use strict;
use Encode;
sub help {
	print encode "cp866",decode("cp1251",join("",<DATA>));
}
if ($ARGV[0] eq "-h"){help();exit;}
sub parse_data {
	my ($filename) = @_;
	my $w;
	open $w, "$filename";
	my %data;
	while (my $str = <$w>){
		my @m = split ' ',$str;
		next if (length($m[1])<5);
		$data{$m[1]} = $m[2];
	}
	return \%data;
}
my $datafile = $ARGV[0] or "IE 0";
my $tfile = $ARGV[1] or "IE 0 3 1000";
my $terms = ".\\terms\\$tfile.terms.txt";

open w, "<",".\\nloglcp\\$datafile.func.txt";
$/=undef;
my $str = <w>;
$/="\n";
my @fcs = split ',',$str;
close w;
my $trms = parse_data($terms);
print "Terms is ".scalar(keys %$trms),$/;
print "Functions is ".scalar(@fcs),$/;
my %ok_terms;
my @data;
my $step = $ARGV[2] or 500000;
for (my $i=0; $i <= $#fcs; $i+=$step ){
	my $all = join ",",@fcs[$i..$i+$step];
	%ok_terms=();
	for (keys %$trms){
		if (index($all,$_) != -1){
			$ok_terms{$_} = 1 if !defined($ok_terms{$_});
			$ok_terms{$_}++ if defined($ok_terms{$_});
		}
	}
	push @data, scalar(keys %ok_terms);
	print "term_count on $i-".($i+$step).": ".scalar(keys %ok_terms)." lens:",join(",",sort {$b<=>$a} map {scalar(split ',',$_)} keys(%ok_terms)),$/;
}
print "\nTerm datas:\n";
for (sort {scalar(split ',',$b) <=> scalar(split ',',$a)} keys %ok_terms){
	print scalar(split ',',$_)." ".$_.' '.$ok_terms{$_},$/;
}
print "\nTerms counts:\n";
print join "\n",@data;
__END__
CountTermsInSequence.pl -         
	CountTermsInSequence.pl funcfile termfile step
	funcfile -     nloglcp   ,      
	termfile -     terms   ,     
	step - ,   ,      