use strict;
use Encode;
sub help {
	print encode "cp866",decode("cp1251",join("",<DATA>));
}
if ($#ARGV >= 0 && $ARGV[0] eq "-h"){help();exit;}
sub avg {
	my ($a) = @_;
	my $sum=0;
	for (@$a){$sum+=$_;}
	return (scalar(@$a)!=0)?($sum/(0+@$a)):0;
}
sub Correlation {
	my ($a,$b) = @_;
	if ( 0+@$a > 0+@$b ){
		($a,$b)=($b,$a);
	}
	my ($ava,$avb)=(avg($a),avg($b));
	my ($cov,$da,$db)=(0,0,0);
	for (my $i=0; $i < 0+@{$a};$i++){
		$cov+=(${$a}[$i]-$ava)*(${$b}[$i]-$avb);
		$da+=(${$a}[$i]-$ava)*(${$a}[$i]-$ava);
		$db+=(${$b}[$i]-$avb)*(${$b}[$i]-$avb);
	}
	if ($da==0 || $db == 0){
		return 0;
	}
	return $cov/sqrt($da*$db);
}
sub SplitOnBlocks{
	my ($t_calls,$block_len)=@_;
	if (scalar(@$t_calls)<=$block_len){
		die "Block length is less than one of the set\r\n";
	}
	my @blocks=();
	my %substrs = ();
	for my $i(0..$#$t_calls-$block_len+1){
		my $s=join("-", @$t_calls[$i..$i+$block_len-1]);
		push @blocks,$s;
		if (defined($substrs{$s})){
			$substrs{$s}++;
		} else{
			$substrs{$s}=1;
		}
	}
	return (\@blocks,\%substrs);
}
my $file1 = $ARGV[0] or "IE 0";
my $file2 = $ARGV[1] or "Mozilla 0";
open w1, "<","nloglcp\\$file1.func.txt";
open w2, "<","nloglcp\\$file2.func.txt";
$/=undef;
my $all1 = <w1>;
my $all2 = <w2>;
my @funcs1 = split ",",$all1;
my @funcs2 = split ",",$all2;
$/="\n";
my $block_len = $ARGV[2] or 10;
my $range1 = $ARGV[3] or 50000;
my $range2 = $ARGV[4] or 50000;
print scalar(@funcs1)." funcs in $file1\n";
print scalar(@funcs2)." funcs in $file2\n";
my @cor_table = ();
for (my $i=0; $i+$range1 <= $#funcs1 ;$i+=$range1){
	for (my $j=0; $j+$range2 <= $#funcs2 ;$j+=$range2){
		my @ffuncs = @funcs1[$i..$i+$range1];
		my @sfuncs = @funcs2[$j..$j+$range2];
		my @fdata;my @sdata;
		my ($fblocks,$fcounts) = SplitOnBlocks(\@ffuncs,$block_len);
		my ($sblocks,$scounts) = SplitOnBlocks(\@sfuncs,$block_len);
		for my $suf(keys %$scounts){
			if (defined($$fcounts{$suf})){
				push @fdata, $$fcounts{$suf};
				push @sdata, $$scounts{$suf};
			}
		}
		my $cor = Correlation(\@fdata,\@sdata);
		print "corr($i x $j) =	".$cor.$/;
		$cor_table[$i/($range1)][$j/($range2)] = sprintf "%.4f",$cor;
	}
}
open res,">","result.htm";
print res "<table border='1' style='color: red; border-collapse: collapse; border: 1px solid black;'>\r\n";
print res "<caption>        $range1 $range2    $block_len</caption>\r\n";
print res "<tr><td>Numbers</td>";
for (my $j=0; $j+$range2 <= $#funcs2 ;$j+=$range2){
	print res "<td>".$j."-".($j+$range2)."</td>";
}
print res "</tr>\r\n";
my $i=0;
for my $ms(@cor_table){
	print res "	<tr><td>".$i."-".($i+$range1)."</td>\r\n";
	$i+=$range1;
	for my $cor(@$ms){
		my $color = 255* (1-$cor);
		print res sprintf("<td style='background-color:#af%.2xaf'>%s</td>\r\n",$color,$cor);
	}
	print res "</tr>\r\n";
}
__END__
FlowThreadCompare.pl -         .
	FlowThreadCompare.pl file1 file2 blocklen range1 range2
	file1 -      nloglcp   
	file2 -      nloglcp   
	blocklen -     
	range1 -     
	range2 -     