6 print "perl $0 <soap file name> <insert size result> <frequence statistics> <drop rate(0~1)>\n";
10 my ($sum,$count,$sum_2,@insertdata,%f);
11 my $soapfile = $ARGV[0];
12 my $insert = $ARGV[1];
13 my $frequence = $ARGV[2];
15 #open SOAP, "<", "$soapfile";
16 open( SOAP
,"-|","gzip -dc $soapfile");
17 open INSERT
, ">", "$insert";
18 open FREQUENCE
, ">", "$frequence";
21 my $filetotal = `ls -l $soapfile`;
22 my @tmp = split/\s+/,$filetotal;
25 print "\nProcessing......\n\n";
26 while(my $line1 = <SOAP
>)
28 my @arry1 = split/\s+/,$line1;
30 my @arry2 = split/\s+/,$line2;
31 unless($arry1[0] eq $arry2[0] && $arry1[7] eq $arry2[7])
34 @arry2 = split/\s+/,$line2;
38 my $result1 = $arry1[8]+$arry1[5]-$arry2[8];
39 print INSERT
$result1,"\n";
41 $sum_2 += ($result1**2);
46 my $result2 = $arry2[8]+$arry2[5]-$arry1[8];
47 print INSERT
$result2,"\n";
49 $sum_2 += ($result2**2);
55 &prograss_bar
((tell SOAP
),$filetotal);
62 my ($i,$total) = ($_[0],$_[1]);
64 print "\r [".("=" x
int(($i/$total)*25)).">".(" " x (24 - int(($i/$total)*25)))."]";
65 printf("%2.1f %%",$i/$total*100);
70 foreach (sort keys %f)
72 if($f{$_} > $maxfr[1])
77 print FREQUENCE
$_,"\t";
78 print FREQUENCE
$f{$_},"\n";
82 print "Process Done!\n\n";
83 my $average = $sum/$count;
84 print "The average value is : ", $average,".\n";
85 print "The number of sample is : ", $count,".\n";
86 my $sd = sqrt(($sum_2-$count*($average**2))/($count-1));
87 print "The SD value is : ", $sd,".\n\n";
89 print "The Max Frequence Value is : ", $maxfr[0], " ", $maxfr[1], "\n";
90 #print "Now Please Input the presentage you want to drop (0~1): ";
91 #chomp(my $drop = <STDIN>);
93 $sum = $sum_2 = $count = 0;
94 open INSERT
, "<", "$insert";
101 foreach (sort keys %f)
103 if($f{$_} <= $maxfr[1]*$drop)
108 foreach (@insertdata)
121 my @a = (0.0000677106,-0.0003442342,0.0015397681,-0.0024467480,
122 0.0109736958,-0.0002109075,0.0742379071,0.0815782188,
123 0.4118402518,0.4227843370,1.0);
126 print "x must greater than 0!\n";
131 $t = 1/($x1*($x1+1));
163 my ($a_gam,$x_gam) = ($_[0],$_[1]);
164 my ($n_gam,$p_gam,$q_gam,$d_gam,$s_gam,$s1_gam,$p0_gam,$q0_gam,$p1_gam,$q1_gam,$qq_gam);
165 if(($a_gam <= 0)||($x_gam < 0))
169 print "err**a<=0!\n";
185 $q_gam = $a_gam*log($x_gam);
186 $qq_gam = exp($q_gam);
187 if($x_gam < 1+$a_gam)
190 $s_gam = $d_gam = 1/$a_gam;
191 for($n_gam = 1; $n_gam <= 100; $n_gam++)
194 $d_gam = $d_gam*$x_gam/$p_gam;
196 if(abs($d_gam) < abs($s_gam)*1e-7)
198 $s_gam=$s_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
209 for($n_gam = 1; $n_gam <= 100; $n_gam++)
211 $p0_gam = $p1_gam+($n_gam-$a_gam)*$p0_gam;
212 $q0_gam = $q1_gam+($n_gam-$a_gam)*$q0_gam;
213 $p_gam = $x_gam*$p0_gam+$n_gam*$p1_gam;
214 $q_gam = $x_gam*$q0_gam+$n_gam*$q1_gam;
217 $s1_gam = $p_gam/$q_gam;
220 if(abs(($s1_gam-$s_gam)/$s1_gam) < 1e-7)
222 $s_gam = $s1_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
231 print "a too large !\n";
232 $s_gam = 1-$s_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
241 $y_erf=&gam2
(0.5,$x_erf**2);
245 $y_erf=-&gam2
(0.5,$x_erf);
249 my $average_drop = $sum/$count;
250 print "The average value after modification is : ", $average_drop,".\n";
251 print "The number of sample after modification is : ", $count,".\n";
252 my $pa = 3.1415926536;
253 my $variance = ($sum_2-$count*($average_drop**2))/($count-1);
254 print "The SD value after modification is : ",sqrt($variance), "\n";
255 my $sd_drop = sqrt($variance*&erf
(sqrt((-1)*log($drop)))/(&erf(sqrt((-1)*log($drop)))-2*$drop*sqrt((-1)*log($drop))/sqrt
($pa)));
256 print "The estimated SD value based on the modified sample is : ", $sd_drop,".\n";