6 print "perl $0 base\n";
15 my @a = (0.0000677106,-0.0003442342,0.0015397681,-0.0024467480,
16 0.0109736958,-0.0002109075,0.0742379071,0.0815782188,
17 0.4118402518,0.4227843370,1.0);
19 print "x must greater than 0!\n";
46 my ($a_gam,$x_gam) = ($_[0],$_[1]);
47 my ($n_gam,$p_gam,$q_gam,$d_gam,$s_gam,$s1_gam,$p0_gam,$q0_gam,$p1_gam,$q1_gam,$qq_gam);
48 if(($a_gam <= 0)||($x_gam < 0)) {
63 $q_gam = $a_gam*log($x_gam);
64 $qq_gam = exp($q_gam);
65 if($x_gam < 1+$a_gam) {
67 $s_gam = $d_gam = 1/$a_gam;
68 for($n_gam = 1; $n_gam <= 100; $n_gam++) {
70 $d_gam = $d_gam*$x_gam/$p_gam;
72 if(abs($d_gam) < abs($s_gam)*1e-7) {
73 $s_gam=$s_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
82 for($n_gam = 1; $n_gam <= 100; $n_gam++) {
83 $p0_gam = $p1_gam+($n_gam-$a_gam)*$p0_gam;
84 $q0_gam = $q1_gam+($n_gam-$a_gam)*$q0_gam;
85 $p_gam = $x_gam*$p0_gam+$n_gam*$p1_gam;
86 $q_gam = $x_gam*$q0_gam+$n_gam*$q1_gam;
87 if(abs($q_gam) != 0) {
88 $s1_gam = $p_gam/$q_gam;
91 if(abs(($s1_gam-$s_gam)/$s1_gam) < 1e-7) {
92 $s_gam = $s1_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
101 print "a too large !\n";
102 $s_gam = 1-$s_gam*exp(-$x_gam)*$qq_gam/&gam1
($a_gam);
109 $y_erf=&gam2
(0.5,$x_erf**2);
111 $y_erf=-&gam2
(0.5,$x_erf);
119 if ($href and %$href) {
121 $m = (sort {$a<=>$b} keys %$href)[-1];
124 push @str,join(':',$_,$$href{$_}||0);
126 return \
join(',',@str);
127 } else {return \'.';}
132 if ($href and %$href) {
134 for (sort {$a<=>$b} keys %$href) {
135 push @str,join(':',$_,$$href{$_});
137 return \join(',',@str);
138 } else {return \'.';}
141 sub getRealpos
($$$$) {
142 my ($len,$strand,$realpos,$trim)=@_;
143 if ($strand eq '-') { # Negative
144 $realpos += $len; # should be $len-1. So, starting 0. (+ & -, never meets.)
145 if ($trim =~ /(\d+)S$/) {
146 $realpos += $1; # $1 only reset after next /()/
148 } elsif ($strand eq '+') { # Positive
149 if ($trim =~ /^(\d+)S/) {
158 my ($Pairs,$Paired,$Singled,$Reads,$Alignment);
159 open LOG
,'<',"$soapbase.log" or die "[x]Error opening $soapbase.log: $!\n";
161 $Pairs = (split)[-2] if /^Total Pairs:/;
162 $Paired = (split)[1] if /^Paired:/;
163 $Singled = (split)[1] if /^Singled:/;
164 $Reads = (split)[-1] if /^Total Reads/;
165 $Alignment = (split)[1] if /^Alignment:/;
169 open NFO
,'>',"$soapbase.nfo" or die "[x]Error opening $soapbase.nfo: $!\n";
171 print NFO
"#fmtS\tTotal_Pairs\tPaired\tSingled\n";
172 print NFO
"Summary\t",join("\t",$Pairs,$Paired,$Singled),"\n";
173 #open(OUTS,"-|","gzip -dc $soapbase.soap.gz $soapbase.single.gz") or die "Error: $!\n";
174 open(TP
,"-|","gzip -dc $soapbase.soap.gz") or die "Error: $!\n";
175 open(TS
,"-|","gzip -dc $soapbase.single.gz") or die "Error: $!\n";
177 my ($BadLines,$BPOut,$ReadsOut,$MisSum,$TrimedBP,$TrimedReads,%Hit9r,%Hit9bp,%misMatch,%Indel)=(0,0,0,0,0,0);
178 my (%chrBPOut,%chrReadsOut,%chrMisSum,%chrTrimedBP,%chrTrimedReads,%chrHit9r,%chrHit9bp,%chrmisMatch,%chrIndel);
179 # for 46999 ChrIDs, one hash took 12m RES for {}=$ and 125m VIRT for {}{10}=$, thus fine to work with scaffolds. ( 1 gb VIRT max ? )
180 #my (@lines,$hit,$len,$chr,$types,$trim,$mistr,$missed);
183 my ($hit,$len,$chr,$types,$trim,$mistr) = @
{$_[0]}[3,5,7,9,-2,-1];
184 my $missed=$mistr=~tr/ATCGatcg//;
185 $BPOut += $len; $chrBPOut{$chr} += $len;
186 ++$ReadsOut; ++$chrReadsOut{$chr};
187 ++$misMatch{$missed};
188 ++$chrmisMatch{$chr}{$missed};
190 $chrMisSum{$chr} += $missed;
192 $hit=4 if $hit>4; # max to count 3, then >=4. Ancient Chinese wisdom, and a bit more ...
194 ++$chrHit9r{$chr}{$hit};
195 $Hit9bp{$hit} += $len;
196 $chrHit9bp{$chr}{$hit} += $len;
198 ++$Indel{200-$types};
199 ++$chrIndel{$chr}{200-$types};
200 } elsif ($types > 100) {
201 ++$Indel{$types-100};
202 ++$chrIndel{$chr}{$types-100};
205 my @Trimed = $trim =~ /(\d+)S/;
208 ++$chrTrimedReads{$chr};
211 $chrTrimedBP{$chr} += $_;
217 my ($pairs,$lastpos,$line1,$line2,$pp,$pn,$calins,%insD,@lines)=(0);
218 while ($line1=<TP
>) {
221 @lines = split /\t/,$line1;
226 &getStatNfo
(\
@lines);
227 my ($id1, $n1, $len1, $f1, $chr1, $x1, $m1) = @lines[0,3,5,6,7,8,-2];
229 @lines = split /\t/,$line2;
234 my ($id2, $n2, $len2, $f2, $chr2, $x2, $m2) = @lines[0,3,5,6,7,8,-2];
235 #($soapid,$hit,$len,$strand,$chr,$pos,$trim) = (split(/\t/))[0,3,5,6,7,8,-2];
238 if ($id1 ne $id2){ # single
239 seek (TP
, $lastpos, 0);
242 &getStatNfo
(\
@lines);
243 next if $n1+$n2>2 or $chr1 ne $chr2;
250 if ($ins > 1500) { # FR => +.pos < -.pos; RF => -.pos < +.pos
252 } else { next if $pp > $pn; }
256 $line1=&getRealpos
($len1, $f1, $x1, $m1); # $len,$strand,$realpos,$trim
257 $line2=&getRealpos
($len2, $f2, $x2, $m2); # Well, $line{1,2} is recycled.
258 $calins=abs($line1-$line2); # -.starting=0
264 if (@lines > 10) { # soap2 output always more than 10 columes.
265 &getStatNfo
(\
@lines);
266 } else { # rare event ...
273 print NFO
"\n#fmtC\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads\tReads\@Hit\tBP\@Hit\tIndelReads\tBadLines\n";
274 print NFO
join("\t",'ALL',$ReadsOut,$BPOut,$MisSum,$TrimedReads,$TrimedBP,
275 ${&combineC
(\
%misMatch)},${&combineJ
(\
%Hit9r)},${&combineJ
(\
%Hit9bp)},${&combineJ
(\
%Indel)},$BadLines),"\n\n";
276 print NFO
"#fmtP\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads\tReads\@Hit\tBP\@Hit\tIndelReads\n";
277 print NFO
join("\t",";$_",$chrReadsOut{$_},$chrBPOut{$_},$chrMisSum{$_}||0,$chrTrimedReads{$_}||0,$chrTrimedBP{$_}||0,
278 ${&combineC
(\
%{$chrmisMatch{$_}})},${&combineJ
(\
%{$chrHit9r{$_}})},${&combineJ
(\
%{$chrHit9bp{$_}})},${&combineJ
(\
%{$chrIndel{$_}})}),"\n" for sort keys %chrReadsOut;
281 my ($n,$fqcmd,$tmpcmd,$avg,$std,$Lsd,$Rsd,$max_y,$max_x)=(0);
282 my ($sum,$sum2,$v)=(0,0);
283 open O
,'>',"$soapbase.insD";
284 for my $k (sort {$a <=> $b} keys %insD) {
292 $std = sqrt($sum2/$n-$avg*$avg);
293 print O
"# $avg ± $std\n";
294 ($max_y,$max_x)=(-1,0);
295 for my $k ($avg-$std .. $avg+$std) { # 68.27%
296 next unless $v=$insD{$k};
302 my $cutoff = $max_y / 1000;
303 $cutoff = 3 if $cutoff<3;
304 my ($diff, $Lc, $Rc);
305 for my $k (keys %insD) {
307 next if $v < $cutoff;
310 $Lsd += $v * $diff * $diff;
314 $Rsd += $v * $diff * $diff;
318 $Lsd = sqrt($Lsd/$Lc);
319 $Rsd = sqrt($Rsd/$Rc);
320 print O
"# +$Lsd -$Rsd\n";
324 #system("bzip2 -9 $opath$fqnames[0].unmap");