released/pIRS.old/bwasam/alistat.pl

   1 #!/bin/env perl
   2 =pod
   3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
   4 Version: 0.0.1 @ 20110803
   5 =cut
   6 #use lib "/ifs1/ST_ASMB/USER/huxuesong/public/lib";
   7 #use lib '/export/data0/gentoo/tmp';
   8 use strict;
   9 use warnings;
  10 use Time::HiRes qw ( gettimeofday tv_interval );
  11 use Galaxy::ShowHelp;
  12 use Data::Dump qw(dump);
  13
  14 $main::VERSION=0.0.2;
  15 our $opts='o:b';
  16 our($opt_o, $opt_b);
  17
  18 #our $desc='';
  19 our $help=<<EOH;
  20 \t-o output prefix (./stat).{info,insD}
  21 \t-b No pause for batch runs
  22 For SOAP, both soap/single files and STDERR dump are needed.
  23 For BWA , only insert size stat. is done currently.
  24 EOH
  25 our $ARG_DESC='soap/sam files{,.gz,.bz2}';
  26
  27 ShowHelp();
  28 $opt_o='./stat' if ! $opt_o;
  29 die "[x]No input files found !\n" unless @ARGV;
  30 #die "[!]Max 252 files supported.\n" if @ARGV>252;
  31
  32 print STDERR "From [@ARGV] to [$opt_o]\n";
  33 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <STDIN>;}
  34
  35 my $start_time = [gettimeofday];
  36 #BEGIN
  37 sub combineC($) {
  38         my $href=$_[0];
  39         if ($href and %$href) {
  40                 my (@str,$m);
  41                 $m = (sort {$a<=>$b} keys %$href)[-1];
  42                 for (1..$m) {
  43                         #$$href{$_} += 0;
  44                         push @str,join(':',$_,$$href{$_}||0);
  45                 }
  46                 return \join(',',@str);
  47         } else {return \'.';}
  48 }
  49
  50 sub combineJ($) {
  51         my $href=$_[0];
  52         if ($href and %$href) {
  53                 my @str;
  54                 for (sort {$a<=>$b} keys %$href) {
  55                         push @str,join(':',$_,$$href{$_});
  56                 }
  57                 return \join(',',@str);
  58         } else {return \'.';}
  59 }
  60
  61 sub getRealpos($$$$) {
  62         my ($len,$strand,$realpos,$trim)=@_;
  63         if ($strand eq '-') {   # Negative
  64                 $realpos += $len;       # should be $len-1. So, starting 0. (+ & -, never meets.)
  65                 if ($trim =~ /(\d+)S$/) {
  66                         $realpos += $1; # $1 only reset after next /()/
  67                 }
  68         } elsif ($strand eq '+') {      # Positive
  69                 if ($trim =~ /^(\d+)S/) {
  70                         $realpos -= $1;
  71                 }
  72         } else {
  73                 $realpos=-1;
  74         }
  75         return $realpos;
  76 }
  77 =pod
  78 case 'R': opt->FR = 0;
  79
  80 alnSeq[0] = mseqs->seqList+i;
  81 alnSeq[1] = mseqs->seqList+i+1;
  82 pe_aux.len = alnSeq[0]->len;
  83
  84 fprintf(stderr,"%c\n", "+-"[strain]);
  85
  86 int strain1 = (p->info >> 24)&1;
  87 int strain2 = (q->info >> 24)&1;
  88 if(o->FR){      # FR
  89         if(!strain1 && q->pos-p->pos + o->len >= o->min_ins && q->pos-p->pos + o->len <= o->max_ins) return TRUE;
  90         else if(strain1 && p->pos-q->pos+o->len >= o->min_ins && p->pos-q->pos+o->len <= o->max_ins) return TRUE;
  91         else{ return FALSE;};
  92 }
  93 else if(!o->FR){        # RF, (-R)
  94         if(strain1 && q->pos - p->pos >= o->min_ins && q->pos - p->pos + o->len <= o->max_ins) return TRUE;
  95         else if(!strain1 && p->pos-q->pos >= o->min_ins && p->pos-q->pos+o->len <= o->max_ins) return TRUE;
  96         else{ return FALSE;}
  97 }
  98
  99 Well, InsertSize should always be defined from the 2 terminals of a sequencing fragument.
 100 Thus, for FR, the same as what from the gel; for RF, should be less than the gel result by the length of what actually been sequenced.
 101 =cut
 102
 103 sub openfile($) {
 104     my ($filename)=@_;
 105     my $infile;
 106     if ($filename=~/.bz2$/) {
 107             open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
 108     } elsif ($filename=~/.gz$/) {
 109         open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
 110     } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
 111     return $infile;
 112 }
 113 sub checkfiletype($) {
 114     my $fhc=$_[0];
 115     my $type;
 116     my $head=<$fhc>;
 117     if ($head =~ /^\[bwa_sai2sam_pe_core\]/) {
 118         $type='samlog';
 119     } elsif ($head =~ /^\@SQ/) {
 120         $type='sam';
 121     } elsif ($head =~ /[ATCG]/) {
 122         $type='soap';
 123     } elsif ($head =~ /^$/) {
 124 =pod
 125 00000000  0a 42 65 67 69 6e 20 50  72 6f 67 72 61 6d 20 53  |.Begin Program S|
 126 00000010  4f 41 50 61 6c 69 67 6e  65 72 2f 73 6f 61 70 32  |OAPaligner/soap2|
 127 00000020  0a 46 72 69 20 4a 75 6c  20 31 35 20 30 39 3a 32  |.Fri Jul 15 09:2|
 128 00000030  39 3a 34 32 20 32 30 31  31 0a 52 65 66 65 72 65  |9:42 2011.Refere|
 129 00000040  6e 63 65 3a 20 2e 2f 72  65 66 2f 68 75 6d 61 6e  |nce: ./ref/human|
 130 00000050  2e 66 61 2e 69 6e 64 65  78 0a 51 75 65 72 79 20  |.fa.index.Query |
 131 00000060  46 69 6c 65 20 61 3a 20  2e 2f 30 6e 6f 6d 61 73  |File a: ./0nomas|
 132 =cut
 133         $type='soaplog';
 134     }
 135     return $type;
 136 }
 137
 138 sub sumsoapdata($$) {
 139     my ($hit,$len,$chr,$types,$trim,$mistr)=@{$_[0]};
 140     my $dathref=$_[1];
 141     my ($BPOut,$ReadsOut,$MisSum,$TrimedBP,$TrimedReads,%Hit9r,%Hit9bp,%misMatch,%Indel)=(0,0,0,0,0);
 142     #my (%chrBPOut,%chrReadsOut,%chrMisSum,%chrTrimedBP,%chrTrimedReads,%chrHit9r,%chrHit9bp,%chrmisMatch,%chrIndel);
 143     my (@trims,$missed);
 144                 $BPOut += $len;#        $chrBPOut{$chr} += $len;
 145                 ++$ReadsOut;#   ++$chrReadsOut{$chr};
 146                 $missed=$mistr=~tr/ATCGatcg//;
 147                 ++$misMatch{$missed};
 148                 #++$chrmisMatch{$chr}{$missed};
 149                 $MisSum += $missed;
 150                 #$chrMisSum{$chr} += $missed;
 151
 152                 $hit=4 if $hit>4;       # max to count 3, then >=4. Ancient Chinese wisdom, and a bit more ...
 153                 ++$Hit9r{$hit};
 154                 #++$chrHit9r{$chr}{$hit};
 155                 $Hit9bp{$hit} += $len;
 156                 #$chrHit9bp{$chr}{$hit} += $len;
 157 =pod
 158                 if ($types < 100) {
 159                         #++$misMatch{$types};
 160                         #++$chrmisMatch{$chr}{$types};
 161                 } elsif ($types < 200) {        # '3S33M9D39M', '32M1D14M29S' exists ?
 162                         ++$Indel{$types-100};
 163                         ++$chrIndel{$chr}{$types-100};
 164                 } else {
 165                         ++$Indel{200-$types};
 166                         ++$chrIndel{$chr}{200-$types};
 167                 }
 168 =cut
 169                 if ($types > 200) {
 170                         ++$Indel{200-$types};
 171                         #++$chrIndel{$chr}{200-$types};
 172                 } elsif ($types > 100) {
 173                         ++$Indel{$types-100};
 174                         #++$chrIndel{$chr}{$types-100};
 175                 }
 176
 177                 @trims = $trim =~ /(\d+)S/;
 178                 if (@trims) {
 179                         ++$TrimedReads;
 180                         #++$chrTrimedReads{$chr};
 181                         for ( @trims ) {
 182                                 $TrimedBP += $_;
 183                                 #$chrTrimedBP{$chr} += $_;
 184                         }
 185                 }
 186     $dathref->{'BPOut'} += $BPOut;
 187     $dathref->{'ReadsOut'} += $ReadsOut;
 188     $dathref->{'MisSum'} += $MisSum;
 189     $dathref->{'TrimedReads'} += $TrimedReads;
 190     $dathref->{'TrimedBP'} += $TrimedBP;
 191     $dathref->{'misMatch'}{$_} += $misMatch{$_} for keys %misMatch;
 192     $dathref->{'Indel'}{$_} += $Indel{$_} for keys %Indel;
 193     $dathref->{'Hit9r'}{$_} += $Hit9r{$_} for keys %Hit9r;
 194     $dathref->{'Hit9bp'}{$_} += $Hit9bp{$_} for keys %Hit9bp;
 195 }
 196 sub statsoap($) {
 197     my $fh=$_[0];
 198     my (%datsum);
 199     $datsum{'PEuniqPairs'}=0;
 200     my ($BadLines,$PESE,$pairs,$lastpos,$line1,$line2,$pp,$pn,$calins,%insD)=(0,'PE',0);
 201         while ($line1=<$fh>) {
 202 #print "[$line1]\n";
 203                 my ($id1, $n1, $len1, $f1, $chr1, $x1, $types1, $m1, $mistr1)
 204                  = (split "\t", $line1)[0,3,5,6,7,8,9,-2,-1];
 205                 unless (defined $types1) {    # soap2 output always more than 10 columes.
 206                     ++$BadLines;
 207                     last;
 208                 }
 209                 &sumsoapdata([$n1, $len1, $chr1, $types1, $m1, $mistr1],\%datsum);
 210                 $line2=<$fh>;
 211                 last unless $line2;
 212                 my ($id2, $n2, $len2, $f2, $chr2, $x2, $types2, $m2, $mistr2)
 213                  = (split "\t", $line2)[0,3,5,6,7,8,9,-2,-1];
 214                 unless (defined $types2) {    # soap2 output always more than 10 columes.
 215                     ++$BadLines;
 216                     last;
 217                 }
 218                         #($soapid,$hit,$len,$strand,$chr,$pos,$trim) = (split(/\t/))[0,3,5,6,7,8,-2];
 219                         #        ($hit,$len,        $chr,$types,$trim,$mistr) = @lines[3,5,7,9,-2,-1];
 220                 $id1 =~ s/\/[12]$//;
 221                 $id2 =~ s/\/[12]$//;
 222                 &sumsoapdata([$n2, $len2, $chr2, $types2, $m2, $mistr2],\%datsum);
 223                 if (($PESE eq 'SE') or ($id1 ne $id2)){ # single
 224                         $PESE='SE';
 225                         next;
 226                 }
 227                 next if $n1+$n2>2 or $chr1 ne $chr2;
 228 =pod
 229                 if ($f1 eq '+') {
 230                         ($pp,$pn)=($x1,$x2);
 231                 } else {
 232                         ($pp,$pn)=($x2,$x1);
 233                 }
 234                 if ($ins > 1500) {      # FR => +.pos < -.pos; RF => -.pos < +.pos
 235                         next if $pp < $pn;
 236                 } else { next if $pp > $pn; }
 237 =cut
 238                 ++$pairs;
 239                 $line1=&getRealpos($len1, $f1, $x1, $m1);       # $len,$strand,$realpos,$trim
 240                 $line2=&getRealpos($len2, $f2, $x2, $m2);       # Well, $line{1,2} is recycled.
 241                 $calins=abs($line1-$line2);     # -.starting=0
 242                 ++$insD{$calins};
 243         }
 244         $datsum{'BadLines'}=$BadLines;
 245         if ($PESE eq 'PE') {
 246         $datsum{'PEuniqPairs'}=$pairs;
 247         }
 248         return [$PESE,[0,0,0],\%datsum,\%insD];
 249 }
 250 sub statsoaplog($) {
 251     my $fh=$_[0];
 252     my ($Pairs,$Paired,$Singled,$Reads,$Alignment)=(0,0,0,0,0);
 253     while(<$fh>) {
 254             $Pairs = (split)[-2] if /^Total Pairs:/;
 255             $Paired = (split)[1] if /^Paired:/;
 256             $Singled = (split)[1] if /^Singled:/;
 257             $Reads = (split)[-1] if /^Total Reads/;
 258             $Alignment = (split)[1] if /^Alignment:/;
 259     }
 260     my @RET;
 261     if ($Reads) {
 262         @RET=('SE',[$Reads,0,$Alignment]);
 263     } else {
 264         @RET=('PE',[$Pairs*2,$Paired*2,$Singled]);
 265     }
 266     return \@RET;
 267 =pod
 268 Total Pairs: 34776407 PE
 269 Paired:      17719335 (50.95%) PE
 270 Singled:     30467634 (43.81%) SE
 271
 272 IN:     34776407 reads x 2 fq file = 69552814 reads from _1 & _2.fq
 273 -o:     17719335 pairs = 17719335 x 2 =35438670 lines in .soap
 274 -2:     30467634 lines in .single
 275
 276 17719335/34776407 = 0.50952172833726037310294878939046
 277 30467634/69552814 = 0.43805034257851882168275750856033
 278
 279
 280 Total Reads: 25
 281 Alignment:   22 (88.00%)
 282 =cut
 283 }
 284
 285 sub sumsamdata($$) {
 286         #my ($QNAME,$FLAG,$RNAME,$POS,$MAPQ,$CIAGR,$MRNM,$MPOS,$ISIZE,$SEQ,$QUAL,$OPT)=@read1;
 287         #       0      1    2       3   4       5   6       7     8     9    10    11
 288     my ($FLAG,$RNAME,$POS,$CIAGR,$MRNM,$MPOS,$ISIZE)=@{$_[0]}[1,2,3,5,6,7,8];
 289     my $dathref=$_[1];
 290     my ($BPOut,$ReadsOut,$MisSum,$TrimedBP,$TrimedReads,%Hit9r,%Hit9bp,%misMatch,%Indel)=(0,0,0,0,0);
 291     my (@trims,@matches,@inserts,@deletes,$missed);
 292     my ($isPaired,$isSingled)=(0,0);
 293     unless ($CIAGR eq '*') {
 294         ++$ReadsOut;
 295     } else { return [0,0]; }
 296     if ($FLAG & 2) {
 297         $isPaired=1;
 298     } else {   #($FLAG & 8) and ($FLAG & 4 == 0)
 299         $isSingled=1;
 300 #print '[',join('|',@{$_[0]}),']' unless $FLAG & 8;
 301 #print $FLAG & 8,'|';
 302     }
 303         @matches = $CIAGR =~ /(\d+)M/;
 304         for ( @matches ) {
 305                 $BPOut += $_;
 306         }
 307         my ($Insertion,$Deletion)=(0,0);
 308         @inserts = $CIAGR =~ /(\d+)I/;
 309         for ( @inserts ) {
 310                 $BPOut += $_;
 311                 $Insertion += $_;
 312         }
 313         @deletes = $CIAGR =~ /(\d+)D/;
 314         for ( @deletes ) {
 315                 $Deletion -= $_;
 316         }
 317         if ($Insertion and $Deletion) {
 318             $Indel{$Insertion} += 0.5;
 319             $Indel{$Deletion} += 0.5;
 320         } elsif ($Insertion) {
 321             ++$Indel{$Insertion};
 322         } else {
 323             ++$Indel{$Deletion};
 324         }
 325         my $Alternativehits='';
 326         for ( @{$_[0]} ) {
 327         $Alternativehits = $1 if /^XA:Z:([\w,+-;]+)$/; #XA:Z:chrX,+1144912,100M,0;
 328             next unless /^XM:i:(\d+)$/;
 329             $MisSum += $1;
 330 #print '[',join('|',@{$_[0]}),']',"\t[$1,$MisSum]\n";
 331         }
 332         my @excigar=split(';',$Alternativehits);
 333         my $hit=1+scalar @excigar;
 334         $hit=4 if $hit>4;       # max to count 3, then >=4. Ancient Chinese wisdom, and a bit more ...
 335         ++$Hit9r{$hit};
 336         $Hit9bp{$hit} += $BPOut;
 337         ++$misMatch{$MisSum};
 338         @trims = $CIAGR =~ /(\d+)S/;
 339         if (@trims) {
 340                 ++$TrimedReads;
 341                 #++$chrTrimedReads{$chr};
 342                 for ( @trims ) {
 343                         $TrimedBP += $_;
 344                         #$chrTrimedBP{$chr} += $_;
 345                 }
 346         }
 347     $dathref->{'BPOut'} += $BPOut;
 348     $dathref->{'ReadsOut'} += $ReadsOut;
 349     $dathref->{'MisSum'} += $MisSum;
 350     $dathref->{'TrimedReads'} += $TrimedReads;
 351     $dathref->{'TrimedBP'} += $TrimedBP;
 352     $dathref->{'misMatch'}{$_} += $misMatch{$_} for keys %misMatch;
 353     $dathref->{'Indel'}{$_} += $Indel{$_} for keys %Indel;
 354     $dathref->{'Hit9r'}{$_} += $Hit9r{$_} for keys %Hit9r;
 355     $dathref->{'Hit9bp'}{$_} += $Hit9bp{$_} for keys %Hit9bp;
 356     return [$isPaired,$isSingled];
 357 }
 358 sub statsam($) {
 359     my $fh=$_[0];
 360     my ($PESE,$Reads,$toPaired,$toSingled,$BadLines,%datsum,%insD)=('PE',0,0,0,0);
 361     my ($Paired,$Singled)=(0,0);
 362     my ($line1,$line2,$calins,$sumret);
 363 =pod
 364        │Col │ Field │                       Description                        │
 365        ├────┼───────┼──────────────────────────────────────────────────────────┤
 366        │ 1  │ QNAME │ Query (pair) NAME                                        │
 367        │ 2  │ FLAG  │ bitwise FLAG                                             │
 368        │ 3  │ RNAME │ Reference sequence NAME                                  │
 369        │ 4  │ POS   │ 1-based leftmost POSition/coordinate of clipped sequence │
 370        │ 5  │ MAPQ  │ MAPping Quality (Phred-scaled)                           │
 371        │ 6  │ CIAGR │ extended CIGAR string                                    │
 372        │ 7  │ MRNM  │ Mate Reference sequence NaMe (`=' if same as RNAME)      │
 373        │ 8  │ MPOS  │ 1-based Mate POSistion                                   │
 374        │ 9  │ ISIZE │ Inferred insert SIZE                                     │
 375        │10  │ SEQ   │ query SEQuence on the same strand as the reference       │
 376        │11  │ QUAL  │ query QUALity (ASCII-33 gives the Phred base quality)    │
 377        │12  │ OPT   │ variable OPTional fields in the format TAG:VTYPE:VALUE   │
 378         #my ($QNAME,$FLAG,$RNAME,$POS,$MAPQ,$CIAGR,$MRNM,$MPOS,$ISIZE,$SEQ,$QUAL,$OPT)=@read1;
 379         #       0      1    2       3   4       5   6       7     8     9    10    11
 380 =cut
 381     while ($line1=<$fh>) {
 382         next if $line1=~/^@\w\w\t\w\w:/;
 383                 my @read1=split /\t/, $line1;
 384                 #print "[",join('|',@read1),"]\n";
 385                 #print scalar @read1,"|\n";
 386                 if (scalar @read1 < 11) {
 387                     ++$BadLines;
 388                     last;
 389                 }
 390                 ++$Reads;
 391                 $sumret=&sumsamdata(\@read1,\%datsum);
 392                 $Paired += $sumret->[0];
 393                 $Singled += $sumret->[1];
 394                 $line2=<$fh>;
 395                 last unless $line2;
 396                 my @read2=split /\t/, $line2;
 397                 if (scalar @read2 < 11) {
 398                     ++$BadLines;
 399                     last;
 400                 }
 401                 ++$Reads;
 402                 $sumret=&sumsamdata(\@read2,\%datsum);
 403                 $Paired += $sumret->[0];
 404                 $Singled += $sumret->[1];
 405                 if (($PESE eq 'SE') or ($read1[0] ne $read2[0])){       # single
 406                         $PESE='SE';
 407                         next;
 408                 }
 409                 $calins=abs($read2[8]);
 410                 ++$insD{$calins} if $sumret->[0] and $calins < 1500;
 411     }
 412     $datsum{'BadLines'}=$BadLines;
 413     return [$PESE,[$Reads,$Paired,$Singled],\%datsum,\%insD];
 414 }
 415 my %dostat=(
 416     'sam'     => \&statsam,
 417     'soap'    => \&statsoap,
 418 #    'samlog' => sub {},
 419     'soaplog' => \&statsoaplog,
 420 );
 421
 422 sub sumintohash($$) {
 423     my ($inhref,$intohref)=@_;
 424     for my $key (keys %{$inhref}) {
 425         if (ref($$inhref{$key}) eq 'HASH') {
 426             $$intohref{$key}={} unless exists $$intohref{$key};
 427             &sumintohash($$inhref{$key},$$intohref{$key});
 428         } else {
 429             $$intohref{$key} += $$inhref{$key};
 430         }
 431     }
 432 =pod
 433     print STDERR '-' x 80,"\n";
 434     dump($inhref);
 435     print STDERR 'to',"\n";
 436     dump($intohref);
 437     print STDERR '-' x 80,"\n";
 438 =cut
 439 }
 440
 441 my $files=0;
 442 my ($withPE,$InReads,$mapPair,$mapSingle,%DatSum,%InsD)=(0,0,0,0);
 443 while($_=shift @ARGV) {
 444     next unless -f $_;
 445     ++$files;
 446     my $infile;
 447     $infile=openfile($_);
 448     my $type=checkfiletype($infile);
 449     close $infile;
 450     print STDERR "$files\t[$type] $_ ...";
 451     $infile=openfile($_);
 452     if (exists $dostat{$type}) {
 453         my $ret=&{$dostat{$type}}($infile); # [$PESE,[$Reads,$Paired*2,$Singled],\%datsum,\%insD]
 454         $InReads += $ret->[1]->[0];
 455         $mapPair += $ret->[1]->[1];
 456         $mapSingle += $ret->[1]->[2];
 457         if ($ret->[0] eq 'PE') {
 458             $withPE=1;
 459             &sumintohash($ret->[3],\%InsD) if $ret->[3];
 460         }
 461         &sumintohash($ret->[2],\%DatSum) if $ret->[2];
 462         #print "\n[";dump($ret);print "]\n";
 463     } else {
 464         print STDERR "\b\b\bskipped."
 465     }
 466     close $infile;
 467     print STDERR "\n";
 468 }
 469 warn "[!]Files Read: $files\n";
 470
 471 my ($n,$v,$avg,$std,$Lsd,$Rsd,$max_y,$max_x,$sum,$sum2)=(0);
 472 if ($withPE) {
 473     open O,'>',"$opt_o.insD" or die "[x]Error opening $opt_o.insD: $!\n";
 474     for my $k (keys %InsD) {
 475             $v=$InsD{$k};
 476             $sum += $k * $v;
 477             $n += $v;
 478             $sum2 += $k*$k * $v;
 479     }
 480     $n=-1 unless $n;
 481     $avg = $sum/$n;
 482     $std = sqrt($sum2/$n-$avg*$avg);
 483     print O "# $avg ± $std\n";
 484     ($max_y,$max_x)=(-1,0);
 485     for my $k ($avg-$std .. $avg+$std) {        # 68.27%
 486             next unless $v=$InsD{$k};
 487             if ($max_y < $v) {
 488                     $max_x = $k;
 489                     $max_y = $v;
 490             }
 491     }
 492     my $cutoff = $max_y / 1000;
 493     $cutoff = 3 if $cutoff<3;
 494     my ($diff, $Lc, $Rc);
 495     for my $k (keys %InsD) {
 496             $v=$InsD{$k};
 497             next if $v < $cutoff;
 498             $diff = $k - $max_x;
 499             if ($diff < 0) {
 500                     $Lsd += $v * $diff * $diff;
 501                     $Lc += $v;
 502             }
 503             elsif ($diff > 0) {
 504                     $Rsd += $v * $diff * $diff;
 505                     $Rc += $v;
 506             }
 507     }
 508     $Lc=-1 unless $Lc;
 509     $Rc=-1 unless $Rc;
 510     $Lsd = sqrt($Lsd/$Lc);
 511     $Rsd = sqrt($Rsd/$Rc);
 512     print O "# +$Lsd -$Rsd\n#InsertSize\tCount\n";
 513     for my $k (sort {$a <=> $b} keys %InsD) {
 514             $v=$InsD{$k};
 515             print O "$k\t$v\n";
 516     }
 517     close O;
 518 }
 519
 520 open NFO,'>',"$opt_o.info" or die "[x]Error opening $opt_o.info: $!\n";
 521 if ($withPE) {
 522         my $p=sprintf "%.2f",100*$max_y/$n;
 523         $avg=int($avg*10+.5)/10;
 524         $Lsd=int($Lsd*100+.5)/100;
 525         $Rsd=int($Rsd*100+.5)/100;
 526         print NFO "#fmtS\tTotalReads\ttoPaired\ttoSingled\tModeIns(p%),Lsd,Rsd,InsAvg,STD\n";
 527         print NFO "Summary\t",join("\t",$InReads,$mapPair,$mapSingle,"$max_x($p %),$Lsd,$Rsd,$avg,$std"),"\n";
 528 } else {
 529         print NFO "#fmtS\tTotalReads\tAlignment\n";
 530         print NFO "Summary\t",join("\t",$InReads,$mapSingle),"\n";
 531 }
 532 print NFO "\n#fmtC\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads\tReads\@Hit\tBP\@Hit\tIndelReads\tBadLines\n";
 533 print NFO join("\t",'ALL',$DatSum{'ReadsOut'},$DatSum{'BPOut'},$DatSum{'MisSum'},$DatSum{'TrimedReads'},$DatSum{'TrimedBP'},
 534         ${&combineC($DatSum{'misMatch'})},${&combineJ($DatSum{'Hit9r'})},${&combineJ($DatSum{'Hit9bp'})},${&combineJ($DatSum{'Indel'})},$DatSum{'BadLines'}),"\n\n";# if exists $DatSum{'Hit9r'};
 535 #print NFO "#fmtP\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads\tReads\@Hit\tBP\@Hit\tIndelReads\n";
 536 #print NFO join("\t",";$_",$chrReadsOut{$_},$chrBPOut{$_},$chrMisSum{$_}||0,$chrTrimedReads{$_}||0,$chrTrimedBP{$_}||0,${&combineC(\%{$chrmisMatch{$_}})},${&combineJ(\%{$chrHit9r{$_}})},${&combineJ(\%{$chrHit9bp{$_}})},${&combineJ(\%{$chrIndel{$_}})}),"\n" for sort keys %chrReadsOut;
 537 close NFO;
 538
 539 #print "\n[";dump([$InReads,$mapPair,$mapSingle,\%DatSum,\%InsD]);print "]\n";
 540
 541 __END__
 542 [$InReads,$mapPair,$mapSingle,\%DatSum,\%InsD]
 543 [
 544   196657088,
 545   171314024,
 546   11148094,
 547   {
 548     BadLines    => 0,
 549     BPOut       => 396395,
 550     Hit9bp      => { 1 => 323074, 2 => 7900, 3 => 3063, 4 => 62358 },
 551     Hit9r       => { 1 => 3266, 2 => 79, 3 => 31, 4 => 624 },
 552     misMatch    => {
 553                      "0" => 2441,
 554                      "1" => 498,
 555                      "2" => 380,
 556                      "3" => 129,
 557                      "4" => 218,
 558                      "5" => 116,
 559                      "6" => 107,
 560                      "7" => 111,
 561                    },
 562     MisSum      => 4516,
 563     PEuniqPairs => 973,
 564     ReadsOut    => 4000,
 565     TrimedBP    => 3568,
 566     TrimedReads => 122,
 567   },
 568   {
 569     653 => 9,
 570     654 => 12,
 571     655 => 15,
 572     656 => 18,
 573     657 => 18,
 574     658 => 13,
 575     659 => 21,
 576     660 => 25,
 577     661 => 22,
 578     662 => 19,
 579     663 => 18,
 580     664 => 24,
 581     665 => 24,
 582     666 => 35,
 583     667 => 27,
 584     668 => 24,
 585     669 => 24,
 586     670 => 25,
 587     671 => 29,
 588     672 => 36,
 589     673 => 38,
 590     674 => 42,
 591     675 => 32,
 592     676 => 32,
 593     677 => 32,
 594     678 => 31,
 595     679 => 26,
 596     680 => 19,
 597     681 => 20,
 598     682 => 9,
 599     683 => 16,
 600   },
 601 ]
 602