4 use Data
::Dump
qw(dump ddx);
7 print "perl $0 <outprefix(.stat & .ins)> <stator info files>\n"; # soaps.nfo can store the file size of soap. Too late, useless.
11 my $statout = shift @ARGV;
12 my (%DATrbrf,%nfo,%insD);
14 my $Dathref={ALL
=> [0,0,0,0,0,{},{},{},{},0], Summary
=> [0,0,0], };
15 # "ReadsOut, BPOut, MisSum, TrimedReads, TrimedBP, misMatchReads, Reads@Hit, BP@Hit, IndelReads, BadLines"
16 # "Total_Reads(=Total_Pairs*2), readsPaired, Singled/Alignment"
21 if ($href and %$href) {
23 $m = (sort {$a<=>$b} keys %$href)[-1];
25 push @str,$$href{$_}||0;
27 return \
join('|',@str);
32 if ($href and %$href) {
34 for (sort {$a<=>$b} keys %$href) {
35 push @str,join(':',$_,$$href{$_});
37 return \join(',',@str);
42 return join "\t",@
{$$ref{Summary
}},@
{$$ref{ALL
}}[0..4],${&combineC
($$ref{ALL
}[5])},${&combineC
($$ref{ALL
}[6])},${&combineC
($$ref{ALL
}[7])},${&combineJ
($$ref{ALL
}[8])},$$ref{ALL
}[9];
46 return join ',',@
$ref[0..4],${&combineC
($$ref[5])},${&combineC
($$ref[6])},${&combineC
($$ref[7])},${&combineC
($$ref[8])};
50 return if $$stref eq '.';
51 my %new = map {split /:/} split(/,/,$$stref);
52 $$href{$_} += $new{$_} for (keys %new);
55 my ($PESE,$sum,$item)=@_;
56 my ($a,$m)=($$sum{Summary
},$$sum{ALL
});
57 my ($b,$n)=($$item{Summary
},$$item{ALL
});
59 $$a[0] += $$b[0];#+$$b[0];
66 $$m[-1] += $$n[-1]; # BadLines
67 for my $chr (keys %$item) {
68 next if $chr eq 'Summary';
69 #$$sum{$_}=[0,0,0,0,{},{},{},{}] unless $$sum{$_};
70 ($m,$n)=($$sum{$chr},$$item{$chr});
71 $$sum{$chr}=$m=[0,0,0,0,0,{},{},{},{}] unless $m;
72 $$m[$_] += $$n[$_] for (0..4);
73 &sumcsv
($$m[$_],\
$$n[$_]) for (5..8);
80 open INS
,'>',$statout.'.ins' or die "[x]Error opening $statout.ins: $!\n";
81 while(my $nfofpath=shift @ARGV) {
82 next unless -f
$nfofpath;
84 print STDERR
"Read [$nfofpath]\n";
85 print INS
"#[$nfofpath]\n";
86 open NFO
,'<',"$nfofpath" or (warn "[!]Error opening $nfofpath: $!\n" and next);
90 my ($key,@values)=split /\t/;
91 $key='ALL' if $key eq '_ALL_'; # for old format
95 &sumup
($PESE,$Dathref,\
%NFO);
96 push @InsStr,${$NFO{Summary
}}[3] if $PESE eq 'PE';
103 print INS
"\n#Mode(p%),Lsd,Rsd,InsAvg,STD\n";
104 print INS
"$_\n" for @InsStr;
108 open O
,'>',$statout.'.stat' or die "[x]Error opening $statout.stat: $!\n";
109 print O
"#Summary:\n#Total_Reads Aligned_Pairs,Aligned_Single\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads|ASC\tReads\@Hit|ASC\tBP\@Hit|ASC\tIndelReads|ASC\tBadLines\n";
110 my $RFL=&combineLineA
($Dathref);
116 对
./to9311/2soap
/soaps
.stat:
118 #Summary SubItemOrder: Total_Reads,Aligned_Pairs,Aligned_Single,ReadsOut,BPOut,TrimedReads,TrimedBP,misMatchReads|ASC,Reads@Hit|ASC,BP@Hit|ASC,IndelReads|ASC,BadLines
119 按个体统计:
grep ^ALL to9311
/2soap/soaps.stat | awk
'{print $2"\t"$3}' |sort|uniq
120 按文库统计:
grep ^ALL to9311
/2soap/soaps.stat | awk
'{print $4"\t"$5}' |sort|uniq
121 按lane统计:
grep ^ALL to9311
/2soap/soaps.stat | awk
'{print $6"\t"$7}'
124 #ByChr SubItemOrder: ReadsOut,BPOut,TrimedReads,TrimedBP,misMatchReads|ASC,Reads@Hit|ASC,BP@Hit|ASC,IndelReads|ASC
125 按个体统计:
grep ^Chr01 to9311
/2soap/soaps.stat | awk
'{print $2"\t"$3}' |sort|uniq
126 按文库统计:
grep ^Chr01 to9311
/2soap/soaps.stat | awk
'{print $4"\t"$5}' |sort|uniq
127 按lane统计:
grep ^Chr01 to9311
/2soap/soaps.stat | awk
'{print $6"\t"$7}'
129 以Chr01为例,其他染色体替换。或用“Chr”提取全部。
132 “
300347|184857|0|222”,mismatch
=1的有
300347,mismatch
=2的有
184857,没有mismatch
=3的,mismatch
=4的有
222。
135 BP
@Hit|ASC与Reads
@Hit|ASC类似,但只分为
1..9和
>=10共计
10种类别。
136 根据抽取一个soap结果统计结果,不同hit数的分布是类似双曲线趋势下降的。