modified: pixi.toml
[GalaxyCodeBases.git] / released / pIRS.old / bwasam / alimerge.pl
blob1edd874bfd23504285bc68f7084cfff4a09d66e5
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(dump ddx);
6 unless (@ARGV){
7 print "perl $0 <outprefix(.stat & .ins)> <stator info files>\n"; # soaps.nfo can store the file size of soap. Too late, useless.
8 exit;
11 my $statout = shift @ARGV;
12 my (%DATrbrf,%nfo,%insD);
14 my $Dathref={ALL => [0,0,0,0,0,{},{},{},{},0], Summary => [0,0,0], };
15 # "ReadsOut, BPOut, MisSum, TrimedReads, TrimedBP, misMatchReads, Reads@Hit, BP@Hit, IndelReads, BadLines"
16 # "Total_Reads(=Total_Pairs*2), readsPaired, Singled/Alignment"
17 my @InsStr;
19 sub combineC($) {
20 my $href=$_[0];
21 if ($href and %$href) {
22 my (@str,$m);
23 $m = (sort {$a<=>$b} keys %$href)[-1];
24 for (1..$m) {
25 push @str,$$href{$_}||0;
27 return \join('|',@str);
28 } else {return \'.';}
30 sub combineJ($) {
31 my $href=$_[0];
32 if ($href and %$href) {
33 my @str;
34 for (sort {$a<=>$b} keys %$href) {
35 push @str,join(':',$_,$$href{$_});
37 return \join(',',@str);
38 } else {return \'.';}
40 sub combineLineA($) {
41 my $ref=$_[0];
42 return join "\t",@{$$ref{Summary}},@{$$ref{ALL}}[0..4],${&combineC($$ref{ALL}[5])},${&combineC($$ref{ALL}[6])},${&combineC($$ref{ALL}[7])},${&combineJ($$ref{ALL}[8])},$$ref{ALL}[9];
44 sub combineLine($) {
45 my $ref=$_[0];
46 return join ',',@$ref[0..4],${&combineC($$ref[5])},${&combineC($$ref[6])},${&combineC($$ref[7])},${&combineC($$ref[8])};
48 sub sumcsv ($$) {
49 my ($href,$stref)=@_;
50 return if $$stref eq '.';
51 my %new = map {split /:/} split(/,/,$$stref);
52 $$href{$_} += $new{$_} for (keys %new);
54 sub sumup ($$$) {
55 my ($PESE,$sum,$item)=@_;
56 my ($a,$m)=($$sum{Summary},$$sum{ALL});
57 my ($b,$n)=($$item{Summary},$$item{ALL});
58 if ($PESE eq 'PE') {
59 $$a[0] += $$b[0];#+$$b[0];
60 $$a[1] += $$b[1];
61 $$a[2] += $$b[2];
62 } else {
63 $$a[0] += $$b[0];
64 $$a[2] += $$b[1];
66 $$m[-1] += $$n[-1]; # BadLines
67 for my $chr (keys %$item) {
68 next if $chr eq 'Summary';
69 #$$sum{$_}=[0,0,0,0,{},{},{},{}] unless $$sum{$_};
70 ($m,$n)=($$sum{$chr},$$item{$chr});
71 $$sum{$chr}=$m=[0,0,0,0,0,{},{},{},{}] unless $m;
72 $$m[$_] += $$n[$_] for (0..4);
73 &sumcsv($$m[$_],\$$n[$_]) for (5..8);
74 #warn "$chr\n";
78 my %NFO;
79 my $PESE='PE';
80 open INS,'>',$statout.'.ins' or die "[x]Error opening $statout.ins: $!\n";
81 while(my $nfofpath=shift @ARGV) {
82 next unless -f $nfofpath;
83 %NFO=();
84 print STDERR "Read [$nfofpath]\n";
85 print INS "#[$nfofpath]\n";
86 open NFO,'<',"$nfofpath" or (warn "[!]Error opening $nfofpath: $!\n" and next);
87 while (<NFO>) {
88 next if /^(#|$)/;
89 chomp;
90 my ($key,@values)=split /\t/;
91 $key='ALL' if $key eq '_ALL_'; # for old format
92 $NFO{$key}=\@values;
94 close NFO;
95 &sumup($PESE,$Dathref,\%NFO);
96 push @InsStr,${$NFO{Summary}}[3] if $PESE eq 'PE';
97 #ddx \%NFO;
99 #ddx \%DATrbrf;
100 ddx $Dathref;
101 ddx \@InsStr;
103 print INS "\n#Mode(p%),Lsd,Rsd,InsAvg,STD\n";
104 print INS "$_\n" for @InsStr;
105 close INS;
106 #__END__
107 my %Chr;
108 open O,'>',$statout.'.stat' or die "[x]Error opening $statout.stat: $!\n";
109 print O "#Summary:\n#Total_Reads Aligned_Pairs,Aligned_Single\tReadsOut\tBPOut\tMisSum\tTrimedReads\tTrimedBP\tmisMatchReads|ASC\tReads\@Hit|ASC\tBP\@Hit|ASC\tIndelReads|ASC\tBadLines\n";
110 my $RFL=&combineLineA($Dathref);
111 print O $RFL,"\n";
112 close O;
113 warn "[!] Done !\n";
115 __END__
116 ./to9311/2soap/soaps.stat
117 上半部分:
118 #Summary SubItemOrder: Total_Reads,Aligned_Pairs,Aligned_Single,ReadsOut,BPOut,TrimedReads,TrimedBP,misMatchReads|ASC,Reads@Hit|ASC,BP@Hit|ASC,IndelReads|ASC,BadLines
119 按个体统计:grep ^ALL to9311/2soap/soaps.stat | awk '{print $2"\t"$3}' |sort|uniq
120 按文库统计:grep ^ALL to9311/2soap/soaps.stat | awk '{print $4"\t"$5}' |sort|uniq
121 按lane统计:grep ^ALL to9311/2soap/soaps.stat | awk '{print $6"\t"$7}'
123 下半部分:
124 #ByChr SubItemOrder: ReadsOut,BPOut,TrimedReads,TrimedBP,misMatchReads|ASC,Reads@Hit|ASC,BP@Hit|ASC,IndelReads|ASC
125 按个体统计:grep ^Chr01 to9311/2soap/soaps.stat | awk '{print $2"\t"$3}' |sort|uniq
126 按文库统计:grep ^Chr01 to9311/2soap/soaps.stat | awk '{print $4"\t"$5}' |sort|uniq
127 按lane统计:grep ^Chr01 to9311/2soap/soaps.stat | awk '{print $6"\t"$7}'
129 以Chr01为例,其他染色体替换。或用“Chr”提取全部。
131 misMatchReads|ASC示例:
132 300347|184857|0|222”,mismatch=1的有300347,mismatch=2的有184857,没有mismatch=3的,mismatch=4的有222
133 IndelReads|ASC 同上。
135 BP@Hit|ASC与Reads@Hit|ASC类似,但只分为1..9>=10共计10种类别。
136 根据抽取一个soap结果统计结果,不同hit数的分布是类似双曲线趋势下降的。