3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 0.1.0 @ 20110803
6 #use lib "/ifs1/ST_ASMB/USER/huxuesong/public/lib";
7 #use lib '/export/data0/gentoo/tmp';
10 use Time
::HiRes qw
( gettimeofday tv_interval
);
12 use Data
::Dump
qw(dump ddx);
20 \t-o output prefix (./matrixsum).{err2mis}
21 \t-b No pause for batch runs
23 our $ARG_DESC='matrix_count_file';
26 $opt_o='./matrixsum' if ! $opt_o;
27 my $input=shift @ARGV;
28 die "[!]Error @ [$input]\n" unless (-s
$input);
30 print STDERR
"From [$input] to [$opt_o]\n";
31 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <STDIN
>;}
33 my $start_time = [gettimeofday
];
35 open IN
,'<',$input or die "Error: $!\n";
38 my ($TotalReads,$TotalBase,$MisBase,%BaseCountTypeRef)=(0,0,0);
39 my ($mapBase,$mapReads,$QBbase)=(0,0,0);
41 my %Stat; # $Stat{Ref}{Cycle}{Read}{Quality}
42 my %MismatchBYQ; # $MismatchBYQ{Read:A,T,C,G,All}{$Q}->[mismatch,match]
45 if (/^#Input \[(\w+)\] file of mapped Reads: (\d+) , mapped Bases (\d+) \(no base stat for sam files\)$/) {
46 $type=$1 if $type ne 'sam';
48 $mapBase += $3 if $type ne 'sam';
50 if (/^#Total statistical Bases: (\d+) , Reads: (\d+) of ReadLength (\d+)$/) {
51 print " >$input| Reads:$2 ReadLen:$3 Bases:$1\n";
53 $READLEN = $3 if $READLEN < $3;
55 if (/^#Ref\tCycle\t/) {
58 (undef,undef,@BQHeader)=split /\t/;
59 pop @BQHeader if $BQHeader[-1] eq 'RowSum';
61 /(\w)[^\d]?(\d+)$/ or die "[x]BQHeader wrong:[$_].\n";
65 if (/^#Dimensions:.+?Quality_number (\d+)$/) {
66 $Qcount = $1 if $Qcount<$1;
68 if (/^#Mismatch_base: (\d+)/) {
71 if (/^#QB_Bases: (\d+)/) {
77 my ($ref,$cycle,@BQ)=split /\t/;
78 #print "$ref,$cycle,@BQ\n";
79 next unless $ref =~ /^[ATCG]$/;
80 #die "[$_]\n$ref,$cycle,[@BQ]\n$#BQ < $#BQHeader " if $#BQ < $#BQHeader;
81 for my $key (@BQHeader) {
83 my ($kbase,$kQ)=@
$key;
84 $Stat{$ref}{$cycle}{$kbase}{$kQ}+=$value;
86 $MismatchBYQ{$kbase}{$kQ}->[1] +=$value;
87 $MismatchBYQ{'_All'}{$kQ}->[1] +=$value;
89 $MismatchBYQ{$kbase}{$kQ}->[0] +=$value;
90 $MismatchBYQ{'_All'}{$kQ}->[0] +=$value;
92 $BaseCountTypeRef{$ref}+=$value;
94 #print "{$ref}{$cycle}{$key}$value\n";
99 open OA
,'>',$opt_o.'.err2mis' or die "Error: $!\n";
100 print OA
"#Read\tQ\tErrRate\tMismatchRate\terrbar\n";
101 for my $read (sort keys %MismatchBYQ) {
102 for my $Q (sort {$a<=>$b} keys %{$MismatchBYQ{$read}}) {
103 my ($mismatch,$match)=@
{$MismatchBYQ{$read}{$Q}};
105 print OA
"$read\t$Q\t",10**(-$Q/10),"\t",$mismatch/($mismatch+$match),"\t",1/($mismatch+$match),"\n";
110 for my $ref (keys %Stat) {
111 for my $cycle (keys %{$Stat{$ref}}) {
112 for my $base (keys %{$Stat{$ref}{$cycle}}) {
113 for my $Q (keys %{$Stat{$ref}{$cycle}{$base}}) {
114 print "{$ref}{$cycle}{$base}{$Q} = $Stat{$ref}{$cycle}{$base}{$Q}\n";
122 my $stop_time = [gettimeofday
];
124 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";