modified: myjupyterlab.sh
[GalaxyCodeBases.git] / released / pIRS.old / bwasam / matrixsummer.pl
blob22a49f664582a838b72ece1c8d8eb2f2f4b8b2c0
1 #!/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 0.1.0 @ 20110803
5 =cut
6 #use lib "/ifs1/ST_ASMB/USER/huxuesong/public/lib";
7 #use lib '/export/data0/gentoo/tmp';
8 use strict;
9 use warnings;
10 use Time::HiRes qw ( gettimeofday tv_interval );
11 use Galaxy::ShowHelp;
12 use Data::Dump qw(dump ddx);
14 $main::VERSION=0.1.0;
15 our $opts='o:b';
16 our($opt_o, $opt_b);
18 #our $desc='';
19 our $help=<<EOH;
20 \t-o output prefix (./matrixsum).{err2mis}
21 \t-b No pause for batch runs
22 EOH
23 our $ARG_DESC='matrix_count_file';
25 ShowHelp();
26 $opt_o='./matrixsum' if ! $opt_o;
27 my $input=shift @ARGV;
28 die "[!]Error @ [$input]\n" unless (-s $input);
30 print STDERR "From [$input] to [$opt_o]\n";
31 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <STDIN>;}
33 my $start_time = [gettimeofday];
34 #BEGIN
35 open IN,'<',$input or die "Error: $!\n";
36 my $READLEN=0;
37 my $Qcount=41;
38 my ($TotalReads,$TotalBase,$MisBase,%BaseCountTypeRef)=(0,0,0);
39 my ($mapBase,$mapReads,$QBbase)=(0,0,0);
40 my $type='N/A';
41 my %Stat; # $Stat{Ref}{Cycle}{Read}{Quality}
42 my %MismatchBYQ; # $MismatchBYQ{Read:A,T,C,G,All}{$Q}->[mismatch,match]
43 my @BQHeader;
44 while (<IN>) {
45 if (/^#Input \[(\w+)\] file of mapped Reads: (\d+) , mapped Bases (\d+) \(no base stat for sam files\)$/) {
46 $type=$1 if $type ne 'sam';
47 $mapReads += $2;
48 $mapBase += $3 if $type ne 'sam';
50 if (/^#Total statistical Bases: (\d+) , Reads: (\d+) of ReadLength (\d+)$/) {
51 print " >$input| Reads:$2 ReadLen:$3 Bases:$1\n";
52 $TotalReads += $2;
53 $READLEN = $3 if $READLEN < $3;
55 if (/^#Ref\tCycle\t/) {
56 #s/^#//;
57 chomp;
58 (undef,undef,@BQHeader)=split /\t/;
59 pop @BQHeader if $BQHeader[-1] eq 'RowSum';
60 for (@BQHeader) {
61 /(\w)[^\d]?(\d+)$/ or die "[x]BQHeader wrong:[$_].\n";
62 $_=[$1,$2];
65 if (/^#Dimensions:.+?Quality_number (\d+)$/) {
66 $Qcount = $1 if $Qcount<$1;
68 if (/^#Mismatch_base: (\d+)/) {
69 $MisBase += $1;
71 if (/^#QB_Bases: (\d+)/) {
72 $QBbase += $1;
74 next if /^#/;
75 next if /^$/;
76 chomp;
77 my ($ref,$cycle,@BQ)=split /\t/;
78 #print "$ref,$cycle,@BQ\n";
79 next unless $ref =~ /^[ATCG]$/;
80 #die "[$_]\n$ref,$cycle,[@BQ]\n$#BQ < $#BQHeader " if $#BQ < $#BQHeader;
81 for my $key (@BQHeader) {
82 my $value=shift @BQ;
83 my ($kbase,$kQ)=@$key;
84 $Stat{$ref}{$cycle}{$kbase}{$kQ}+=$value;
85 if ($ref eq $kbase) {
86 $MismatchBYQ{$kbase}{$kQ}->[1] +=$value;
87 $MismatchBYQ{'_All'}{$kQ}->[1] +=$value;
88 } else {
89 $MismatchBYQ{$kbase}{$kQ}->[0] +=$value;
90 $MismatchBYQ{'_All'}{$kQ}->[0] +=$value;
92 $BaseCountTypeRef{$ref}+=$value;
93 $TotalBase+=$value;
94 #print "{$ref}{$cycle}{$key}$value\n";
97 close IN;
98 #ddx \%MismatchBYQ;
99 open OA,'>',$opt_o.'.err2mis' or die "Error: $!\n";
100 print OA "#Read\tQ\tErrRate\tMismatchRate\terrbar\n";
101 for my $read (sort keys %MismatchBYQ) {
102 for my $Q (sort {$a<=>$b} keys %{$MismatchBYQ{$read}}) {
103 my ($mismatch,$match)=@{$MismatchBYQ{$read}{$Q}};
104 next unless $match;
105 print OA "$read\t$Q\t",10**(-$Q/10),"\t",$mismatch/($mismatch+$match),"\t",1/($mismatch+$match),"\n";
108 close OA;
109 =pod
110 for my $ref (keys %Stat) {
111 for my $cycle (keys %{$Stat{$ref}}) {
112 for my $base (keys %{$Stat{$ref}{$cycle}}) {
113 for my $Q (keys %{$Stat{$ref}{$cycle}{$base}}) {
114 print "{$ref}{$cycle}{$base}{$Q} = $Stat{$ref}{$cycle}{$base}{$Q}\n";
119 =cut
121 #END
122 my $stop_time = [gettimeofday];
124 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
125 __END__