modified: myjupyterlab.sh
[GalaxyCodeBases.git] / released / pIRS.old / bwasam / matrixmerge.pl
blob1fd89b7f9eb16eb2da8143a5834010298712d4e1
1 #!/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 0.1.1 @ 20110819
5 =cut
6 #use lib "/ifs1/ST_ASMB/USER/huxuesong/public/lib";
7 use strict;
8 use warnings;
9 use Time::HiRes qw ( gettimeofday tv_interval );
10 use Galaxy::ShowHelp;
12 $main::VERSION=0.1.1;
13 our $opts='o:b';
14 our($opt_o, $opt_b);
16 #our $desc='';
17 our $help=<<EOH;
18 \t-o output prefix (./allmatrix).{count,ratio}.matrix
19 \t-b No pause for batch runs
20 EOH
21 our $ARG_DESC='matrix_count_files';
23 ShowHelp();
24 $opt_o='./allmatrix' if ! $opt_o;
26 print STDERR "From [@ARGV] to [$opt_o]\n";
27 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <STDIN>;}
29 my $start_time = [gettimeofday];
30 #BEGIN
31 my $READLEN=0;
32 my $Qcount=41;
33 my ($TotalReads,$TotalBase,$MisBase,%BaseCountTypeRef)=(0,0,0);
34 my ($mapBase,$mapReads,$QBbase,$QBmis)=(0,0,0,0);
35 my $type='N/A';
36 my %Stat; # $Stat{Ref}{Cycle}{Read-Quality}
37 my @BQHeader;
38 while (<>) {
39 if (/^#Input \[(\w+)\] file of mapped Reads: (\d+) , mapped Bases (\d+) \(no base stat for sam files\)$/) {
40 $type=$1 if $type ne 'sam';
41 $mapReads += $2;
42 $mapBase += $3 if $type ne 'sam';
44 if (/^#Total statistical Bases: (\d+) , Reads: (\d+) of ReadLength (\d+)$/) {
45 print " >$ARGV| Reads:$2 ReadLen:$3 Bases:$1\n";
46 $TotalReads += $2;
47 $READLEN = $3 if $READLEN < $3;
49 if (/^#Ref\tCycle\t/) {
50 #s/^#//;
51 chomp;
52 (undef,undef,@BQHeader)=split /\t/;
53 pop @BQHeader if $BQHeader[-1] eq 'RowSum';
55 if (/^#Dimensions:.+?Quality_number (\d+)$/) {
56 $Qcount = $1 if $Qcount<$1;
58 if (/^#Mismatch_base: (\d+)/) {
59 $MisBase += $1;
61 if (/^#QB_Bases: (\d+), QB_Mismatches: (\d+)/) {
62 $QBbase += $1;
63 $QBmis += $2;
65 next if /^#/;
66 next if /^$/;
67 chomp;
68 my ($ref,$cycle,@BQ)=split /\t/;
69 #print "$ref,$cycle,@BQ\n";
70 next unless $ref =~ /^[ATCG]$/;
71 #die "[$_]\n$ref,$cycle,[@BQ]\n$#BQ < $#BQHeader " if $#BQ < $#BQHeader;
72 for my $key (@BQHeader) {
73 my $value=shift @BQ;
74 $Stat{$ref}{$cycle}{$key}+=$value;
75 $BaseCountTypeRef{$ref}+=$value;
76 $TotalBase+=$value;
77 #print "{$ref}{$cycle}{$key}$value\n";
80 #print $TotalReads,"\t",$READLEN,"\n";
81 #print join("\t",@BQHeader),"\n";
82 open OA,'>',$opt_o.'.count.matrix' or die "Error: $!\n";
83 open OB,'>',$opt_o.'.ratio.matrix' or die "Error: $!\n";
84 my $tmp;
85 chomp(my $user=`id -nru`);
86 @ARGV=('/etc/passwd');
87 chomp(my @passwd=<>);
88 ($_)=grep /$user/,@passwd;
89 $tmp=(split /:/)[4];
90 my $mail='';
91 $mail=" <$tmp>" unless $tmp =~ /^\s*$/;
92 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
93 my $date=sprintf "%02d:%02d:%02d,%4d-%02d-%02d",$hour,$min,$sec,$year+1900,$mon+1,$mday;
94 my $Cycle=2*$READLEN;
95 my $MisRate=100*$MisBase/$TotalBase;
96 $tmp="#Generate @ $date by ${user}$mail
97 #Input [$type] file of Reads: $mapReads , Bases $mapBase (no base stat for sam files)
98 #Total statistical Bases: $TotalBase , Reads: $TotalReads of ReadLength $READLEN
99 #Dimensions: Ref_base_number 4, Cycle_number $Cycle, Seq_base_number 4, Quality_number $Qcount
100 #Mismatch_base: $MisBase, Mismatch_rate: $MisRate %
101 #QB_Bases: $QBbase, QB_Mismatches: $QBmis (bases with quality <= 2)
102 #Reference Base Ratio in reads: ";
103 my @BaseOrder=sort keys %BaseCountTypeRef; # qw{A T C G};
104 for (@BaseOrder) {
105 $tmp .= $_.' '. int(0.5+100*1000*$BaseCountTypeRef{$_}/$TotalBase)/1000 .' %; ';
108 $tmp .= "\n#".join("\t",'Ref','Cycle',@BQHeader);
109 print OA $tmp;
110 print OB $tmp;
111 print OA "\tRowSum";
112 print OB "\n";
113 my ($count,$countsum);
114 for my $ref (@BaseOrder) {
115 print OA "\n";
116 for my $cycle (sort {$a<=>$b} keys %{$Stat{$ref}}) {
117 $tmp="$ref\t$cycle\t";
118 print OA $tmp; print OB $tmp;
119 my (@Counts,@Rates)=();
120 for my $bq (@BQHeader) {
121 push @Counts,$Stat{$ref}{$cycle}{$bq};
123 #print "[",join("|",@Counts),"\n";
124 $countsum=0;
125 $countsum += $_ for @Counts;
126 push @Rates,$_/$countsum for @Counts;
127 print OA join("\t",@Counts,$countsum),"\n";
128 print OB join("\t",@Rates),"\n";
131 close OA;
132 close OB;
134 #END
135 my $stop_time = [gettimeofday];
137 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
138 __END__