3 Author: Hu Xuesong @ PKU <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120530
8 #use Data::Dump qw(ddx);
10 die "Usage: $0 <sam1> <sam2> <out prefix>\n" if @ARGV<2;
17 my ($infile,$lastline);
18 if ($filename=~/.bz2$/) {
19 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
20 } elsif ($filename=~/.gz$/) {
21 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
22 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
24 while (defined($line=<$infile>)) {
25 if ($line =~ /^@\w\w\t\w\w:/) {
29 my @items = split /\t/,$line;
30 #warn "[",join(" | ",@items),"]\n";
31 if ($items[1] & 128) { # second read
34 return [$infile,\
@items];
39 my $samin1 = openfile
($in1);
40 my $samin2 = openfile
($in2);
41 open O
,'|-',"gzip -9c >$outp.cmpsam.gz" or die "Error opening $outp.cmpsam.gz with gzip: $!\n";
42 open L
,'>',$outp.'.log' or die "Error opening $outp.log: $!\n";
46 print L
"From [$in1,$in2] to [$outp.cmpsam.gz]\n";
47 my ($Total,$Out,$notOut)=(0,0,0);
54 while (defined($line = readline($$FHa[0]))) {
56 my @items = split /\t/,$line;
57 if ($items[1] & 128) { # second read
65 my ($StatSum,$arr1,$arr2,%Stat,$XT1,$XT2)=(0);
67 $arr1 = getNextRead
($samin1) or last;
68 $arr2 = getNextRead
($samin2) or last;
69 die "$$arr1[0] ne $$arr2[0]" if $$arr1[0] ne $$arr2[0];
71 $diff |= 1 if (($$arr1[1] & 16) != ($$arr2[1] & 16)); # strand of the query
72 $diff |= 2 if ($$arr1[2] ne $$arr2[2]); # Reference sequence NAME
73 $diff |= 4 if ($$arr1[3] ne $$arr2[3]); # POS
74 $diff |= 8 if (($$arr1[4]>0) != ($$arr2[4]>0)); # MAPQ == 0 or not.
75 $diff |= 16 if ($$arr1[5] ne $$arr2[5]); # CIAGR
77 if ($$arr1[2] eq '*') {
80 $XT1 = (grep(/^XT:/,@
$arr1))[0];
82 $XT1 = (split(':',$XT1))[2];
83 $diff |= 64 if $XT1 eq 'U';
85 #warn "---[",join(" | ",@$arr1),"]\n";
89 if ($$arr2[2] eq '*') {
92 $XT2 = (grep(/^XT:/,@
$arr2))[0];
94 $XT2 = (split(':',$XT2))[2];
95 $diff |= 128 if $XT2 eq 'U';
97 #warn "---[",join(" | ",@$arr2),"]\n";
101 $diff |= 32 if ( $XT1 and $XT2 and ($XT1 ne $XT2) );
102 print O
"[$#$arr1] [$#$arr2] $$arr1[0] - $diff\n";
106 print L
'# ',join(',',reverse qw
/Strand Ref Pos MapQ>0 CIAGR XT 1=U 2=U 1unmap 2unmap 1noXT 2noXT/),"\n";
111 print L
"Stat of [$StatSum] Read_1s:\n";
112 for (sort {$Stat{$b}<=>$Stat{$a}} keys %Stat) {
113 print L
"$_\t$Stat{$_}\t";
114 printf L
"%.6f\t%#x\t%#014b\n",$Stat{$_}/$StatSum,($_) x
2;
120 grep -P
'0b[01]{5}1[01]{6}' PD2M
-LSJ
-BHX011
.log|awk
'BEGIN{sum=0}{sum+=$2}END{print sum}'