new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / combBfq.pl
blob223863e0a407f5faf8ea4ae775769386288c954c
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "Usage: $0 <ori-fq> <B-masked-fq> <outprefix>.(fq.xz|log)\n" if @ARGV <2;
6 my ($inA,$inB,$out)=@ARGV;
7 $out=$inA.'.merge' unless $out;
8 warn "From [$inA]&[$inB] to [$out].(fq.xz|log)\n";
10 sub openfile($) {
11 my ($filename)=@_;
12 my $infile;
13 if ($filename=~/.bz2$/) {
14 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
15 } elsif ($filename=~/.gz$/) {
16 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
17 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
18 return $infile;
20 sub getFQitem($) {
21 my $fh=$_[0];
22 my ($a,$b,$c,$d);
23 defined($a=<$fh>) or return 0;
24 defined($b=<$fh>) or return 0;
25 defined($c=<$fh>) or return 0;
26 defined($d=<$fh>) or return 0;
27 return [$a,$b,$c,$d]
29 sub CnM($$) {
30 my ($a,$b)=@_;
31 my ($id,$seq)=($$a[0],$$a[1]);
32 if ($id ne $$b[0]) {
33 chomp $id;
34 $id .= "\t".$$b[0];
36 if ($seq ne $$b[1]) {
37 chomp $seq;
38 $seq .= "\t".$$b[1];
40 return "${id}${seq}$$a[3]$$b[3]";
43 my ($Count1,$Count2,$CountPairs)=(0,0,0);
44 my $fhA=openfile($inA);
45 my $fhB=openfile($inB);
46 $|=1;
47 open(OUT,"|-","xz -c9e > \"${out}.fq.xz\"") or die "Error opening $out:$!\n";
48 my ($dat1,$dat2);
49 while (1) {
50 $dat1=getFQitem($fhA);
51 $dat2=getFQitem($fhB);
52 if ($dat1 && $dat2) {
53 print OUT CnM($dat1,$dat2);
54 ++$CountPairs;
55 } elsif ($dat1) {
56 print OUT join('',@$dat1);
57 ++$Count1;
58 } elsif ($dat2) {
59 print OUT join('',@$dat2);
60 ++$Count2;
61 } else {
62 last;
66 close $fhA;
67 close $fhB;
68 close OUT;
69 open LOG,'>',"${out}.log" or die "Error opening $out.log:$!\n";
70 my $str = "Out Pairs: $CountPairs\nFQ1 over hang: $Count1\nFQ2 over hang:$Count2\n";
71 print $str;
72 print LOG "From [$inA]&[$inB] to [$out.fq.xz]\n$str";
73 close LOG;
74 #print $Count{1}+$Count{-1}," ,RF:$CountRF\t+$Count{1},-$Count{-1},z$Count{0}\t$Reads\n";
75 __END__
76 find . -name '*.r'|xargs -n1 ./samrstat.pl
77 find . -name '*.ro'|xargs -n1 cat|sort -n