modified: Makefile
[GalaxyCodeBases.git] / perl / etc / radseq / fqdebarcode.pl
blob201d9c7229de84149625fb51c5a8d7eedfedeb7a
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120330
5 =cut
6 use strict;
7 use warnings;
8 #use Data::Dump qw(ddx);
10 die "Usage: $0 <bar code> <fq1> <fq2> <outprefix1> <outprefix2>\n" if @ARGV<2;
11 my $bar=shift;
12 my $fq=shift;
13 my $fqb=shift;
14 my $out=shift;
15 my $outb=shift;
17 my $Eseq="CTGCAG";
18 $Eseq = "TGCAG";
19 #my $EcutAt=5;
21 sub openfile($) {
22 my ($filename)=@_;
23 my $infile;
24 if ($filename=~/.bz2$/) {
25 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
26 } elsif ($filename=~/.gz$/) {
27 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
28 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
29 return $infile;
32 my $fqfh=openfile($fq);
33 my $fqfh2=openfile($fqb);
34 my $prefix = $bar.$Eseq;
35 my $barLen = length $bar;
36 open O,'|-',"gzip -9c >$out.fq.gz" or die "Error opening $out.fq.gz with gzip: $!\n";
37 open P,'|-',"gzip -9c >$outb.fq.gz" or die "Error opening $outb.fq.gz with gzip: $!\n";
38 open L,'>',$out.'.log' or die "Error opening $out.log: $!\n";
39 select(L);
40 $|=1;
41 print L "Barcode:[$bar]($barLen), Enzyme:[$Eseq].\n => Prefix:[$prefix]\nFrom [$fq],[$fqb]\nTo [$out.fq.gz],[$outb.fq.gz]\n\n";
43 my ($fq1,$fq2,$fq3,$fq4);
44 my ($fqb1,$fqb2,$fqb3,$fqb4);
45 my ($tot,$have,$nothave)=(0,0,0);
46 while (defined ($fq1=<$fqfh>)) {
47 $fq2=<$fqfh>;
48 $fq3=<$fqfh>;
49 $fq4=<$fqfh>;
51 $fqb1=<$fqfh2>;
52 $fqb2=<$fqfh2>;
53 $fqb3=<$fqfh2>;
54 $fqb4=<$fqfh2>;
56 if ($fq2 =~ /^$prefix/) {
57 $fq2 = substr $fq2,$barLen;
58 $fq4 = substr $fq4,$barLen;
59 ++$have;
60 print O join('',$fq1,$fq2,$fq3,$fq4);
61 print P join('',$fqb1,$fqb2,$fqb3,$fqb4);
62 } else {
63 #$fq2 = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n";
64 #$fq4 = "################################\n";
65 ++$nothave;
67 ++$tot;
68 #print O join('',$fq1,$fq2,$fq3,$fq4);
71 close $fqfh;
72 print L "Total Read Pairs: $tot\nWith Prefix: $have , ",$have/$tot,"\nWithout pfx: $nothave , ",$nothave/$tot,"\n";
73 close L;
74 close O;
75 close P;