modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / soap / rmdup / qredupmerge.pl
blob2908dab37a19d61aa9ee85ddab2bf902db031326
1 #!/usr/bin/perl -w
2 #use threads;
3 use strict;
4 use warnings;
5 #use DBI;
6 use Time::HiRes qw ( gettimeofday tv_interval );
7 use Galaxy::ShowHelp;
9 $main::VERSION=0.0.1;
10 my $bin='/share/raid010/resequencing/resequencing/tmp/resequencing/animal/panda20090805/redup/doredupmerge.pl';
12 our $opts='i:o:s:c:bv';
13 our($opt_i, $opt_s, $opt_o, $opt_v, $opt_b, $opt_c);
15 our $desc='SoapSort library PCR PE Duplication Remover & Merger (Qsub Edition)';
16 our $help=<<EOH;
17 \t-i SOAP result path (./soap) with ./PE and ./SE
18 \t-s sample.list (sample.list) in format: /^sample\\tlib\\t?.*\$/
19 \t-c chromosome list (chrorder) in format: /^chr_name\\n\$/
20 \t-o Merge output path (./merged), will mkdir if not exist
21 \t-v show verbose info to STDOUT
22 \t-b No pause for batch runs
23 EOH
25 ShowHelp();
27 $opt_i='./soap' if ! $opt_i;
28 $opt_s='sample.list' if ! $opt_s;
29 $opt_o='./merged' if ! $opt_o;
30 $opt_c='chrorder' if ! $opt_c;
32 print STDERR "From [$opt_i] to [$opt_o] refer to [$opt_s][$opt_c]\n";
33 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
35 my $start_time = [gettimeofday];
36 #BEGIN
37 my (%sample,$tmppath,@PEp,@PEs,@SE,@Chromosome);
38 opendir INDIR,$opt_i.'/PE' or warn "[!]Error opening [${opt_i}/PE]: $!\n";
39 for my $file (readdir INDIR) {
40 if ($file =~ /\_\d\.fq\.soap$/) {push @PEp,$file;}
41 elsif ($file =~ /\_\d\.fq\.single$/) {push @PEs,$file;}
43 closedir INDIR;
44 opendir INDIR,$opt_i.'/SE' or warn "[!]Error opening [${opt_i}/SE]: $!\n";
45 for my $file (readdir INDIR) {
46 if ($file =~ /\.fq\.soap$/) {push @SE,$file;}
48 closedir INDIR;
49 my @files=(@PEp,@PEs,@SE);
50 die "[x]No porper SOAP output files found. Quit.\n" if $#files==-1;
52 if ($opt_v) {
53 print "[$_]\n" for @files;
56 open SAMPLE,'<',$opt_s or die "Error opening $opt_s: $!\n";
57 while (<SAMPLE>) {
58 chomp;
59 s/\r//g;
60 my ($sample,$lib)=split /\t/;
61 print "[$sample]\t[$lib]\n" if $opt_v;
62 push @{$sample{$sample}},$lib;
64 close SAMPLE;
65 open CHR,'<',$opt_c or die "Error opening $opt_c: $!\n";
66 @Chromosome=<CHR>;
67 chomp @Chromosome;
68 close CHR;
69 system('mkdir','-p',$opt_o);
71 for my $sample (keys %sample) {
72 $tmppath=$opt_o.'/'.$sample;
73 mkdir $tmppath,0775;
74 open OUT,'>',$tmppath.'/'.$sample.'_lst.soaplist';
75 my $filecount=1;
76 for my $lib (@{$sample{$sample}}) {
77 for (grep /$lib/,@PEp ) {
78 print OUT "PE\t${opt_i}/PE/$_\t$filecount\n";
79 ++$filecount;
81 for (grep /$lib/,@PEs ) {
82 print OUT "SE\t${opt_i}/PE/$_\t$filecount\n";
83 ++$filecount;
85 for (grep /$lib/,@SE ) {
86 print OUT "SE\t${opt_i}/SE/$_\t$filecount\n";
87 ++$filecount;
90 close OUT;
91 for (@Chromosome) {
92 open OUT,'>',$tmppath.'/'.$sample.'_'.$_.'.sh';
93 print OUT "#!/bin/bash\nsource /home/huxuesong/works/run.sh\n
94 echo running \@ \`hostname\` >'${tmppath}/${sample}_${_}.err'
95 date >>'${tmppath}/${sample}_${_}.err'
96 perl '$bin' -b -c '$_' -i '${tmppath}/${sample}_lst.soaplist' -o '${tmppath}/$sample' >'${tmppath}/${sample}_${_}.log' 2>>'${tmppath}/${sample}_${_}.err'
97 date >>'${tmppath}/${sample}_${_}.err'\n";
98 #" # to balance the poor highlighting of gedit, >_<
99 close OUT;
103 # Well, we can auto qsub with caltulated vf. Adding soon, maybe.
105 #END
106 my $stop_time = [gettimeofday];
108 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
109 print "\nPlease use the following command to batch qsub:\033[32;1m
110 find ${opt_o}/ -name '*.sh' | while read ll; do qsub -l vf=2G -cwd \$ll; done\n\033[0;0m\n";
111 __END__
112 GP26 PANwkgRATDXAAPEI 75
113 GP30 PANwkgRAXDXAAPEI 75