6 use Time
::HiRes qw
( gettimeofday tv_interval
);
10 my $bin='/share/raid010/resequencing/resequencing/tmp/resequencing/animal/panda20090805/redup/doredupmerge.pl';
12 our $opts='i:o:s:c:bv';
13 our($opt_i, $opt_s, $opt_o, $opt_v, $opt_b, $opt_c);
15 our $desc='SoapSort library PCR PE Duplication Remover & Merger (Qsub Edition)';
17 \t-i SOAP result path (./soap) with ./PE and ./SE
18 \t-s sample.list (sample.list) in format: /^sample\\tlib\\t?.*\$/
19 \t-c chromosome list (chrorder) in format: /^chr_name\\n\$/
20 \t-o Merge output path (./merged), will mkdir if not exist
21 \t-v show verbose info to STDOUT
22 \t-b No pause for batch runs
27 $opt_i='./soap' if ! $opt_i;
28 $opt_s='sample.list' if ! $opt_s;
29 $opt_o='./merged' if ! $opt_o;
30 $opt_c='chrorder' if ! $opt_c;
32 print STDERR
"From [$opt_i] to [$opt_o] refer to [$opt_s][$opt_c]\n";
33 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
35 my $start_time = [gettimeofday
];
37 my (%sample,$tmppath,@PEp,@PEs,@SE,@Chromosome);
38 opendir INDIR
,$opt_i.'/PE' or warn "[!]Error opening [${opt_i}/PE]: $!\n";
39 for my $file (readdir INDIR
) {
40 if ($file =~ /\_\d\.fq\.soap$/) {push @PEp,$file;}
41 elsif ($file =~ /\_\d\.fq\.single$/) {push @PEs,$file;}
44 opendir INDIR
,$opt_i.'/SE' or warn "[!]Error opening [${opt_i}/SE]: $!\n";
45 for my $file (readdir INDIR
) {
46 if ($file =~ /\.fq\.soap$/) {push @SE,$file;}
49 my @files=(@PEp,@PEs,@SE);
50 die "[x]No porper SOAP output files found. Quit.\n" if $#files==-1;
53 print "[$_]\n" for @files;
56 open SAMPLE
,'<',$opt_s or die "Error opening $opt_s: $!\n";
60 my ($sample,$lib)=split /\t/;
61 print "[$sample]\t[$lib]\n" if $opt_v;
62 push @
{$sample{$sample}},$lib;
65 open CHR
,'<',$opt_c or die "Error opening $opt_c: $!\n";
69 system('mkdir','-p',$opt_o);
71 for my $sample (keys %sample) {
72 $tmppath=$opt_o.'/'.$sample;
74 open OUT
,'>',$tmppath.'/'.$sample.'_lst.soaplist';
76 for my $lib (@
{$sample{$sample}}) {
77 for (grep /$lib/,@PEp ) {
78 print OUT
"PE\t${opt_i}/PE/$_\t$filecount\n";
81 for (grep /$lib/,@PEs ) {
82 print OUT
"SE\t${opt_i}/PE/$_\t$filecount\n";
85 for (grep /$lib/,@SE ) {
86 print OUT
"SE\t${opt_i}/SE/$_\t$filecount\n";
92 open OUT
,'>',$tmppath.'/'.$sample.'_'.$_.'.sh';
93 print OUT
"#!/bin/bash\nsource /home/huxuesong/works/run.sh\n
94 echo running \@ \`hostname\` >'${tmppath}/${sample}_${_}.err'
95 date >>'${tmppath}/${sample}_${_}.err'
96 perl '$bin' -b -c '$_' -i '${tmppath}/${sample}_lst.soaplist' -o '${tmppath}/$sample' >'${tmppath}/${sample}_${_}.log' 2>>'${tmppath}/${sample}_${_}.err'
97 date >>'${tmppath}/${sample}_${_}.err'\n";
98 #" # to balance the poor highlighting of gedit, >_<
103 # Well, we can auto qsub with caltulated vf. Adding soon, maybe.
106 my $stop_time = [gettimeofday
];
108 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";
109 print "\nPlease use the following command to batch qsub:\033[32;1m
110 find ${opt_o}/ -name '*.sh' | while read ll; do qsub -l vf=2G -cwd \$ll; done\n\033[0;0m\n";
112 GP26 PANwkgRATDXAAPEI
75
113 GP30 PANwkgRAXDXAAPEI
75