modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / popuSNP / pipeline / popipe.pl
blob2e8168ba39416c160e918ff0cce108a6ab04ec37
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use lib '/share/raid010/resequencing/soft/lib';
5 use Time::HiRes qw ( gettimeofday tv_interval );
6 use File::Basename;
7 use Galaxy::ShowHelp;
9 $main::VERSION=0.0.4;
10 my $SCRIPTS='/panfs/GAG/huxuesong/scripts';
11 my $POPSPLIT=1000000;
13 =pod
14 Note:
15 If a PE Lib come with only one .adapter.list file, all will use that one. If more than 2, only (sort)[0,-1] is used as _1 and _2.
17 ./0rawfq , the path to fq file cannot contain more than 1 "_{$LibName}", since it is searched directly on fullpath.
19 $ perl -MTree::Suffix -e 'my $tree = Tree::Suffix->new(qw(stringxxxssx stringyx1xxssx axxxssxstring));my @lcs = $tree->lcs;print "[$_]\n" for @lcs;'
20 [string]
22 $ perl -MTree::Suffix -e 'my $tree = Tree::Suffix->new(qw(zzzzzzxxaaaaaastringaxxssx tyaaaaaastringzzzzzzyaxxssx zzzzzz1aaaaaaaaxxssxstring));my @lcs = $tree->lcs;print "[$_]\n" for @lcs;'
23 [zzzzzz]
24 [aaaaaa]
25 [axxssx]
26 [string]
28 =cut
29 our $opts='i:o:s:l:c:r:f:m:x:bvqd';
30 our($opt_i, $opt_s, $opt_o, $opt_m, $opt_v, $opt_b, $opt_c, $opt_q, $opt_f, $opt_r, $opt_d, $opt_x);
32 our $desc='1.filter fq, 2.soap, 3.rmdup';
33 our $help=<<EOH;
34 \t-i FASTAQ file path (./0rawfq) with *.adapter.list and *.fq
35 \t-s Sample list (sample.lst) in format: /^Sample\\tLib\\tReadLen\\tInsertSize\$/
36 \t-c Chromosome length list (chr.len) in format: /^ChrName\\s+ChrLen\\s?.*\$/
37 \t-m monoploid Chromosome names ('') in format: 'ChrID1,ChrID2'
38 \t-r Reference Genome for Soap2 (./Ref) with *.index.bwt
39 \t-f faByChr path (./faByChr) with ChrID.fa\(s\)
40 \t-o Project output path (.), will mkdir if not exist
41 \t-q run qsub automatically
42 \t-x lib regex for Simulation Mode, undef for Normal Mode (undef)
43 \t-v show verbose info to STDOUT
44 \t-b No pause for batch runs
45 \t-d Debug Mode, for test only
46 EOH
48 ShowHelp();
50 $opt_i='./0rawfq' if ! $opt_i;
51 $opt_s='sample.lst' if ! $opt_s;
52 $opt_o='.' if ! $opt_o;
53 $opt_c='chr.len' if ! $opt_c;
54 $opt_r='./Ref' if ! $opt_r;
55 $opt_f='./faByChr' if ! $opt_f;
57 # `readlink -f` will be blank if target not exists.
58 system('mkdir','-p',$opt_o);
59 my ($lopt_i,$lopt_r,$lopt_f);
61 $lopt_i=`readlink -nf $opt_i`;
62 $opt_o=`readlink -nf $opt_o`;
63 $lopt_r=`readlink -nf $opt_r`;
64 $lopt_f=`readlink -nf $opt_f`;
66 warn "[x]-i $opt_i not exists !\n" unless $lopt_i;
67 warn "[x]-r $opt_r not exists !\n" unless $lopt_r;
68 warn "[x]-f $opt_f not exists !\n" unless $lopt_f;
69 die "\n" unless $lopt_i and $lopt_r and $lopt_f;
71 my @t=`find $lopt_r -name '*.index.bwt'`;
72 $t[0] =~ /(.+\.index)\.\w+$/;
73 $lopt_r = $1;
74 my %Monoploid;
75 if ($opt_m) {
76 my @Monoploid=split /,/,$opt_m;
77 ++$Monoploid{$_} for @Monoploid;
80 print STDERR "From [$lopt_i] to [$opt_o] refer to [$opt_s][$opt_c]\nRef:[$lopt_r][$lopt_f]\n";
81 print STDERR "Monoploid Chr(s):[",join(',',sort keys %Monoploid),"]\n" if %Monoploid;
82 print STDERR "DEBUG Mode on !\n" if $opt_d;
83 print STDERR "Simulation Mode on with [$opt_x]!\n" if $opt_x;
84 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
86 my $start_time = [gettimeofday];
87 #BEGIN
88 my (%SampleLib,%LibSample,%SampleMaxReadLen,%LibInsSize,%ChrLen,%Info);
89 open SAMPLE,'<',$opt_s or die "Error opening $opt_s: $!\n";
90 while (<SAMPLE>) {
91 chomp;
92 s/\r//g;
93 my ($sample,$lib,$len,$min,$max)=split /\t/;
94 unless (defined $max) {
95 $max=int(0.5+$min*1.08);
96 $min=int(0.5+$min*0.92);
98 #print "Sample[$sample]\tLib[$lib]\n" if $opt_v;
99 push @{$SampleLib{$sample}},$lib;
100 #$LibSample{$lib}=$sample;
101 $Info{$lib}=[$len,$min,$max];
102 push @{$LibSample{$lib}},$sample;
103 $SampleMaxReadLen{$sample}=0;
105 close SAMPLE;
106 for (keys %LibSample) {
107 my $t = scalar @{$LibSample{$_}};
108 die "[!]Sample with Lib conflict($t) for Lib:[$_]. Check [$opt_s] !\n" if $t != 1; # Check if a lib -> more samples
110 if ($opt_v) {
111 for my $k (sort keys %SampleLib) {
112 print "Sample:[$k]\tLib:[",join('],[',@{$SampleLib{$k}}),"]\n"
114 for my $k (sort keys %LibInsSize) {
115 print "Lib:[$k]\tInsize:[",join(',',@{$LibInsSize{$k}}),"]\n";
118 open CHRLEN,'<',$opt_c or die "Error opening $opt_c: $!\n";
119 while (<CHRLEN>) {
120 chomp;
121 s/\r//g;
122 my ($chr,$len)=split /\s+/;
123 #print "Chr:[$chr]\tLen:[$len]\n" if $opt_v;
124 $ChrLen{$chr}=$len;
126 close CHRLEN;
127 if ($opt_v) {
128 for my $k (sort keys %ChrLen) {
129 print "Chr:[$k]\tLen:[$ChrLen{$k}]\n"
132 system('mkdir','-p',$opt_o);
133 my $opath;
134 ### 1.filter fq
135 $opath=$opt_o.'/1fqfilted';
136 system('mkdir','-p',$opath);
137 my @fq = `find $lopt_i -name '*.fq'`; # no need to sort
138 chomp @fq;
139 my (%fqfile2rawfp,%fq1,%fq2,%fqse,%fqpe); # no ext.
140 #@fqfiles = map {[fileparse($_, qr/\.fq/)]} @fq;
141 for (@fq) {
142 my ($file, $path, $ext) = fileparse($_, qr/\.fq/);
143 next if $file =~ /IndexPooling\d+(_[12])?$/i;
144 $fqfile2rawfp{$file}=$path; # $path is enough. But, who cares? Me!
145 my ($lib,$ab);
146 unless ($opt_x) {
147 $file =~ /_([^_]+)(_[12])?$/; # $1, $2 is local, thus un-useable outside this `unless`.
148 $lib = $1; $ab=$2;
149 } else {
150 $file =~ /($opt_x).*?(_[12])?$/;
151 $lib = $1; $ab=$2;
153 unless ($ab) {
154 push @{$fqse{$lib}},[$file]; # well, same API is better ?
155 #print "-[$lib] $file\t[$ab]\n" if $opt_v;
156 next;
158 $fq1{$file}=$lib if $ab eq '_1';
159 $fq2{$file}=$lib if $ab eq '_2';
161 for my $file (keys %fq1) {
162 my $file2=$file;
163 $file2=~s/_1$/_2/;
164 my $lib=$fq1{$file};
165 delete $fq1{$file};
166 if (defined $fq2{$file2}) {
167 push @{$fqpe{$lib}},[$file,$file2];
168 delete $fq2{$file2};
169 } else {
170 warn "[!][$file.fq] is not paired with _2 !\n";
171 push @{$fqse{$lib}},[$file];
174 for my $file (keys %fq2) {
175 my $lib=$fq2{$file};
176 delete $fq2{$file};
177 warn "[!][$file.fq] is not paired with _1 !\n";
178 push @{$fqse{$lib}},[$file];
180 %fq1=%fq2=(); # useless now
181 my %fqbylib;#=%fqpe; If just copy, the value of hash will be the same pointer, thus conflict.
182 push @{$fqbylib{$_}},@{$fqpe{$_}} for (keys %fqpe);
183 push @{$fqbylib{$_}},@{$fqse{$_}} for (keys %fqse);
184 if ($opt_v) {
185 for my $k (sort keys %fqbylib) {
186 #print "\n[$k]\n[",join("]\n[",@{$fqbylib{$k}}),"]\n"
187 print "\n[$k]\n[";
188 for (@{$fqbylib{$k}}) {
189 print join(']-[',@$_);
191 print "]\n";
194 sub qsub($$) {
195 my ($opath,$file)=@_;
196 my @sh = `find $opath/ -name '$file'`; # $opath may be symlink
197 chomp @sh;
198 for (@sh) {
199 print STDERR "[$_]\n" if $opt_v;
200 system("qsub $_");
203 #my ($skip,$count,$copy,$lstcount,$adapter,%copy)=(0,0,0,0);
204 sub callfqfilter($$$$$$$$$) {
205 my ($k,$file,$dir,$adapter,$path,$skip,$count,$copy,$lstcount,$copy_ref)=@_;
206 my $fq="${path}$file.fq";
207 # existance check, old version
208 if (-s "${dir}/${file}.nfo" or -s "${dir}/${file}.fq.bz2") { # We may package the data and mix *.nfo then?
209 ++$skip;
210 system("mv -f ${dir}/${file}_filte.sh ${dir}/${file}_filte.oldsh") if ( (! $adapter) and -e "${dir}/${file}_filte.sh");
211 return [$skip,$count,$copy,$lstcount,$copy_ref];
213 # existance check, old version
214 unless ($adapter) {
215 ++${$copy_ref}{$path};
216 ++$copy;
217 #system('mkdir','-p',$dir);
218 system('cp','-s',$fq,"${dir}/${file}.fq");
219 #warn $fq,"${dir}/${file}.fq";
220 open STAT,'>',"${dir}/${file}_filte.sh" || die "$!\n";
221 print STAT "#!/bin/sh
222 #\$ -N \"s_$k\"
223 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
224 #\$ -cwd -r y -l vf=9M
225 #\$ -o /dev/null -e /dev/null
226 #\$ -S /bin/bash
227 perl $SCRIPTS/fqstat.pl $fq 2>${dir}/${file}.nfo
229 close STAT;
230 return [$skip,$count,$copy,$lstcount,$copy_ref];
232 chomp $adapter;
233 #print "[$dir] [$file]\t[$adapter]\n";
234 system('mkdir','-p',$dir);
235 my $cmd="$adapter $fq >$dir/${file}.fq 2>$dir/${file}.nfo\n";
236 print OUT $cmd;
237 ++$lstcount;
238 return [$skip,$count,$copy,$lstcount,$copy_ref];
241 my ($sample,$cmd,@sh);
242 my (%fqfile2fp); # new path
243 for my $k (keys %fqbylib) {
244 my $withPE=0;
245 $sample=$LibSample{$k}->[0];
246 next unless $sample;
247 my ($len,$min,$max)=@{$Info{$k}};
248 my $dir = $opath."/$sample/$k";
249 system('mkdir','-p',$dir);
250 open OUT,'>',$dir.'.cmd' || die "$!\n";
251 open LST,'>',$dir.'.lst' || die "$!\n";
252 #open SH,'>',$dir.'_filte.sh' || die "$!\n";
253 #open LOG,'>',$dir.'.log' || die "$!\n";
254 my ($skip,$count,$copy,$lstcount,%copy)=(0,0,0,0);
255 my $copy_ref=\%copy;
256 for (@{$fqbylib{$k}}) {
257 my ($fq1,$fq2)=@$_;
258 ++$count;
259 my $path=$fqfile2rawfp{$fq1};
260 my @adapter = `find $path -name '*.list'`;
261 @adapter=sort @adapter; # if _1 & _2, _1 should be first after sort.
262 $fqfile2fp{$fq1}=$dir;
263 ($skip,$count,$copy,$lstcount,$copy_ref)=@{&callfqfilter($k,$fq1,$dir,$adapter[0],$path,$skip,$count,$copy,$lstcount,$copy_ref)};
264 if ($fq2) { # PE
265 $withPE=1;
266 print LST "PE\t${dir}/${fq1}.fq\t${dir}/${fq2}.fq\n";
267 $fqfile2fp{$fq2}=$dir;
268 my $path=$fqfile2rawfp{$fq2};
269 my @adapter = `find $path -name '*.list'`;
270 @adapter=sort @adapter;
271 if ($adapter[0]) {
272 ($skip,$count,$copy,$lstcount,$copy_ref)=@{&callfqfilter($k,$fq2,$dir,$adapter[-1],$path,$skip,$count,$copy,$lstcount,$copy_ref)};
273 } else {
274 ($skip,$count,$copy,$lstcount,$copy_ref)=@{&callfqfilter($k,$fq2,$dir,$adapter[0],$path,$skip,$count,$copy,$lstcount,$copy_ref)};
275 # in case no list file, @adapter will be empty, '-1' will die "Modification of non-creatable array value attempted, subscript -1"
277 } else { # SE
278 print LST "SE\t${dir}/${fq1}.fq\n";
281 %copy=%$copy_ref;
282 close LST;
283 close OUT;
284 open SH,'>',$dir.'_filte.sh' || die "$!\n";
285 print SH "#!/bin/sh
286 #\$ -N \"f_$k\"
287 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
288 #\$ -cwd -r y -l vf=276M
289 #\$ -o /dev/null -e /dev/null
290 #\$ -S /bin/bash -t 1-$lstcount
291 SEEDFILE=${dir}.cmd
292 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
293 eval perl $SCRIPTS/fqfilter.pl \$SEED
294 " if $lstcount > 0;
295 close SH;
296 if ($lstcount == 0) {
297 unlink $dir.'_filte.sh';
298 unlink $dir.'.cmd';
300 open LOG,'>',$dir.'.log' || die "$!\n";
301 print LOG "Of $count file(s),$skip skipped, $copy with no adapter.list.\n";
302 if ($copy) {
303 print LOG "\nThe following path with no adapter.list\n";
304 for (sort keys %copy) {
305 print LOG $_,"\t",$copy{$_}," time(s)\n";
308 close LOG;
309 ## ReadLen
310 if (-s "${dir}.ReadLen") {
311 system("mv -f ${dir}_redlen.sh ${dir}_redlen.oldsh") if (-e "${dir}_redlen.sh");
312 } else {
313 open SH,'>',$dir.'_redlen.sh' || die "$!\n";
314 print SH "#!/bin/sh
315 #\$ -N \"len_$k\"
316 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
317 #\$ -cwd -r y -l vf=1M
318 #\$ -hold_jid f_$k,s_$k
319 #\$ -o /dev/null -e /dev/null
320 #\$ -S /bin/bash
321 grep '# MaxReadLen' $dir/*.nfo | perl -F'\\t' -lane 'BEGIN {\$c=99999999} END { print \"\$b\\n\$c\" } \$b=\$F[-1] if \$b<\$F[-1];\$c=\$F[-1] if \$c>\$F[-1];' > $dir.ReadLen
323 # grep '# MaxReadLen' /panfs/GAG/huxuesong/panda/1fqfilted/GP1/PANwskRAADJBAPEI/*.nfo | perl -F'\t' -lane 'BEGIN {$c=999999} END { print "$b\n$c" } $b=$F[-1] if $b<$F[-1];$c=$F[-1] if $c>$F[-1];'
324 close SH;
326 ## InsertSizing
327 open O,'>',"${dir}.insize";
328 print O "$min\t$max\n";
329 close O;
330 ### DEBUE CODE ###
331 if (-s "${dir}.insize") {
332 system("mv -f ${dir}_insize.sh ${dir}_insize.oldsh") if (-e "${dir}_insize.sh");
333 } else {
334 open SH,'>',$dir.'_insize.sh' || die "$!\n";
335 print SH "#!/bin/sh
336 #\$ -N \"size_$k\"
337 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
338 #\$ -cwd -r y -l vf=4.1G,s_core=4
339 #\$ -hold_jid len_$k
340 #\$ -o /dev/null -e /dev/null
341 #\$ -S /bin/bash
342 perl $SCRIPTS/instsize.pl ${dir}.lst ${dir}.ReadLen $lopt_r $dir
344 close SH;
347 } # End for my $k (keys %fqbylib)
348 ## Qsub
349 if ($opt_q) {
350 print STDERR '-' x 75,"\n";
351 &qsub($opath,'*_filte.sh');
352 &qsub($opath,'*_redlen.sh');
353 &qsub($opath,'*_insize.sh');
355 ### 2.soap
356 # %fqpe,%fqse
357 # /share/raid010/resequencing/resequencing/tmp/bin/pipeline/SNPcalling/subBin/soap2.20 -p 4 -a 090811_I58_FC42C7AAAXX_L8_PANwkgRBMDXAAPEI_1.fq -b 090811_I58_FC42C7AAAXX_L8_PANwkgRBMDXAAPEI_2.fq -D Panda.merge.fa.index -o 090811_I58_FC42C7AAAXX_L8_PANwkgRBMDXAAPEI_1.fq.soap -2 090811_I58_FC42C7AAAXX_L8_PANwkgRBMDXAAPEI_1.fq.single -m 100 -x 400 -t -s 40 -l 32 -v 3
358 my $lastopath=$opath;
359 $opath=$opt_o.'/2soap';
360 system('mkdir','-p',$opath);
361 my %SoapCount;
362 system('touch',"$opath/_.soaplst");
363 system("find $opath/ -name '*.soaplst' | xargs rm"); # rm dies unless input
364 for my $k (keys %fqse) {
365 $sample=$LibSample{$k}->[0];
366 next unless $sample;
367 my $dir = $opath."/$sample/$k";
368 system('mkdir','-p',$dir);
369 open LST,'>>',$dir.'.soaplst' || die "$!\n";
370 open OUT,'>',$dir.'.secmd' || die "$!\n";
371 my $lstcount=0;
372 for (@{$fqse{$k}}) {
373 my $fq=$$_[0];
374 my $path=$lastopath."/$sample/$k";
375 ++$SoapCount{$sample};
376 print LST "SE\t",$SoapCount{$sample},"\t$dir/$fq.se\n";
377 unless (-s "${dir}/${fq}.nfo" or -s "${dir}/${fq}.se.bz2") {
378 print OUT "${path}.ReadLen ${path}/${fq}.fq $lopt_r $dir/$fq\n";
379 ++$lstcount;
382 close OUT;
383 close LST;
384 if ($lstcount > 0) {
385 open SH,'>',$dir.'_soapse.sh' || die "$!\n";
386 print SH "#!/bin/sh
387 #\$ -N \"se_$k\"
388 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
389 #\$ -cwd -r y -l vf=4.1G,s_core=5
390 #\$ -hold_jid len_$k
391 #\$ -o /dev/null -e /dev/null
392 #\$ -S /bin/bash -t 1-$lstcount
393 SEEDFILE=${dir}.secmd
394 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
395 eval perl $SCRIPTS/soapse.pl \$SEED
397 close SH;
398 } else {
399 unlink $dir.'.secmd';
400 unlink $dir.'_soapse.sh';
403 for my $k (keys %fqpe) {
404 $sample=$LibSample{$k}->[0];
405 next unless $sample;
406 my $dir = $opath."/$sample/$k";
407 system('mkdir','-p',$dir);
408 open LST,'>>',$dir.'.soaplst' || die "$!\n";
409 open OUT,'>',$dir.'.pecmd' || die "$!\n";
410 my $lstcount=0;
411 for (@{$fqpe{$k}}) {
412 my ($fq1,$fq2)=@$_;
413 my $path=$lastopath."/$sample/$k";
414 ++$SoapCount{$sample};
415 print LST "PE\t",$SoapCount{$sample},"\t$dir/$fq1.soap\n";
416 ++$SoapCount{$sample};
417 print LST "SE\t",$SoapCount{$sample},"\t$dir/$fq1.single\n";
418 unless (-s "${dir}/${fq1}.nfo" or -s "${dir}/${fq1}.soap.bz2") {
419 print OUT "${path}.insize ${path}.ReadLen ${path}/$fq1.fq ${path}/$fq2.fq $lopt_r $dir/$fq1\n";
420 ++$lstcount;
421 #die $fq1 unless $fq2;
424 close OUT;
425 if ($lstcount > 0) {
426 open SH,'>',$dir.'_soappe.sh' || die "$!\n";
427 print SH "#!/bin/sh
428 #\$ -N \"pe_$k\"
429 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
430 #\$ -cwd -r y -l vf=4.1G,s_core=5
431 #\$ -hold_jid len_$k,size_$k
432 #\$ -o /dev/null -e /dev/null
433 #\$ -S /bin/bash -t 1-$lstcount
434 SEEDFILE=${dir}.pecmd
435 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
436 eval perl $SCRIPTS/soappe.pl \$SEED
438 close SH;
439 } else {
440 unlink $dir.'.pecmd';
441 unlink $dir.'_soappe.sh';
444 ## Qsub
445 if ($opt_q) {
446 print STDERR '-' x 75,"\n";
447 &qsub($opath,'*_soapse.sh');
448 &qsub($opath,'*_soappe.sh');
450 ### 3.rmdupmerge
451 $lastopath=$opath;
452 $opath=$opt_o.'/3rmdupmerge';
453 system('mkdir','-p',$opath);
454 system('touch',"$opath/_.mglst");
455 system("find $opath/ -name '*.mglst' | xargs rm");
456 for my $k (keys %fqbylib) {
457 $sample=$LibSample{$k}->[0];
458 next unless $sample;
459 my $dir = $opath."/$sample/$k";
460 system('mkdir','-p',$dir);
461 system('mkdir','-p',"$opath/$sample/lst");
462 open CMD,'>',$dir.'.rdcmd' || die "$!\n";
463 my $lstcount=0;
464 for my $chr (keys %ChrLen) {
465 open LST,'>>',$opath."/$sample/lst/${sample}_${chr}.mglst" || die "$!\n";
466 print LST "$dir/$k.$chr\n" or warn "${sample}_${chr}";
467 unless (-s "$dir/$k.$chr" and -s "$dir/${chr}_$k.log") {
468 print CMD "-bi $lastopath/$sample/$k.soaplst -c $chr -o $dir/$k >$dir/${chr}_$k.log 2>$dir/${chr}_$k.err\n";
469 ++$lstcount;
471 #close LST; # Let the open close it.
473 close CMD;
474 close LST;
475 if ($lstcount > 0) {
476 open SH,'>',$dir.'_rmdup.sh' || die "$!\n";
477 print SH "#!/bin/sh
478 #\$ -N \"rmdup_${sample}_$k\"
479 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
480 #\$ -cwd -r y -l vf=280M
481 #\$ -hold_jid \"pe_$k,se_$k\"
482 #\$ -o /dev/null -e /dev/null
483 #\$ -S /bin/bash -t 1-$lstcount
484 SEEDFILE=${dir}.rdcmd
485 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
486 eval perl $SCRIPTS/rmdupbylib.pl -d \$SEED
487 "; # add -d for debug
488 close SH;
489 } else {
490 unlink $dir.'.rdcmd';
491 unlink $dir.'_rmdup.sh';
495 for my $k (keys %SampleLib) {
496 my $dir = $opath."/$k";
497 my $lib=$SampleLib{$k}->[0];
498 next unless defined $fqbylib{$lib};
499 open CMD,'>',$dir."/$k.mgcmd" || die "$!\n";
500 my $lstcount=0;
501 for my $chr (keys %ChrLen) {
502 unless (-s "$dir/${k}_${chr}.sp" and -s "$dir/${k}_${chr}.log") {
503 print CMD "$dir/lst/${k}_${chr}.mglst $dir/${k}_${chr}\n";
504 ++$lstcount;
507 close CMD;
508 if ($lstcount > 0) {
509 open SH,'>',$dir."/${k}_merge.sh" || die "$!\n";
510 print SH "#!/bin/sh
511 #\$ -N \"merge_$k\"
512 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
513 #\$ -cwd -r y -l vf=12M
514 #\$ -hold_jid \"rmdup_${k}_*\"
515 #\$ -o /dev/null -e /dev/null
516 #\$ -S /bin/bash -t 1-$lstcount
517 SEEDFILE=${dir}/$k.mgcmd
518 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
519 eval perl $SCRIPTS/merge.pl \$SEED
521 close SH;
522 } else {
523 unlink $dir."/${k}.mgcmd";
524 unlink $dir."/${k}_merge.sh";
527 ## Qsub
528 if ($opt_q) {
529 print STDERR '-' x 75,"\n";
530 &qsub($opath,'*_rmdup.sh');
531 &qsub($opath,'*_merge.sh');
533 ### 4.GLF
534 $lastopath=$opath;
535 $opath=$opt_o.'/4GLF';
536 my $dir = $opath.'/matrix';
537 system('mkdir','-p',$dir);
538 ## Matrix
539 if (-s "$dir/all.matrix") {
540 system("mv -f ${dir}/all_matrix.sh ${dir}/all_matrix.oldsh") if (-e "${dir}/all_matrix.sh");
541 } else {
542 open SH,'>',$dir."/all_matrix.sh" || die "$!\n";
543 print SH "#!/bin/sh
544 #\$ -N \"All_Matrix\"
545 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
546 #\$ -cwd -r y -l vf=266M
547 #\$ -hold_jid \"merge_*\"
548 #\$ -o /dev/null -e /dev/null
549 #\$ -S /bin/bash
550 f=`find $lastopath/ -name '*.sp'|xargs ls -lH|awk '{print \$5,\$9}'|sort -nrk1|head -n1|awk '{print \$2}'`
551 perl $SCRIPTS/matrix.pl \$f $lopt_f $opt_o/1fqfilted $dir/all
553 close SH;
555 ## Qsub
556 if ($opt_q) {
557 print STDERR '-' x 75,"\n";
558 &qsub($dir,'all_matrix.sh');
560 ## SoapSNP
561 for my $chr (keys %ChrLen) {
562 my $dir = $opath."/$chr";
563 system('mkdir','-p',$dir);
564 unless ($opt_d) {
565 open LST,'>',$dir.'.glflst' || die "$!\n";
566 open CMD,'>',$dir."/$chr.glfcmd" || die "$!\n";
567 my $lstcount=0;
568 for my $k (keys %SampleLib) {
569 print LST "$dir/${k}_$chr.glf\n";
570 unless (-s "$dir/${k}_$chr.tag" or -s "$dir/${k}_$chr.glf") { # 'tag' not working since soapsnp return non-0 on exit
571 my $Mono=' d';
572 $Mono=' m' if $Monoploid{$chr};
573 print CMD "$lastopath/$k/${k}_$chr.sp $dir/${k}_${chr}$Mono\n";
574 ++$lstcount;
577 close CMD;
578 close LST;
579 if ($lstcount > 0) {
580 open SH,'>',$dir.'_glf.sh' || die "$!\n";
581 print SH "#!/bin/sh
582 #\$ -N \"GLF_$chr\"
583 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
584 #\$ -cwd -r y -l vf=280M,s_core=1
585 #\$ -hold_jid \"All_Matrix\"
586 #\$ -o /dev/null -e /dev/null
587 #\$ -S /bin/bash -t 1-$lstcount
588 SEEDFILE=${dir}/$chr.glfcmd
589 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
590 eval perl $SCRIPTS/callglf.pl $opath/matrix/all.matrix $lopt_f $opt_o/1fqfilted \$SEED
592 close SH;
593 } else {
594 unlink $dir."/$chr.glfcmd";
595 unlink $dir.'_glf.sh';
597 } # unless ($opt_d)
599 ## Qsub
600 if ($opt_q) {
601 print STDERR '-' x 75,"\n";
602 &qsub($opath,'*_glf.sh');
605 ### 5.popSNP
606 $lastopath=$opath;
607 $opath=$opt_o.'/5popSNP';
608 $dir = $opath.'/all';
609 system('mkdir','-p',$dir);
610 for my $chr (keys %ChrLen) {
611 my $dir = $opath."/$chr";
612 system('mkdir','-p',$dir);
613 my $lst=$lastopath."/$chr.glflst";
614 my $len=$ChrLen{$chr};
615 open LST,'>',$dir.'.psnplst' || die "$!\n";
616 open CMD,'>',$dir."/$chr.popcmd" || die "$!\n";
617 my $lstcount=0;
618 my ($i,$j,$exit)=(1,$POPSPLIT,0);
619 while ($exit != -1) {
620 ++$exit;
621 if ($j > $len) {
622 $j=$len;
623 $exit=-1;
625 print LST "$dir/${chr}_$exit.psnp\n";
626 unless (-s "$dir/${chr}_$exit.tag") {
627 print CMD "$i $j $lst $dir/${chr}_$exit.psnp >$dir/${chr}_$exit.tag 2>$dir/${chr}_$exit.log\n";
628 ++$lstcount;
630 $i += $POPSPLIT;
631 $j += $POPSPLIT;
633 close CMD;
634 close LST;
635 if ($lstcount > 0) {
636 open SH,'>',$dir.'_popsnp.sh' || die "$!\n";
637 print SH "#!/bin/sh
638 #\$ -N \"PSNP_$chr\"
639 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
640 #\$ -cwd -r y -l vf=70M,s_core=1
641 #\$ -hold_jid GLF_$chr
642 #\$ -o /dev/null -e /dev/null
643 #\$ -S /bin/bash -t 1-$lstcount
644 SEEDFILE=${dir}/$chr.popcmd
645 SEED=\$(sed -n -e \"\$SGE_TASK_ID p\" \$SEEDFILE)
646 eval python $SCRIPTS/GLFmulti.py \$SEED
648 close SH;
649 open SH,'>',$dir.'_wcsnp.sh' || die "$!\n";
650 print SH "#!/bin/sh
651 #\$ -N \"WSNP_$chr\"
652 #\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
653 #\$ -cwd -r y -l vf=60M
654 #\$ -hold_jid PSNP_$chr
655 #\$ -o /dev/null -e /dev/null
656 #\$ -S /bin/bash
657 cat $dir.psnplst|xargs wc -l > $dir.psnpwc
658 rm -f $dir.psnp
659 cat $dir.psnplst|xargs cat >> $dir.psnp
660 wc -l $dir.psnp >> $dir.psnpwc
661 "; # whenever >> , remember to rm first !
662 close SH;
663 } else {
664 unlink $dir."/$chr.popcmd";
665 unlink $dir.'_popsnp.sh';
666 unlink $dir.'_wcsnp.sh';
669 ## Qsub
670 if ($opt_q) {
671 print STDERR '-' x 75,"\n";
672 &qsub($opath,'*_popsnp.sh');
673 &qsub($opath,'*_wcsnp.sh');
676 #END
677 my $stop_time = [gettimeofday];
679 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
680 #print "\nPlease use the following command to batch qsub:\033[32;1m
681 #find ${opt_o}/ -name '*.sh' | while read ll; do qsub -l vf=2G -cwd \$ll; done\n\033[0;0m\n";
682 __END__