limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / popuSNP / qsnp.pl
bloba2bc64deab1126239970a70f05b53c0b1f8dd19c
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use Time::HiRes qw ( gettimeofday tv_interval );
5 use Galaxy::ShowHelp;
6 use Galaxy::SGE::MakeJobSH 0.06;
8 $main::VERSION=0.0.3;
9 my $bin='/share/raid010/resequencing/user/huxs/release/popSNP/GLFmulti.py';
11 our $opts='i:o:c:f:s:bv';
12 our ($opt_i, $opt_c, $opt_o, $opt_f, $opt_s, $opt_v, $opt_b);
14 our $desc='GLF2SNP Job file Maker';
15 our $help=<<EOH;
16 \t-i GLF path (./GLF) with SampleName_ChrID.glf, may not exists now
17 \t-c chrlen file (./chrlen) in format: /^ChrID\\tLen\\t?.*\$/
18 \t-f fragments length (1000000)
19 \t-s sample.list (sample.lst) in format: /^Sample\\t?.*\$/
20 \t-o popSNP output path (./popSNP), will mkdir if not exist
21 \t-v show verbose info to STDOUT
22 \t-b No pause for batch runs
23 EOH
25 ShowHelp();
27 $opt_i='./GLF' if ! $opt_i;
28 $opt_c='./chrlen' if ! $opt_c;
29 $opt_f='1000000' if ! $opt_f;
30 $opt_f='1000000' if $opt_f<10;
31 $opt_s='sample.lst' if ! $opt_s;
32 $opt_o='./popSNP' if ! $opt_o;
33 $opt_i =~ s/\/$//;
34 $opt_o =~ s/\/$//;
35 print STDERR "From [$opt_i] with [$opt_c][$opt_s] frag. [$opt_f] to [$opt_o]\n";
36 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <STDIN>;}
37 --$opt_f;
39 my $start_time = [gettimeofday];
40 #BEGIN
41 #unless (-s $opt_c) {die "[x]chrlen file [$opt_c] is nothing !\n";}
42 my (%ChrLen,@Samples);
43 open CHRLEN,'<',$opt_c or die "[x]Error opening $opt_c: $!\n";
44 while (<CHRLEN>) {
45 chomp;
46 s/\r//g;
47 my ($chrid,$len)=split /\s+/; # I need a program to make chrlen ...
48 next if ($chrid =~ /total/i);
49 print "[$chrid]\t[$len]\n" if $opt_v;
50 $ChrLen{$chrid}=$len;
52 close CHRLEN;
53 open SAMPLE,'<',$opt_s or die "[x]Error opening $opt_s: $!\n";
54 while (<SAMPLE>) {
55 chomp;
56 s/\r//g;
57 my ($sample)=split /\t/;
58 print "[$sample]\n" if $opt_v;
59 push @Samples,$sample;
61 close SAMPLE;
62 system('mkdir','-p',"$opt_o/list");
63 system('mkdir','-p',"$opt_o/merge");
64 my ($pos1,$pos2,$part,$cmd,$chrlen,@parts,$partfile);
65 for my $chr (keys %ChrLen) {
66 my $listfile="$opt_o/list/$chr.list";
67 open LIST,'>',$listfile or die "[x]Error opening $listfile: $!\n";
68 print LIST "$opt_i/$_/${_}_$chr.glf\n" for @Samples;
69 close LIST;
70 system('mkdir','-p',"$opt_o/$chr");
71 $chrlen=$ChrLen{$chr};
72 $part=$pos1=1;
73 $pos2 = $pos1 + $opt_f;
74 @parts=();
75 while ($chrlen >= $pos2) {
76 $partfile="$opt_o/$chr/${chr}_$part";
77 push @parts,$partfile;
78 $cmd="python $bin $pos1 $pos2 $listfile $partfile ERR";
79 my $sh=Galaxy::SGE::MakeJobSH->new(vf=>'66M',cmd=>$cmd,name=>"${part}${chr}pSNP");
80 $sh->markopt("-i $partfile");
81 open SH,'>',"$opt_o/$chr/${chr}-$part.sh";
82 print SH $sh->format;
83 close SH;
84 ++$part;
85 $pos1 = 1+$pos2;
86 $pos2 = $pos1 + $opt_f;
88 if ($pos2 > $chrlen) {
89 $partfile="$opt_o/$chr/${chr}_$part";
90 push @parts,$partfile;
91 $cmd="python $bin $pos1 $chrlen $listfile $partfile ERR";
92 my $sh=Galaxy::SGE::MakeJobSH->new(vf=>'66M',cmd=>$cmd,name=>"${part}${chr}fpSNP");
93 $sh->markopt("-i $partfile");
94 open SH,'>',"$opt_o/$chr/${chr}-$part.sh";
95 print SH $sh->format;
96 close SH;
98 open SH,'>',"$opt_o/merge/${chr}-merge.sh";
99 $cmd='cat '.join(' ',@parts)." > $opt_o/merge/${chr} LOG\nwc -l $opt_o/$chr/${chr}_* LOG\nwc -l $opt_o/merge/${chr} LOG";
100 my $sh=Galaxy::SGE::MakeJobSH->new(vf=>'9M',cmd=>$cmd,name=>"${chr}mixpSNP");
101 my $partsfile="$opt_o/merge/.${chr}-merge.list";
102 open L,'>',$partsfile;
103 print L "$_\n" for @parts;
104 close L;
105 $sh->waitopt("-li $partsfile");
106 $sh->markopt("-i $opt_o/merge/${chr}");
107 print SH $sh->format;
108 close SH;
109 print '.'; # dots every sample.
111 print "\nAll done !\n";
115 #END
116 my $stop_time = [gettimeofday];
118 print STDERR "\nTime Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s).\n";
119 print "\nPlease use the following command to batch qsub:\033[32;1m
120 find ${opt_o}/ -name '*.sh' |sort| while read ll; do sh \$ll; done\n\033[0;0m\n";
121 __END__