4 use lib
'/nas/RD_09C/resequencing/soft/lib';
5 use Time
::HiRes qw
( gettimeofday tv_interval
);
6 use GalaxyXS
::ChromByte
1.02;# ':all';
11 our $opts='i:o:v:r:l:c:p:bd';
12 our($opt_i, $opt_o, $opt_v, $opt_b, $opt_d, $opt_l, $opt_c, $opt_p);
15 \t-i soaps.lst (2soap/soaps.lst)
16 \t-l block file lst (./block.lst)
17 \t-c Chromosome NFO file (chr.nfo) in format: /^ChrID\\s+ChrLen\\s?.*\$/
18 \t-o Output path (./cfq) for [Sample/F_L_Lib.dat.gz], will mkdir if not exist
20 \t-v Verbose level (undef=0)
21 \t-b No pause for batch runs
22 \t-d Debug Mode, for test only
27 $opt_i='./2soap/soaps.lst' if ! $opt_i;
28 die "[x]-i $opt_i not exists !\n" unless -f
$opt_i;
29 $opt_l='./block.lst' if ! $opt_l;
30 $opt_o='./ffq' if ! $opt_o;
31 $opt_c='chr.nfo' if ! $opt_c;
40 print STDERR
"From [$opt_i],$opt_p [$opt_l] to [$opt_o] refer to [$opt_c]\n";
41 print STDERR
"DEBUG Mode on !\n" if $opt_d;
42 print STDERR
"Verbose Mode [$opt_v] !\n" if $opt_v;
43 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
45 my $start_time = [gettimeofday
];
48 open ChrNFO
,'<',$opt_c or die "[x]Error opening $opt_c: $!\n";
51 my ($chrid,$len)=split /\t/;
56 system('mkdir','-p',$opt_o);
58 open LST
,'<',$opt_l or die "[x]Error opening $opt_l: $!\n";
59 my (%BlockD,%BlockS,%BlockDO);
62 my ($chrid,$block)=split /\t/;
63 open B
,'<',$block or die "[x]Error opening $block: $!\n";
64 print STDERR
"[!] Block $chrid: ";
67 my @Sample=split /\s+/;
68 system('mkdir','-p',$opt_o.'/'.$_) for @Sample;
69 $BlockS{$chrid}=\
@Sample;
72 my ($pos,@Dat)=split /\s+/;
73 next if $pos !~ /\d+/;
74 $BlockD{$chrid}{$pos}=join('',@Dat);
77 my @PosOrder=sort {$a<=>$b} keys %{$BlockD{$chrid}};
78 $BlockDO{$chrid}=\
@PosOrder;
79 $ChrMem{$chrid}=initchr
($ChrLen{$chrid});
80 warn "$ChrLen{$chrid}.\n";
93 for my $chrid (keys %BlockS) {
94 my $sp=0; # Starts from 0
95 for (@
{$BlockS{$chrid}}) {
96 last if $_ eq $sample;
99 setbases
($ChrMem{$chrid},0,$ChrLen{$chrid},0); # Zero it
101 for my $pos (@
{$BlockDO{$chrid}}) {
102 my $CurGT=substr($BlockD{$chrid}{$pos},$sp,1);
104 $v=$varToGT{$CurGT} if exists $varToGT{$CurGT};
105 setbases
($ChrMem{$chrid},$lastPos,$pos,$v);
112 return 0 unless exists $ChrMem{$chrid};
113 return 0 if $pos > $ChrLen{$chrid} or $pos <= 0;
114 return getbase
($ChrMem{$chrid},$pos);
120 my ($PESE,$sample,$lib,$FL)=@
$fileNFO;
122 open $FH,'-|',"gzip -dc $file.gz" or die "[x]Error opening $PESE [$file.gz] with gzip: $!\n";
123 print STDERR
'[!]',join(', ',$sample,$lib,$FL,"$file.gz"),"\n";
125 open $FH,'<',$file or die "[x]Error opening $PESE [$file]: $!\n";
126 print STDERR
'[!]',join(', ',$sample,$lib,$FL,$file),"\n";
133 my ($FH,$Dat,$CurtLine,$NextLine)=@
{$FDH};
134 my ($line,$ID, $Chr, $Pos, $len);
137 ($ID, $Chr, $Pos, $len)=(split /\t/,$line)[0,7,8,5];
138 $NextLine=[$ID, $Chr, $Pos, $len];
140 $NextLine=[-2,'',0,0];
142 $FDH->[2] = $CurtLine;
143 $FDH->[3] = $NextLine;
147 my ($FH,$Dat,$CurtLine,$NextLine)=@
{$FDH};
148 if ( $NextLine->[0] == -2 or $CurtLine->[0] < $NextLine->[0] ) {
149 unless ($NextLine->[0] == -2) {
150 $Dat=[$CurtLine->[0],$CurtLine->[1],$CurtLine->[2],$CurtLine->[2]+$CurtLine->[3]-1,'',0,0];
153 $Dat=[-2,'',0,0,'',0,0];
155 } elsif ( $CurtLine->[0] == $NextLine->[0] ) {
156 $Dat=[$CurtLine->[0],$CurtLine->[1],$CurtLine->[2],$CurtLine->[2]+$CurtLine->[3]-1,$NextLine->[1],$NextLine->[2],$NextLine->[2]+$NextLine->[3]-1];
159 } else { die "[x]Not a raw SOAP2 file !"; }
164 my ($FDH,$lastID)=@_;
166 FILES
: for my $fdh (@
$FDH) {
167 while ($fdh->[1]->[0] <= $lastID) {
168 if ($fdh->[1]->[0] == -2) {
172 &nextPair
($fdh); # Sync
177 @nFDH = sort { $a->[1]->[0] <=> $b->[1]->[0] } @nFDH; # ASC
179 return $nFDH[0]->[1];
181 return [-2,'',0,0,'',0,0];
185 open LST
,'<',$opt_i or die "[x]Error opening $opt_i: $!\n";
190 @LSTdat=($LSTdat[$opt_p]);
194 my ($PESE,$sample,$lib,$FL,$ReadLen,$nfofpath)=split /\t/;
195 readGTtoMEM
($sample);
196 $fileNFO=[$PESE,$sample,$lib,$FL];
197 $nfofpath =~ s/\.nfo$//;
200 @Files=("$nfofpath.soap","$nfofpath.single");
202 @Files=("$nfofpath.soap");
205 my $shfile="$nfofpath.archive";
207 open SH
,'<',$shfile or die "[x]Error opening $shfile: $!\n";
208 } elsif (-s
"$shfile.0") {
209 open SH
,'<',"$shfile.0" or die "[x]Error opening $shfile.0: $!\n";
211 warn "[!]$shfile not found, skipped [$PESE,$sample,$lib,$FL].\n";
216 next unless / -[ab] /;
217 while (/ -([ab]) (\S+)\b/g) {
221 open OUT
,'|-',"gzip -9c - >$opt_o/$sample/${FL}_$lib.dat.gz" or die "[x]Error opening [$opt_o/$sample/${FL}_$lib.dat.gz] with gzip: $!\n";
222 print OUT
"#$FQ{'a'},$FQ{'b'}\n#$PESE,$sample,$lib,$FL,$ReadLen,$nfofpath\n";
224 my $tbuf=[-1,'',0,0];
226 push @FHD,[&getFH
($_),$tdat,$tbuf,$tbuf] for @Files;
227 # Handle, [id,chr1,pos1,chr2,pos2],bufferCur[id,chr,pos,ab],bufferNext[id,chr,pos,ab]
229 while ($lastID > -2) {
230 my ($id,$chr1,$pos1,$end1,$chr2,$pos2,$end2)=@
{&getSeq
(\
@FHD,$lastID)};
231 my $GTa=&getGT
($chr1,$pos1);
232 my $GTb=&getGT
($chr2,$pos2);
233 my $GTae=&getGT
($chr1,$end1);
234 my $GTbe=&getGT
($chr2,$end2);
235 print OUT
join("\t",$id,$GTa|$GTb|$GTae|$GTbe,"$GTa,$GTae;$GTb,$GTbe","$chr1,$pos1-$end1; $chr2,$pos2-$end2"),"\n" if $id >=0;
239 warn "[!]$opt_o/$sample/${FL}_$lib.dat done.\n",'-' x
75,"\n";
244 my $stop_time = [gettimeofday
];
246 print STDERR
"\nTime Elapsed:\t",tv_interval
( $start_time, $stop_time )," second(s).\n";
248 $ ./classifySeqbyBlock.pl -bi ./to9311g10a
/2soap/soaps.lst
250 less
-S ffq
/BI10-1/FC704U5AAXX_L5_ORYqzpRALDIAAPEI
-4.dat
251 gzip
-dc
./to9311g10a/2soap
/BI10-1/ORYqzpRALDIAAPEI
-4/100506_I328_FC704U5AAXX_L5_ORYqzpRALDIAAPEI
-4_1
.soap
.gz
| awk
'{print $1"\t"$8"\t"$9"\t"$5}' |less
-S
252 gzip
-dc
./to9311g10a/2soap
/BI10-1/ORYqzpRALDIAAPEI
-4/100506_I328_FC704U5AAXX_L5_ORYqzpRALDIAAPEI
-4_1
.single
.gz
| awk
'{print $1"\t"$8"\t"$9"\t"$5}' |less
-S
254 PR NI VIRT RES SHR S
%CPU %MEM TIME
+ COMMAND
255 16 0 681m
605m
1760 S
1 3.8 3:17.54 classifySeqbyBl
258 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
259 #$ -cwd -r y -l vf=950m
260 #$ -o /dev/null -e /dev/null
261 #$ -S /bin/bash -t 1-195
262 perl
./classifySeqbyBlock.pl -bi ./to9311g10a
/2soap/soaps.lst
-p
$SGE_TASK_ID -o
./ffq2 2> ./log/classifySeqbyBlock
.$SGE_TASK_ID.log