5 use lib
'E:/BGI/toGit/perlib/etc';
6 use lib
'/nas/RD_09C/resequencing/soft/lib';
11 our $opts='i:o:c:m:p:sbvne';
12 our($opt_i, $opt_o, $opt_c, $opt_v, $opt_b, $opt_n, $opt_e, $opt_m, $opt_p, $opt_s);
14 our $desc='Genome creater for Single analyse';
16 \t-c Consensus file with refbase
17 \t-p final SNP file (undef for directly from CNS)
18 \t-i filtered Indel file
19 \t-m merge.list for dividing mixed Chrs if any
20 \t-o Output file prefix for the sample, directories must exist
21 \t-n mask noncoverage bases to "n", otherwise little cased
22 \t-s change hete SNP to homo by subtracting Ref base
23 \t-e skip hete indels, otherwise both homo and hete(little cased) used
24 \t-v show verbose info to STDOUT
25 \t-b No pause for batch runs
30 our %IUB = ( A
=> [qw(A)],
49 warn "[x]Must specify -c xx.consensus.txt !\n" unless $opt_c;
50 warn "[x]Must specify -i xx.indel.txt.filter !\n" unless $opt_i;
51 warn "[x]Must specify -o ./xx !\n" unless $opt_o;
52 exit 1 unless $opt_i and $opt_o and $opt_c;
54 die "[x]Invalid merge.list: [$opt_m] !\n" unless (-s
$opt_m);
57 die "[x]Invalid SNP file: [$opt_p] !\n" unless (-s
$opt_p);
60 print STDERR
"From [$opt_c][$opt_i] to [$opt_o]\n ";
61 print STDERR
"merge.list is [$opt_m]\n " if $opt_m;
62 print STDERR
"SNP file is [$opt_p]\n " if $opt_p;
63 print STDERR
"noncoverage mask to n. " if $opt_n;
64 print STDERR
"hete indels will be skiped." if $opt_e;
66 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
68 my (%Genome,%Depth,%Merged,%SNP,$snpChr);
74 #my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:','','',\%attr) or die $DBI::errstr;
76 #CREATE TABLE IF NOT EXISTS merge
82 #$dbh->do($sql) or die $dbh->errstr;
84 #my $sthi = $dbh->prepare( 'INSERT INTO merge ( chrid,scaffold,start,end ) VALUES ( ?,?,?,? )' );
85 #my $stho = $dbh->prepare( 'SELECT DISTINCT scaffold,start,end FROM merge WHERE chrid=? AND ? BETWEEN start AND end' );
88 open M
,'<',$opt_m or die "Error opening $opt_m: $!\n";
91 my ($chrid,$scaffold,$start,$end)=split /\t/;
92 #$sthi->execute($chrid,$scaffold,$start,$end);
93 $Merged{$chrid}{$start}=[$scaffold,$end];
96 warn "\n[!]merge.list done.\n";
99 open M
,'<',$opt_p or die "Error opening $opt_p: $!\n";
102 my ($chrid,$pos,$ref,$cons)=split /\t/;
103 #$sthi->execute($chrid,$scaffold,$start,$end);
105 print "SNP:$chrid,$pos,$ref,$cons -> " if $opt_v;
107 my @bases=@{$IUB{$cons}};
115 print "$cons\n" if $opt_v;
117 $SNP{$chrid}{$pos}=$cons;
120 warn "\n[!]SNP file done.\n";
123 sub prograss_bar
($$) {
124 my ($i,$total) = ($_[0],$_[1]);
126 my $pos=int(($i/$total)*50);
127 print "\r [" , '=' x
$pos, ($pos<50)?
'>':'=', ' ' x
(49 - $pos), '] ';
128 printf("%2.1f %% ",$i/$total*100);
133 #open CNS,'<',$opt_c or die "Error opening $opt_c: $!\n";
134 if ($opt_c =~ /\.gz$/) {
135 open( CNS
,'-|',"gzip -dc $opt_c") or die "[x]Error opening $opt_c: $!\n";
136 } elsif ($opt_c =~ /\.bz2$/) {
137 open( CNS
,'-|',"bzip2 -dc $opt_c") or die "[x]Error opening $opt_c: $!\n";
138 } else {open( CNS
,'<',$opt_c) or die "[x]Error opening $opt_c: $!\n";}
143 my ($chr,$pos,$best,$depth,$ref,$indel,$bases,$homhet,$strand,$t,@bases);
146 ($chr,$pos,$ref,$best,undef,undef,undef,undef,undef,undef,undef,undef,undef,$depth) = split /\t/;
147 unless ($ref) { # if file not completed.
152 &prograss_bar
((tell CNS
),$FL) if $count%1000 == 0;
153 if ($opt_s and $best !~ /[ATCG]/) {
154 @bases=@
{$IUB{$best}};
164 unless ($opt_n) {$best=lc $ref;}
165 else {$best=$ref='n';}
166 } else {$Depth{$chr}->{$pos}=chr $depth;} # string is smaller than double
167 # WARING: Something might be wrong if $depth > 255 !
168 # However, people should not have such much money.
169 #$best = $ref; # DEBUG ONLY !!!
171 if ($SNP{$chr}{$pos}) {$Genome{$chr}->[$pos]=$best;}
172 else {$Genome{$chr}->[$pos]=$ref;}
173 #warn '.' if $best ne $SNP{$chr}{$pos};
174 } else { $Genome{$chr}->[$pos]=$best; }
175 #$Depth{$chr}->[$pos]=$depth;
179 warn "[!]CNS done.\n";
181 open INDEL
,'<',$opt_i or die "Error opening $opt_i: $!\n";
183 ($chr,$pos,$indel,$bases,$strand,$homhet,undef,$depth,$t) = split /\t/;
184 unless ($t) { # if file not completed.
188 next unless exists $Genome{$chr};
189 if ($homhet eq 'hete') {
190 unless ($opt_e) {$bases = lc $bases;}
193 if ($indel =~ /^([ID])(\d+)$/) {
195 $Genome{$chr}->[$pos] .= $bases; # if length($Genome{$chr}->[$t]) == 1
196 $Depth{$chr}{$pos} .= chr($depth) x
$2;
199 #++$pos; # deletion including it.
201 print "[D]$indel:${1}_${2}@","$pos $strand\t";
202 #print $Genome{$chr}->[$_] for ($pos..$pos+$i);
203 print(($t=$Genome{$chr}->[$_])?
$t:'.') for ($pos-5..$pos-1);
205 print(($t=$Genome{$chr}->[$_])?
$t:'.',' ') for ($pos..$pos+$i);
207 print(($t=$Genome{$chr}->[$_])?
$t:'.') for ($pos+$i+1..$pos+$i+5);
208 print ' - ',$bases,"\n";
210 for $t ($pos..$pos+$i) {
211 $Genome{$chr}->[$t]='';
212 delete $Depth{$chr}{$pos};# if exists $Depth{$chr}{$pos};
215 } else {print STDERR
'|';next;}
218 warn "[!]INDEL done.\n";
220 my ($i,$dep,$scaf,$name);
221 for $chr (keys %Genome) {
222 $Genome{$chr}->[0]=''; # no longer undef
225 unless (exists $Merged{$chr}) { # exists should faster than defined ?
226 $name=$opt_o.'.'.$chr;
227 open OUT
,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
229 open DEP
,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
233 for (@
{$Genome{$chr}}) {
238 $dep=$Depth{$chr}{$i} or $dep="\0";
239 $dep = join(' ',map ord,split //,$dep);
240 #print DEP '[',$dep,']';
245 } else {print DEP
$dep,' ';}
253 system('mkdir','-p',$t);
254 my ($end,$scafname,$toPrint)=(-1,'',0); # -1 to fix 'print() on unopened filehandle OUT at ./conasm.pl line 178 & 179 of elsif ($i == $end).'
256 for (@
{$Genome{$chr}}) {
258 if (exists $$scaf{$i}) { # Start point
259 ($scafname,$end)=@
{$$scaf{$i}};
260 $name=$opt_o.'.'.$chr.'/'.$scafname;
261 open OUT
,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
262 print OUT
">$scafname\n";
263 open DEP
,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
264 print DEP
">$scafname\n";
265 $toPrint=1; # when set to 0, no output for manmade junctions.
268 next if ($_ eq '' or $toPrint==0);
271 $dep=$Depth{$chr}{$i} or $dep="\0";
272 $dep = join(' ',map ord,split //,$dep);
273 #print DEP '[',$dep,']';
278 } else {print DEP
$dep,' ';}
282 $dep=$Depth{$chr}{$i} or $dep="\0";
283 $dep = join(' ',map ord,split //,$dep);
297 warn "[!]Output done.\n";
300 vf
=11.9g
for 1.3G Feb
20 07:23 consensus
/QRS9
.Gm03
.txt
, 47,781,076
302 find
./consensus/ -name
'*.txt'|perl
-lane
'$m="";$vf="14g";$a=(split /\//)[-1];$b=(split /\./,$a)[0];open O,">./sh/$a.sh";if ($a=~/SGm/) {$m="-m soybean.merge.list ";$vf="4.7g"};print O "#\$ -N \"C_$a\"\n#\$ -cwd -r y -l vf=$vf,p=1\n#\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#\$ -o /dev/null -e /dev/null\n./conasm.pl ${m}-i ./Indel/${b}.indel.txt.filter -c $_ -bno ./out/$b 2>./log/$a.log";close O'
304 $ cat sh
/QRS21
.Gm13
.txt
.sh
305 #$ -N "C_QRS21.Gm13.txt"
306 #$ -cwd -r y -l vf=14g,s_core=1
307 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
308 #$ -o /dev/null -e /dev/null
309 ./conasm.pl -i ./Indel
/QRS21.indel.txt.filter -c ./consensu
s/QRS21.Gm13.txt -bo ./out/QRS21
2>./log/QRS21
.Gm13
.txt
.log
311 ./conasmS.pl -c P142cns/P142
.chromosome01
.cns
-p P142snp
/P142.chromosome01.snp -i P142indel.list.filter -ne -o oP142j/m
313 cat chrorderNip
| perl
-lane
'$p="142";$a=$_;open O,">./sh/do$p$a.sh";print O "#\$ -N \"C_$a\"\n#\$ -cwd -r y -l vf=12g,p=1\n#\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#\$ -o /dev/null -e /dev/null\n./conasmS.pl -c P${p}cns/P${p}.$a.cns -p P${p}snp/P${p}.$a.snp -i P${p}indel.list.filter -nbe -o oP${p}j/m 2>./log/${p}_$a.log";close O'
315 cat chrorder9311
| perl
-lane
'$p="143";$a=$_;open O,">./sh/do$p$a.sh";print O "#\$ -N \"C_$a\"\n#\$ -cwd -r y -l vf=12g,p=1\n#\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#\$ -o /dev/null -e /dev/null\n./conasmS.pl -c P${p}cns/P${p}.$a.cns -p P${p}snp/P${p}.$a.snp -i P${p}indel.list.filter -nbe -o oP${p}i/m 2>./log/${p}_$a.log";close O'
317 ./conasm.pl -m soybean.merge.list -i ./Indel
/QRS29.indel.txt.filter -c ./consensu
s/QRS29.SGm1.txt -bno ./out/QRS29
2>./log/QRS29
.SGm1
.txt
.log
319 cat
../9311/chrorder
|while read a
; do n
="9308"; echo
-e
"#$ -N c$a\n#$ -cwd -r y -l vf=10.5g\n#$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#$ -o ./fa20100926/${n}_${a}.log -e ./fa20100926/${n}_${a}.err\nperl ./conasmS.pl -bi ./indel_f/$n.indel-result.filter -c ./3GLF/$a/${n}_${a}.cns -o ./fa20100926/$n\n" > ./sh/con
$n$a.sh
; done