9 our $opts='i:o:c:m:bvne';
10 our($opt_i, $opt_o, $opt_c, $opt_v, $opt_b, $opt_n, $opt_e, $opt_m);
12 our $desc='Genome creater for population analyse';
14 \t-c Consensus file with refbase
15 \t-i filtered Indel file
16 \t-m merge.list for dividing mixed Chrs if any
17 \t-o Output file prefix for the sample, directories must exist
18 \t-n mask noncoverage bases to "n", otherwise little cased
19 \t-e skip hete indels, otherwise both homo and hete(little cased) used
20 \t-v show verbose info to STDOUT
21 \t-b No pause for batch runs
26 warn "[x]Must specify -c xx.consensus.txt !\n" unless $opt_c;
27 warn "[x]Must specify -i xx.indel.txt.filter !\n" unless $opt_i;
28 warn "[x]Must specify -o ./xx !\n" unless $opt_o;
29 exit 1 unless $opt_i and $opt_o and $opt_c;
31 die "[x]Invalid merge.list: [$opt_m] !\n" unless (-s
$opt_m);
34 print STDERR
"From [$opt_c][$opt_i] to [$opt_o]\n ";
35 print STDERR
"merge.list is [$opt_m]\n " if $opt_m;
36 print STDERR
"noncoverage mask to n. " if $opt_n;
37 print STDERR
"hete indels will be skiped." if $opt_e;
39 unless ($opt_b) {print STDERR
'press [Enter] to continue...'; <>;}
41 my (%Genome,%Depth,%Merged);
47 #my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:','','',\%attr) or die $DBI::errstr;
49 #CREATE TABLE IF NOT EXISTS merge
55 #$dbh->do($sql) or die $dbh->errstr;
57 #my $sthi = $dbh->prepare( 'INSERT INTO merge ( chrid,scaffold,start,end ) VALUES ( ?,?,?,? )' );
58 #my $stho = $dbh->prepare( 'SELECT DISTINCT scaffold,start,end FROM merge WHERE chrid=? AND ? BETWEEN start AND end' );
61 open M
,'<',$opt_m or die "Error opening $opt_m: $!\n";
64 my ($chrid,$scaffold,$start,$end)=split /\t/;
65 #$sthi->execute($chrid,$scaffold,$start,$end);
66 $Merged{$chrid}{$start}=[$scaffold,$end];
69 warn "\n[!]merge.list done.\n";
72 open CNS
,'<',$opt_c or die "Error opening $opt_c: $!\n";
73 my ($chr,$pos,$best,$depth,$ref,$indel,$bases,$homhet,$strand,$t);
76 ($chr,$pos,undef,$best,undef,undef,$depth,$ref) = split /\t/;
77 unless ($ref) { # if file not completed.
82 unless ($opt_n) {$best=lc $ref;}
84 } else {$Depth{$chr}->{$pos}=chr $depth;} # string is smaller than double
85 # WARING: Something might be wrong if $depth > 255 !
86 # However, people should not have such much money.
87 #$best = $ref; # DEBUG ONLY !!!
88 $Genome{$chr}->[$pos]=$best;
89 #$Depth{$chr}->[$pos]=$depth;
92 warn "[!]CNS done.\n";
94 open INDEL
,'<',$opt_i or die "Error opening $opt_i: $!\n";
96 ($chr,$pos,$indel,$bases,$strand,$homhet,undef,$depth,$t) = split /\t/;
97 unless ($t) { # if file not completed.
101 next unless exists $Genome{$chr};
102 if ($homhet eq 'hete') {
103 unless ($opt_e) {$bases = lc $bases;}
106 if ($indel =~ /^([ID])(\d+)$/) {
108 $Genome{$chr}->[$pos] .= $bases; # if length($Genome{$chr}->[$t]) == 1
109 $Depth{$chr}{$pos} .= chr($depth) x
$2;
112 #++$pos; # deletion including it.
114 print "[D]$indel:${1}_${2}@","$pos $strand\t";
115 #print $Genome{$chr}->[$_] for ($pos..$pos+$i);
116 print(($t=$Genome{$chr}->[$_])?
$t:'.') for ($pos-5..$pos-1);
118 print(($t=$Genome{$chr}->[$_])?
$t:'.',' ') for ($pos..$pos+$i);
120 print(($t=$Genome{$chr}->[$_])?
$t:'.') for ($pos+$i+1..$pos+$i+5);
121 print ' - ',$bases,"\n";
123 for $t ($pos..$pos+$i) {
124 $Genome{$chr}->[$t]='';
125 delete $Depth{$chr}{$pos};# if exists $Depth{$chr}{$pos};
128 } else {print STDERR
'|';next;}
131 warn "[!]INDEL done.\n";
133 my ($i,$dep,$scaf,$name);
134 for $chr (keys %Genome) {
135 $Genome{$chr}->[0]=''; # no longer undef
138 unless (exists $Merged{$chr}) { # exists should faster than defined ?
139 $name=$opt_o.'.'.$chr;
140 open OUT
,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
142 open DEP
,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
146 for (@
{$Genome{$chr}}) {
151 $dep=$Depth{$chr}{$i} or $dep="\0";
152 $dep = join(' ',map ord,split //,$dep);
153 #print DEP '[',$dep,']';
158 } else {print DEP
$dep,' ';}
166 system('mkdir','-p',$t);
167 my ($end,$scafname,$toPrint)=(-1,'',0); # -1 to fix 'print() on unopened filehandle OUT at ./conasm.pl line 178 & 179 of elsif ($i == $end).'
169 for (@
{$Genome{$chr}}) {
171 if (exists $$scaf{$i}) { # Start point
172 ($scafname,$end)=@
{$$scaf{$i}};
173 $name=$opt_o.'.'.$chr.'/'.$scafname;
174 open OUT
,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
175 print OUT
">$scafname\n";
176 open DEP
,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
177 print DEP
">$scafname\n";
178 $toPrint=1; # when set to 0, no output for manmade junctions.
181 next if ($_ eq '' or $toPrint==0);
184 $dep=$Depth{$chr}{$i} or $dep="\0";
185 $dep = join(' ',map ord,split //,$dep);
186 #print DEP '[',$dep,']';
191 } else {print DEP
$dep,' ';}
195 $dep=$Depth{$chr}{$i} or $dep="\0";
196 $dep = join(' ',map ord,split //,$dep);
210 warn "[!]Output done.\n";
213 vf
=11.9g
for 1.3G Feb
20 07:23 consensus
/QRS9
.Gm03
.txt
, 47,781,076
215 find
./consensus/ -name
'*.txt'|perl
-lane
'$m="";$vf="14g";$a=(split /\//)[-1];$b=(split /\./,$a)[0];open O,">./sh/$a.sh";if ($a=~/SGm/) {$m="-m soybean.merge.list ";$vf="4.7g"};print O "#\$ -N \"C_$a\"\n#\$ -cwd -r y -l vf=$vf,p=1\n#\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#\$ -o /dev/null -e /dev/null\n./conasm.pl ${m}-i ./Indel/${b}.indel.txt.filter -c $_ -bno ./out/$b 2>./log/$a.log";close O'
217 $ cat sh
/QRS21
.Gm13
.txt
.sh
218 #$ -N "C_QRS21.Gm13.txt"
219 #$ -cwd -r y -l vf=14g,s_core=1
220 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
221 #$ -o /dev/null -e /dev/null
222 ./conasm.pl -i ./Indel
/QRS21.indel.txt.filter -c ./consensu
s/QRS21.Gm13.txt -bo ./out/QRS21
2>./log/QRS21
.Gm13
.txt
.log
224 ./conasm.pl -m soybean.merge.list -i ./Indel
/QRS29.indel.txt.filter -c ./consensu
s/QRS29.SGm1.txt -bno ./out/QRS29
2>./log/QRS29
.SGm1
.txt
.log