limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / popuSNP / conasmP.pl
blob53f3cd374bbcb55352d985cfd2ffda60c8de2c61
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 #use DBI;
5 use Galaxy::ShowHelp;
7 $main::VERSION=0.1.3;
9 our $opts='i:o:c:m:bvne';
10 our($opt_i, $opt_o, $opt_c, $opt_v, $opt_b, $opt_n, $opt_e, $opt_m);
12 our $desc='Genome creater for population analyse';
13 our $help=<<EOH;
14 \t-c Consensus file with refbase
15 \t-i filtered Indel file
16 \t-m merge.list for dividing mixed Chrs if any
17 \t-o Output file prefix for the sample, directories must exist
18 \t-n mask noncoverage bases to "n", otherwise little cased
19 \t-e skip hete indels, otherwise both homo and hete(little cased) used
20 \t-v show verbose info to STDOUT
21 \t-b No pause for batch runs
22 EOH
24 ShowHelp();
26 warn "[x]Must specify -c xx.consensus.txt !\n" unless $opt_c;
27 warn "[x]Must specify -i xx.indel.txt.filter !\n" unless $opt_i;
28 warn "[x]Must specify -o ./xx !\n" unless $opt_o;
29 exit 1 unless $opt_i and $opt_o and $opt_c;
30 if ($opt_m) {
31 die "[x]Invalid merge.list: [$opt_m] !\n" unless (-s $opt_m);
34 print STDERR "From [$opt_c][$opt_i] to [$opt_o]\n ";
35 print STDERR "merge.list is [$opt_m]\n " if $opt_m;
36 print STDERR "noncoverage mask to n. " if $opt_n;
37 print STDERR "hete indels will be skiped." if $opt_e;
38 warn "\n";
39 unless ($opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
41 my (%Genome,%Depth,%Merged);
42 #my %attr = (
43 # RaiseError => 0,
44 # PrintError => 1,
45 # AutoCommit => 0
46 #);
47 #my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:','','',\%attr) or die $DBI::errstr;
48 #my $sql=q/
49 #CREATE TABLE IF NOT EXISTS merge
50 #( chrid TEXT,
51 # scaffold TEXT,
52 # start INTEGER,
53 # end INTEGER );
54 #/;
55 #$dbh->do($sql) or die $dbh->errstr;
56 #$dbh->commit;
57 #my $sthi = $dbh->prepare( 'INSERT INTO merge ( chrid,scaffold,start,end ) VALUES ( ?,?,?,? )' );
58 #my $stho = $dbh->prepare( 'SELECT DISTINCT scaffold,start,end FROM merge WHERE chrid=? AND ? BETWEEN start AND end' );
60 if ($opt_m) {
61 open M,'<',$opt_m or die "Error opening $opt_m: $!\n";
62 while (<M>) {
63 chomp;
64 my ($chrid,$scaffold,$start,$end)=split /\t/;
65 #$sthi->execute($chrid,$scaffold,$start,$end);
66 $Merged{$chrid}{$start}=[$scaffold,$end];
68 close M;
69 warn "\n[!]merge.list done.\n";
72 open CNS,'<',$opt_c or die "Error opening $opt_c: $!\n";
73 my ($chr,$pos,$best,$depth,$ref,$indel,$bases,$homhet,$strand,$t);
74 while (<CNS>) {
75 chomp;
76 ($chr,$pos,undef,$best,undef,undef,$depth,$ref) = split /\t/;
77 unless ($ref) { # if file not completed.
78 print STDERR '.';
79 next;
81 if ($depth==0) {
82 unless ($opt_n) {$best=lc $ref;}
83 else {$best='n';}
84 } else {$Depth{$chr}->{$pos}=chr $depth;} # string is smaller than double
85 # WARING: Something might be wrong if $depth > 255 !
86 # However, people should not have such much money.
87 #$best = $ref; # DEBUG ONLY !!!
88 $Genome{$chr}->[$pos]=$best;
89 #$Depth{$chr}->[$pos]=$depth;
91 close CNS;
92 warn "[!]CNS done.\n";
94 open INDEL,'<',$opt_i or die "Error opening $opt_i: $!\n";
95 while (<INDEL>) {
96 ($chr,$pos,$indel,$bases,$strand,$homhet,undef,$depth,$t) = split /\t/;
97 unless ($t) { # if file not completed.
98 print STDERR '\'';
99 next;
101 next unless exists $Genome{$chr};
102 if ($homhet eq 'hete') {
103 unless ($opt_e) {$bases = lc $bases;}
104 else {next;}
106 if ($indel =~ /^([ID])(\d+)$/) {
107 if ($1 eq 'I') {
108 $Genome{$chr}->[$pos] .= $bases; # if length($Genome{$chr}->[$t]) == 1
109 $Depth{$chr}{$pos} .= chr($depth) x $2;
110 } else {
111 my $i=$2-1;
112 #++$pos; # deletion including it.
113 if ($opt_v) {
114 print "[D]$indel:${1}_${2}@","$pos $strand\t";
115 #print $Genome{$chr}->[$_] for ($pos..$pos+$i);
116 print(($t=$Genome{$chr}->[$_])?$t:'.') for ($pos-5..$pos-1);
117 print '<';
118 print(($t=$Genome{$chr}->[$_])?$t:'.',' ') for ($pos..$pos+$i);
119 print '>';
120 print(($t=$Genome{$chr}->[$_])?$t:'.') for ($pos+$i+1..$pos+$i+5);
121 print ' - ',$bases,"\n";
123 for $t ($pos..$pos+$i) {
124 $Genome{$chr}->[$t]='';
125 delete $Depth{$chr}{$pos};# if exists $Depth{$chr}{$pos};
128 } else {print STDERR '|';next;}
130 close INDEL;
131 warn "[!]INDEL done.\n";
133 my ($i,$dep,$scaf,$name);
134 for $chr (keys %Genome) {
135 $Genome{$chr}->[0]=''; # no longer undef
136 $t=0;
137 $i=-1;
138 unless (exists $Merged{$chr}) { # exists should faster than defined ?
139 $name=$opt_o.'.'.$chr;
140 open OUT,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
141 print OUT ">$chr\n";
142 open DEP,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
143 print DEP ">$chr\n";
144 #$t=0;
145 #$i=-1;
146 for (@{$Genome{$chr}}) {
147 ++$i;
148 next if $_ eq '';
149 ++$t;
150 print OUT $_;
151 $dep=$Depth{$chr}{$i} or $dep="\0";
152 $dep = join(' ',map ord,split //,$dep);
153 #print DEP '[',$dep,']';
154 if ($t > 79) {
155 $t=0;
156 print OUT "\n";
157 print DEP "$dep\n";
158 } else {print DEP $dep,' ';}
160 print OUT "\n";
161 print DEP "\n";
162 close OUT;
163 close DEP;
164 } else {
165 $t=$opt_o.'.'.$chr;
166 system('mkdir','-p',$t);
167 my ($end,$scafname,$toPrint)=(-1,'',0); # -1 to fix 'print() on unopened filehandle OUT at ./conasm.pl line 178 & 179 of elsif ($i == $end).'
168 $scaf=$Merged{$chr};
169 for (@{$Genome{$chr}}) {
170 ++$i;
171 if (exists $$scaf{$i}) { # Start point
172 ($scafname,$end)=@{$$scaf{$i}};
173 $name=$opt_o.'.'.$chr.'/'.$scafname;
174 open OUT,'>',$name.'.fa' or die "Error opening ${name}.fa: $!\n";
175 print OUT ">$scafname\n";
176 open DEP,'>',$name.'.dep' or die "Error opening ${name}.dep: $!\n";
177 print DEP ">$scafname\n";
178 $toPrint=1; # when set to 0, no output for manmade junctions.
180 if ($i != $end) {
181 next if ($_ eq '' or $toPrint==0);
182 ++$t;
183 print OUT $_;
184 $dep=$Depth{$chr}{$i} or $dep="\0";
185 $dep = join(' ',map ord,split //,$dep);
186 #print DEP '[',$dep,']';
187 if ($t > 79) {
188 $t=0;
189 print OUT "\n";
190 print DEP "$dep\n";
191 } else {print DEP $dep,' ';}
192 } else { # End point
193 if ($_ ne '') {
194 print OUT $_,"\n";
195 $dep=$Depth{$chr}{$i} or $dep="\0";
196 $dep = join(' ',map ord,split //,$dep);
197 print DEP "$dep\n";
198 } else {
199 print OUT "\n";
200 print DEP "\n";
202 close OUT;
203 close DEP;
204 $t=$toPrint=0;
205 next;
210 warn "[!]Output done.\n";
212 __END__
213 vf=11.9g for 1.3G Feb 20 07:23 consensus/QRS9.Gm03.txt, 47,781,076
215 find ./consensus/ -name '*.txt'|perl -lane '$m="";$vf="14g";$a=(split /\//)[-1];$b=(split /\./,$a)[0];open O,">./sh/$a.sh";if ($a=~/SGm/) {$m="-m soybean.merge.list ";$vf="4.7g"};print O "#\$ -N \"C_$a\"\n#\$ -cwd -r y -l vf=$vf,p=1\n#\$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH\n#\$ -o /dev/null -e /dev/null\n./conasm.pl ${m}-i ./Indel/${b}.indel.txt.filter -c $_ -bno ./out/$b 2>./log/$a.log";close O'
217 $ cat sh/QRS21.Gm13.txt.sh
218 #$ -N "C_QRS21.Gm13.txt"
219 #$ -cwd -r y -l vf=14g,s_core=1
220 #$ -v PERL5LIB,PATH,PYTHONPATH,LD_LIBRARY_PATH
221 #$ -o /dev/null -e /dev/null
222 ./conasm.pl -i ./Indel/QRS21.indel.txt.filter -c ./consensus/QRS21.Gm13.txt -bo ./out/QRS21 2>./log/QRS21.Gm13.txt.log
224 ./conasm.pl -m soybean.merge.list -i ./Indel/QRS29.indel.txt.filter -c ./consensus/QRS29.SGm1.txt -bno ./out/QRS29 2>./log/QRS29.SGm1.txt.log