3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
8 use Data
::Dump
qw(ddx);
10 use Galaxy
::IO
::FASTAQ
qw(readfq getQvaluesFQ);
13 die "Usage: $0 <ec list> <bcgv bcf> <out>\n" if @ARGV<2;
20 my $minAlignLen = int($ReadLen * 0.6);
21 $minAlignLen = $minOKalnLen if $minAlignLen < $minOKalnLen;
22 die if $ReadLen < $minOKalnLen;
25 my $EseqLen = length $Eseq;
27 my $EseqL="CTGCA";#substr $Eseq,0,$EcutAt;
30 my $EfwdTerm5=1-$EcutAt+1; # -3
31 my $EfwdTerm3=1-$EcutAt+$ReadLen; # 92
33 my $ErevTerm5=$ErevTerm3-$ReadLen+1; # -95
34 # 12008 -> [12005,12099],[11914,12008]
35 my $Rfwd2Ec = -$EfwdTerm5; # 3
36 my $Rrev2Ec = -$ErevTerm3; # 0
39 my $PosLeft = -$EfwdTerm3; # -92
40 my $PosRight = -$ErevTerm5; # 95;
41 my $PosECsft = $Rfwd2Ec; # 3;
42 # - [x-$PosLeft,x+$PosECsft], Len=96
43 # + [x,x+$PosRight], Len=96
44 # A [x-$PosLeft,x+$PosRight], Len=95+92+1=186
48 open O
,'>',$outfs or die "Error opening $outfs : $!\n";
49 $t = "# EClst: [$eclst], Enzyme: [$Eseq], Cut after $EcutAt\n# Bams: [$bcfs]\n";
54 open L
,'<',$eclst or die;
57 my ($chr,$pos,$strand,$mark,$count,$samples) = split /\t/;
58 #warn "$chr,$pos,$strand,$samples,$mark\n";
60 push @
{$Markers{$chr}},[$pos,$strand];
64 my $th = openpipe
('bcftools view',$bcfs);
65 my (@Samples,@Parents);
69 my @data = split /\t/;
70 if ($data[0] eq '#CHROM') {
71 @Samples = map {my $t=(split /\//)[-1];$t=~s/_cut
//g;$t=~s/-/./g; $_=join('_',(split /\./,$t)[-5,-6]);} splice @data,9;
72 # ../5.bam_0000210210_merged/d1_4_merged.D4.JHH001.XTU.sort.rmdup.bam
73 @Parents = grep(!/^GZXJ/,@Samples);
77 print O
"# Samples: [",join('],[',@Samples),"]\n# Parents: [",join('],[',@Parents),"]\n";
78 warn "Samples:\n[",join("]\n[",@Samples),"]\nParents: [",join('],[',@Parents),"]\n";
100 my ($lastChr) = ('');
105 my ($CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, @data) = split /\t/;
107 my @groups = split(/\s*;\s*/, $INFO);
109 if ($groups[0] eq 'INDEL') {
110 $INFO{'Type'} = 'INDEL';
113 $INFO{'Type'} = 'SNP';
115 for my $group (@groups) {
116 my ($tag,$value) = split /=/,$group;
117 #warn "- $group -> $tag,$value\n";
118 my @values = split /,/,$value;
120 $INFO{$tag}=$values[0];
122 $INFO{$tag}=\
@values;
126 my @FMT = split /:/,$FORMAT;
127 for my $s (@Samples) {
128 my $dat = shift @data or die "Bam file error.";
129 my @dat = split /:/,$dat;
131 $GT{$s}{$i} = shift @dat;
136 if ($GT{$_}{'DP'} > 0 and $GT{$_}{'GQ'} > 17) {
137 ++$GTcnt{$GT{$_}{'GT'}};
145 ++$Stat{'GTcnt'}{$INFO{'FQ'} <=> 0}{scalar(keys %GTcnt)};
147 ddx $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO,\%INFO,\%GT if scalar(keys %GTcnt) > 1 and $INFO{'FQ'} < 0 and $SPcnt>2;
148 # rad2marker.pl:135: {
149 # -1 => { 1 => 2850, 2 => 526, 3 => 37 },
150 # 1 => { 1 => 8, 2 => 2507, 3 => 1792 },
153 #warn "$CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO\n";
154 if ($QUAL<20 or $INFO{'FQ'}<0 or scalar(keys %GTcnt)<2 or $SPcnt<3 or $INFO{'DP'}<6) {
155 ++$Stat{'VCF_Skipped'};
158 #ddx $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO,\%GTcnt,\%INFO,\%GT;
159 unless ($CHROM eq $lastChr) {
160 my ($mark,$flag,$sampleA) = @
{&deal_cluster
($CHROM,\
@items)};
164 push @items,[$POS, $REF, $ALT, $SPcnt, \
%INFO,\
%GT];
171 my ($REF, $ALT, $GT) = @_;
174 my ($Chr,$itemsA) = @_;
176 if ( (not exists $Markers{$Chr}) ) {
177 ++$Stat{'Cluster_err'} if $Chr ne '';
178 return [$mark,'',[]];
180 for (@
{$Markers{$Chr}}) {
181 my ($pos,$strand) = @
$_;
183 if ($strand eq '+') {
184 ($left,$right)=($pos,$pos+$PosRight);
185 } elsif ($strand eq '-') {
186 ($left,$right)=($pos-$PosLeft,$pos+$PosECsft);
188 ($left,$right)=($pos-$PosLeft,$pos+$PosRight);
190 print "$pos,$strand [$left,$right] ";
193 my ($POS, $REF, $ALT, $SPcnt, $pINFO,$pGT) = @
$_;
194 next if $POS < $left;
195 last if $POS > $right;
198 next if @thisDat == 0;
199 print "\n$pos,$strand [$left,$right]\n";
202 ++$Stat{'Cluster_cnt'};
209 zcat radseq
.bcgv
.vcf
.gz
|perl
-ne 'if (/^#CHROM/) {my @data = split /\t/;@Samples = map {my $t=(split /\//)[-1];$t=~s/_cut//g;$t=~s/-/./g; $_=join('_
',(split /\./,$t)[-5,-6]);} splice @data,9;splice @data,9,$#data,@Samples;print join("\t",@data),"\n"} else {print $_;}' > radseqA
.vcf
&
210 zcat radseq
.bcgv
.vcf
.gz
|perl
-lane
'BEGIN {my $i;} next if /^#/;++$i;$F[0]=~s/^\D+//;print "$F[0]\trs$i\t0\t$F[1]"' > radseqA
.map &
212 zcat radseq
.bcgv
.vcf
.gz
| perl
-ne 'my @data = split /\t/;if (/^#CHROM/) {@Samples = map {my $t=(split /\//)[-1];$t=~s/_cut//g;$t=~s/-/./g; $_=join('_
',(split /\./,$t)[-5,-6]);} splice @data,9;splice @data,9,$#data,@Samples;print join("\t",@data),"\n"} elsif (/^#/) {print $_;} else {print $_;}' > radseqA
.vcf
214 p
-link --tfile test
--reference
-allele test
.refallele
--pheno test
.pheno
--all
-pheno
--model
--cell
0 --fisher
217 B19 JHH001_D4
0 0 2 1
218 B19 GZXJ03_A1 BHX019_LSJ JHH001_D4
2 1
220 1001 rs1
0 5188 A T A T A T A T A T A T A T A T A T T T T T T T T T T T T T T T T T T T
221 1001 rs2
0 5193 A A A A A A A A A A A A A A A A A A A C A C A C A C
0 0 A C A C A C
0 0
222 $ head
-2 test
.refallele
229 p
-link --tfile test
--reference
-allele test
.refallele
--pheno test
.pheno
--all
-pheno
--model
--cell
0 --fisher
--out testO
231 -rw
-r
--r
-- 1 huxs users
2526 Aug
1 18:41 testO
.log
232 -rw
-r
--r
-- 1 huxs users
3672 Aug
1 18:41 testO
.sex
.model
233 -rw
-r
--r
-- 1 huxs users
3672 Aug
1 18:41 testO
.snow
.model
235 #--freq --missing --tdt