3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
8 use Data
::Dump
qw(dump ddx);
12 die "Usage: $0 <bcgv bcf> <out>\n" if @ARGV<2;
17 open O
,'>',$outfs.'.out' or die "Error opening $outfs.out : $!\n";
18 $t = "# In: [$bcfs], Out: [$outfs]\n";
22 my $th = openpipe
('bcftools view -I',$bcfs); # -I skip indels
23 my (@Samples,@Parents);
27 my @data = split /\t/;
28 if ($data[0] eq '#CHROM') {
30 if (/^(\w+)_all\.bam$/) {
33 my $t=(split /\//)[-1];$t=~s/_cut
//g;$t=~s/-/./g; $_=join('_',(split /\./,$t)[-5,-6]);
36 # ../5.bam_0000210210_merged/d1_4_merged.D4.JHH001.XTU.sort.rmdup.bam
37 @Parents = grep(!/^GZXJ/,@Samples);
41 print O
"# Samples: [",join('],[',@Samples),"]\n# Parents: [",join('],[',@Parents),"]\n";
42 warn "Samples:\n[",join("]\n[",@Samples),"]\nParents: [",join('],[',@Parents),"]\n";
47 my ($CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, @data) = split /\t/;
48 ++$Stat{'VCF_In'}; # also as rs#
49 my @Bases = split /\s*,\s*/,"$REF, $ALT";
50 my @groups = split(/\s*;\s*/, $INFO);
52 # if ($groups[0] eq 'INDEL') {
53 # $INFO{'Type'} = 'INDEL';
56 # $INFO{'Type'} = 'SNP';
58 for my $group (@groups) {
59 my ($tag,$value) = split /=/,$group;
60 #warn "- $group -> $tag,$value\n";
61 my @values = split /,/,$value;
63 $INFO{$tag}=$values[0];
69 my @FMT = split /:/,$FORMAT;
70 for my $s (@Samples) {
71 my $dat = shift @data or die "bcf file error.";
72 my @dat = split /:/,$dat;
74 $GT{$s}{$i} = shift @dat;
78 my (%GTitemCnt,$Mut,@plinkGT);
80 if ($GT{$_}{'DP'} > 0 and $GT{$_}{'GQ'} >= 20) {
81 my $gt = $GT{$_}{'GT'};
83 my @GT = split /[\/|]/,$gt;
85 ++$GTitemCnt{$_} for @GT;
86 $GT{$_}{'GTp'} = join ' ',map($Bases[$_], @GT);
89 $GT{$_}{'GTp'} = '0 0';
92 push @plinkGT,$GT{$_}{'GTp'};
94 ($Mut) = sort { $GTitemCnt{$b} <=> $GTitemCnt{$a} } keys %GTitemCnt;
95 next if (keys %GTitemCnt) > 1;
96 unless (defined $Mut) {
97 ++$Stat{'VCF_noAffInd_Skipped'};
101 ++$Stat{'VCF_noMUT_Skipped'};
104 #$Mut = $Bases[$Mut];
106 ++$Stat{'GTcnt'}{$INFO{'FQ'} <=> 0}{scalar(keys %GTcnt)};
108 ddx $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO,\%INFO,\%GT if scalar(keys %GTcnt) > 1 and $INFO{'FQ'} < 0 and $SPcnt>2;
109 # rad2marker.pl:135: {
110 # -1 => { 1 => 2850, 2 => 526, 3 => 37 },
111 # 1 => { 1 => 8, 2 => 2507, 3 => 1792 },
114 #warn "$CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO\n";
115 #ddx $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO,\%GTcnt,\%INFO,\%GT,\%GTitemCnt,$Mut;
116 if ($QUAL<20 or $SPcnt<18) {
117 ++$Stat{'VCF_Skipped'};
121 my $SNPid = "r".$Stat{'VCF_In'};
123 print STDERR
"[$SPcnt] $CHROM, $POS, $REF, $ALT, $QUAL, $INFO ",dump(\
%GT),"\n";
132 ./bcf4bb
.pl tigers
.bcgv
.bcf t
2>&1|tee t
.log
134 grep scaffold b17
.log |awk
'{print $1}'|uniq
|perl
-lane
's/\,//;print $_'|while read a
;do cat bychr
/$a.fa
>> bgene
.fa
;done
136 cat bgene
.chr|while read a
;do cat P_tigris
.gene
.gtf
|grep -P
"$a\t" >> bgene
.vcf
;done