5 use Data
::Dump
qw(ddx);
7 my $Usage = "Usage: $0 <vcf.gz>\n";
8 die $Usage if @ARGV < 1;
10 my ($filename) = @ARGV;
11 #my $cmd = "bcftools view -m3 -v snps $filename | bcftools query -f '%CHROM\t%POS\t%REF,%ALT\t%QUAL[\t%TGT:%AD:%GQ]\n'";
12 my $cmd = "bcftools view -U -m2 -i '%QUAL>=40 & MIN(FMT/GQ)>20 & GT[2]=\"het\"' -v snps $filename |bcftools view -e 'FMT/DP=\".\"'| bcftools query -f '%CHROM\t%POS\t%REF,%ALT\t%QUAL[\t%TGT:%AD:%GQ]\n'";
14 chr1 102951 C,T 1575.58 C/C/C/T:23,6:25 C/C/C/T:88,27:67 C/C/C/T:111,33:93
15 chr1 633963 C,T 6936.96 T/T/T/T:5,242:99 C/C/C/C:250,0:99 C/C/C/T:172,77:75
16 chr1 889636 A,G,T 1356.65 A/A/G/T:14,3,8:21 A/A/G/T:36,20,10:48 A/A/G/T:50,23,18:75
17 chr1 1099396 G,A,C 214.66 G/G/G/G:22,1,0:24 G/G/A/C:67,5,9:58 G/G/A/C:89,6,9:24
20 open(SID
,'-|',"bcftools query -l $filename") or die "Error opening [$filename]: $!\n";
27 # ["T10C", "T3C", "mixed"] => v,k,m
31 my @GTs = sort keys %{$hashref};
42 open(IN
,"-|",$cmd) or die "Error opening [$filename]: $!\n";
45 my ($Chrom,$Pos,$RefAlt,$Qual,@GTs) = split /\t/,$_;
46 next if $Chrom =~ /_/;
49 for my $i (0 .. $#SampleIDs) {
50 my ($sGT,$sAD,$sGQ) = split /:/,$GTs[$i];
51 my @aGT = split /[|\/]/,$sGT;
52 my @aAD = split /\,/,$sAD;
55 if ($SampleIDs[$i] eq 'mixed') {
56 ++$counter{$_} for @aGT;
58 ++$counter{$_} for @aGT[0,1];
64 #my @result = sort(keys %counter);
65 $GTs{$SampleIDs[$i]} = \
%counter;
67 $GTs{'__R'} = {%{$GTs{'mixed'}}};
68 for (keys %{$GTs{'T10C'}}) {
69 if (exists $GTs{'__R'}{$_}) {
70 delete $GTs{'__R'}{$_};
73 $GTs{'_V'} = mergeGT
($GTs{'T10C'});
74 $GTs{'_K'} = mergeGT
($GTs{'T3C'});
75 $GTs{'_M'} = mergeGT
($GTs{'mixed'});
76 $GTs{'_R'} = mergeGT
($GTs{'__R'});
78 my $str1 = $GTs{'_R'};
79 my $str2 = join('.+',split(//,$str1));
81 if (length($GTs{'_M'})==1) {
83 } elsif (length($GTs{'_M'})==2) {
85 if ($GTs{'_R'} eq '') {
87 if ($GTs{'_M'} eq $GTs{'_V'}) {
88 doCnt
('1R0e'); # M == V, het
92 } elsif (length($GTs{'_R'})==1) {
94 if (length($GTs{'_V'})==1) {
96 if ($GTs{'_R'} eq $GTs{'_K'}) {
98 } elsif ($GTs{'_K'} =~ /$GTs{'_R'}/) {
99 doCnt
('2LK'); # M-V ~= K
101 doCnt
('2x'); # V=K, M:het
116 if ($GTs{'_R'} eq '') {
118 } elsif ($GTs{'_R'} eq $GTs{'_K'}) {
119 ++$COUNTING{'1fulEQ'};
120 } elsif (length($GTs{'_R'})==1 and $GTs{'_K'} =~ /$GTs{'_R'}/) {
122 } elsif ($GTs{'_K'} =~ /$str2/) {
123 ++$COUNTING{'3incT'};