4 use Data
::Dump
qw(ddx);
8 use Term
::ANSIColor
qw(:constants);
10 die "Usage: $0 <tfam file> <bcgv bcf> <regions> <list,of,samples to exclude(none)> <Dominant/Recessive> <outprefix> [Hom1,Hom2:Het1,Het2]\n" if @ARGV<6;
17 my $HomHetFilter=shift;
19 $DomRec='D' if $DomRec =~ /^D/i;
20 $DomRec='R' if $DomRec =~ /^R/i;
22 open OUTT
,'>',$outfs.'.txt' or die $!;
25 if (defined $HomHetFilter) {
26 my ($Homs,$Hets) = split /:/,$HomHetFilter;
27 my @Hom = split /,/,$Homs;
28 my @Het = split /,/,$Hets;
29 $HomHetRequired{'Hom'} = {map { $_ => 1 } @Hom};
30 $HomHetRequired{'Het'} = {map { $_ => 1 } @Het};
31 print OUTT
"### HomHetFilter: Hom:[@Hom], Het:[@Het]\n";
32 warn "HomHetFilter: Hom:[@Hom], Het:[@Het]\n";
33 #ddx \%HomHetRequired,\($Homs,$Hets),$HomHetFilter;
37 my $cmd = 'bcftools query -f\'%CHROM\t%POS\t%REF,%ALT\t%QUAL\t%DP[\t%SAMPLE,%GT,%DP,%GQ]\n\'';
38 if ($regions ne 'all') {
39 $cmd .= " -r '$regions'";
40 my @t = split /,/,$samples;
41 %SkippedSamples = map { $_ => 1 } @t;
43 if ($samples ne 'none') {
44 $cmd .= ' -s^'.$samples;
45 my @t = split /,/,$samples;
46 %SkippedSamples = map { $_ => 1 } @t;
48 warn "$DomRec [$cmd $bcfs]\n";
50 my (@tfamSamples,%tfamDat,%tfamSamplePheno,%inFamily,@CaseS,@ControlS);
51 open L
,'<',$tfamfs or die;
56 my ($family,$ind,$P,$M,$sex,$pho) = split /\t/;
57 next if exists $SkippedSamples{$ind};
58 $tfamDat{$ind} = $_."\n";
59 if ($pho == 1 or $pho == 2) {
60 $tfamSamplePheno{$ind} = $pho; # disease phenotype (1=unaff/ctl, 2=aff/case, 0=miss)
66 } else { die; } # $pho can only be 1 or 2
67 push @tfamSamples,$ind;
68 push @
{$inFamily{$family}},$ind;
70 #ddx \%tfamSamplePheno,\%inFamily;
74 my $fh = openpipe
('bcftools query -l',$bcfs);
77 push @Samples,$_ unless exists $SkippedSamples{$_};
81 sub biggerKeyExcept
($$) {
82 my ($in,$except) = @_;
83 my @range1 = grep {$_ ne '.'} keys %{$in};
84 my @range2 = grep {$_ ne $except} @range1;
85 if (scalar @range2 == 0) {
87 } elsif (scalar @range2 == 1) {
89 } elsif (scalar @range2 > 1) {
90 my @order = sort { $in->{$b} <=> $in->{$a} || $a<=>$b } @range2;
92 } elsif (scalar @range1 == 1) {
98 my @order = sort { $in->{$b} <=> $in->{$a} || $a<=>$b } grep {$_ ne '.'} keys %{$in};
102 my ($gt,$array) = @_;
103 my @sGTs = split /\//,$gt;
104 my %GTs = map { $_ => 1 } @sGTs;
105 @sGTs = sort {$a cmp $b} keys %GTs;
109 $ret .= $array->[$_];
112 $ret = '.' if length $ret == 0;
115 sub TermColorGT
($$) {
116 my ($thisGT,$ref) = @_;
118 if (length $thisGT > 1) {
119 return GREEN
. $thisGT;
120 } elsif ($thisGT eq $ref) {
121 $ret = YELLOW
. $thisGT;
122 } elsif ($thisGT eq '.') {
123 $ret = WHITE
. $thisGT;
125 $ret = BLUE
. $thisGT;
130 print OUTT
"### $DomRec [$cmd $bcfs]\n";
131 print OUTT
"## Case_Samples: ",join(', ',@CaseS),"\n";
132 print OUTT
"## Control_Samples: ",join(', ',@ControlS),"\n";
135 print OUTT
join("\t",qw
/#Chr Pos Mutant/)," Case_Samples . Control_Samples\n";
137 $fh = openpipe
($cmd,$bcfs);
140 my ($chr,$pos,$refalt,$qual,$adp,@sampleDat) = split /\t/;
142 my @GTs = split /,/,$refalt;
148 next if $t>1; # skip indels
149 #print join(',',$chr,$pos,$qual,$adp),"\t@GTs\t@sampleDat\n";
150 # gi|753572091|ref|NC_018727.2|,152998276,999,136 A C FCAP055,1/1,5,18 FCAP056,1/1,8,27 FCAP059,1/1,3,12 FCAP066,1/1,2,9 FCAP067,1/1,6,21 FCAP069,1/1,6,21 FCAP072,1/1,16,51 FCAP075,0/1,7,57 FCAP084,0/1,6,50 FCAP085,1/1,3,12 FCAP086,0/0,6,9 FCAP087,1/1,8,27 FCAP088,1/1,8,27 FCAP089,1/1,5,18 FCAP090,1/1,4,15 FCAP110,1/1,7,24 FCAP111,0/1,8,54 FCAP112,0/1,8,12 FCAP113,0/1,4,30
151 my ($skipped,%SampleDat,%caseGT,%ctlGT)=(0);
152 die if @Samples != @sampleDat;
154 my ($id,$gt,$sdp,$squal) = split /,/,$_;
155 if ($sdp==0 or $squal<20) {
159 my @sGTs = split /\//,$gt;
160 if ( (exists $HomHetRequired{'Hom'}{$id}) and ($sGTs[0] ne $sGTs[1]) ) {
164 if (exists $HomHetRequired{'Het'} && exists $HomHetRequired{'Het'}{$id}) {
165 if ($sGTs[0] eq $sGTs[1]) {
170 $SampleDat{$id} = $gt;
171 if ($tfamSamplePheno{$id} == 2) { # 2=aff/case
172 ++$caseGT{$_} for @sGTs;
173 } elsif ($tfamSamplePheno{$id} == 1) {
174 ++$ctlGT{$_} for @sGTs;
177 #next if $skipped > 0.5 * scalar(@sampleDat);
178 next if $skipped > 0;
180 if ($DomRec eq 'D') {
181 my $t = biggerKey
(\
%ctlGT);
182 next unless defined $t;
183 $theGT = biggerKeyExcept
(\
%caseGT,$t);
184 } elsif ($DomRec eq 'R') {
185 $theGT = biggerKey
(\
%caseGT);
187 next unless defined $theGT;
188 #ddx \%caseGT,\%ctlGT,$theGT;
189 #print decodeGT($theGT,\@GTs);
190 print OUTT
join("\t",$chr,$pos,
191 join(' ',map {TermColorGT
(decodeGT
($_,\
@GTs),decodeGT
($theGT,\
@GTs));} ($theGT,@SampleDat{@CaseS},'.',@SampleDat{@ControlS}))
197 system("cat $outfs.txt | aha -b >$outfs.htm");
199 warn "[!]Use less -RS $outfs.txt or open $outfs.htm\n";
200 #ddx \@CaseS,\@ControlS;
203 ./vcfplot
.pl kinkcats
.tfam mpileup_20150403
.vcf
.filtered
.gz
'gi|753572091|ref|NC_018727.2|:151386958-153139134' FCAP114 D plotN114
204 ./vcfplot
.pl kinkcats
.tfam mpileup_20150403
.vcf
.filtered
.gz
'gi|753572091|ref|NC_018727.2|:151386958-153139134' none D plotN114a
206 ./vcfplot
.pl kinkcats
.tfam mpileup_20150403
.vcf
.filtered
.gz
'gi|753572091|ref|NC_018727.2|:151386958-153139134' FCAP114 D plotNh FCAP075
:FCAP072
207 ./vcfplot
.pl kinkcats
.tfam mpileup_20150403
.vcf
.filtered
.gz
'gi|753572091|ref|NC_018727.2|:151386958-153139134' none D plotNha FCAP075
:FCAP072
209 gi
|753572091|ref|NC_018727
.2
|:151586958-152939134