modified: Makefile
[GalaxyCodeBases.git] / perl / etc / radseq / vcfplot.pl
blobe162b6aa39e12c93c4fa6c6e4ed533884ca05f46
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
5 use Galaxy::IO;
6 #use Galaxy::SeqTools;
7 use Data::Dumper;
8 use Term::ANSIColor qw(:constants);
10 die "Usage: $0 <tfam file> <bcgv bcf> <regions> <list,of,samples to exclude(none)> <Dominant/Recessive> <outprefix> [Hom1,Hom2:Het1,Het2]\n" if @ARGV<6;
11 my $tfamfs=shift;
12 my $bcfs=shift;
13 my $regions=shift;
14 my $samples=shift;
15 my $DomRec=shift;
16 my $outfs=shift;
17 my $HomHetFilter=shift;
19 $DomRec='D' if $DomRec =~ /^D/i;
20 $DomRec='R' if $DomRec =~ /^R/i;
22 open OUTT,'>',$outfs.'.txt' or die $!;
24 my %HomHetRequired;
25 if (defined $HomHetFilter) {
26 my ($Homs,$Hets) = split /:/,$HomHetFilter;
27 my @Hom = split /,/,$Homs;
28 my @Het = split /,/,$Hets;
29 $HomHetRequired{'Hom'} = {map { $_ => 1 } @Hom};
30 $HomHetRequired{'Het'} = {map { $_ => 1 } @Het};
31 print OUTT "### HomHetFilter: Hom:[@Hom], Het:[@Het]\n";
32 warn "HomHetFilter: Hom:[@Hom], Het:[@Het]\n";
33 #ddx \%HomHetRequired,\($Homs,$Hets),$HomHetFilter;
36 my %SkippedSamples;
37 my $cmd = 'bcftools query -f\'%CHROM\t%POS\t%REF,%ALT\t%QUAL\t%DP[\t%SAMPLE,%GT,%DP,%GQ]\n\'';
38 if ($regions ne 'all') {
39 $cmd .= " -r '$regions'";
40 my @t = split /,/,$samples;
41 %SkippedSamples = map { $_ => 1 } @t;
43 if ($samples ne 'none') {
44 $cmd .= ' -s^'.$samples;
45 my @t = split /,/,$samples;
46 %SkippedSamples = map { $_ => 1 } @t;
48 warn "$DomRec [$cmd $bcfs]\n";
50 my (@tfamSamples,%tfamDat,%tfamSamplePheno,%inFamily,@CaseS,@ControlS);
51 open L,'<',$tfamfs or die;
52 while (<L>) {
53 next if /^(#|$)/;
54 #print OF $_;
55 chomp;
56 my ($family,$ind,$P,$M,$sex,$pho) = split /\t/;
57 next if exists $SkippedSamples{$ind};
58 $tfamDat{$ind} = $_."\n";
59 if ($pho == 1 or $pho == 2) {
60 $tfamSamplePheno{$ind} = $pho; # disease phenotype (1=unaff/ctl, 2=aff/case, 0=miss)
61 if ($pho == 2) {
62 push @CaseS,$ind;
63 } else {
64 push @ControlS,$ind;
66 } else { die; } # $pho can only be 1 or 2
67 push @tfamSamples,$ind;
68 push @{$inFamily{$family}},$ind;
70 #ddx \%tfamSamplePheno,\%inFamily;
71 close L;
73 my @Samples;
74 my $fh = openpipe('bcftools query -l',$bcfs);
75 while (<$fh>) {
76 chomp;
77 push @Samples,$_ unless exists $SkippedSamples{$_};
79 close $fh;
81 sub biggerKeyExcept($$) {
82 my ($in,$except) = @_;
83 my @range1 = grep {$_ ne '.'} keys %{$in};
84 my @range2 = grep {$_ ne $except} @range1;
85 if (scalar @range2 == 0) {
86 return undef;
87 } elsif (scalar @range2 == 1) {
88 return $range2[0];
89 } elsif (scalar @range2 > 1) {
90 my @order = sort { $in->{$b} <=> $in->{$a} || $a<=>$b } @range2;
91 return $order[0];
92 } elsif (scalar @range1 == 1) {
93 return $range1[0];
94 } else {die;}
96 sub biggerKey($) {
97 my $in = $_[0];
98 my @order = sort { $in->{$b} <=> $in->{$a} || $a<=>$b } grep {$_ ne '.'} keys %{$in};
99 return $order[0];
101 sub decodeGT($$) {
102 my ($gt,$array) = @_;
103 my @sGTs = split /\//,$gt;
104 my %GTs = map { $_ => 1 } @sGTs;
105 @sGTs = sort {$a cmp $b} keys %GTs;
106 my $ret = '';
107 for (@sGTs) {
108 if (/^\d+$/) {
109 $ret .= $array->[$_];
112 $ret = '.' if length $ret == 0;
113 return $ret;
115 sub TermColorGT($$) {
116 my ($thisGT,$ref) = @_;
117 my $ret;
118 if (length $thisGT > 1) {
119 return GREEN . $thisGT;
120 } elsif ($thisGT eq $ref) {
121 $ret = YELLOW . $thisGT;
122 } elsif ($thisGT eq '.') {
123 $ret = WHITE . $thisGT;
124 } else {
125 $ret = BLUE . $thisGT;
127 return $ret.' ';
130 print OUTT "### $DomRec [$cmd $bcfs]\n";
131 print OUTT "## Case_Samples: ",join(', ',@CaseS),"\n";
132 print OUTT "## Control_Samples: ",join(', ',@ControlS),"\n";
134 no warnings 'qw';
135 print OUTT join("\t",qw/#Chr Pos Mutant/)," Case_Samples . Control_Samples\n";
137 $fh = openpipe($cmd,$bcfs);
138 while (<$fh>) {
139 chomp;
140 my ($chr,$pos,$refalt,$qual,$adp,@sampleDat) = split /\t/;
141 next if $qual < 20;
142 my @GTs = split /,/,$refalt;
143 my $t;
144 for (@GTs) {
145 $t = length $_;
146 last if $t>1;
148 next if $t>1; # skip indels
149 #print join(',',$chr,$pos,$qual,$adp),"\t@GTs\t@sampleDat\n";
150 # gi|753572091|ref|NC_018727.2|,152998276,999,136 A C FCAP055,1/1,5,18 FCAP056,1/1,8,27 FCAP059,1/1,3,12 FCAP066,1/1,2,9 FCAP067,1/1,6,21 FCAP069,1/1,6,21 FCAP072,1/1,16,51 FCAP075,0/1,7,57 FCAP084,0/1,6,50 FCAP085,1/1,3,12 FCAP086,0/0,6,9 FCAP087,1/1,8,27 FCAP088,1/1,8,27 FCAP089,1/1,5,18 FCAP090,1/1,4,15 FCAP110,1/1,7,24 FCAP111,0/1,8,54 FCAP112,0/1,8,12 FCAP113,0/1,4,30
151 my ($skipped,%SampleDat,%caseGT,%ctlGT)=(0);
152 die if @Samples != @sampleDat;
153 for (@sampleDat) {
154 my ($id,$gt,$sdp,$squal) = split /,/,$_;
155 if ($sdp==0 or $squal<20) {
156 $gt = './.';
157 ++$skipped;
159 my @sGTs = split /\//,$gt;
160 if ( (exists $HomHetRequired{'Hom'}{$id}) and ($sGTs[0] ne $sGTs[1]) ) {
161 ++$skipped;
162 last;
164 if (exists $HomHetRequired{'Het'} && exists $HomHetRequired{'Het'}{$id}) {
165 if ($sGTs[0] eq $sGTs[1]) {
166 ++$skipped;
167 last;
170 $SampleDat{$id} = $gt;
171 if ($tfamSamplePheno{$id} == 2) { # 2=aff/case
172 ++$caseGT{$_} for @sGTs;
173 } elsif ($tfamSamplePheno{$id} == 1) {
174 ++$ctlGT{$_} for @sGTs;
175 } else {die;}
177 #next if $skipped > 0.5 * scalar(@sampleDat);
178 next if $skipped > 0;
179 my $theGT;
180 if ($DomRec eq 'D') {
181 my $t = biggerKey(\%ctlGT);
182 next unless defined $t;
183 $theGT = biggerKeyExcept(\%caseGT,$t);
184 } elsif ($DomRec eq 'R') {
185 $theGT = biggerKey(\%caseGT);
187 next unless defined $theGT;
188 #ddx \%caseGT,\%ctlGT,$theGT;
189 #print decodeGT($theGT,\@GTs);
190 print OUTT join("\t",$chr,$pos,
191 join(' ',map {TermColorGT(decodeGT($_,\@GTs),decodeGT($theGT,\@GTs));} ($theGT,@SampleDat{@CaseS},'.',@SampleDat{@ControlS}))
192 ),RESET,"\n";
194 close $fh;
195 close OUTT;
197 system("cat $outfs.txt | aha -b >$outfs.htm");
199 warn "[!]Use less -RS $outfs.txt or open $outfs.htm\n";
200 #ddx \@CaseS,\@ControlS;
202 __END__
203 ./vcfplot.pl kinkcats.tfam mpileup_20150403.vcf.filtered.gz 'gi|753572091|ref|NC_018727.2|:151386958-153139134' FCAP114 D plotN114
204 ./vcfplot.pl kinkcats.tfam mpileup_20150403.vcf.filtered.gz 'gi|753572091|ref|NC_018727.2|:151386958-153139134' none D plotN114a
206 ./vcfplot.pl kinkcats.tfam mpileup_20150403.vcf.filtered.gz 'gi|753572091|ref|NC_018727.2|:151386958-153139134' FCAP114 D plotNh FCAP075:FCAP072
207 ./vcfplot.pl kinkcats.tfam mpileup_20150403.vcf.filtered.gz 'gi|753572091|ref|NC_018727.2|:151386958-153139134' none D plotNha FCAP075:FCAP072
209 gi|753572091|ref|NC_018727.2|:151586958-152939134
210 -200k