limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / etc / filtervcf.pl
blob75bac5c59ba6e55b85f393f2af534b53f4e4ea8f
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
5 =cut
6 use strict;
7 use warnings;
8 use Vcf;
9 #use GTF;
10 use Data::Dump qw(ddx);
11 use Galaxy::IO::FASTAQ;
12 use Galaxy::SeqTools qw(translate revcom);
13 use Galaxy::Data;
15 die "Usage: $0 <vcf.gz> <minGQ> <out>\n" if @ARGV<3;
16 my $vcfs = shift;
17 my $qual = shift;
18 my $outfs = shift;
20 warn "From [$vcfs] with [minGQ=$qual] to [$outfs]\n";
22 open O,'>',$outfs."q$qual.snp.lst" or die;
23 open OI,'>',$outfs."q$qual.indel.lst" or die;
25 my $vcf = Vcf->new(file=>$vcfs);
26 $vcf->parse_header();
27 my (@samples) = $vcf->get_samples();
28 #ddx \@samples;
29 for (@samples) {
30 s/\.bam$//i;
32 my $t = 'Samples:'.join(',',@samples);
33 print O join("\t",'#CHROM','POS','GT','QUAL',$t),"\n";
34 print OI join("\t",'#CHROM','POS','GT','QUAL',$t),"\n";
36 while (my $x=$vcf->next_data_hash()) {
37 next if $$x{QUAL} < 20;
38 my ($Dindel,@GTs)=(0);
39 my %GTs = %{$$x{gtypes}};
40 my @Bases = ($$x{REF},@{$$x{ALT}});
41 $Dindel = length($Bases[1]) - length($Bases[0]);
42 for my $sample (keys %GTs) {
43 my $sampleID = $sample;
44 $sampleID =~ s/\.bam$//i;
45 next if $GTs{$sample}{DP}<=0;
46 next if $GTs{$sample}{GQ}<$qual; # 10:219090, 20:88956, 15:150149.
47 my ($a1,$a2,$a3) = $vcf->split_gt($GTs{$sample}{GT});
48 die "[$a3]" if $a3;
49 my ($t,%t)=(0);
50 for (($Bases[$a1],$Bases[$a2])) {
51 ++$t{$_};
52 $t = length $_ if $t < length $_;
54 if ($t==1) {
55 $t = join('',sort keys %t);
56 $t = $REV_IUB{$t};
57 } else {
58 $t = '.';
60 push @GTs, join('|',"$a1/$a2","$Bases[$a1]/$Bases[$a2]",$GTs{$sample}{DP},$GTs{$sample}{GQ},$t);
62 next unless @GTs;
63 #if (length($Bases[0]) == 1) {
64 my @t = map { length($_) } @Bases;
65 my %t;
66 #ddx \@t;
67 ++$t{$_} for @t;
68 @t = sort { $b <=> $a } keys %t;
69 #ddx \@t;
70 if ( $t[0] == 1 ) {
71 print O join("\t",$$x{CHROM},$$x{POS},join('/',@Bases,$Dindel),$$x{QUAL}, join(", ",@GTs) ),"\n";
72 } else {
73 print OI join("\t",$$x{CHROM},$$x{POS},join('/',@Bases,$Dindel),$$x{QUAL}, join(", ",@GTs), $t[-1] ),"\n";
76 # for my $gt (keys %GTs) {
77 # my ($a1,$a2,$a3) = $vcf->split_gt($gt);
78 # if ($a3 or ($a1 != $a2)) {
79 # $flag = 1;
80 # }
81 # }
82 #ddx \%GTs;
83 #ddx $x;
84 # vcf2cds.pl:189: {
85 # ALT => ["A"],
86 # CHROM => "scaffold75",
87 # FILTER => ["."],
88 # FORMAT => ["GT", "PL", "DP", "SP", "GQ"],
89 # gtypes => {
90 # "BHX011.bam" => { DP => 19, GQ => 61, GT => "0/0", PL => "0,57,255", SP => 0 },
91 # "BHX019.bam" => { DP => 26, GQ => 82, GT => "0/0", PL => "0,78,255", SP => 0 },
92 # "JHH001.bam" => { DP => 28, GQ => 99, GT => "0/1", PL => "244,0,255", SP => 9 },
93 # },
94 # ID => ".",
95 # INFO => {
96 # AC1 => 1,
97 # AF1 => 0.1667,
98 # DP => 74,
99 # DP4 => "34,26,4,9",
100 # FQ => 999,
101 # MQ => 59,
102 # PV4 => "0.13,1,0.12,1",
103 # VDB => 0.0365,
104 # },
105 # POS => 3572768,
106 # QUAL => 999,
107 # REF => "G",
109 #print $vcf->format_line($x);
111 #ddx $vcf;
112 $vcf->close;
114 close O;
115 close OI;
117 sub cmpstr {
118 my ($a, $b) = @_;
119 my $c = $a ^ $b;
120 my @ret;
121 while ($c =~ /[^\0]/g) {
122 my $p = pos($c);
123 push @ret,[$p,substr($a,$p-1,1),substr($b,$p-1,1)];
125 @ret;
128 __END__
130 bcftools view /bak/archive/projects/Tiger/BCF/WGS/parents.bcgv.bcf |bgzip -c > parents.bcgv.vcf.gz &
131 #tabix -p vcf parents.bcgv.vcf.gz
133 perl filtervcf.pl bam2.bcgv.vcf.gz 20 bam2 &