modified: Makefile
[GalaxyCodeBases.git] / perl / etc / radseq / tmpvcf.pl
blob4f0fc483640d8dd4f6aae872af1e194735fb5af2
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
5 =cut
6 use strict;
7 use warnings;
8 use Data::Dump qw(ddx);
9 use Galaxy::IO;
10 #use Galaxy::IO::VCF;
11 use Vcf;
13 die "Usage: $0 <bcf/vcf> <out>\n" if @ARGV<2;
14 my $bcfs=shift;
15 my $outfs=shift;
17 my ($fh,$vcf);
18 if ($bcfs =~ /\.bcf$/) {
19 $fh = openpipe('bcftools view',$bcfs);
20 $vcf = Vcf->new(fh=>$bcfs);
21 } else {
22 $fh = openpipe('bcftools view -S',$bcfs);
23 $vcf = Vcf->new(file=>$bcfs);
25 $vcf->parse_header();
26 my (@samples) = $vcf->get_samples();
27 ddx \@samples;
29 while (my $x=$vcf->next_data_hash()) {
30 # for my $gt (keys %{$$x{gtypes}}) {
31 # my ($al1,$sep,$al2) = $vcf->parse_alleles($x,$gt);
32 # print "\t$gt: $al1$sep$al2\n";
33 # }
34 # print "\n";
35 next unless $$x{REF} =~ /^[ATCG]$/;
36 next if $$x{QUAL} < 20;
37 my ($flag,%GTs)=(0);
38 for my $gt (keys %{$$x{gtypes}}) {
39 if ($$x{gtypes}{$gt}{DP} > 0) {
40 ++$GTs{$$x{gtypes}{$gt}{GT}};
43 next if (keys %GTs) > 1; # one GT;
44 for my $gt (keys %GTs) {
45 $flag = 1 if $gt =~ /0/;
46 my ($a1,$a2,$a3) = $vcf->split_gt($gt);
47 if ($a3 or ($a1 != $a2)) {
48 $flag = 1;
51 #ddx [\%GTs,$x] if $flag;
52 next if $flag;
53 print $vcf->format_line($x);
55 =pod
56 BHX011.bam: A/A
57 BHX019.bam: A/A
58 JHH001.bam: A/A
60 # tmpvcf.pl:35: {
61 # ALT => ["A"],
62 # CHROM => "scaffold1001",
63 # FILTER => ["."],
64 # FORMAT => ["GT", "PL", "DP", "SP", "GQ"],
65 # gtypes => {
66 # "BHX011.bam" => { DP => 8, GQ => 32, GT => "1/1", PL => "131,24,0", SP => 0 },
67 # "BHX019.bam" => { DP => 6, GQ => 26, GT => "1/1", PL => "107,18,0", SP => 0 },
68 # "JHH001.bam" => { DP => 0, GQ => 9, GT => "1/1", PL => "0,0,0", SP => 0 },
69 # },
70 # ID => ".",
71 # INFO => { AC1 => 6, AF1 => 1, DP => 14, DP4 => "0,0,9,5", FQ => -33.3, MQ => 35, VDB => 0.0256 },
72 # POS => 5193,
73 # QUAL => 202,
74 # REF => "C",
75 # }
76 =cut
78 #while (my $x=$vcf->next_data_array()) {
79 # print join('|',@$x),"\n"; #"$$x[0]:$$x[1]\n";
80 # ddx $x;
82 # tmpvcf.pl:62: [
83 # "scaffold1001",
84 # 5193,
85 # ".",
86 # "C",
87 # "A",
88 # 202,
89 # ".",
90 # "DP=14;VDB=0.0256;AF1=1;AC1=6;DP4=0,0,9,5;MQ=35;FQ=-33.3",
91 # "GT:PL:DP:SP:GQ",
92 # "1/1:0,0,0:0:0:9",
93 # "1/1:131,24,0:8:0:32",
94 # "1/1:107,18,0:6:0:26",
95 # ]
97 $vcf->close();
99 __END__
100 ./tmpvcf.pl JB19.bcvgL.vcf.gz c > tmp
101 perl -lane 'BEGIN {my %a;} next if /^#/; ++$a{$F[0]}; END {print "$_\t$a{$_}" for sort {$a{$b} <=> $a{$a}} keys %a;}' tmp|cat -n > tmp.s