new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / getvcfs.pl
blob5ba5e44e6589cb79fd38fe58d69b674ed4e4cca6
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20180417
5 =cut
6 use strict;
7 use warnings;
8 use Galaxy::IO;
9 use Data::Dump qw(ddx);
11 my $minDepth = 100;
13 #my $in = openfile('MA20.snp.501a100.txt.gz');
14 my $in = openpipe("bcftools query -f '%CHROM\t%POS\t%REF,%ALT\t%QUAL[\t%TGT;%AD]\n' -i'POS=501 && AVG(FMT/AD)>=$minDepth'",'MA20.snp.gz');
15 my @Samples = qw(C1 F1 F2 F3 F4 F5 M1 M2 M3 M4 M5);
16 my (%Result,%GroupTGT);
18 while(<$in>) {
19 chomp;
20 my ($chr,$pos,$Alleles,$Qual,@SampleDat) = split /\t/;
21 #print join(", ",$chr,$pos,@SampleDat),"\n";
22 my (@SampleGT,@SampleDep);
23 for (@SampleDat) {
24 my ($TGT,$Deps) = split /;/;
25 my @aSampleGT = split /[\/|]/,$TGT;
26 my @aSampleDep = split /,/,$Deps;
27 my $aSumDep;
28 map { $aSumDep += $_ } @aSampleDep;
29 push @SampleDep,$aSumDep;
30 push @SampleGT,join('',sort @aSampleGT);
32 my (%GTf,%GTm);
33 for (1..5) {
34 ++$GTf{$SampleGT[$_]} if $SampleDep[$_] >= $minDepth;
36 for (6..10) {
37 ++$GTm{$SampleGT[$_]} if $SampleDep[$_] >= $minDepth;
39 #++$GTf{$SampleGT[$_]} for (1..5);
40 #++$GTm{$SampleGT[$_]} for (6..10);
41 ++$Result{'F'}{keys(%GTf)};
42 ++$Result{'M'}{keys(%GTm)};
43 if (keys(%GTf)+keys(%GTm)>2) {
44 print "@SampleGT,@SampleDep\n";
45 ddx %GTf if keys(%GTf)>1;
46 ddx %GTm if keys(%GTm)>1;
48 #print join(", ",$chr,$pos,@SampleGT,@SampleDep),"\n";
49 #ddx \%Result;
51 close $in;
53 ddx \%GroupTGT;
54 ddx \%Result;