new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / getvcf.pl
blob070a965a63efe4ecbadbff23826f2ef1dea0fcb5
1 #!/usr/bin/env perl
2 =pod
3 Author: Hu Xuesong @ BGI <huxuesong@genomics.org.cn>
4 Version: 1.0.0 @ 20180417
5 =cut
6 use strict;
7 use warnings;
8 use Galaxy::IO;
9 use Data::Dump qw(ddx);
11 my $in = openfile('0328.snp.table.16.txt.gz');
12 chomp(my $t = <$in>);
13 my @Header = split(/\t/,$t);
14 my @SampleIDs = splice @Header,4;
15 my $qGroupCnt = @SampleIDs/4;
16 die "[x]Sample count $qGroupCnt is not 4n !\n" if $qGroupCnt != int($qGroupCnt);
17 print join(", ","$qGroupCnt: ",@SampleIDs),"\n";
19 my (%Result,%GroupTGT);
21 while(<$in>) {
22 chomp;
23 my ($chr,$pos,$Alleles,$Qual,@SampleDat) = split /\t/;
24 #print join(", ",$chr,$pos,@SampleDat),"\n";
25 my (@SampleGT,@SampleDep);
26 for (@SampleDat) {
27 my ($TGT,$Deps) = split /;/;
28 my @aSampleGT = split /[\/|]/,$TGT;
29 my @aSampleDep = split /,/,$Deps;
30 my $aSumDep;
31 map { $aSumDep += $_ } @aSampleDep;
32 push @SampleDep,$aSumDep;
33 if (@aSampleGT == 2 and $aSampleGT[0] eq $aSampleGT[1]) {
34 push @SampleGT,$aSampleGT[0];
35 } else {
36 push @SampleGT,join('','x',@aSampleGT);
39 for my $i (1 .. $qGroupCnt) {
40 my $flag = 1;
41 my %gTGTs;
42 for my $j (4*($i-1) .. (4*$i-1)) {
43 if ($SampleDep[$j] < 15) { # min depth is 15
44 $flag = 0;
45 last;
46 } else {
47 ++$gTGTs{$SampleGT[$j]}
50 my $cp = "$chr\t$pos";
51 if ($flag == 0) {
52 $GroupTGT{$cp}{$i} = '.'; # less depth
53 next;
55 my @TGTs = keys(%gTGTs);
56 if (@TGTs == 1 and length($TGTs[0])!=1) {
57 $GroupTGT{$cp}{$i} = '-'; # 4 being same, but Het
58 next;
59 } else {
60 ++$Result{$i}{@TGTs};
61 ++$Result{$i}{-1};
62 if (@TGTs == 1) {
63 $GroupTGT{$cp}{$i} = $TGTs[0];
67 #print join(", ",$chr,$pos,@SampleGT,@SampleDep),"\n";
68 ddx \%Result;
70 close $in;
72 ddx \%GroupTGT;
73 ddx \%Result;