modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / cntsnp3.pl
blobaea0d66a302af4194b2242d22ef48af6f189fc22
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump;
6 #die "Usage: $0 <blood vcf.gz> <sperm vcf.gz>\n" if @ARGV < 2;
7 die "Usage: $0 <spermA vcf> <spermB vcf> <spermC vcf>\n" if @ARGV < 3;
8 my ($inA,$inB,$inC)=@ARGV;
10 my (@FH,%SNP);
11 for ($inA,$inB,$inC) {
12 my $t;
13 open $t,'<',$_ or die "$!";
14 push @FH,$t;
17 sub getGT($) {
18 my $str = $_[0];
19 my ($chr,$pos,undef,$ref,$alt,undef,undef,$INFO,undef,$data) = split /\t/,$str;
20 my @GeneTypes=split /,/,$alt;
21 unshift @GeneTypes,$ref;
22 my $GT = (split /:/,$data)[0];
23 my @GTs=split /[\/|]/,$GT;
24 my @ret = ( $chr, $pos, $GeneTypes[$GTs[0]] . $GeneTypes[$GTs[1]] );
25 return \@ret
28 sub readfile($) { # 1 .. 3
29 my $id = $_[0];
30 my $fh = $FH[ $id - 1 ];
31 while (<$fh>) {
32 next if /^#/;
33 my ($chr,$pos,undef,$ref,$alt,$QUAL,undef,$INFO,undef,$data) = split /\t/;
34 next if $chr =~ /[XYM]/i;
35 next if $INFO =~ /INDEL;/;
36 next if $QUAL < 20;
37 my $GQ = (split /:/,$data)[-1];
38 next if $GQ < 20;
39 next unless $chr =~ /^chr\d+/;
40 my $GT;
41 ($chr,$pos,$GT) = @{getGT($_)};
42 $SNP{$chr}{$pos}{$id} = $GT;
46 readfile($_) for (1 .. 3);
47 close $_ for @FH;
49 #ddx \%SNP;
50 # 141050953 => { 1 => "TT", 2 => "TT" },
52 my %Stat;
53 for my $chr (keys %SNP) {
54 for my $pos (keys %{$SNP{$chr}}) {
55 my %Dat = %{$SNP{$chr}{$pos}};
56 my $flag = 0;
57 for my $k (keys %Dat) {
58 $flag |= 2 ** $k;
60 ++$Stat{$flag};
61 #ddx $SNP{$chr}{$pos}; print "$flag\n\n";
65 print "$_\t",$Stat{$_},"\n" for sort {$a<=>$b} keys %Stat;
67 __END__
68 perl ./cntsnp3.pl sperm23.5cf.filter sperm24.5cf.filter sperm28.5cf.filter
69 2 243901
70 4 268409
71 6 38623
72 8 253689
73 10 33956
74 12 35961
75 14 7207
77 perl ./cntsnp3.pl sperms01.5cf.filter sperms02.5cf.filter sperms03.5cf.filter
78 2 676931
79 4 635059
80 6 60638
81 8 623031
82 10 65199
83 12 58892
84 14 36960