modified: myjupyterlab.sh
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / cntsnp.pl
blob91da7922c987674496ce9123c0b0d44ae87ac0cb
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump;
6 #die "Usage: $0 <blood vcf.gz> <sperm vcf.gz>\n" if @ARGV < 2;
7 die "Usage: $0 <blood vcf> <sperm vcf>\n" if @ARGV < 2;
8 my ($inbd,$insp)=@ARGV;
10 #open IBD, "-|", "zcat $inbd" or die "$!";
11 #open ISP, "-|", "zcat $insp" or die "$!";
12 open IBD,'<',$inbd or die "$!";
13 open ISP,'<',$insp or die "$!";
14 open O,'>',"$insp.stat";
16 my %SNP;
18 sub getGT($) {
19 my $str = $_[0];
20 my ($chr,$pos,undef,$ref,$alt,undef,undef,$INFO,undef,$data) = split /\t/,$str;
21 my @GeneTypes=split /,/,$alt;
22 unshift @GeneTypes,$ref;
23 my $GT = (split /:/,$data)[0];
24 my @GTs=split /[\/|]/,$GT;
25 my @ret = ( $chr, $pos, $GeneTypes[$GTs[0]] . $GeneTypes[$GTs[1]] );
26 return \@ret
29 while (<ISP>) {
30 next if /^#/;
31 my ($chr,$pos,undef,$ref,$alt,$QUAL,undef,$INFO,undef,$data) = split /\t/;
32 next if $INFO =~ /INDEL;/;
33 next if $QUAL < 20;
34 my $GQ = (split /:/,$data)[-1];
35 next if $GQ < 20;
36 next unless $chr =~ /^chr\d+/;
37 my $GT;
38 ($chr,$pos,$GT) = @{getGT($_)};
39 $SNP{$chr}{$pos}{'SP'} = $GT;
41 while (<IBD>) {
42 next if /^#/;
43 my ($chr,$pos,undef,$ref,$alt,$QUAL,undef,$INFO,undef,$data) = split /\t/;
44 next if $INFO =~ /INDEL;/;
45 next if $QUAL < 20;
46 my $GQ = (split /:/,$data)[-1];
47 next if $GQ < 20;
48 next unless $chr =~ /^chr\d+/;
49 my $GT;
50 ($chr,$pos,$GT) = @{getGT($_)};
51 $SNP{$chr}{$pos}{'BD'} = $GT;
53 #ddx \%SNP;
54 my (%Results,%subResults);
55 for my $chr (keys %SNP) {
56 for my $pos (keys %{$SNP{$chr}}) {
57 my ($GT1,$GT2) = ( $SNP{$chr}{$pos}{'BD'}, $SNP{$chr}{$pos}{'SP'} );
58 unless ( defined $GT1 ) {
59 ++$Results{$chr}{'SpermOnly'};
60 ++$Results{'_All_'}{'SpermOnly'};
61 next;
63 unless ( defined $GT2 ) {
64 ++$Results{$chr}{'BloodOnly'};
65 ++$Results{'_All_'}{'BloodOnly'};
66 next;
68 my @gt1 = split //,$GT1;
69 my @gt2 = split //,$GT2;
70 my ($flag1,$flag2);
71 if ($gt1[0] eq $gt1[1]) { $flag1='Hom'; } else { $flag1='Het'; }
72 if ($gt2[0] eq $gt2[1]) { $flag2='Hom'; } else { $flag2='Het'; }
73 #print "$gt1[0] $gt1[1] $gt2[0] $gt2[1] $flag1,$flag2\n";
74 my ($str,$subtype);
75 if ($GT1 eq $GT2) {
76 $str = 'Same';
77 } else {
78 $str = $flag1 . '-' . $flag2;
79 if ( $flag1 eq $flag2 ) {
80 if ($flag1 eq 'Hom') {
81 $subtype = 'Hom' . $gt1[1] . $gt2[1];
82 } else {
83 $subtype = 'Het' . $GT1 . $GT2;
87 ++$Results{$chr}{$str};
88 ++$Results{'_All_'}{$str};
89 if ( defined $subtype ) {
90 ++$subResults{$chr}{$subtype};
91 ++$subResults{'_All_'}{$subtype};
95 ddx \%subResults;
96 ddx \%Results;
98 for my $chr (sort keys %Results) {
99 print O "$chr\t";
100 for my $type (sort keys %{$Results{$chr}}) {
101 print O "$type: $Results{$chr}{$type}\t";
103 if (exists $subResults{$chr}) {
104 print O "|\t";
105 for my $type (sort keys %{$subResults{$chr}}) {
106 print O "$type: $subResults{$chr}{$type}\t";
109 print O "\n";
112 close IBD;
113 close ISP;
114 close O;
116 __END__
117 perl cntsnp.pl blood.gz sperm23.gz
119 perl cntsnp.pl blood.mda.filter.gz sperm23.vcf.filter.gz