new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / justonce / sperm / cntsnpn.pl
blob58390df40493ea65ea84423e8a5cdc09869eade1
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump;
6 #die "Usage: $0 <blood vcf.gz> <sperm vcf.gz>\n" if @ARGV < 2;
7 die "Usage: $0 <vcf1> <vcf2> [vcf3.gz ..]\n" if @ARGV < 2 or @ARGV > 32;
8 my @INS=@ARGV;
9 my $Length = scalar @INS;
10 my (@FH,%SNP);
12 sub openfile($) {
13 my ($filename)=@_;
14 my $infile;
15 if ($filename=~/.xz$/) {
16 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
17 } elsif ($filename=~/.gz$/) {
18 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
19 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
20 return $infile;
23 sub readfile($) {
24 my $id = $_[0];
25 my $fh = $FH[ $id ];
26 print STDERR "Reading [$id] ...";
27 my ($cntID,$totalID) = (0,0);
28 while (<$fh>) {
29 next if /^#/;
30 my ($chr,$pos,undef,$ref,$alt,$QUAL,undef,$INFO,undef,$data) = split /\t/;
31 next if $chr =~ /[XYM]/i;
32 next if $INFO =~ /INDEL;/;
33 ++$totalID;
34 if (defined $data and $id > 0) {
35 next if $QUAL < 20;
36 my $GQ = (split /:/,$data)[-1];
37 next if $GQ < 20;
39 $chr =~ s/^chr//i;
40 next unless $chr =~ /^\d+/;
41 #my $GT;
42 #($chr,$pos,$GT) = @{getGT($_)};
43 my $GT = 1;
44 $SNP{$chr}{$pos}{$id} = $GT;
45 ++$cntID;
47 print STDERR "\b\b\b\b, done ! [$totalID -> $cntID]\n";
48 print OUT "[$INS[$id]]:[$totalID -> $cntID]\n";
51 for (@INS) {
52 my $t;
53 $t = openfile($_);
54 push @FH,$t;
56 open OUT,'>','stat.txt';
57 print OUT "[@INS] -> [stat.txt]\n";
59 warn "[@INS] -> [stat.txt]\n";
61 readfile($_) for (0 .. $#FH);
62 close $_ for @FH;
64 # chr1 245017123 1 1 0 0.31 0.69 0 0 rs880
65 #CHROM POS ID REF ALT QUAL FILTER INFO
66 #1 10019 rs376643643 TA T . . RS=376643643;RSPOS=10020;dbSNPBuildID=138;SSR=0;SAO=0;VP=0x050000020005000002000200;WGT=1;VC=DIV;R5;ASP;OTHERKG
68 my %Stat;
69 for my $chr (keys %SNP) {
70 for my $pos (keys %{$SNP{$chr}}) {
71 my %Dat = %{$SNP{$chr}{$pos}};
72 my $flag = 0;
73 for my $k (keys %Dat) {
74 $flag |= 2 ** $k;
76 ++$Stat{$flag};
77 #ddx $SNP{$chr}{$pos}; print "$flag\n\n";
81 for (sort {$a<=>$b} keys %Stat) {
82 my $v = sprintf("%0${Length}b",$_);
83 my $str = join("\t",$v,$Stat{$_},$_);
84 print "$str\n";
85 print OUT "$str\n"
87 close OUT;
89 __END__
90 ./cntsnpn.pl dbSNP132.chr.All sperms0{1,2,3}.5cf.filter sperm2{3,4,8}.5cf.filter
91 ./cntsnpn.pl human_9606_b142_GRCh37p13.All.vcf.gz sperms0{1,2,3}.5cf.filter sperm2{3,4,8}.5cf.filter blood.malbac5.filter blood.mda3.filter
92 二进制数的顺序和第一行顺序相反