new file: cell2loc.py
[GalaxyCodeBases.git] / perl / popuSNP / new / psnphc.pl
blob08481cf9ee8ee6b1ba168c299cbf91358716893b
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 die "perl $0 <psnp file(.add_ref)> <out prefix> [minCov,minHom]\n" unless (@ARGV);
7 my ($minCov,$minHom);
8 if ($ARGV[2]) {
9 ($minCov,$minHom)=split /,/,$ARGV[2];
10 warn "[!]Cov>=$minCov, Hom>=$minHom\n";
11 } else { ($minCov,$minHom)=(2,2); }
12 open PSNP,'<', $ARGV[0] or die "$!\n";
13 chomp(my $title=<PSNP>);
14 my @Samples;
15 open OUT,'>', "$ARGV[1].psnp" or die "$!\n";
16 if ($title =~ /^#ChrID\t/) {
17 print OUT $title,"\n";
18 @Samples = split / /,(split /\t/,$title)[-1];
19 } else {
20 seek PSNP,0,0;
22 print STDERR '[!]Samples: [',join('|',@Samples),"]\n";
24 open NFO,'>', "$ARGV[1].hcnfo" or die "$!\n";
25 print NFO "Hom\tCov\n";
26 my ($In,$Out,%Cov,%Hom)=(0,0);
27 while (<PSNP>) {
28 chomp;
29 ++$In;
30 my ($chr,$pos,$ref,$tail)=split /\t/;
31 my @indSNP=split / /,$tail; # /[ACGTRYMKSWHBVDNX-]/
32 my ($hom,$cov,$all)=(0,0,0);
33 for (@indSNP) {
34 ++$all;
35 ++$hom if /[ATCG]/i;
36 ++$cov unless /-/;
38 $hom /= $cov;
39 $cov /= $all;
40 print NFO "$hom\t$cov\n";
41 if ($hom >= $minHom and $cov >= $minCov) {
42 print OUT join("\t",$chr,$pos,$ref,$tail),"\n";
43 ++$Out;
46 close NFO;
47 close OUT;
48 warn "\n[!] $In -> $Out, ",int(10000*$Out/$In)/100,"%\n";
50 __END__
51 ./psnphc.pl psnp/Chr02.add_ref Chr02 0.95,0.99999
52 cat 9311/chrorder |while read a; do echo ./psnphc.pl ./psnp/$a.add_ref $a 0.95,0.999;done > ff.sh
54 perl -lane 'next if $F[8]<70;next if $F[8]>170;print join("\t",@F[0,1,2,7,8],$F[8]/($F[7]+$F[8]))' psnp/Chr01_final.snp
55 cat 9311/chrorder |while read a;do perl -lane 'next if $F[8]<70;next if $F[8]>170;print join("\t",@F[0,1,2,7,8],$F[8]/($F[7]+$F[8]))' psnp/${a}_final.snp > ff$a.snp;done
57 cat 9311/chrorder |while read a;do perl -e "my %Dat;open IN,'<','ff$a.snp';
58 while (<IN>) {\$x=(split /\t/)[1]; ++\$Dat{\$x};}
59 close IN;
60 open T,'<','psnp/$a.add_ref';
61 while (<T>) {
62 @F=split /\t/;
63 next unless \$Dat{\$F[1]};
64 print \$_;
66 close T;
67 " > aa$a.snp;
68 done