new file: cell2loc.py
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / Tiger / CountPstInumber.pl
blob89540ff40ed80dd50d1657adbefbcdf2471be588
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use threads;
7 sub SiteCover {
8 my $file = shift;
9 my %count;
10 open my $in, "-|", "samtools view -f 64 -F 1796 $file" or die "Error opening $file: $!\n";
11 while (<$in>) {
12 my @align = split /\t/;
13 my @match = $align[5] =~ /(\d+)M/g;
14 my $sum_match = 0;
15 $sum_match += $_ foreach @match;
16 next if $sum_match < 64;
17 @match = sort {$b <=> $a} @match;
18 next if $match[0] < 30;
19 if ($align[1] & 0x10) {
20 next if $align[5] =~ /[NHP=X]/;
21 next unless $align[5] =~ /(\d+)M$/;
22 next unless $1 >= 5;
23 my @del = $align[5] =~ /(\d+)D/g;
24 my $sum_del = 0;
25 $sum_del += $_ foreach @del;
26 my $c = $align[3] + $sum_match + $sum_del - 1;
27 $count{$align[2]}{$c}[0] = 1;
28 } else {
29 next unless $align[5] =~ /^(\d+)M/;
30 next unless $1 >= 5;
31 $count{$align[2]}{$align[3]+3}[1] = 1;
34 close $in;
35 my $t = localtime;
36 print STDERR "$t\tCount file $file complete!\n";
37 return \%count;
41 my %thread;
42 while (<>) {
43 chomp;
44 $thread{$_} = threads->new (\&SiteCover, $_);
46 my $t = localtime;
47 print STDERR "$t\tDestribute threads completed!\n";
50 my %all;
51 foreach (keys %thread) {
52 $all{$_} = $thread{$_}->join;
54 $t = localtime;
55 print STDERR "$t\tRead all file completed!\n";
57 my %stat_site;
58 foreach my $i (keys %all) {
59 $i =~ /^(\d\d)/;
60 my $bit = 18 - $1;
61 foreach my $s (keys %{$all{$i}}) {
62 foreach my $c (keys %{$all{$i}{$s}}) {
63 ++$stat_site{$s}{$c}[0] if $all{$i}{$s}{$c}[0];
64 ++$stat_site{$s}{$c}[1] if $all{$i}{$s}{$c}[1];
69 my $PstI;
70 foreach my $s (keys %stat_site) {
71 foreach my $c (keys %{$stat_site{$s}}) {
72 ++$PstI if ($stat_site{$s}{$c}[0] and $stat_site{$s}{$c}[1]);
76 $t = localtime;
77 print STDERR "$t\tCount PstI complete!\n";
78 print STDERR "Number of PstI is $PstI\n";