modified: Makefile
[GalaxyCodeBases.git] / perl / etc / calblast.pl
blob148cc4c88a724d6a397566673e032f561ba5837b
1 #!/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 my ($flag,$minaln,$ia,$ib,$E,$line,$qSt,$qEd,$sSt,$sEd,@Q,@S)=(1,200);
8 sub setQS($$$$$$) {
9 my ($ia,$ib,$qSt,$qEd,$sSt,$sEd)=@_;
10 for my $i ($qSt .. $qEd) {
11 #$Q[$i] = $ia if (!defined($Q[$i])) or $Q[$i] < $ia;
12 ++$Q[$i];
14 for my $i ($sSt .. $sEd) {
15 #$S[$i] = $ib if (!defined($S[$i])) or $S[$i] < $ib;
16 ++$S[$i];
20 open I,'<','tig2cat.blast' or die $!;
21 =pod
22 Score = 2.188e+04 bits (11039), Expect = 0.0
23 Identities = 12026/12360 (97%), Gaps = 72/12360 (0%)
24 Strand = Plus / Plus
25 =cut
26 $line = readline(I);
27 while ($flag) {
28 unless ($line =~/^ Score = /) {
29 defined($line = readline(I)) or $flag=0;
30 next;
32 $line =~ / Expect = ([\d\.+-e]+)$/ or die "[$line]";
33 $E=$1;
34 $E = "1$E" if $E=~/^e/i;
35 last if $E>1e-10;
36 $line = readline(I);
37 $line =~ /^ Identities = (\d+)\/(\d+) / or die;
38 ($ia,$ib)=($1,$2);
39 next if ($ia<$minaln or $ib<$minaln);
40 $line = readline(I);
41 unless ($line =~ /^ Strand = Plus \/ Plus/) {
42 #warn "[$line";
43 #print STDERR '.';
44 defined($line = readline(I)) or $flag=0;
45 next;
47 my (@QSt,@QEd,@SSt,@SEd);
48 while ($line = readline(I)) {
49 last if $line =~/^ Score = /;
50 if ($line =~/^Query: (\d+) [^\d]+(\d+)$/) {
51 push @QSt,$1;
52 push @QEd,$2;
54 if ($line =~/^Sbjct: (\d+) [^\d]+(\d+)$/) {
55 push @SSt,$1;
56 push @SEd,$2;
59 $qSt=$QSt[0]; $qEd=$QEd[-1];
60 $sSt=$SSt[0]; $sEd=$SEd[-1];
61 setQS($ia,$ib,$qSt,$qEd,$sSt,$sEd);
62 #print STDERR '+';
63 #print "$ia,$ib,$E,$qSt,$qEd,$sSt,$sEd\n";
65 close I;
66 warn "\nDone.\n";
68 my ($Ql,$Sl,%Qh,%Sh)=(0,0);
69 for (@Q) {
70 unless (defined $_) {
71 ++$Qh{0};
72 next;
74 ++$Ql if $_ == 1;
75 ++$Qh{$_};
77 for (@S) {
78 unless (defined $_) {
79 ++$Sh{0};
80 next;
82 ++$Sl if $_ == 1;
83 ++$Sh{$_};
85 warn "Q:$Ql\nS:$Sl\n";
86 ddx \%Qh;
87 ddx \%Sh;
89 __END__
90 repeated==1
91 Q:10557598
92 S:10504512
94 repeated<=2
95 Q:10637014
96 S:10621036
98 no repeat filter (directly ++)
99 Q:12958103
100 S:16862471
102 Len:
103 cat75n1458 16866601
104 tiger75n1458 12958419
106 Query= Tiger75n1458
109 # "0" => 1772885,
110 # "1" => 10557598,
111 # "2" => 79416,
112 # "3" => 42748,
113 # "4" => 24955,
114 # "5" => 11571,
116 # "0" => 5396572,
117 # "1" => 10504512,
118 # "2" => 116524,
119 # "3" => 54313,
120 # "4" => 36982,
121 # "5" => 34610,