modified: makefile
[GalaxyCodeBases.git] / tools / linkage / getNzones.pl
bloba8966fa74440729fab775ddd5d788818c9457a1f
1 #!/bin/env perl
2 use strict;
3 use warnings;
5 unless (@ARGV > 0) {
6 print "perl $0 <fa file> <out N zones>\n";
7 exit 0;
10 my ($in,$out)=@ARGV;
12 my $infile;
13 if ($in =~ /.bz2$/) {
14 open( $infile,"-|","bzip2 -dc $in") or die "Error: $!\n";
15 } elsif ($in =~ /.gz$/) {
16 open( $infile,"-|","gzip -dc $in") or die "Error: $!\n";
17 } else {open( $infile,"<",$in) or die "Error: $!\n";}
20 local $/=">";
21 $_=<$infile>;
22 die "[x]Not a FASTA file !\n" unless /^\s*>/;
25 open O,'>',$out or die "Error: $!\n";
26 while (<$infile>) {
27 chomp;
28 my ($id,$desc)=split / /,$_,2;
29 $desc='' unless $desc;
30 $/=">";
31 my $seq=<$infile>;
32 chomp $seq;
33 $seq =~ s/\s//g;
34 $/="\n";
35 print STDERR ">$id,\t[$desc]:\n";
36 my $len=length($seq);
37 my $pos=0;
38 my $inNzone=0;
39 my ($Nstart,$Ncount,$Nlen)=(0,0,0);
40 while ($pos <= $len) {
41 my $base=substr($seq,$pos,1);
42 my $isN = ($base =~ /N/i);
43 if (! $isN and ! $inNzone) {
44 $inNzone=0;
45 } elsif ($isN and ! $inNzone) {
46 $inNzone=1;
47 $Nstart=$pos;
48 print O "$id\t",$pos+1,"\t";
49 } elsif (! $isN and $inNzone) {
50 $inNzone=0;
51 print O join("\t",$pos,$pos-$Nstart),"\n";
52 $Nlen += $pos-$Nstart;
53 ++$Ncount;
54 $Nstart=0;
55 } else {
58 ++$pos;
60 my $avg='NA';
61 $avg=int($Nlen/$Ncount) if $Ncount;
62 print STDERR " Len:$len\tN-zone: $Nlen <- $Ncount x ",$avg,"\n";
64 close O;
65 close $infile;