new file: cell2loc.py
[GalaxyCodeBases.git] / perl / soap / draw / subBin / callNgap.pl
blob3591ebe9bcf18bac28a791f998b97252066addcc
1 #!/usr/bin/perl -w
3 use strict;
6 unless(@ARGV){
7 print "\\n\tto calculate the chromsome length and effective length\n";
8 print "\n\tperl $0 <fasta file> < N size, default 1> <output file>\n\n";
9 exit 0;
13 my $Nsize = $ARGV[1] || 1;
14 my $outputFile = "$ARGV[2]";
16 my $name = '';
17 my %fasta = ();
19 open IN,"$ARGV[0]" or die "$!\n";
20 while(my $line = <IN>){
21 chomp $line;
22 if($line =~ />(.*$)/){
23 $name = (split /\s+/,$1)[0];
24 }else{
25 $fasta{$name} .= $line;
28 close IN;
31 open OUT,">$outputFile" or die "$!\n";
32 warn "Chromosome\tLen1\tEffectiveLen\tGapCount\tGapSize\n";
33 foreach my $name(sort keys %fasta){
35 my $chrLen = length $fasta{$name};
36 my $gapSize = 0;
37 my $gapCount = 0;
38 my $length = 0;
39 while($fasta{$name} =~ /N{$Nsize,}/ig){
40 $length = $+[0] - $-[0];
41 $gapSize += $length;
42 my $start = $-[0] + 1;
43 print OUT "$name\t$start\t$+[0]\t$length\n";
44 $gapCount++;
46 my $effectiveLen = $chrLen - $gapSize;
47 warn "$name\t$chrLen\t$effectiveLen\t$gapCount\t$gapSize\n";
49 close OUT;