a script implemented as a MooseX::Runnable script to calculate the gap sizes in the...
[cxgn-corelibs.git] / lib / CXGN / Tools / GetGaps.pm
blob9434bd142317e3978301e70eee29225c4bc0e7d9
2 package CXGN::Tools::GetGaps;
3 use Moose;
5 with 'MooseX::Runnable';
6 with 'MooseX::Getopt';
8 use Bio::SeqIO;
10 has 'min_gap_size' => (is => 'rw',
11 isa => 'Int',
12 traits=> ['Getopt'],
13 default=>10,
17 has 'fasta_file' => (is => 'rw',
18 isa => 'Str',
19 traits=>['Getopt'],
24 sub run {
25 my $self = shift;
27 my $io = Bio::SeqIO->new(-format=>'largefasta', -file=>$self->fasta_file());
29 my $gap_no = 1;
31 while (my $s = $io->next_seq()) {
32 my $seq = $s->seq();
33 my $id = $s->id();
35 warn "Processing sequence $id (".$s->length()." nucleotides)...\n";
37 my $n_region_start = 0;
38 my $n_region_end = 0;
39 foreach my $i (1..$s->length()) {
40 my $nuc = $s->subseq($i, $i);
42 if ($nuc=~/n/i) {
43 if (!$n_region_start) { $n_region_start=$i; }
46 else {
47 if ($n_region_start) { $n_region_end = $i; }
49 my $gap_size = $n_region_end - $n_region_start + 1;
50 if ($gap_size > $self->min_gap_size()) {
52 print "$id\_"; printf "%06d", "$gap_no"; print "\t$id\t$n_region_start\t$n_region_end\t$gap_size\n";
53 $gap_no++;
57 $n_region_start = 0;
58 $n_region_end = 0;