2 package CXGN
::Tools
::GetGaps
;
5 with
'MooseX::Runnable';
10 has
'min_gap_size' => (is
=> 'rw',
17 has
'fasta_file' => (is
=> 'rw',
27 my $io = Bio
::SeqIO
->new(-format
=>'largefasta', -file
=>$self->fasta_file());
31 while (my $s = $io->next_seq()) {
35 warn "Processing sequence $id (".$s->length()." nucleotides)...\n";
37 my $n_region_start = 0;
39 foreach my $i (1..$s->length()) {
40 my $nuc = $s->subseq($i, $i);
43 if (!$n_region_start) { $n_region_start=$i; }
47 if ($n_region_start) { $n_region_end = $i; }
49 my $gap_size = $n_region_end - $n_region_start + 1;
50 if ($gap_size > $self->min_gap_size()) {
52 print "$id\_"; printf "%06d", "$gap_no"; print "\t$id\t$n_region_start\t$n_region_end\t$gap_size\n";