2 # BioPerl module for Bio::Tools::Genewise
6 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
12 Bio::Tools::Genewise - Results of one Genewise run
16 use Bio::Tools::Genewise;
17 my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
19 while (my $gene = $gw->next_prediction){
20 my @transcripts = $gene->transcripts;
21 foreach my $t(@transcripts){
22 my @exons = $t->exons;
23 foreach my $e(@exons){
24 print $e->start." ".$e->end."\n";
31 This is the parser for the output of Genewise. It takes either a file
32 handle or a file name and returns a
33 Bio::SeqFeature::Gene::GeneStructure object.
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Fugu Team, Jason Stajich
67 Email: fugui@worf.fugu-sg.org
68 Email: jason-at-bioperl.org
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
78 # Let the code begin...
81 package Bio
::Tools
::Genewise
;
86 use Bio::Tools::AnalysisResult;
87 use Bio::SeqFeature::Generic;
88 use Bio::SeqFeature::Gene::Exon;
89 use Bio::SeqFeature::FeaturePair;
90 use Bio::SeqFeature::Gene::Transcript;
91 use Bio::SeqFeature::Gene::GeneStructure;
93 use base qw(Bio::Root::Root Bio::Root::IO);
99 Usage : $obj->new(-file=>"genewise.out");
100 $obj->new(-fh=>\*GW);
101 Function: Constructor for genewise wrapper. Takes either a file or filehandle
103 Returns : Bio::Tools::Genewise object
105 See L<Bio::Tools::Genewise>
110 my($class,@args) = @_;
111 my $self = $class->SUPER::new
(@args);
112 $self->_initialize_io(@args);
119 Usage : $obj->_get_strand
120 Function: takes start and end values, swap them if start>end and
123 Returns :$start,$end,$strand
128 my ($self,$start,$end) = @_;
129 defined($start) || $self->throw("Need a start");
130 defined($end) || $self->throw("Need an end");
141 return ($start,$end,$strand);
148 Function: get/set for score info
149 Returns : a score value
155 return $self->{'_score'} = shift if @_;
156 return $self->{'_score'};
162 Usage : $obj->_prot_id
163 Function: get/set for protein id
164 Returns :a protein id
170 return $self->{'_prot_id'} = shift if @_;
171 return $self->{'_prot_id'};
177 Usage : $obj->_target_id
178 Function: get/set for genomic sequence id
186 return $self->{'_target_id'} = shift if @_;
187 return $self->{'_target_id'};
191 =head2 next_prediction
193 Title : next_prediction
194 Usage : while($gene = $genewise->next_prediction()) {
197 Function: Returns the gene structure prediction of the Genewise result
198 file. Call this method repeatedly until FALSE is returned.
201 Returns : a Bio::SeqFeature::Gene::GeneStructure object
204 See L<Bio::SeqFeature::Gene::GeneStructure>
209 sub next_prediction
{
212 unless ( $self->parsed ){
216 return shift @
{$self->{'_genes'}};
221 return $self->{'_parsed'} = 1 if @_ && $_[0];
222 return $self->{'_parsed'};
227 my (@alignments,@genes);
230 while ( defined($_ = $self->_readline) ) {
232 while( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
233 $alignments[$1] = $2;
235 if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
236 $self->_score($1);# unless defined $self->_score;
239 if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
240 $self->_prot_id($1); #unless defined $self->_prot_id;
243 if( /Target Sequence\s+(\S+)/ ) {
244 $self->_target_id($1);# unless defined $self->_target_id;
247 next unless /Gene\s+\d+\n/;
248 my @genes_txt = split(/Gene\s+\d+\n/,$_);
249 shift @genes_txt; #remove first empty entry
251 foreach my $gene_txt (@genes_txt) {
252 my $score = $alignments[$gene_num];
253 # If genewise has assigned a strand to the gene as a whole
254 # overall gene start and end
255 my ($g_start, $g_end, $type) =
256 $gene_txt =~ m/Gene\s+
257 (\d+)[\s-]+ # start (1-based)
262 my $source_tag = $type ? "$Srctag". "_
$type" : $Srctag;
263 my $genes = Bio::SeqFeature::Gene::GeneStructure->new
264 (-source => $source_tag,
265 -score => $self->_score,
267 $genes->add_tag_value('ID', $self->_prot_id.".gene
");
268 my $transcript = Bio::SeqFeature::Gene::Transcript->new
269 (-source => $source_tag,
271 ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
272 $genes->strand($g_strand);
274 # grab exon + supporting feature info
276 unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
277 @exons = $gene_txt =~ m/(Exon .+\s+)/g;
281 # loop through each exon-supporting feature pair
282 foreach my $e (@exons){
283 my ($e_start,$e_end,$phase) =
285 (\d+)[\s-]+ # start (1 based)
287 phase\s+(\d+) # phase
290 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
292 $transcript->strand($e_strand) unless $transcript->strand != 0;
293 my $exon = Bio::SeqFeature::Gene::Exon->new
294 (-seq_id =>$self->_target_id,
295 -source => $source_tag,
300 -strand =>$e_strand);
302 $exon->add_tag_value('phase',$phase);
304 if( $self->_prot_id ) {
305 $exon->add_tag_value('Parent',$self->_prot_id);
307 $exon->add_tag_value("Exon
",$nbr++);
308 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
309 my ($geno_start,$geno_end,
310 $prot_start, $prot_end) = ($1,$2,$3,$4);
312 ($prot_start,$prot_end,
313 $prot_strand) = $self->_get_strand($prot_start,$prot_end);
314 my $pf = Bio::SeqFeature::Generic->new
315 ( -start => $prot_start,
317 -seq_id => $self->_prot_id,
319 -strand => $prot_strand,
320 -source => $source_tag,
321 -primary_tag => 'supporting_protein_feature',);
323 ($geno_start,$geno_end,
324 $geno_strand) = $self->_get_strand($geno_start,$geno_end);
325 my $gf = Bio::SeqFeature::Generic->new
326 ( -start => $geno_start,
328 -seq_id => $self->_target_id,
330 -strand => $geno_strand,
331 -source => $source_tag,
332 -primary_tag => 'supporting_genomic_feature',);
333 my $fp = Bio::SeqFeature::FeaturePair->new
336 $exon->add_tag_value( 'supporting_feature',$fp );
337 if( $self->_prot_id ) {
338 $exon->add_tag_value('Target','Protein:'.$self->_prot_id);
339 $exon->add_tag_value('Target',$prot_start);
340 $exon->add_tag_value('Target',$prot_end);
343 $transcript->add_exon($exon);
345 $transcript->seq_id($self->_target_id);
346 $transcript->add_tag_value('ID', $self->_prot_id);
347 $transcript->add_tag_value('Parent', $self->_prot_id.".gene
");
348 $genes->add_transcript($transcript);
349 $genes->seq_id($self->_target_id);
354 $self->{'_genes'} = \@genes;