Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Tools / Genewise.pm
blob0416693f9a630e6a9029403ab307b4d7309105b4
2 # BioPerl module for Bio::Tools::Genewise
4 # Copyright Fugu Team
6 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
10 =head1 NAME
12 Bio::Tools::Genewise - Results of one Genewise run
14 =head1 SYNOPSIS
16 use Bio::Tools::Genewise;
17 my $gw = Bio::Tools::Genewise(-file=>"genewise.out");
19 while (my $gene = $gw->next_prediction){
20 my @transcripts = $gene->transcripts;
21 foreach my $t(@transcripts){
22 my @exons = $t->exons;
23 foreach my $e(@exons){
24 print $e->start." ".$e->end."\n";
29 =head1 DESCRIPTION
31 This is the parser for the output of Genewise. It takes either a file
32 handle or a file name and returns a
33 Bio::SeqFeature::Gene::GeneStructure object.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
61 web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Fugu Team, Jason Stajich
67 Email: fugui@worf.fugu-sg.org
68 Email: jason-at-bioperl.org
70 =head1 APPENDIX
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
75 =cut
78 # Let the code begin...
81 package Bio::Tools::Genewise;
82 use vars qw($Srctag);
83 use strict;
84 use Symbol;
86 use Bio::Tools::AnalysisResult;
87 use Bio::SeqFeature::Generic;
88 use Bio::SeqFeature::Gene::Exon;
89 use Bio::SeqFeature::FeaturePair;
90 use Bio::SeqFeature::Gene::Transcript;
91 use Bio::SeqFeature::Gene::GeneStructure;
93 use base qw(Bio::Root::Root Bio::Root::IO);
94 $Srctag = 'genewise';
96 =head2 new
98 Title : new
99 Usage : $obj->new(-file=>"genewise.out");
100 $obj->new(-fh=>\*GW);
101 Function: Constructor for genewise wrapper. Takes either a file or filehandle
102 Example :
103 Returns : Bio::Tools::Genewise object
105 See L<Bio::Tools::Genewise>
107 =cut
109 sub new {
110 my($class,@args) = @_;
111 my $self = $class->SUPER::new(@args);
112 $self->_initialize_io(@args);
113 return $self;
116 =head2 _get_strand
118 Title : _get_strand
119 Usage : $obj->_get_strand
120 Function: takes start and end values, swap them if start>end and
121 returns end
122 Example :
123 Returns :$start,$end,$strand
125 =cut
127 sub _get_strand {
128 my ($self,$start,$end) = @_;
129 defined($start) || $self->throw("Need a start");
130 defined($end) || $self->throw("Need an end");
131 my $strand;
132 if ($start > $end) {
133 my $tmp = $start;
134 $start = $end;
135 $end = $tmp;
136 $strand = -1;
138 else {
139 $strand = 1;
141 return ($start,$end,$strand);
144 =head2 _score
146 Title : _score
147 Usage : $obj->_score
148 Function: get/set for score info
149 Returns : a score value
151 =cut
153 sub _score {
154 my $self = shift;
155 return $self->{'_score'} = shift if @_;
156 return $self->{'_score'};
159 =head2 _prot_id
161 Title : _prot_id
162 Usage : $obj->_prot_id
163 Function: get/set for protein id
164 Returns :a protein id
166 =cut
168 sub _prot_id {
169 my $self = shift;
170 return $self->{'_prot_id'} = shift if @_;
171 return $self->{'_prot_id'};
174 =head2 _target_id
176 Title : _target_id
177 Usage : $obj->_target_id
178 Function: get/set for genomic sequence id
179 Example :
180 Returns :a target id
182 =cut
184 sub _target_id {
185 my $self = shift;
186 return $self->{'_target_id'} = shift if @_;
187 return $self->{'_target_id'};
191 =head2 next_prediction
193 Title : next_prediction
194 Usage : while($gene = $genewise->next_prediction()) {
195 # do something
197 Function: Returns the gene structure prediction of the Genewise result
198 file. Call this method repeatedly until FALSE is returned.
200 Example :
201 Returns : a Bio::SeqFeature::Gene::GeneStructure object
202 Args :
204 See L<Bio::SeqFeature::Gene::GeneStructure>
206 =cut
209 sub next_prediction {
210 my ($self) = @_;
212 unless ( $self->parsed ){
213 $self->_parse_genes;
214 $self->parsed(1);
216 return shift @{$self->{'_genes'}};
219 sub parsed {
220 my $self = shift;
221 return $self->{'_parsed'} = 1 if @_ && $_[0];
222 return $self->{'_parsed'};
225 sub _parse_genes {
226 my ($self) = @_;
227 my (@alignments,@genes);
228 local ($/) = "//";
230 while ( defined($_ = $self->_readline) ) {
231 $self->debug( $_ );
232 while( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
233 $alignments[$1] = $2;
235 if( /Score\s+(\-?\d+(\.\d+)?)/ ) {
236 $self->_score($1);# unless defined $self->_score;
239 if( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
240 $self->_prot_id($1); #unless defined $self->_prot_id;
243 if( /Target Sequence\s+(\S+)/ ) {
244 $self->_target_id($1);# unless defined $self->_target_id;
247 next unless /Gene\s+\d+\n/;
248 my @genes_txt = split(/Gene\s+\d+\n/,$_);
249 shift @genes_txt; #remove first empty entry
250 my $gene_num = 1;
251 foreach my $gene_txt (@genes_txt) {
252 my $score = $alignments[$gene_num];
253 # If genewise has assigned a strand to the gene as a whole
254 # overall gene start and end
255 my ($g_start, $g_end, $type) =
256 $gene_txt =~ m/Gene\s+
257 (\d+)[\s-]+ # start (1-based)
258 (\d+)\s+ # end
259 (?:\[(\w+)\])? #
261 my $g_strand;
262 my $source_tag = $type ? "$Srctag". "_$type" : $Srctag;
263 my $genes = Bio::SeqFeature::Gene::GeneStructure->new
264 (-source => $source_tag,
265 -score => $self->_score,
267 $genes->add_tag_value('ID', $self->_prot_id.".gene");
268 my $transcript = Bio::SeqFeature::Gene::Transcript->new
269 (-source => $source_tag,
270 -score => $score);
271 ($g_start, $g_end, $g_strand) = $self->_get_strand($g_start,$g_end);
272 $genes->strand($g_strand);
274 # grab exon + supporting feature info
275 my @exons;
276 unless ( @exons = $gene_txt =~ m/(Exon .+\s+Supporting .+)/g ) {
277 @exons = $gene_txt =~ m/(Exon .+\s+)/g;
279 my $nbr = 1;
281 # loop through each exon-supporting feature pair
282 foreach my $e (@exons){
283 my ($e_start,$e_end,$phase) =
284 $e =~ m/Exon\s+
285 (\d+)[\s-]+ # start (1 based)
286 (\d+)\s+ # end
287 phase\s+(\d+) # phase
289 my $e_strand;
290 ($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,
291 $e_end);
292 $transcript->strand($e_strand) unless $transcript->strand != 0;
293 my $exon = Bio::SeqFeature::Gene::Exon->new
294 (-seq_id =>$self->_target_id,
295 -source => $source_tag,
296 -start =>$e_start,
297 -end =>$e_end,
298 -score => $score,
299 #-frame => $phase,
300 -strand =>$e_strand);
302 $exon->add_tag_value('phase',$phase);
303 $exon->is_coding(1);
304 if( $self->_prot_id ) {
305 $exon->add_tag_value('Parent',$self->_prot_id);
307 $exon->add_tag_value("Exon",$nbr++);
308 if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
309 my ($geno_start,$geno_end,
310 $prot_start, $prot_end) = ($1,$2,$3,$4);
311 my $prot_strand;
312 ($prot_start,$prot_end,
313 $prot_strand) = $self->_get_strand($prot_start,$prot_end);
314 my $pf = Bio::SeqFeature::Generic->new
315 ( -start => $prot_start,
316 -end => $prot_end,
317 -seq_id => $self->_prot_id,
318 -score => $score,
319 -strand => $prot_strand,
320 -source => $source_tag,
321 -primary_tag => 'supporting_protein_feature',);
322 my $geno_strand;
323 ($geno_start,$geno_end,
324 $geno_strand) = $self->_get_strand($geno_start,$geno_end);
325 my $gf = Bio::SeqFeature::Generic->new
326 ( -start => $geno_start,
327 -end => $geno_end,
328 -seq_id => $self->_target_id,
329 -score => $score,
330 -strand => $geno_strand,
331 -source => $source_tag,
332 -primary_tag => 'supporting_genomic_feature',);
333 my $fp = Bio::SeqFeature::FeaturePair->new
334 (-feature1 =>$gf,
335 -feature2 =>$pf);
336 $exon->add_tag_value( 'supporting_feature',$fp );
337 if( $self->_prot_id ) {
338 $exon->add_tag_value('Target','Protein:'.$self->_prot_id);
339 $exon->add_tag_value('Target',$prot_start);
340 $exon->add_tag_value('Target',$prot_end);
343 $transcript->add_exon($exon);
345 $transcript->seq_id($self->_target_id);
346 $transcript->add_tag_value('ID', $self->_prot_id);
347 $transcript->add_tag_value('Parent', $self->_prot_id.".gene");
348 $genes->add_transcript($transcript);
349 $genes->seq_id($self->_target_id);
350 push @genes, $genes;
351 $gene_num++;
354 $self->{'_genes'} = \@genes;