Move HMMER related modules, tests, and programs to new distribution.
[bioperl-live.git] / Bio / Tools / MZEF.pm
blob52585575a57ab2c310b9d859964afa8c8f5c8cb6
2 # BioPerl module for Bio::Tools::MZEF
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tools::MZEF - Results of one MZEF run
18 =head1 SYNOPSIS
20 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
21 # filehandle:
22 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
23 # to indicate that the sequence was reversed prior to feeding it to MZEF
24 # and that you want to have this reflected in the strand() attribute of
25 # the exons, as well have the coordinates translated to the non-reversed
26 # sequence
27 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
28 -strand => -1 );
30 # parse the results
31 # note: this class is-a Bio::Tools::AnalysisResult which implements
32 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
33 while($gene = $mzef->next_prediction()) {
34 # $gene is an instance of Bio::Tools::Prediction::Gene
36 # $gene->exons() returns an array of
37 # Bio::Tools::Prediction::Exon objects
38 # all exons:
39 @exon_arr = $gene->exons();
41 # internal exons only
42 @intrl_exons = $gene->exons('Internal');
43 # note that presently MZEF predicts only internal exons!
46 # essential if you gave a filename at initialization (otherwise the file
47 # will stay open)
48 $mzef->close();
50 =head1 DESCRIPTION
52 The MZEF module provides a parser for MZEF gene structure prediction
53 output.
55 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
56 implements L<Bio::SeqAnalysisParserI>.
58 =head1 FEEDBACK
60 =head2 Mailing Lists
62 User feedback is an integral part of the evolution of this and other
63 Bioperl modules. Send your comments and suggestions preferably to one
64 of the Bioperl mailing lists. Your participation is much appreciated.
66 bioperl-l@bioperl.org - General discussion
67 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69 =head2 Support
71 Please direct usage questions or support issues to the mailing list:
73 I<bioperl-l@bioperl.org>
75 rather than to the module maintainer directly. Many experienced and
76 reponsive experts will be able look at the problem and quickly
77 address it. Please include a thorough description of the problem
78 with code and data examples if at all possible.
80 =head2 Reporting Bugs
82 Report bugs to the Bioperl bug tracking system to help us keep track
83 the bugs and their resolution. Bug reports can be submitted via the
84 web:
86 https://github.com/bioperl/bioperl-live/issues
88 =head1 AUTHOR - Hilmar Lapp
90 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
92 =head1 APPENDIX
94 The rest of the documentation details each of the object
95 methods. Internal methods are usually preceded with a _
97 =cut
100 # Let the code begin...
103 package Bio::Tools::MZEF;
104 use strict;
106 use Bio::Tools::Prediction::Gene;
107 use Bio::Tools::Prediction::Exon;
109 use base qw(Bio::Tools::AnalysisResult);
111 sub _initialize_state {
112 my($self,@args) = @_;
114 # first call the inherited method!
115 my $make = $self->SUPER::_initialize_state(@args);
117 # handle our own parameters
118 my ($strand, $params) =
119 $self->_rearrange([qw(STRAND
121 @args);
123 # our private state variables
124 $strand = 1 unless defined($strand);
125 $self->{'_strand'} = $strand;
126 $self->{'_preds_parsed'} = 0;
127 $self->{'_has_cds'} = 0;
128 # array of pre-parsed predictions
129 $self->{'_preds'} = [];
132 =head2 analysis_method
134 Usage : $mzef->analysis_method();
135 Purpose : Inherited method. Overridden to ensure that the name matches
136 /mzef/i.
137 Returns : String
138 Argument : n/a
140 =cut
142 #-------------
143 sub analysis_method {
144 #-------------
145 my ($self, $method) = @_;
146 if($method && ($method !~ /mzef/i)) {
147 $self->throw("method $method not supported in " . ref($self));
149 return $self->SUPER::analysis_method($method);
152 =head2 next_feature
154 Title : next_feature
155 Usage : while($gene = $mzef->next_feature()) {
156 # do something
158 Function: Returns the next gene structure prediction of the MZEF result
159 file. Call this method repeatedly until FALSE is returned.
161 The returned object is actually a SeqFeatureI implementing object.
162 This method is required for classes implementing the
163 SeqAnalysisParserI interface, and is merely an alias for
164 next_prediction() at present.
166 Note that with the present version of MZEF there will only be one
167 object returned, because MZEF does not predict individual genes
168 but just potential internal exons.
169 Example :
170 Returns : A Bio::Tools::Prediction::Gene object.
171 Args :
173 =cut
175 sub next_feature {
176 my ($self,@args) = @_;
177 # even though next_prediction doesn't expect any args (and this method
178 # does neither), we pass on args in order to be prepared if this changes
179 # ever
180 return $self->next_prediction(@args);
183 =head2 next_prediction
185 Title : next_prediction
186 Usage : while($gene = $mzef->next_prediction()) {
187 # do something
189 Function: Returns the next gene structure prediction of the MZEF result
190 file. Call this method repeatedly until FALSE is returned.
192 Note that with the present version of MZEF there will only be one
193 object returned, because MZEF does not predict individual genes
194 but just potential internal exons.
195 Example :
196 Returns : A Bio::Tools::Prediction::Gene object.
197 Args :
199 =cut
201 sub next_prediction {
202 my ($self) = @_;
203 my $gene;
205 # if the prediction section hasn't been parsed yet, we do this now
206 $self->_parse_predictions() unless $self->_predictions_parsed();
208 # return the next gene structure (transcript)
209 return $self->_prediction();
212 =head2 _parse_predictions
214 Title : _parse_predictions()
215 Usage : $obj->_parse_predictions()
216 Function: Parses the prediction section. Automatically called by
217 next_prediction() if not yet done.
218 Example :
219 Returns :
221 =cut
223 sub _parse_predictions {
224 my ($self) = @_;
225 my ($method); # set but not used presently
226 my $exon_tag = "InternalExon";
227 my $gene;
228 # my $seqname; # name given in output is poorly formatted
229 my $seqlen;
230 my $prednr = 1;
232 while(defined($_ = $self->_readline())) {
233 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
234 # exon or signal
235 if(! defined($gene)) {
236 $gene = Bio::Tools::Prediction::Gene->new(
237 '-primary' => "GenePrediction$prednr",
238 '-source' => 'MZEF');
240 # we handle start-end first because may not be space delimited
241 # for large numbers
242 my ($start,$end) = ($1,$2);
243 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
244 # split the rest into fields
245 chomp();
246 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
247 # index: 0 1 2 3 4 5 6 7
248 my @flds = split(' ', $_);
249 # create the feature object depending on the type of signal --
250 # which is always an (internal) exon for MZEF
251 my $predobj = Bio::Tools::Prediction::Exon->new();
252 # set common fields
253 $predobj->source_tag('MZEF');
254 $predobj->significance($flds[0]);
255 $predobj->score($flds[0]); # what shall we set as overall score?
256 $predobj->strand($self->{'_strand'}); # MZEF searches only one
257 if($predobj->strand() == 1) {
258 $predobj->start($start);
259 $predobj->end($end);
260 } else {
261 $predobj->start($seqlen-$end+1);
262 $predobj->end($seqlen-$start+1);
264 # set scores
265 $predobj->start_signal_score($flds[5]);
266 $predobj->end_signal_score($flds[7]);
267 $predobj->coding_signal_score($flds[6]);
268 # frame -- we simply extract the one with highest score from the
269 # orf field, and store the individual scores for now
270 my $frm = index($flds[4], "1");
271 $predobj->frame(($frm < 0) ? undef : $frm);
272 $predobj->primary_tag($exon_tag);
273 $predobj->is_coding(1);
274 # add to gene structure (should be done only when start and end
275 # are set, in order to allow for proper expansion of the range)
276 $gene->add_exon($predobj);
277 next;
279 if(/^\s*Internal .*(MZEF)/) {
280 $self->analysis_method($1);
281 next;
283 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
284 # $seqname = $1; # this is too poor currently (file name truncated
285 # to 10 chars) in order to be sensible enough
286 $seqlen = $2;
287 next;
290 # $gene->seq_id($seqname);
291 $self->_add_prediction($gene) if defined($gene);
292 $self->_predictions_parsed(1);
295 =head2 _prediction
297 Title : _prediction()
298 Usage : $gene = $obj->_prediction()
299 Function: internal
300 Example :
301 Returns :
303 =cut
305 sub _prediction {
306 my ($self) = @_;
308 return unless(exists($self->{'_preds'}) && @{$self->{'_preds'}});
309 return shift(@{$self->{'_preds'}});
312 =head2 _add_prediction
314 Title : _add_prediction()
315 Usage : $obj->_add_prediction($gene)
316 Function: internal
317 Example :
318 Returns :
320 =cut
322 sub _add_prediction {
323 my ($self, $gene) = @_;
325 if(! exists($self->{'_preds'})) {
326 $self->{'_preds'} = [];
328 push(@{$self->{'_preds'}}, $gene);
331 =head2 _predictions_parsed
333 Title : _predictions_parsed
334 Usage : $obj->_predictions_parsed
335 Function: internal
336 Example :
337 Returns : TRUE or FALSE
339 =cut
341 sub _predictions_parsed {
342 my ($self, $val) = @_;
344 $self->{'_preds_parsed'} = $val if $val;
345 if(! exists($self->{'_preds_parsed'})) {
346 $self->{'_preds_parsed'} = 0;
348 return $self->{'_preds_parsed'};