2 # BioPerl module for Bio::Tools::MZEF
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp-at-gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::MZEF - Results of one MZEF run
20 $mzef = Bio::Tools::MZEF->new(-file => 'result.mzef');
22 $mzef = Bio::Tools::MZEF->new( -fh => \*INPUT );
23 # to indicate that the sequence was reversed prior to feeding it to MZEF
24 # and that you want to have this reflected in the strand() attribute of
25 # the exons, as well have the coordinates translated to the non-reversed
27 $mzef = Bio::Tools::MZEF->new( -file => 'result.mzef',
31 # note: this class is-a Bio::Tools::AnalysisResult which implements
32 # Bio::SeqAnalysisParserI, i.e., $genscan->next_feature() is the same
33 while($gene = $mzef->next_prediction()) {
34 # $gene is an instance of Bio::Tools::Prediction::Gene
36 # $gene->exons() returns an array of
37 # Bio::Tools::Prediction::Exon objects
39 @exon_arr = $gene->exons();
42 @intrl_exons = $gene->exons('Internal');
43 # note that presently MZEF predicts only internal exons!
46 # essential if you gave a filename at initialization (otherwise the file
52 The MZEF module provides a parser for MZEF gene structure prediction
55 This module inherits off L<Bio::Tools::AnalysisResult> and therefore
56 implements L<Bio::SeqAnalysisParserI>.
62 User feedback is an integral part of the evolution of this and other
63 Bioperl modules. Send your comments and suggestions preferably to one
64 of the Bioperl mailing lists. Your participation is much appreciated.
66 bioperl-l@bioperl.org - General discussion
67 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
71 Please direct usage questions or support issues to the mailing list:
73 I<bioperl-l@bioperl.org>
75 rather than to the module maintainer directly. Many experienced and
76 reponsive experts will be able look at the problem and quickly
77 address it. Please include a thorough description of the problem
78 with code and data examples if at all possible.
82 Report bugs to the Bioperl bug tracking system to help us keep track
83 the bugs and their resolution. Bug reports can be submitted via the
86 https://github.com/bioperl/bioperl-live/issues
88 =head1 AUTHOR - Hilmar Lapp
90 Email hlapp-at-gmx.net (or hilmar.lapp-at-pharma.novartis.com)
94 The rest of the documentation details each of the object
95 methods. Internal methods are usually preceded with a _
100 # Let the code begin...
103 package Bio
::Tools
::MZEF
;
106 use Bio
::Tools
::Prediction
::Gene
;
107 use Bio
::Tools
::Prediction
::Exon
;
109 use base
qw(Bio::Tools::AnalysisResult);
111 sub _initialize_state
{
112 my($self,@args) = @_;
114 # first call the inherited method!
115 my $make = $self->SUPER::_initialize_state
(@args);
117 # handle our own parameters
118 my ($strand, $params) =
119 $self->_rearrange([qw(STRAND
123 # our private state variables
124 $strand = 1 unless defined($strand);
125 $self->{'_strand'} = $strand;
126 $self->{'_preds_parsed'} = 0;
127 $self->{'_has_cds'} = 0;
128 # array of pre-parsed predictions
129 $self->{'_preds'} = [];
132 =head2 analysis_method
134 Usage : $mzef->analysis_method();
135 Purpose : Inherited method. Overridden to ensure that the name matches
143 sub analysis_method
{
145 my ($self, $method) = @_;
146 if($method && ($method !~ /mzef/i)) {
147 $self->throw("method $method not supported in " . ref($self));
149 return $self->SUPER::analysis_method
($method);
155 Usage : while($gene = $mzef->next_feature()) {
158 Function: Returns the next gene structure prediction of the MZEF result
159 file. Call this method repeatedly until FALSE is returned.
161 The returned object is actually a SeqFeatureI implementing object.
162 This method is required for classes implementing the
163 SeqAnalysisParserI interface, and is merely an alias for
164 next_prediction() at present.
166 Note that with the present version of MZEF there will only be one
167 object returned, because MZEF does not predict individual genes
168 but just potential internal exons.
170 Returns : A Bio::Tools::Prediction::Gene object.
176 my ($self,@args) = @_;
177 # even though next_prediction doesn't expect any args (and this method
178 # does neither), we pass on args in order to be prepared if this changes
180 return $self->next_prediction(@args);
183 =head2 next_prediction
185 Title : next_prediction
186 Usage : while($gene = $mzef->next_prediction()) {
189 Function: Returns the next gene structure prediction of the MZEF result
190 file. Call this method repeatedly until FALSE is returned.
192 Note that with the present version of MZEF there will only be one
193 object returned, because MZEF does not predict individual genes
194 but just potential internal exons.
196 Returns : A Bio::Tools::Prediction::Gene object.
201 sub next_prediction
{
205 # if the prediction section hasn't been parsed yet, we do this now
206 $self->_parse_predictions() unless $self->_predictions_parsed();
208 # return the next gene structure (transcript)
209 return $self->_prediction();
212 =head2 _parse_predictions
214 Title : _parse_predictions()
215 Usage : $obj->_parse_predictions()
216 Function: Parses the prediction section. Automatically called by
217 next_prediction() if not yet done.
223 sub _parse_predictions
{
225 my ($method); # set but not used presently
226 my $exon_tag = "InternalExon";
228 # my $seqname; # name given in output is poorly formatted
232 while(defined($_ = $self->_readline())) {
233 if(/^\s*(\d+)\s*-\s*(\d+)\s+/) {
235 if(! defined($gene)) {
236 $gene = Bio
::Tools
::Prediction
::Gene
->new(
237 '-primary' => "GenePrediction$prednr",
238 '-source' => 'MZEF');
240 # we handle start-end first because may not be space delimited
242 my ($start,$end) = ($1,$2);
243 s/^\s*(\d+)\s*-\s*(\d+)\s+//;
244 # split the rest into fields
246 # format: Coordinates P Fr1 Fr2 Fr3 Orf 3ss Cds 5ss
247 # index: 0 1 2 3 4 5 6 7
248 my @flds = split(' ', $_);
249 # create the feature object depending on the type of signal --
250 # which is always an (internal) exon for MZEF
251 my $predobj = Bio
::Tools
::Prediction
::Exon
->new();
253 $predobj->source_tag('MZEF');
254 $predobj->significance($flds[0]);
255 $predobj->score($flds[0]); # what shall we set as overall score?
256 $predobj->strand($self->{'_strand'}); # MZEF searches only one
257 if($predobj->strand() == 1) {
258 $predobj->start($start);
261 $predobj->start($seqlen-$end+1);
262 $predobj->end($seqlen-$start+1);
265 $predobj->start_signal_score($flds[5]);
266 $predobj->end_signal_score($flds[7]);
267 $predobj->coding_signal_score($flds[6]);
268 # frame -- we simply extract the one with highest score from the
269 # orf field, and store the individual scores for now
270 my $frm = index($flds[4], "1");
271 $predobj->frame(($frm < 0) ?
undef : $frm);
272 $predobj->primary_tag($exon_tag);
273 $predobj->is_coding(1);
274 # add to gene structure (should be done only when start and end
275 # are set, in order to allow for proper expansion of the range)
276 $gene->add_exon($predobj);
279 if(/^\s*Internal .*(MZEF)/) {
280 $self->analysis_method($1);
283 if(/^\s*File_Name:\s+(\S+)\s+Sequence_length:\s+(\d+)/) {
284 # $seqname = $1; # this is too poor currently (file name truncated
285 # to 10 chars) in order to be sensible enough
290 # $gene->seq_id($seqname);
291 $self->_add_prediction($gene) if defined($gene);
292 $self->_predictions_parsed(1);
297 Title : _prediction()
298 Usage : $gene = $obj->_prediction()
308 return unless(exists($self->{'_preds'}) && @
{$self->{'_preds'}});
309 return shift(@
{$self->{'_preds'}});
312 =head2 _add_prediction
314 Title : _add_prediction()
315 Usage : $obj->_add_prediction($gene)
322 sub _add_prediction
{
323 my ($self, $gene) = @_;
325 if(! exists($self->{'_preds'})) {
326 $self->{'_preds'} = [];
328 push(@
{$self->{'_preds'}}, $gene);
331 =head2 _predictions_parsed
333 Title : _predictions_parsed
334 Usage : $obj->_predictions_parsed
337 Returns : TRUE or FALSE
341 sub _predictions_parsed
{
342 my ($self, $val) = @_;
344 $self->{'_preds_parsed'} = $val if $val;
345 if(! exists($self->{'_preds_parsed'})) {
346 $self->{'_preds_parsed'} = 0;
348 return $self->{'_preds_parsed'};