t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / SearchIO / infernal.pm
blobc521743511c3e502a5df09b94e599979b81fa90f
2 # BioPerl module for Bio::SearchIO::infernal
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Chris Fields <cjfields-at-uiuc-dot-edu>
8 # Copyright Chris Fields
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::infernal - SearchIO-based Infernal parser
18 =head1 SYNOPSIS
20 my $parser = Bio::SearchIO->new(-format => 'infernal',
21 -file => 'purine.inf');
22 while( my $result = $parser->next_result ) {
23 # general result info, such as model used, Infernal version
24 while( my $hit = $result->next_hit ) {
25 while( my $hsp = $hit->next_hsp ) {
26 # ...
31 =head1 DESCRIPTION
33 This is a SearchIO-based parser for Infernal output from the cmsearch program.
34 It currently parses cmsearch output for Infernal versions 0.7-1.1; older
35 versions may work but will not be supported.
37 The latest version of Infernal is 1.1. The output has changed substantially
38 relative to version 1.0. Versions 1.x are stable releases (and output has
39 stabilized) therefore it is highly recommended that users upgrade to using
40 the latest Infernal release. Support for the older pre-v.1 developer releases
41 will be dropped for future core 1.6 releases.
43 =head1 FEEDBACK
45 =head2 Mailing Lists
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to
49 the Bioperl mailing list. Your participation is much appreciated.
51 bioperl-l@bioperl.org - General discussion
52 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 =head2 Support
56 Please direct usage questions or support issues to the mailing list:
58 I<bioperl-l@bioperl.org>
60 rather than to the module maintainer directly. Many experienced and
61 reponsive experts will be able look at the problem and quickly
62 address it. Please include a thorough description of the problem
63 with code and data examples if at all possible.
65 =head2 Reporting Bugs
67 Report bugs to the Bioperl bug tracking system to help us keep track
68 of the bugs and their resolution. Bug reports can be submitted via the
69 web:
71 https://github.com/bioperl/bioperl-live/issues
73 =head1 AUTHOR - Chris Fields
75 Email cjfields-at-uiuc-dot-edu
77 =head1 CONTRIBUTORS
79 Jeffrey Barrick, Michigan State University
80 Paul Cantalupo, University of Pittsburgh
82 =head1 APPENDIX
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
87 =cut
89 # Let the code begin...
91 package Bio::SearchIO::infernal;
92 use strict;
94 use Data::Dumper;
95 use base qw(Bio::SearchIO);
97 our %MODEMAP = (
98 'Result' => 'result',
99 'Hit' => 'hit',
100 'Hsp' => 'hsp'
103 our %MAPPING = (
104 'Hsp_bit-score' => 'HSP-bits',
105 'Hsp_score' => 'HSP-score',
106 'Hsp_evalue' => 'HSP-evalue', # evalues only in v0.81, optional
107 'Hsp_pvalue' => 'HSP-pvalue', # pvalues only in v0.81, optional
108 'Hsp_query-from' => 'HSP-query_start',
109 'Hsp_query-to' => 'HSP-query_end',
110 'Hsp_query-strand'=> 'HSP-query_strand',
111 'Hsp_hit-from' => 'HSP-hit_start',
112 'Hsp_hit-to' => 'HSP-hit_end',
113 'Hsp_hit-strand' => 'HSP-hit_strand',
114 'Hsp_gaps' => 'HSP-hsp_gaps',
115 'Hsp_hitgaps' => 'HSP-hit_gaps',
116 'Hsp_querygaps' => 'HSP-query_gaps',
117 'Hsp_qseq' => 'HSP-query_seq',
118 'Hsp_ncline' => 'HSP-nc_seq',
119 'Hsp_hseq' => 'HSP-hit_seq',
120 'Hsp_midline' => 'HSP-homology_seq',
121 'Hsp_pline' => 'HSP-pp_seq',
122 'Hsp_structure' => 'HSP-meta',
123 'Hsp_align-len' => 'HSP-hsp_length',
124 'Hsp_stranded' => 'HSP-stranded',
126 'Hit_id' => 'HIT-name',
127 'Hit_len' => 'HIT-length',
128 'Hit_gi' => 'HIT-ncbi_gi',
129 'Hit_accession' => 'HIT-accession',
130 'Hit_desc' => 'HIT-description',
131 'Hit_def' => 'HIT-description',
132 'Hit_signif' => 'HIT-significance', # evalues in v1.1 and v0.81, optional
133 'Hit_p' => 'HIT-p', # pvalues only in 1.0, optional
134 'Hit_score' => 'HIT-score', # best HSP bit score (in v1.1, the only HSP bit score)
135 'Hit_bits' => 'HIT-bits', # best HSP bit score (ditto)
137 'Infernal_program' => 'RESULT-algorithm_name', # get/set
138 'Infernal_version' => 'RESULT-algorithm_version', # get/set
139 'Infernal_query-def'=> 'RESULT-query_name', # get/set
140 'Infernal_query-len'=> 'RESULT-query_length',
141 'Infernal_query-acc'=> 'RESULT-query_accession', # get/set
142 'Infernal_querydesc'=> 'RESULT-query_description', # get/set
143 'Infernal_cm' => 'RESULT-cm_name',
144 'Infernal_db' => 'RESULT-database_name', # get/set
145 'Infernal_db-len' => 'RESULT-database_entries', # in v1.1 only
146 'Infernal_db-let' => 'RESULT-database_letters', # in v1.1 only
149 my $MINSCORE = 0;
150 my $DEFAULT_ALGORITHM = 'cmsearch';
151 my $DEFAULT_VERSION = '1.1';
153 my @VALID_SYMBOLS = qw(5-prime 3-prime single-strand unknown gap);
154 my %STRUCTURE_SYMBOLS = (
155 '5-prime' => '<',
156 '3-prime' => '>',
157 'single-strand' => ':',
158 'unknown' => '?',
159 'gap' => '.'
162 =head2 new
164 Title : new
165 Usage : my $obj = Bio::SearchIO::infernal->new();
166 Function: Builds a new Bio::SearchIO::infernal object
167 Returns : Bio::SearchIO::infernal
168 Args : -fh/-file => cmsearch (infernal) filename
169 -format => 'infernal'
170 -model => query model (Rfam ID) (default undef)
171 -database => database name (default undef)
172 -query_acc => query accession, eg. Rfam accession RF####
173 -query_desc => query description, eg. Rfam description
174 -hsp_minscore => minimum HSP score cutoff
175 -convert_meta => boolean, set to convert meta string to simple WUSS format
176 -symbols => hash ref of structure symbols to use
177 (default symbols in %STRUCTURE_SYMBOLS hash)
179 =cut
181 sub _initialize {
182 my ( $self, @args ) = @_;
183 $self->SUPER::_initialize(@args);
184 my ($model, $database, $convert, $symbols, $cutoff,
185 $desc, $accession, $algorithm, $version) =
186 $self->_rearrange([qw(MODEL
187 DATABASE
188 CONVERT_META
189 SYMBOLS
190 HSP_MINSCORE
191 QUERY_DESC
192 QUERY_ACC
193 ALGORITHM
194 VERSION)],@args);
195 my $handler = $self->_eventHandler;
196 $handler->register_factory(
197 'result',
198 Bio::Factory::ObjectFactory->new(
199 -type => 'Bio::Search::Result::INFERNALResult',
200 -interface => 'Bio::Search::Result::ResultI',
201 -verbose => $self->verbose
205 $handler->register_factory(
206 'hit',
207 Bio::Factory::ObjectFactory->new(
208 -type => 'Bio::Search::Hit::ModelHit',
209 -interface => 'Bio::Search::Hit::HitI',
210 -verbose => $self->verbose
214 $handler->register_factory(
215 'hsp',
216 Bio::Factory::ObjectFactory->new(
217 -type => 'Bio::Search::HSP::ModelHSP',
218 -interface => 'Bio::Search::HSP::HSPI',
219 -verbose => $self->verbose
223 defined $model && $self->model($model);
224 defined $database && $self->database($database);
225 defined $accession && $self->query_accession($accession);
226 defined $convert && $self->convert_meta($convert);
227 defined $desc && $self->query_description($desc);
229 $version ||= $DEFAULT_VERSION;
230 $self->version($version);
231 $symbols ||= \%STRUCTURE_SYMBOLS;
232 $self->structure_symbols($symbols);
233 $cutoff ||= $MINSCORE;
234 $self->hsp_minscore($cutoff);
235 $algorithm ||= $DEFAULT_ALGORITHM;
236 $self->algorithm($algorithm);
239 =head2 next_result
241 Title : next_result
242 Usage : my $hit = $searchio->next_result;
243 Function: Returns the next Result from a search
244 Returns : Bio::Search::Result::ResultI object
245 Args : none
247 =cut
249 sub next_result {
250 my ($self) = @_;
251 unless (exists $self->{'_handlerset'}) {
252 my $line;
253 while ($line = $self->_readline) {
254 # advance to first line
255 next if $line =~ m{^\s*$};
256 # newer output starts with model name
257 if ($line =~ m{^\#\s+cmsearch\s}) {
258 my $secondline = $self->_readline;
259 if ($secondline =~ m{INFERNAL 1\.1}) {
260 $self->{'_handlerset'} = '1.1';
262 else {
263 $self->{'_handlerset'} = 'latest'; # v1.0
265 $self->_pushback($secondline);
267 elsif ($line =~ m{^CM\s\d+:}) {
268 $self->{'_handlerset'} = 'pre-1.0';
270 else {
271 $self->{'_handlerset'} ='old';
273 last;
275 $self->_pushback($line);
276 #if ($self->{'_handlerset'} ne '1.0') {
277 # $self->deprecated(
278 # -message => "Parsing of Infernal pre-1.0 release is deprecated;\n".
279 # "upgrading to Infernal 1.0 or above is highly recommended",
280 # -version => 1.007);
283 return ($self->{'_handlerset'} eq '1.1') ? $self->_parse_v1_1 :
284 ($self->{'_handlerset'} eq 'latest') ? $self->_parse_latest :
285 ($self->{'_handlerset'} eq 'pre-1.0') ? $self->_parse_pre :
286 $self->_parse_old;
290 sub _parse_v1_1 {
291 my ($self) = @_;
292 my $seentop = 0;
293 local $/ = "\n";
294 my ($accession, $description) = ($self->query_accession, $self->query_description);
295 my ($buffer, $last, %modelcounter, @hit_list, %hitindex,
296 @hsp_list, %hspindex);
297 $self->start_document();
298 $buffer = $self->_readline;
299 if ( !defined $buffer || $buffer =~ m/^\[ok/ ) { # end of report
300 return undef;
302 else {
303 $self->_pushback($buffer);
306 PARSER: # Parse each line of report
307 while ( defined( $buffer = $self->_readline ) ) {
308 my $hit_counter = 0;
309 my $lineorig = $buffer;
310 chomp $buffer;
312 # INFERNAL program name
313 if ( $buffer =~ m/^\#\s(\S+)\s\:\:\s/ ) {
314 $seentop = 1;
315 my $prog = $1;
316 $self->start_element( { 'Name' => 'Result' } );
317 $self->element_hash( { 'Infernal_program' => uc($prog) } );
320 # INFERNAL version and release date
321 elsif ( $buffer =~ m/^\#\sINFERNAL\s+(\S+)\s+\((.+)\)/ ) {
322 my $version = $1;
323 my $versiondate = $2;
324 $self->{'_cmidline'} = $buffer;
325 $self->element_hash( { 'Infernal_version' => $version } );
328 # Query info
329 elsif ( $buffer =~ /^\#\squery (?:\w+ )?file\:\s+(\S+)/ ) {
330 $self->{'_cmfileline'} = $lineorig;
331 $self->element_hash( { 'Infernal_cm' => $1 } );
334 # Database info
335 elsif ( $buffer =~ m/^\#\starget\s\S+\sdatabase\:\s+(\S+)/ ) {
336 $self->{'_cmseqline'} = $lineorig;
337 $self->element_hash( { 'Infernal_db' => $1 } );
340 # Query data
341 elsif ( $buffer =~ m/^Query:\s+(\S+)\s+\[CLEN=(\d+)\]$/ ) {
342 $self->element_hash( { 'Infernal_query-def' => $1,
343 'Infernal_query-len' => $2,
344 'Infernal_query-acc' => $accession,
345 'Infernal_querydesc' => $description,
346 } );
349 # Get query accession
350 elsif ( $buffer =~ s/^Accession:\s+// && ! $accession) {
351 $buffer =~ s/\s+$//;
352 $self->element_hash( { 'Infernal_query-acc' => $buffer } );
355 # Get query description
356 elsif ( $buffer =~ s/^Description:\s+// && ! $description) {
357 $buffer =~ s/\s+$//;
358 $self->element_hash( { 'Infernal_querydesc' => $buffer } );
361 # Process hit table - including those below inclusion threshold
362 elsif ( $buffer =~ m/^Hit scores:/) {
363 @hit_list = (); # here is case there are multi-query reports
364 while ( defined( $buffer = $self->_readline ) ) {
365 if ( $buffer =~ m/^Hit alignments:/ ) {
366 $self->_pushback($buffer);
367 last;
369 elsif ( $buffer =~ m/^\s+rank\s+E-value/
370 || $buffer =~ m/\-\-\-/
371 || $buffer =~ m/^$/
372 || $buffer =~ m/No hits detected/ ) {
373 next;
376 # Process hit
377 $hit_counter++;
378 my ($rank, $threshold, $eval, $score,
379 $bias, $hitid, $start, $end, $strand,
380 $mdl, $truc, $gc, @desc) = split( " ", $buffer );
381 my $desc = join " ", @desc;
382 $desc = '' if ( !defined($desc) );
384 push @hit_list, [ $hitid, $desc, $eval, $score ];
385 $hitindex{ $hitid.$hit_counter } = $#hit_list;
389 # Process hit alignments
390 elsif ( $buffer =~ /^Hit alignments:/ ) {
391 my $hitid;
392 my $align_counter = 0;
393 while ( defined( $buffer = $self->_readline ) ) {
394 if ( $buffer =~ /^Internal CM pipeline statistics summary/ ) {
395 $self->_pushback($buffer);
396 last;
398 if ( $buffer =~ m/^\>\>\s(\S*)\s+(.*)/ ) { # defline of hit
399 $hitid = $1;
400 my $desc = $2;
401 $align_counter++;
402 my $hitid_alignctr = $hitid.$align_counter;
403 $modelcounter{$hitid_alignctr} = 0;
405 # The Hit Description from the Hit table can be truncated if
406 # it is too long, so use the '>>' line description instead
407 $hit_list[ $hitindex{$hitid_alignctr} ][1] = $desc;
409 # Process hit information table
410 while ( defined( $buffer = $self->_readline ) ) {
411 if ( $buffer =~ m/^Internal CM pipeline statistics/
412 || $buffer =~ m/NC$/
413 || $buffer =~ m/^\>\>/ ) {
414 $self->_pushback($buffer);
415 last;
417 elsif ( $buffer =~ m/^\s+rank\s+E-value/
418 || $buffer =~ m/^\s----/
419 || $buffer =~ m/^$/
420 || $buffer =~ m/No hits detected/ ) {
421 next;
424 # Get hsp data from table, push into @hsp;
425 my ( $rank, $threshold, $eval,
426 $score, $bias, $model,
427 $cm_start, $cm_stop, $cm_cov,
428 $seq_start, $seq_stop, $seq_strand, $seq_cov,
429 $acc, $trunc, $gc,
430 ) = split( " ", $buffer );
432 # Try to get the Hit Length from the alignment information.
433 # For cmsearch, if sequence coverage ends in ']' it means that the
434 # alignment runs full-length flush to the end of the target.
435 my $hitlength = ( $seq_cov =~ m/\]$/ ) ? $seq_stop : 0;
437 my $tmphit = $hit_list[ $hitindex{$hitid_alignctr} ];
438 if ( !defined $tmphit ) {
439 $self->warn("Incomplete information: can't find HSP $hitid in list of hits\n");
440 next;
443 push @hsp_list, [ $hitid,
444 $cm_start, $cm_stop,
445 $seq_start, $seq_stop,
446 $score, $eval,
447 $hitlength];
448 $modelcounter{$hitid_alignctr}++;
449 my $hsp_key = $hitid_alignctr . "_" . $modelcounter{$hitid_alignctr};
450 $hspindex{$hsp_key} = $#hsp_list;
453 elsif ( $buffer =~ m/NC$/ ) { # start of HSP
454 # need CS line to get number of spaces before structure data
455 my $csline = $self->_readline;
456 $csline =~ m/^(\s+)\S+ CS$/;
457 my $offset = length($1);
458 $self->_pushback($csline);
459 $self->_pushback($buffer); # set up for loop
461 my ($ct, $strln) = 0;
462 my $hspdata;
463 HSP:
464 my %hspline = ('0' => 'nc', '1' => 'meta',
465 '2' => 'query', '3' => 'midline',
466 '4' => 'hit', '5' => 'pp');
467 HSP:
468 while (defined ($buffer = $self->_readline)) {
469 chomp $buffer;
470 if ( $buffer =~ /^>>\s/
471 || $buffer =~ /^Internal CM pipeline statistics/) {
472 $self->_pushback($buffer);
473 last HSP;
475 elsif ( $ct % 6 == 0 && $buffer =~ /^$/ ) {
476 next;
478 my $iterator = $ct % 6;
479 # NC line ends with ' NC' so remove these from the strlen count
480 $strln = ( length($buffer) - 3 ) if $iterator == 0;
481 my $data = substr($buffer, $offset, $strln-$offset);
482 $hspdata->{ $hspline{$iterator} } .= $data;
484 $ct++;
485 } # 'HSP' while loop
487 my $strlen = 0;
488 # catch any insertions and add them into the actual length
489 while ($hspdata->{'query'} =~ m{\*\[\s*(\d+)\s*\]\*}g) {
490 $strlen += $1;
492 # add on the actual residues
493 $strlen += $hspdata->{'query'} =~ tr{A-Za-z}{A-Za-z};
494 my $metastr = ($self->convert_meta) ? ($self->simple_meta($hspdata->{'meta'})) :
495 $hspdata->{'meta'};
497 my $hitid_alignctr = $hitid . $align_counter;
498 my $hsp_key = $hitid_alignctr . "_" . $modelcounter{$hitid_alignctr};
499 my $hsp = $hsp_list[ $hspindex{$hsp_key} ];
500 push (@$hsp, $hspdata->{'nc'}, $metastr,
501 $hspdata->{'query'}, $hspdata->{'midline'},
502 $hspdata->{'hit'}, $hspdata->{'pp'});
505 } # end of 'Hit alignments:' section of report
507 # Process internal pipeline stats (end of report)
508 elsif ( $buffer =~ m/Internal CM pipeline statistics summary:/ ) {
509 while ( defined( $buffer = $self->_readline ) ) {
510 last if ( $buffer =~ m!^//! );
512 if ( $buffer =~ /^Target sequences:\s+(\d+)\s+\((\d+) residues/ ) {
513 $self->element_hash( { 'Infernal_db-len' => $1,
514 'Infernal_db-let' => $2, } );
517 last; # of the outer while defined $self->readline
520 # Leftovers
521 else {
522 #print STDERR "Missed line: $buffer\n";
523 $self->debug($buffer);
525 $last = $buffer;
526 } # PARSER end
528 # Final processing of hits and hsps
529 my $hit_counter = 0;
530 foreach my $hit ( @hit_list ) {
531 $hit_counter++;
532 my ($hit_name, $hit_desc, $hit_signif, $hit_score) = @$hit;
533 my $num_hsp = $modelcounter{$hit_name . $hit_counter} || 0;
535 $self->start_element( { 'Name' => 'Hit' } );
536 $self->element_hash( {'Hit_id' => $hit_name,
537 'Hit_desc' => $hit_desc,
538 'Hit_signif'=> $hit_signif,
539 'Hit_score' => $hit_score,
540 'Hit_bits' => $hit_score, } );
541 for my $i ( 1 .. $num_hsp ) {
542 my $hsp_key = $hit_name . $hit_counter . "_" . $i;
543 my $hsp = $hsp_list[ $hspindex{$hsp_key} ];
544 if ( defined $hsp ) {
545 my $hspid = shift @$hsp;
547 my ($cm_start, $cm_stop, $seq_start, $seq_stop,
548 $score, $eval, $hitlength, $ncline,
549 $csline, $qseq, $midline, $hseq, $pline) = @$hsp;
550 if ( $hitlength != 0 ) {
551 $self->element(
552 { 'Name' => 'Hit_len', 'Data' => $hitlength }
556 $self->start_element( { 'Name' => 'Hsp' } );
557 $self->element_hash( { 'Hsp_stranded' => 'HIT',
558 'Hsp_query-from' => $cm_start,
559 'Hsp_query-to' => $cm_stop,
560 'Hsp_hit-from' => $seq_start,
561 'Hsp_hit-to' => $seq_stop,
562 'Hsp_score' => $score,
563 'Hsp_bit-score' => $score,
564 'Hsp_evalue' => $eval,
565 'Hsp_ncline' => $ncline,
566 'Hsp_structure' => $csline,
567 'Hsp_qseq' => $qseq,
568 'Hsp_midline' => $midline,
569 'Hsp_hseq' => $hseq,
570 'Hsp_pline' => $pline,
571 } );
573 $self->end_element( { 'Name' => 'Hsp' } );
576 $self->end_element( { 'Name' => 'Hit' } );
579 $self->end_element( { 'Name' => 'Result' } );
580 my $result = $self->end_document();
581 return $result;
585 =head2 start_element
587 Title : start_element
588 Usage : $eventgenerator->start_element
589 Function: Handles a start element event
590 Returns : none
591 Args : hashref with at least 2 keys 'Data' and 'Name'
594 =cut
596 sub start_element {
597 my ( $self, $data ) = @_;
599 # we currently don't care about attributes
600 my $nm = $data->{'Name'};
601 my $type = $MODEMAP{$nm};
602 if ($type) {
603 if ( $self->_eventHandler->will_handle($type) ) {
604 my $func = sprintf( "start_%s", lc $type );
605 $self->_eventHandler->$func( $data->{'Attributes'} );
607 unshift @{ $self->{'_elements'} }, $type;
609 if ( defined $type
610 && $type eq 'result' )
612 $self->{'_values'} = {};
613 $self->{'_result'} = undef;
617 =head2 end_element
619 Title : start_element
620 Usage : $eventgenerator->end_element
621 Function: Handles an end element event
622 Returns : none
623 Args : hashref with at least 2 keys, 'Data' and 'Name'
625 =cut
627 sub end_element {
628 my ( $self, $data ) = @_;
629 my $nm = $data->{'Name'};
630 my $type = $MODEMAP{$nm};
631 my $rc;
633 if ($type) {
634 if ( $self->_eventHandler->will_handle($type) ) {
635 my $func = sprintf( "end_%s", lc $type );
636 $rc = $self->_eventHandler->$func( $self->{'_reporttype'},
637 $self->{'_values'} );
639 my $lastelem = shift @{ $self->{'_elements'} };
641 # Infernal 1.1 allows one to know hit->length in some instances
642 # so remove it so it doesn't carry over to next hit. Tried flushing
643 # all 'type' values from {_values} buffer but it breaks legacy tests
644 if ($type eq 'hit' ) {
645 delete $self->{_values}{'HIT-length'};
646 delete $self->{_values}{'HSP-hit_length'};
649 elsif ( $MAPPING{$nm} ) {
650 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
651 my $key = ( keys %{ $MAPPING{$nm} } )[0];
652 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
653 $self->{'_last_data'};
655 else {
656 $self->{'_values'}->{ $MAPPING{$nm} } = $self->{'_last_data'};
659 else {
660 $self->debug("unknown nm $nm, ignoring\n");
662 $self->{'_last_data'} = ''; # remove read data if we are at
663 # end of an element
664 $self->{'_result'} = $rc if ( defined $type && $type eq 'result' );
665 return $rc;
668 =head2 element
670 Title : element
671 Usage : $eventhandler->element({'Name' => $name, 'Data' => $str});
672 Function: Convenience method that calls start_element, characters, end_element
673 Returns : none
674 Args : Hash ref with the keys 'Name' and 'Data'
676 =cut
678 sub element {
679 my ( $self, $data ) = @_;
680 # simple data calls (%MAPPING) do not need start_element
681 $self->characters($data);
682 $self->end_element($data);
685 =head2 element_hash
687 Title : element
688 Usage : $eventhandler->element_hash({'Hsp_hit-from' => $start,
689 'Hsp_hit-to' => $end,
690 'Hsp_score' => $lastscore});
691 Function: Convenience method that takes multiple simple data elements and
692 maps to appropriate parameters
693 Returns : none
694 Args : Hash ref with the mapped key (in %MAPPING) and value
696 =cut
698 sub element_hash {
699 my ($self, $data) = @_;
700 $self->throw("Must provide data hash ref") if !$data || !ref($data);
701 for my $nm (sort keys %{$data}) {
702 next if $data->{$nm} && $data->{$nm} =~ m{^\s*$}o;
703 if ( $MAPPING{$nm} ) {
704 if ( ref( $MAPPING{$nm} ) =~ /hash/i ) {
705 my $key = ( keys %{ $MAPPING{$nm} } )[0];
706 $self->{'_values'}->{$key}->{ $MAPPING{$nm}->{$key} } =
707 $data->{$nm};
709 else {
710 $self->{'_values'}->{ $MAPPING{$nm} } = $data->{$nm};
716 =head2 characters
718 Title : characters
719 Usage : $eventgenerator->characters($str)
720 Function: Send a character events
721 Returns : none
722 Args : string
725 =cut
727 sub characters {
728 my ( $self, $data ) = @_;
729 return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/o );
730 $self->{'_last_data'} = $data->{'Data'};
733 =head2 within_element
735 Title : within_element
736 Usage : if( $eventgenerator->within_element($element) ) {}
737 Function: Test if we are within a particular element
738 This is different than 'in' because within can be tested
739 for a whole block.
740 Returns : boolean
741 Args : string element name
743 =cut
745 sub within_element {
746 my ( $self, $name ) = @_;
747 return 0
748 if ( !defined $name
749 || !defined $self->{'_elements'}
750 || scalar @{ $self->{'_elements'} } == 0 );
751 foreach ( @{ $self->{'_elements'} } ) {
752 return 1 if ( $_ eq $name );
754 return 0;
757 =head2 in_element
759 Title : in_element
760 Usage : if( $eventgenerator->in_element($element) ) {}
761 Function: Test if we are in a particular element
762 This is different than 'within' because 'in' only
763 tests its immediate parent.
764 Returns : boolean
765 Args : string element name
767 =cut
769 sub in_element {
770 my ( $self, $name ) = @_;
771 return 0 if !defined $self->{'_elements'}->[0];
772 return ( $self->{'_elements'}->[0] eq $name );
775 =head2 start_document
777 Title : start_document
778 Usage : $eventgenerator->start_document
779 Function: Handle a start document event
780 Returns : none
781 Args : none
783 =cut
785 sub start_document {
786 my ($self) = @_;
787 $self->{'_lasttype'} = '';
788 $self->{'_values'} = {};
789 $self->{'_result'} = undef;
790 $self->{'_elements'} = [];
793 =head2 end_document
795 Title : end_document
796 Usage : $eventgenerator->end_document
797 Function: Handles an end document event
798 Returns : Bio::Search::Result::ResultI object
799 Args : none
801 =cut
803 sub end_document {
804 my ($self) = @_;
805 return $self->{'_result'};
808 =head2 result_count
810 Title : result_count
811 Usage : my $count = $searchio->result_count
812 Function: Returns the number of results we have processed
813 Returns : integer
814 Args : none
816 =cut
818 sub result_count {
819 my $self = shift;
820 return $self->{'_result_count'};
823 =head2 model
825 Title : model
826 Usage : my $model = $parser->model();
827 Function: Get/Set model; Infernal currently does not output
828 the model name (Rfam ID)
829 Returns : String (name of model)
830 Args : [optional] String (name of model)
832 =cut
834 sub model {
835 my $self = shift;
836 return $self->{'_model'} = shift if @_;
837 return $self->{'_model'};
840 =head2 database
842 Title : database
843 Usage : my $database = $parser->database();
844 Function: Get/Set database; pre-v.1 versions of Infernal do not output
845 the database name
846 Returns : String (database name)
847 Args : [optional] String (database name)
849 =cut
851 sub database {
852 my $self = shift;
853 return $self->{'_database'} = shift if @_;
854 return $self->{'_database'};
857 =head2 algorithm
859 Title : algorithm
860 Usage : my $algorithm = $parser->algorithm();
861 Function: Get/Set algorithm; pre-v.1 versions of Infernal do not output
862 the algorithm name
863 Returns : String (algorithm name)
864 Args : [optional] String (algorithm name)
866 =cut
868 sub algorithm {
869 my $self = shift;
870 return $self->{'_algorithm'} = shift if @_;
871 return $self->{'_algorithm'};
874 =head2 query_accession
876 Title : query_accession
877 Usage : my $acc = $parser->query_accession();
878 Function: Get/Set query (model) accession; pre-v1.1 Infernal does not output
879 the accession number (Rfam accession #)
880 Returns : String (accession)
881 Args : [optional] String (accession)
883 =cut
885 sub query_accession {
886 my $self = shift;
887 return $self->{'_query_accession'} = shift if @_;
888 return $self->{'_query_accession'};
891 =head2 query_description
893 Title : query_description
894 Usage : my $acc = $parser->query_description();
895 Function: Get/Set query (model) description; pre-v1.1 Infernal does not output
896 the Rfam description
897 Returns : String (description)
898 Args : [optional] String (description)
900 =cut
902 sub query_description {
903 my $self = shift;
904 return $self->{'_query_description'} = shift if @_;
905 return $self->{'_query_description'};
908 =head2 hsp_minscore
910 Title : hsp_minscore
911 Usage : my $cutoff = $parser->hsp_minscore();
912 Function: Get/Set min bit score cutoff (for generating Hits/HSPs)
913 Returns : score (number)
914 Args : [optional] score (number)
916 =cut
918 sub hsp_minscore {
919 my $self = shift;
920 return $self->{'_hsp_minscore'} = shift if @_;
921 return $self->{'_hsp_minscore'};
924 =head2 convert_meta
926 Title : convert_meta
927 Usage : $parser->convert_meta(1);
928 Function: Get/Set boolean flag for converting Infernal WUSS format
929 to a simple bracketed format (simple WUSS by default)
930 Returns : boolean flag (TRUE or FALSE)
931 Args : [optional] boolean (eval's to TRUE or FALSE)
933 =cut
935 sub convert_meta {
936 my $self = shift;
937 return $self->{'_convert_meta'} = shift if @_;
938 return $self->{'_convert_meta'};
941 =head2 version
943 Title : version
944 Usage : $parser->version();
945 Function: Set the Infernal cmsearch version
946 Returns : version
947 Args : [optional] version
949 =cut
951 sub version {
952 my $self = shift;
953 return $self->{'_version'} = shift if @_;
954 return $self->{'_version'};
957 =head2 structure_symbols
959 Title : structure_symbols
960 Usage : my $hashref = $parser->structure_symbols();
961 Function: Get/Set RNA structure symbols
962 Returns : Hash ref of delimiters (5' stem, 3' stem, single-strand, etc)
963 : default = < (5-prime)
964 > (3-prime)
965 : (single-strand)
966 ? (unknown)
967 . (gap)
968 Args : Hash ref of substitute delimiters, using above keys.
970 =cut
972 sub structure_symbols {
973 my ($self, $delim) = @_;
974 if ($delim) {
975 if (ref($delim) =~ m{HASH}) {
976 my %data = %{ $delim };
977 for my $d (@VALID_SYMBOLS) {
978 if ( exists $data{$d} ) {
979 $self->{'_delimiter'}->{$d} = $data{$d};
982 } else {
983 $self->throw("Args to helix_delimiters() should be in a hash reference");
986 return $self->{'_delimiter'};
989 =head2 simple_meta
991 Title : simple_meta
992 Usage : my $string = $parser->simple_meta($str);
993 Function: converts more complex WUSS meta format into simple bracket format
994 using symbols defined in structure_symbols()
995 Returns : converted string
996 Args : [required] string to convert
997 Note : This is a very simple conversion method to get simple bracketed
998 format from Infernal data. If the convert_meta() flag is set,
999 this is the method used to convert the strings.
1001 =cut
1003 sub simple_meta {
1004 my ($self, $str) = @_;
1005 $self->throw("No string arg sent!") if !$str;
1006 my $structs = $self->structure_symbols();
1007 my ($ls, $rs, $ss, $unk, $gap) = ($structs->{'5-prime'}, $structs->{'3-prime'},
1008 $structs->{'single-strand'}, $structs->{'unknown'},
1009 $structs->{'gap'});
1010 $str =~ s{[\(\<\[\{]}{$ls}g;
1011 $str =~ s{[\)\>\]\}]}{$rs}g;
1012 $str =~ s{[:,_-]}{$ss}g;
1013 $str =~ s{\.}{$gap}g;
1014 # unknown not handled yet
1015 return $str;
1018 ## private methods
1020 # this is a hack which guesses the format and sets the handler for parsing in
1021 # an instance; it'll be taken out when infernal 1.0 is released
1023 sub _parse_latest {
1024 my ($self) = @_;
1025 my $seentop = 0;
1026 local $/ = "\n";
1027 my ($accession, $description) = ($self->query_accession, $self->query_description);
1028 my ($maxscore, $mineval, $minpval);
1029 $self->start_document();
1030 my ($lasthit, $lastscore, $lasteval, $lastpval, $laststart, $lastend);
1031 PARSER:
1032 while (my $line = $self->_readline) {
1033 next if $line =~ m{^\s+$};
1034 # stats aren't parsed yet...
1035 if ($line =~ m{^\#\s+cmsearch}xms) {
1036 $seentop = 1;
1037 $self->start_element({'Name' => 'Result'});
1038 $self->element_hash({
1039 'Infernal_program' => 'CMSEARCH'
1042 elsif ($line =~ m{^\#\sINFERNAL\s+(\d+\.\d+)}xms) {
1043 $self->element_hash({
1044 'Infernal_version' => $1,
1047 elsif ($line =~ m{^\#\scommand:.*?\s(\S+)$}xms) {
1048 $self->element_hash({
1049 'Infernal_db' => $1,
1052 elsif ($line =~ m{^\#\s+dbsize\(Mb\):\s+(\d+\.\d+)}xms) {
1053 # store absolute DB length
1054 $self->element_hash({
1055 'Infernal_db-let' => $1 * 1e6
1058 elsif ($line =~ m{^CM(?:\s(\d+))?:\s*(\S+)}xms) {
1059 # not sure, but it's possible single reports may contain multiple
1060 # models; if so, they should be rolled over into a new ResultI
1061 #print STDERR "ACC: $accession\nDESC: $description\n";
1062 $self->element_hash({
1063 'Infernal_query-def' => $2, # present in output now
1064 'Infernal_query-acc' => $accession,
1065 'Infernal_querydesc' => $description
1068 elsif ($line =~ m{^>\s*(\S+)} ){
1069 #$self->debug("Start Hit: Found hit:$1\n");
1070 if ($self->in_element('hit')) {
1071 $self->element_hash({'Hit_score' => $maxscore,
1072 'Hit_bits' => $maxscore});
1073 ($maxscore, $minpval, $mineval) = undef;
1074 $self->end_element({'Name' => 'Hit'});
1076 $lasthit = $1;
1078 elsif ($line =~ m{
1079 ^\sQuery\s=\s\d+\s-\s\d+,\s # Query start/end
1080 Target\s=\s(\d+)\s-\s(\d+) # Target start/end
1081 }xmso) {
1082 # Query (model) start/end always the same, determined from
1083 # the HSP length
1084 ($laststart, $lastend) = ($1, $2);
1085 #$self->debug("Found hit coords:$laststart - $lastend\n");
1086 } elsif ($line =~ m{
1087 ^\sScore\s=\s([\d\.]+),\s # Score = Bitscore (for now)
1088 (?:E\s=\s([\d\.e-]+),\s # E-val optional
1089 P\s=\s([\d\.e-]+),\s)? # P-val optional
1090 GC\s= # GC not captured
1091 }xmso
1093 ($lastscore, $lasteval, $lastpval) = ($1, $2, $3);
1094 #$self->debug(sprintf("Found hit data:Score:%s,Eval:%s,Pval:%s\n",$lastscore, $lasteval||'', $lastpval||''));
1095 $maxscore ||= $lastscore;
1096 if ($lasteval && $lastpval) {
1097 $mineval ||= $lasteval;
1098 $minpval ||= $lastpval;
1099 $mineval = ($mineval > $lasteval) ? $lasteval :
1100 $mineval;
1101 $minpval = ($minpval > $lastpval) ? $lastpval :
1102 $minpval;
1104 $maxscore = ($maxscore < $lastscore) ? $lastscore :
1105 $maxscore;
1106 if (!$self->within_element('hit')) {
1107 my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit);
1108 $self->start_element({'Name' => 'Hit'});
1109 $self->element_hash({
1110 'Hit_id' => $lasthit,
1111 'Hit_accession' => $ver ? "$acc.$ver" :
1112 $acc ? $acc : $lasthit,
1113 'Hit_gi' => $gi
1116 if (!$self->in_element('hsp')) {
1117 $self->start_element({'Name' => 'Hsp'});
1120 # hsp is similar to older output
1121 } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP
1122 $self->_pushback($line); # set up for loop
1123 #$self->debug("Start HSP\n");
1124 # what is length of the gap to the structure data?
1125 my $offset = length($1);
1126 my ($ct, $strln) = 0;
1127 my $hsp;
1128 HSP:
1129 my %hsp_key = ('0' => 'meta',
1130 '1' => 'query',
1131 '2' => 'midline',
1132 '3' => 'hit');
1133 HSP:
1134 while (defined ($line = $self->_readline)) {
1135 chomp $line;
1136 next if (!$line); # toss empty lines
1137 # next if $line =~ m{^\s*$}; # toss empty lines
1138 # it is possible to have homology lines consisting
1139 # entirely of spaces if the subject has a large
1140 # insertion where nothing matches the model
1142 # exit loop if at end of file or upon next hit/HSP
1143 if ($line =~ m{^\s{0,2}\S+}) {
1144 $self->_pushback($line);
1145 last HSP;
1147 # iterate to keep track of each line (4 lines per hsp block)
1148 my $iterator = $ct % 4;
1149 # strlen set only with structure lines (proper length)
1150 $strln = length($line) if $iterator == 0;
1151 # only grab the data needed (hit start and stop in hit line above)
1153 my $data = substr($line, $offset, $strln-$offset);
1154 $hsp->{ $hsp_key{$iterator} } .= $data;
1156 $ct++;
1159 # query start, end are from the actual query length (entire hit is
1160 # mapped to CM data, so all CM data is represented)
1161 # works for now...
1162 if ($self->in_element('hsp')) {
1163 # In some cases with HSPs unaligned residues are present in
1164 # the hit or query (Ex: '*[ 8]*' is 8 unaligned residues).
1165 # This info needs to be passed on unmodifed to the HSP class
1166 # and handled there as it is subjectively changed based on
1167 # use.
1168 my $strlen = 0;
1170 # catch any insertions and add them into the actual length
1171 while ($hsp->{'query'} =~ m{\*\[\s*(\d+)\s*\]\*}g) {
1172 $strlen += $1;
1174 # add on the actual residues
1175 $strlen += $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z};
1177 my $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) :
1178 $hsp->{'meta'};
1179 $self->element_hash(
1180 {'Hsp_stranded' => 'HIT',
1181 'Hsp_qseq' => $hsp->{'query'},
1182 'Hsp_hseq' => $hsp->{'hit'},
1183 'Hsp_midline' => $hsp->{'midline'},
1184 'Hsp_structure' => $metastr,
1185 'Hsp_query-from' => 1,
1186 'Infernal_query-len' => $strlen,
1187 'Hsp_query-to' => $strlen,
1188 'Hsp_hit-from' => $laststart,
1189 'Hsp_hit-to' => $lastend,
1190 'Hsp_score' => $lastscore,
1191 'Hsp_bit-score' => $lastscore,
1193 $self->element_hash(
1194 {'Hsp_evalue' => $lasteval,
1195 'Hsp_pvalue' => $lastpval,
1196 }) if ($lasteval && $lastpval);
1197 $self->end_element({'Name' => 'Hsp'});
1199 # result now ends with // and 'Fin'
1200 } elsif ($line =~ m{^//}xms ) {
1201 if ($self->within_element('result') && $seentop) {
1202 if ($self->in_element('hit')) {
1203 $self->element_hash({'Hit_score' => $maxscore,
1204 'Hit_bits' => $maxscore});
1205 # don't know where to put minpval yet
1206 $self->element_hash({'Hit_signif' => $mineval}) if $mineval;
1207 $self->element_hash({'Hit_p' => $minpval}) if $minpval;
1208 $self->end_element({'Name' => 'Hit'});
1210 last PARSER;
1214 $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } );
1215 $self->end_element( { 'Name' => 'Result' } ) if $seentop;
1216 return $self->end_document();
1219 # cmsearch 0.81 (pre-1.0)
1220 sub _parse_pre {
1221 my ($self) = @_;
1222 my $seentop = 0;
1223 local $/ = "\n";
1224 my ($accession, $db, $algorithm, $description, $version) =
1225 ($self->query_accession, $self->database, $self->algorithm,
1226 $self->query_description, '0.81');
1227 my ($maxscore, $mineval, $minpval);
1228 $self->start_document();
1229 my ($lasthit, $lastscore, $lasteval, $lastpval, $laststart, $lastend);
1230 PARSER:
1231 while (my $line = $self->_readline) {
1232 next if $line =~ m{^\s+$};
1233 # stats aren't parsed yet...
1234 if ($line =~ m{CM\s\d+:\s*(\S+)}xms) {
1235 #$self->debug("Start Result: Found model:$1\n");
1236 if (!$self->within_element('result')) {
1237 $seentop = 1;
1238 $self->start_element({'Name' => 'Result'});
1239 $self->element_hash({
1240 'Infernal_program' => $algorithm,
1241 'Infernal_query-def' => $1, # present in output now
1242 'Infernal_query-acc' => $accession,
1243 'Infernal_querydesc' => $description,
1244 'Infernal_db' => $db
1247 } elsif ($line =~ m{^>\s*(\S+)} ){
1248 #$self->debug("Start Hit: Found hit:$1\n");
1249 if ($self->in_element('hit')) {
1250 $self->element_hash({'Hit_score' => $maxscore,
1251 'Hit_bits' => $maxscore});
1252 ($maxscore, $minpval, $mineval) = undef;
1253 $self->end_element({'Name' => 'Hit'});
1255 $lasthit = $1;
1257 elsif ($line =~ m{
1258 ^\sQuery\s=\s\d+\s-\s\d+,\s # Query start/end
1259 Target\s=\s(\d+)\s-\s(\d+) # Target start/end
1260 }xmso) {
1261 # Query (model) start/end always the same, determined from
1262 # the HSP length
1263 ($laststart, $lastend) = ($1, $2);
1264 #$self->debug("Found hit coords:$laststart - $lastend\n");
1265 } elsif ($line =~ m{
1266 ^\sScore\s=\s([\d\.]+),\s # Score = Bitscore (for now)
1267 (?:E\s=\s([\d\.e-]+),\s # E-val optional
1268 P\s=\s([\d\.e-]+),\s)? # P-val optional
1269 GC\s= # GC not captured
1270 }xmso
1272 ($lastscore, $lasteval, $lastpval) = ($1, $2, $3);
1273 #$self->debug(sprintf("Found hit data:Score:%s,Eval:%s,Pval:%s\n",$lastscore, $lasteval||'', $lastpval||''));
1274 $maxscore ||= $lastscore;
1275 if ($lasteval && $lastpval) {
1276 $mineval ||= $lasteval;
1277 $minpval ||= $lastpval;
1278 $mineval = ($mineval > $lasteval) ? $lasteval :
1279 $mineval;
1280 $minpval = ($minpval > $lastpval) ? $lastpval :
1281 $minpval;
1283 $maxscore = ($maxscore < $lastscore) ? $lastscore :
1284 $maxscore;
1285 if (!$self->within_element('hit')) {
1286 my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit);
1287 $self->start_element({'Name' => 'Hit'});
1288 $self->element_hash({
1289 'Hit_id' => $lasthit,
1290 'Hit_accession' => $ver ? "$acc.$ver" :
1291 $acc ? $acc : $lasthit,
1292 'Hit_gi' => $gi
1295 if (!$self->in_element('hsp')) {
1296 $self->start_element({'Name' => 'Hsp'});
1299 # hsp is similar to older output
1300 } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP
1301 $self->_pushback($line); # set up for loop
1302 #$self->debug("Start HSP\n");
1303 # what is length of the gap to the structure data?
1304 my $offset = length($1);
1305 my ($ct, $strln) = 0;
1306 my $hsp;
1307 HSP:
1308 my %hsp_key = ('0' => 'meta',
1309 '1' => 'query',
1310 '2' => 'midline',
1311 '3' => 'hit');
1312 HSP:
1313 while (defined ($line = $self->_readline)) {
1314 chomp $line;
1315 next if (!$line); # toss empty lines
1316 # next if $line =~ m{^\s*$}; # toss empty lines
1317 # it is possible to have homology lines consisting
1318 # entirely of spaces if the subject has a large
1319 # insertion where nothing matches the model
1321 # exit loop if at end of file or upon next hit/HSP
1322 if ($line =~ m{^\s{0,2}\S+}) {
1323 $self->_pushback($line);
1324 last HSP;
1326 # iterate to keep track of each line (4 lines per hsp block)
1327 my $iterator = $ct%4;
1328 # strlen set only with structure lines (proper length)
1329 $strln = length($line) if $iterator == 0;
1330 # only grab the data needed (hit start and stop in hit line above)
1332 my $data = substr($line, $offset, $strln-$offset);
1333 $hsp->{ $hsp_key{$iterator} } .= $data;
1335 $ct++;
1338 # query start, end are from the actual query length (entire hit is
1339 # mapped to CM data, so all CM data is represented)
1340 # works for now...
1341 if ($self->in_element('hsp')) {
1342 my $strlen = $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z};
1344 my $metastr;
1345 $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) :
1346 ($hsp->{'meta'});
1347 $self->element_hash(
1348 {'Hsp_stranded' => 'HIT',
1349 'Hsp_qseq' => $hsp->{'query'},
1350 'Hsp_hseq' => $hsp->{'hit'},
1351 'Hsp_midline' => $hsp->{'midline'},
1352 'Hsp_structure' => $metastr,
1353 'Hsp_query-from' => 1,
1354 'Infernal_query-len' => $strlen,
1355 'Hsp_query-to' => $strlen,
1356 'Hsp_hit-from' => $laststart,
1357 'Hsp_hit-to' => $lastend,
1358 'Hsp_score' => $lastscore,
1359 'Hsp_bit-score' => $lastscore,
1361 $self->element_hash(
1362 {'Hsp_evalue' => $lasteval,
1363 'Hsp_pvalue' => $lastpval,
1364 }) if ($lasteval && $lastpval);
1365 $self->end_element({'Name' => 'Hsp'});
1367 # result now ends with // and 'Fin'
1368 } elsif ($line =~ m{^//}xms ) {
1369 if ($self->within_element('result') && $seentop) {
1370 $self->element(
1371 {'Name' => 'Infernal_version',
1372 'Data' => $version}
1374 if ($self->in_element('hit')) {
1375 $self->element_hash({'Hit_score' => $maxscore,
1376 'Hit_bits' => $maxscore});
1377 # don't know where to put minpval yet
1378 $self->element_hash({'Hit_signif' => $mineval}) if $mineval;
1379 $self->end_element({'Name' => 'Hit'});
1381 last PARSER;
1385 $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } );
1386 $self->end_element( { 'Name' => 'Result' } ) if $seentop;
1387 return $self->end_document();
1390 # cmsearch 0.72 and below; will likely be dropped when Infernal 1.0 is released
1391 sub _parse_old {
1392 my ($self) = @_;
1393 my $seentop = 0;
1394 local $/ = "\n";
1395 my ($accession, $db, $algorithm, $model, $description, $version) =
1396 ($self->query_accession, $self->database, $self->algorithm,
1397 $self->model, $self->query_description, $self->version);
1398 my $maxscore;
1399 my $cutoff = $self->hsp_minscore;
1400 $self->start_document();
1401 local ($_);
1402 my $line;
1403 my ($lasthit, $lastscore, $laststart, $lastend);
1404 my $hitline;
1405 PARSER:
1406 while ( defined( $line = $self->_readline ) ) {
1407 next if $line =~ m{^\s+$};
1408 # bypass this for now...
1409 next if $line =~ m{^HMM\shit};
1410 # pre-0.81
1411 if ($line =~ m{^sequence:\s+(\S+)} ){
1412 if (!$self->within_element('result')) {
1413 $seentop = 1;
1414 $self->start_element({'Name' => 'Result'});
1415 $self->element_hash({
1416 'Infernal_program' => $algorithm,
1417 'Infernal_query-def' => $model,
1418 'Infernal_query-acc' => $accession,
1419 'Infernal_querydesc' => $description,
1420 'Infernal_db' => $db
1423 if ($self->in_element('hit')) {
1424 $self->element_hash({'Hit_score' => $maxscore,
1425 'Hit_bits' => $maxscore});
1426 $maxscore = undef;
1427 $self->end_element({'Name' => 'Hit'});
1429 $lasthit = $1;
1430 } elsif ($line =~ m{^hit\s+\d+\s+:\s+(\d+)\s+(\d+)\s+(\d+\.\d+)\s+bits}xms) {
1431 ($laststart, $lastend, $lastscore) = ($1, $2, $3);
1432 $maxscore = $lastscore unless $maxscore;
1433 if ($lastscore > $cutoff) {
1434 if (!$self->within_element('hit')) {
1435 my ($gi, $acc, $ver) = $self->_get_seq_identifiers($lasthit);
1436 $self->start_element({'Name' => 'Hit'});
1437 $self->element_hash({
1438 'Hit_id' => $lasthit,
1439 'Hit_accession' => $ver ? "$acc.$ver" :
1440 $acc ? $acc : $lasthit,
1441 'Hit_gi' => $gi
1444 # necessary as infernal 0.71 has repeated hit line
1445 if (!$self->in_element('hsp')) {
1446 $self->start_element({'Name' => 'Hsp'});
1448 $maxscore = ($maxscore < $lastscore) ? $lastscore :
1449 $maxscore;
1451 } elsif ($line =~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) { # start of HSP
1452 $self->_pushback($line); # set up for loop
1453 # what is length of the gap to the structure data?
1454 my $offset = length($1);
1455 my ($ct, $strln) = 0;
1456 my $hsp;
1457 HSP:
1458 my %hsp_key = ('0' => 'meta',
1459 '1' => 'query',
1460 '2' => 'midline',
1461 '3' => 'hit');
1462 HSP:
1463 while ($line = $self->_readline) {
1464 next if $line =~ m{^\s*$}; # toss empty lines
1465 chomp $line;
1466 # exit loop if at end of file or upon next hit/HSP
1467 if (!defined($line) || $line =~ m{^\S+}) {
1468 $self->_pushback($line);
1469 last HSP;
1471 # iterate to keep track of each line (4 lines per hsp block)
1472 my $iterator = $ct%4;
1473 # strlen set only with structure lines (proper length)
1474 $strln = length($line) if $iterator == 0;
1475 # only grab the data needed (hit start and stop in hit line above)
1477 my $data = substr($line, $offset, $strln-$offset);
1478 $hsp->{ $hsp_key{$iterator} } .= $data;
1479 $ct++;
1481 # query start, end are from the actual query length (entire hit is
1482 # mapped to CM data, so all CM data is represented)
1483 # works for now...
1484 if ($self->in_element('hsp')) {
1485 my $strlen = $hsp->{'query'} =~ tr{A-Za-z}{A-Za-z};
1487 my $metastr;
1488 # Ugh...these should be passed in a hash
1489 $metastr = ($self->convert_meta) ? ($self->simple_meta($hsp->{'meta'})) :
1490 ($hsp->{'meta'});
1491 $self->element_hash(
1492 {'Hsp_stranded' => 'HIT',
1493 'Hsp_qseq' => $hsp->{'query'},
1494 'Hsp_hseq' => $hsp->{'hit'},
1495 'Hsp_midline' => $hsp->{'midline'},
1496 'Hsp_structure' => $metastr,
1497 'Hsp_query-from' => 1,
1498 'Infernal_query-len' => $strlen,
1499 'Hsp_query-to' => $strlen,
1500 'Hsp_hit-from' => $laststart,
1501 'Hsp_hit-to' => $lastend,
1502 'Hsp_score' => $lastscore,
1503 'Hsp_bit-score' => $lastscore
1505 $self->end_element({'Name' => 'Hsp'});
1507 } elsif ($line =~ m{^memory}xms || $line =~ m{^CYK\smemory}xms ) {
1508 if ($self->within_element('result') && $seentop) {
1509 $self->element(
1510 {'Name' => 'Infernal_version',
1511 'Data' => $version}
1513 if ($self->in_element('hit')) {
1514 $self->element_hash({'Hit_score' => $maxscore,
1515 'Hit_bits' => $maxscore});
1516 $self->end_element({'Name' => 'Hit'});
1518 last PARSER;
1522 $self->within_element('hit') && $self->end_element( { 'Name' => 'Hit' } );
1523 $self->end_element( { 'Name' => 'Result' } ) if $seentop;
1524 return $self->end_document();