Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Search / Result / HmmpfamResult.pm
blobf0f087999d3f210170fcadbb95145662ed8879d1
2 # BioPerl module for Bio::Search::Result::HmmpfamResult
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam
17 results
19 =head1 SYNOPSIS
21 # generally we use Bio::SearchIO to build these objects
22 use Bio::SearchIO;
23 my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
24 -file => 'result.hmmer');
26 while (my $result = $in->next_result) {
27 print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
30 =head1 DESCRIPTION
32 This object implements a parser for hmmpfam result output, a program in the HMMER
33 package.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via the
61 web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Sendu Bala
67 Email bix@sendu.me.uk
69 =head1 APPENDIX
71 The rest of the documentation details each of the object methods.
72 Internal methods are usually preceded with a _
74 =cut
76 # Let the code begin...
78 package Bio::Search::Result::HmmpfamResult;
80 use strict;
82 use Bio::Search::Hit::HmmpfamHit;
84 use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
86 =head2 new
88 Title : new
89 Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
90 Function: Builds a new Bio::SearchIO::Result::hmmpfam object
91 Returns : Bio::SearchIO::Result::hmmpfam
92 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
93 -parent => Bio::PullParserI object (required if no -chunk)
94 -parameters => hash ref of search parameters (key => value), optional
95 -statistics => hash ref of search statistics (key => value), optional
97 where the array ref provided to -chunk contains an IO object
98 for a filehandle to something representing the raw data of the
99 result, and $start and $end define the tell() position within the
100 filehandle that the result data starts and ends (optional; defaults
101 to start and end of the entire thing described by the filehandle)
103 =cut
105 sub new {
106 my ($class, @args) = @_;
107 my $self = $class->SUPER::new(@args);
109 $self->_setup(@args);
111 foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) {
112 $self->_fields->{$field} = undef;
115 $self->_dependencies( { ( query_name => 'header',
116 query_accession => 'header',
117 query_description => 'header',
118 hit_table => 'header',
119 num_hits => 'hit_table',
120 no_hits_found => 'hit_table',
121 hsp_table => 'hit_table',
122 next_alignment => 'hsp_table' ) } );
124 return $self;
128 # PullParserI discovery methods so we can answer all ResultI questions
131 sub _discover_header {
132 my $self = shift;
133 $self->_chunk_seek(0);
134 my $header = $self->_get_chunk_by_end("all domains):\n");
135 $self->{_after_header} = $self->_chunk_tell;
137 $header || $self->throw("Could not find hmmer header, is file really hmmer format?");
139 ($self->_fields->{query_name}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m;
140 ($self->_fields->{query_accession}) = $header =~ /^Accession:\s+(\S+)/m;
141 ($self->_fields->{query_description}) = $header =~ /^Description:\s+(\S.+)/m;
142 $self->_fields->{query_accession} ||= '';
143 $self->_fields->{query_description} ||= '';
145 $self->_fields->{header} = 1; # stop this method being called again
148 sub _discover_hit_table {
149 my $self = shift;
151 $self->_chunk_seek($self->{_after_header});
152 my $table = $self->_get_chunk_by_end("for domains:\n");
153 $self->{_after_hit_table} = $self->_chunk_tell;
155 my $evalue_cutoff = $self->get_field('evalue_cutoff');
156 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
157 my $score_cutoff = $self->get_field('score_cutoff');
158 undef $score_cutoff if $score_cutoff eq '[unset]';
159 my $hsps_cutoff = $self->get_field('hsps_cutoff');
160 undef $hsps_cutoff if $hsps_cutoff eq '[unset]';
162 my @table;
163 my $no_hit = 1;
164 while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) {
165 $no_hit = 0;
166 my $evalue = abs($4); # consistency for tests under Windows
167 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
168 next if ($score_cutoff && $3 < $score_cutoff);
169 next if ($hsps_cutoff && $5 < $hsps_cutoff);
170 push(@table, [$1, $2, $3, $evalue, $5]);
172 $self->_fields->{hit_table} = \@table;
173 $self->{_next_hit_index} = @table > 0 ? 0 : -1;
175 $self->_fields->{no_hits_found} = $no_hit;
176 $self->_fields->{num_hits} = @table;
179 sub _discover_hsp_table {
180 my $self = shift;
182 $self->_chunk_seek($self->{_after_hit_table});
183 my $table = $self->_get_chunk_by_end("top-scoring domains:\n");
184 $table ||= $self->_get_chunk_by_end("//\n"); # A0 reports
185 $self->{_after_hsp_table} = $self->_chunk_tell;
187 my %table;
188 # can't save this regex work for the hsp object because the hit object needs
189 # its length, so may as well just do all the work here
190 while ($table =~ /^(\S+)\s+(\d+)\/\d+\s+(\d+)\s+(\d+)\s+\S\S\s+(\d+)\s+(\d+)\s+\S(\S)\s+(\S+)\s+(\S+)/gm) {
191 # rank query_start query_end hit_start hit_end score evalue
192 my $evalue = abs($9); # consistency for tests under Windows
193 push(@{$table{$1}->{hsp_data}}, [$2, $3, $4, $5, $6, $8, $evalue]);
194 if ($7 eq ']') {
195 $table{$1}->{hit_length} = $6;
198 $self->_fields->{hsp_table} = \%table;
201 sub _discover_alignments {
202 my $self = shift;
203 $self->_fields->{alignments} = { };
206 sub _next_alignment {
207 my $self = shift;;
208 return if $self->{_no_more_alignments};
210 my $aligns = $self->_fields->{alignments};
212 unless (defined $self->{_after_previous_alignment}) {
213 $self->_chunk_seek($self->{_after_hsp_table});
214 my $chunk = $self->_get_chunk_by_end(": domain");
215 unless ($chunk) {
216 $self->{_no_more_alignments} = 1;
217 return;
220 $self->{_after_previous_alignment} = $self->_chunk_tell;
221 $self->{_next_alignment_start_text} = $chunk;
222 return $self->_next_alignment;
225 $self->_chunk_seek($self->{_after_previous_alignment});
226 my $chunk = $self->_get_chunk_by_end(": domain");
227 unless ($chunk) {
228 $self->_chunk_seek($self->{_after_previous_alignment});
229 $chunk = $self->_get_chunk_by_end("//");
231 unless ($chunk) {
232 $self->{_no_more_alignments} = 1;
233 return;
237 $self->{_after_previous_alignment} = $self->_chunk_tell;
239 if (defined $self->{_next_alignment_start_text}) {
240 $chunk = $self->{_next_alignment_start_text}.$chunk;
243 $chunk =~ s/(\S+: domain)$//;
244 $self->{_next_alignment_start_text} = $1;
246 my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/;
247 $aligns->{$name.'~~~~'.$domain} = $chunk;
248 return 1;
251 sub _discover_next_hit {
252 my $self = shift;
253 my @hit_table = @{$self->get_field('hit_table')};
254 return if $self->{_next_hit_index} == -1;
256 #[name description score significance num_hsps rank]
257 my @hit_data = (@{$hit_table[$self->{_next_hit_index}++]}, $self->{_next_hit_index});
259 $self->_fields->{next_hit} = Bio::Search::Hit::HmmpfamHit->new(-parent => $self,
260 -hit_data => \@hit_data);
262 if ($self->{_next_hit_index} > $#hit_table) {
263 $self->{_next_hit_index} = -1;
267 =head2 next_hit
269 Title : next_hit
270 Usage : while( $hit = $result->next_hit()) { ... }
271 Function: Returns the next available Hit object, representing potential
272 matches between the query and various entities from the database.
273 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
274 Args : none
276 =cut
278 sub next_hit {
279 my $self = shift;
280 my $hit = $self->get_field('next_hit');
281 undef $self->_fields->{next_hit};
282 return $hit;
285 =head2 next_model
287 Title : next_model
288 Usage : my $domain = $result->next_model
289 Function: Returns the next domain - this is an alias for next_hit()
290 Returns : L<Bio::Search::Hit::HitI> object
291 Args : none
293 =cut
295 *next_model = \&next_hit;
297 =head2 hits
299 Title : hits
300 Usage : my @hits = $result->hits
301 Function: Returns the HitI objects contained within this Result
302 Returns : Array of Bio::Search::Hit::HitI objects
303 Args : none
305 See Also: L<Bio::Search::Hit::HitI>
307 =cut
309 sub hits {
310 my $self = shift;
311 my $old = $self->{_next_hit_index} || 0;
312 $self->rewind;
313 my @hits;
314 while (defined(my $hit = $self->next_hit)) {
315 push(@hits, $hit);
317 $self->{_next_hit_index} = @hits > 0 ? $old : -1;
318 return @hits;
321 =head2 models
323 Title : models
324 Usage : my @domains = $result->models;
325 Function: Returns the list of HMM models seen - this is an alias for hits()
326 Returns : Array of L<Bio::Search::Hit::HitI> objects
327 Args : none
329 =cut
331 *models = \&hits;
333 =head2 sort_hits
335 Title : sort_hits
336 Usage : $result->sort_hits('<score')
337 Function : Sorts the hits so that they come out in the desired order when
338 hits() or next_hit() is called.
339 Returns : n/a
340 Args : A coderef for the sort function. See the documentation on the Perl
341 sort() function for guidelines on writing sort functions.
342 You will be sorting array references, not HitI objects. The
343 references contain name as element 0, description as element 1,
344 score as element 2, significance as element 3 and number of hsps
345 as element 4.
346 By default the sort order is ascending significance value (ie.
347 most significant hits first).
348 Note : To access the special variables $a and $b used by the Perl sort()
349 function the user function must access
350 Bio::Search::Result::HmmpfamResult namespace.
351 For example, use :
352 $result->sort_hits(
353 sub{$Bio::Search::Result::HmmpfamResult::a->[2]
354 <=>
355 $Bio::Search::Result::HmmpfamResult::b->[2]});
356 NOT $result->sort_hits($a->[2] <=> $b->[2]);
358 =cut
360 sub sort_hits {
361 my ($self, $code_ref) = @_;
362 $code_ref ||= sub { $a->[3] <=> $b->[3] };
364 # avoid creating hit objects just to sort, hence force user to sort on
365 # the array references in hit table
366 my $table_ref = $self->get_field('hit_table');
367 @{$table_ref} > 1 || return;
369 my @sorted = sort $code_ref @{$table_ref};
370 @sorted == @{$table_ref} || $self->throw("Your sort routine failed to give back all hits!");
371 $self->_fields->{hit_table} = \@sorted;
374 =head2 rewind
376 Title : rewind
377 Usage : $result->rewind;
378 Function: Allow one to reset the Hit iterator to the beginning, so that
379 next_hit() will subsequently return the first hit and so on.
380 Returns : n/a
381 Args : none
383 =cut
385 sub rewind {
386 my $self = shift;
387 unless ($self->_fields->{hit_table}) {
388 $self->get_field('hit_table');
390 $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;