2 # BioPerl module for Bio::Search::Result::BlastPullResult
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Result::BlastPullResult - A parser and result object for BLASTN
21 # generally we use Bio::SearchIO to build these objects
23 my $in = Bio::SearchIO->new(-format => 'blast_pull',
24 -file => 'result.blast');
26 while (my $result = $in->next_result) {
27 print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
32 This object implements a parser for NCBI BLASTN result output.
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 of the bugs and their resolution. Bug reports can be submitted via the
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHOR - Sendu Bala
70 Additional contributors names and emails here
74 The rest of the documentation details each of the object methods.
75 Internal methods are usually preceded with a _
79 # Let the code begin...
81 package Bio
::Search
::Result
::BlastPullResult
;
85 use Bio
::Search
::Hit
::BlastPullHit
;
87 use base
qw(Bio::Root::Root Bio::Search::Result::PullResultI);
92 Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
93 Function: Builds a new Bio::SearchIO::Result::hmmpfam object
94 Returns : Bio::SearchIO::Result::hmmpfam
95 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
96 -parent => Bio::PullParserI object (required if no -chunk)
97 -parameters => hash ref of search parameters (key => value), optional
98 -statistics => hash ref of search statistics (key => value), optional
100 where the array ref provided to -chunk contains an IO object
101 for a filehandle to something representing the raw data of the
102 result, and $start and $end define the tell() position within the
103 filehandle that the result data starts and ends (optional; defaults
104 to start and end of the entire thing described by the filehandle)
109 my ($class, @args) = @_;
110 my $self = $class->SUPER::new
(@args);
112 $self->_setup(@args);
114 foreach my $field (qw( header hit_table hsp_table alignments next_model
115 models query_length stats_and_params)) {
116 $self->_fields->{$field} = undef;
119 $self->_dependencies( { ( query_name
=> 'header',
120 query_accession
=> 'header',
121 query_description
=> 'header',
122 query_length
=> 'header',
123 hit_table
=> 'header',
124 num_hits
=> 'hit_table',
125 no_hits_found
=> 'hit_table' ) } );
131 # PullParserI discovery methods so we can answer all ResultI questions
134 sub _discover_header
{
136 $self->_chunk_seek(0);
137 my $header = $self->_get_chunk_by_end("Value\n");
139 $header = $self->_get_chunk_by_end("***** No hits found ******\n");
140 $self->{_no_hits_found
} = 1;
142 $self->throw("Invalid header returned") unless $header;
143 $self->{_after_header
} = $self->_chunk_tell;
145 ($self->_fields->{query_name
}) = $header =~ /^\s*(\S+)/;
146 $self->_fields->{query_accession
} = '';
147 $self->_fields->{query_description
} = '';
149 if ($header =~ /^Length=(\d+)/m) {
150 $self->_fields->{query_length
} = $1;
152 elsif ($header =~ /\((\d+) letters\)/) {
154 $self->_fields->{query_length
} = $1;
156 if ($header =~ /^\s*\(\d+ letters/) {
157 # there wasn't a query sequence name
158 $self->_fields->{query_name
} = '';
162 $self->_fields->{header
} = 1;
165 sub _discover_hit_table
{
167 $self->_chunk_seek($self->{_after_header
});
169 my $table = $self->_get_chunk_by_end("\n>");
171 if (!$table && !$self->{_no_hits_found
}) {
172 # no alignments, presumably; hit table comprises the remainder of this
174 while (my $line = $self->_get_chunk_by_nol(1)) {
181 $self->{_after_hit_table
} = $self->_chunk_tell;
183 my $evalue_cutoff = $self->get_field('evalue_cutoff');
184 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
185 my $score_cutoff = $self->get_field('score_cutoff');
186 undef $score_cutoff if $score_cutoff eq '[unset]';
190 while ($table =~ /^(\S+)\s+(\S.*?)?\s+(\S+)\s+([\de]\S*)\s*\n/gm) {
192 my ($name, $desc, $score, $evalue) = ($1, $2, $3, $4);
194 if ($evalue =~ /^e/) {
195 $evalue = '1'.$evalue;
197 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
198 next if ($score_cutoff && $score < $score_cutoff);
199 push(@table, [$name, $desc, $score, $evalue]);
201 $self->_fields->{hit_table
} = \
@table;
202 $self->{_next_hit_index
} = @table > 0 ?
0 : -1;
204 $self->_fields->{no_hits_found
} = $no_hit;
205 $self->_fields->{num_hits
} = @table;
208 sub _discover_next_hit
{
210 my $hit_table = $self->get_field('hit_table');
211 return if $self->{_next_hit_index
} == -1;
212 my $hit_row = ${$hit_table}[$self->{_next_hit_index
}];
214 $self->_chunk_seek($self->{_end_of_previous_hit
} || $self->{_after_hit_table
});
216 my ($start, $end) = $self->_find_chunk_by_end("\n>");
218 $start = $self->{_end_of_previous_hit
} || $self->{_after_hit_table
};
219 $end = $self->_chunk_true_end;
222 $end += $self->_chunk_true_start;
224 $start += $self->_chunk_true_start;
226 $self->{_end_of_previous_hit
} = $end - $self->_chunk_true_start;
228 #*** needs to inherit piped_behaviour, and we need to deal with _sequential
230 $self->_fields->{next_hit
} = Bio
::Search
::Hit
::BlastPullHit
->new(-parent
=> $self,
231 -chunk
=> [$self->chunk, $start, $end],
232 -hit_data
=> $hit_row);
234 $self->{_next_hit_index
}++;
236 if ($self->{_next_hit_index
} > $#{$hit_table}) {
237 $self->{_next_hit_index
} = -1;
241 sub _discover_stats_and_params
{
243 $self->_chunk_seek(0);
244 my ($start, $end) = $self->_find_chunk_by_end("\n Database: ");
245 $self->_chunk_seek($end);
248 while ($self->_get_chunk_by_nol(1)) {
249 if (/Number of letters in database:\s+(\S+)/) {
252 $self->add_statistic('dbletters', $stat);
254 elsif (/Number of sequences in database:\s+(\S+)/) {
257 $self->add_statistic('dbentries', $stat);
260 my $line = $self->_get_chunk_by_nol(1);
261 $line =~ /\s+(\S+)\s+(\S+)\s+(\S+)/;
262 $self->add_statistic($gapped ?
'lambda_gapped' : 'lambda', $1);
263 $self->add_statistic($gapped ?
'kappa_gapped' : 'kappa', $2);
264 $self->add_statistic($gapped ?
'entropy_gapped' : 'entropy', $3);
267 elsif (/^Matrix: (.+)\n/) {
268 $self->add_parameter('matrix', $1);
270 elsif (/^Gap Penalties: Existence: (\d+), Extension: (\d+)/) {
271 $self->add_parameter('gapopen', $1);
272 $self->add_parameter('gapext', $2);
274 elsif (/^Number of Hits to DB: (\d+)/) {
275 $self->add_statistic('Hits_to_DB', $1);
277 elsif (/^Number of extensions: (\d+)/) {
278 $self->add_statistic('num_extensions', $1);
280 elsif (/^Number of successful extensions: (\d+)/) {
281 $self->add_statistic('num_successful_extensions', $1);
283 elsif (/^Number of sequences better than (\S+):/) {
284 $self->add_parameter('expect', $1);
286 elsif (/^[Ll]ength of query: (\d+)/) {
287 $self->add_statistic('querylength', $1);
289 elsif (/^[Ee]ffective HSP length: (\d+)/) {
290 $self->add_statistic('effective_hsplength', $1);
292 elsif (/^[Ee]ffective length of database: (\d+)/) {
293 $self->add_statistic('effectivedblength', $1);
295 elsif (/^[Ee]ffective search space: (\d+)/) {
296 $self->add_statistic('effectivespace', $1);
298 elsif (/^[Ee]ffective search space used: (\d+)/) {
299 $self->add_statistic('effectivespaceused', $1);
301 elsif (/^([TAXS]\d?): (\d+)(?: \((\S+))?/) {
302 $self->add_statistic($1, $2);
303 $self->add_statistic($1.'_bits', $3) if $3;
307 $self->_fields->{stats_and_params
} = 1;
313 Usage : while( $hit = $result->next_hit()) { ... }
314 Function: Returns the next available Hit object, representing potential
315 matches between the query and various entities from the database.
316 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
323 my $hit = $self->get_field('next_hit');
324 undef $self->_fields->{next_hit
};
331 Usage : my @hits = $result->hits
332 Function: Returns the HitI objects contained within this Result
333 Returns : Array of Bio::Search::Hit::HitI objects
336 See Also: L<Bio::Search::Hit::HitI>
342 my $old = $self->{_next_hit_index
} || 0;
345 while (defined(my $hit = $self->next_hit)) {
348 $self->{_next_hit_index
} = @hits > 0 ?
$old : -1;
355 Usage : $result->sort_hits('<score')
356 Function : Sorts the hits so that they come out in the desired order when
357 hits() or next_hit() is called.
359 Args : A coderef for the sort function. See the documentation on the Perl
360 sort() function for guidelines on writing sort functions.
361 By default the sort order is ascending significance value (ie.
362 most significant hits first).
368 my ($self, $code_ref) = @_;
369 $self->throw("Not implemented yet");
375 Usage : $result->rewind;
376 Function: Allow one to reset the Hit iterator to the beginning, so that
377 next_hit() will subsequently return the first hit and so on.
385 unless ($self->_fields->{hit_table
}) {
386 $self->get_field('hit_table');
388 $self->{_next_hit_index
} = @
{$self->_fields->{hit_table
}} > 0 ?
0 : -1;
393 Title : get_statistic
394 Usage : my $gap_ext = $result->get_statistic('kappa')
395 Function: Returns the value for a specific statistic available
398 Args : name of statistic (string)
404 $self->get_field('stats_and_params');
405 return $self->SUPER::get_statistic
(@_);
410 Title : get_parameter
411 Usage : my $gap_ext = $result->get_parameter('gapext')
412 Function: Returns the value for a specific parameter used
413 when running this result
415 Args : name of parameter (string)
421 $self->get_field('stats_and_params');
422 return $self->SUPER::get_parameter
(@_);