2 # BioPerl module for Bio::Search::Hit::BlastPullHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Hit::BlastPullHit - A parser and hit object for BLASTN hits
20 # generally we use Bio::SearchIO to build these objects
22 my $in = Bio::SearchIO->new(-format => 'blast_pull',
23 -file => 'result.blast');
25 while (my $result = $in->next_result) {
26 while (my $hit = $result->next_hit) {
27 print $hit->name, "\n";
28 print $hit->score, "\n";
29 print $hit->significance, "\n";
31 while (my $hsp = $hit->next_hsp) {
32 # process HSPI objects
39 This object implements a parser for BLASTN hit output.
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via the
69 https://github.com/bioperl/bioperl-live/issues
71 =head1 AUTHOR - Sendu Bala
77 Additional contributors names and emails here
81 The rest of the documentation details each of the object methods.
82 Internal methods are usually preceded with a _
86 # Let the code begin...
88 package Bio
::Search
::Hit
::BlastPullHit
;
92 use Bio
::Search
::HSP
::BlastPullHSP
;
94 use base
qw(Bio::Root::Root Bio::Search::Hit::PullHitI);
99 Usage : my $obj = Bio::Search::Hit::BlastNHit->new();
100 Function: Builds a new Bio::Search::Hit::BlastNHit object.
101 Returns : Bio::Search::Hit::BlastNHit
102 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
103 -parent => Bio::PullParserI object (required if no -chunk)
104 -hit_data => array ref with [name description score significance]
106 where the array ref provided to -chunk contains an IO object
107 for a filehandle to something representing the raw data of the
108 hit, and $start and $end define the tell() position within the
109 filehandle that the hit data starts and ends (optional; defaults
110 to start and end of the entire thing described by the filehandle)
115 my ($class, @args) = @_;
116 my $self = $class->SUPER::new
(@args);
118 $self->_setup(@args);
120 my $fields = $self->_fields;
121 foreach my $field (qw( header start_end )) {
122 $fields->{$field} = undef;
125 my $hit_data = $self->_raw_hit_data;
126 if ($hit_data && ref($hit_data) eq 'ARRAY') {
127 foreach my $field (qw(name description score significance)) {
128 $fields->{$field} = shift(@
{$hit_data});
132 $self->_dependencies( { ( name
=> 'header',
134 description
=> 'header',
135 accession
=> 'header',
136 next_hsp
=> 'header',
137 query_start
=> 'start_end',
138 query_end
=> 'start_end',
139 hit_start
=> 'start_end',
140 hit_end
=> 'start_end' ) } );
146 # PullParserI discovery methods so we can answer all HitI questions
149 sub _discover_header
{
151 $self->_chunk_seek(0);
152 my $header = $self->_get_chunk_by_end("\n Score = ");
155 # no alignment or other data; all information was in the hit table of
157 $self->_calculate_accession_from_name;
159 $self->_fields->{header
} = 1;
163 $self->{_after_header
} = $self->_chunk_tell;
165 ($self->_fields->{name
}, $self->_fields->{description
}, $self->_fields->{length}) = $header =~ /^(\S+)\s+(\S.+?)?\s+Length\s*=\s*(\d+)/sm;
166 if ($self->_fields->{description
}) {
167 $self->_fields->{description
} =~ s/\n//g;
170 $self->_fields->{description
} = '';
173 $self->_calculate_accession_from_name;
175 $self->_fields->{header
} = 1;
178 sub _calculate_accession_from_name
{
180 my $name = $self->get_field('name');
181 if ($name =~ /.+?\|.+?\|.+?\|(\w+)/) {
182 $self->_fields->{accession
} = $1;
184 elsif ($self->_fields->{name
} =~ /.+?\|(\w+)?\./) {
186 $self->_fields->{accession
} = $1;
189 $self->_fields->{accession
} = $name;
193 sub _discover_start_end
{
196 my ($q_start, $q_end, $h_start, $h_end);
197 foreach my $hsp ($self->hsps) {
198 my ($this_q_start, $this_h_start) = $hsp->start;
199 my ($this_q_end, $this_h_end) = $hsp->end;
201 if (! defined $q_start || $this_q_start < $q_start) {
202 $q_start = $this_q_start;
204 if (! defined $h_start || $this_h_start < $h_start) {
205 $h_start = $this_h_start;
208 if (! defined $q_end || $this_q_end > $q_end) {
209 $q_end = $this_q_end;
211 if (! defined $h_end || $this_h_end > $h_end) {
212 $h_end = $this_h_end;
216 $self->_fields->{query_start
} = $q_start;
217 $self->_fields->{query_end
} = $q_end;
218 $self->_fields->{hit_start
} = $h_start;
219 $self->_fields->{hit_end
} = $h_end;
222 sub _discover_next_hsp
{
224 my $pos = $self->{_end_of_previous_hsp
} || $self->{_after_header
};
226 $self->_chunk_seek($pos);
228 my ($start, $end) = $self->_find_chunk_by_end("\n Score = ");
229 if ((defined $end && ($end + $self->_chunk_true_start) > $self->_chunk_true_end) || ! $end) {
230 $start = $self->{_end_of_previous_hsp
} || $self->{_after_header
};
231 $end = $self->_chunk_true_end;
234 $end += $self->_chunk_true_start;
236 $start += $self->_chunk_true_start;
238 return if $start >= $self->_chunk_true_end;
240 $self->{_end_of_previous_hsp
} = $end - $self->_chunk_true_start;
242 #*** needs to inherit piped_behaviour, and we need to deal with _sequential
244 $self->_fields->{next_hsp
} = Bio
::Search
::HSP
::BlastPullHSP
->new(-parent
=> $self,
245 -chunk
=> [$self->chunk, $start, $end]);
248 sub _discover_num_hsps
{
250 $self->_fields->{num_hsps
} = $self->hsps;
256 Usage : while( $hsp = $obj->next_hsp()) { ... }
257 Function : Returns the next available High Scoring Pair
259 Returns : L<Bio::Search::HSP::HSPI> object or null if finished
266 my $hsp = $self->get_field('next_hsp');
267 undef $self->_fields->{next_hsp
};
273 Usage : $hit_object->hsps();
274 Purpose : Get a list containing all HSP objects.
275 Example : @hsps = $hit_object->hsps();
276 Returns : list of L<Bio::Search::HSP::BlastHSP> objects.
283 my $old = $self->{_end_of_previous_hsp
};
286 while (defined(my $hsp = $self->next_hsp)) {
289 $self->{_end_of_previous_hsp
} = $old;
295 Usage : $hit_object->hsp( [string] );
296 Purpose : Get a single HSPI object for the present HitI object.
297 Example : $hspObj = $hit_object->hsp; # same as 'best'
298 : $hspObj = $hit_object->hsp('best');
299 : $hspObj = $hit_object->hsp('worst');
300 Returns : Object reference for a L<Bio::Search::HSP::HSPI> object.
301 Argument : String (or no argument).
302 : No argument (default) = highest scoring HSP (same as 'best').
303 : 'best' = highest scoring HSP.
304 : 'worst' = lowest scoring HSP.
305 Throws : Exception if an unrecognized argument is used.
307 See Also : L<hsps()|hsps>, L<num_hsps>()
312 my ($self, $type) = @_;
314 $self->throw_not_implemented;
320 Usage : $result->rewind;
321 Function: Allow one to reset the HSP iterator to the beginning, so that
322 next_hsp() will subsequently return the first hsp and so on.
330 delete $self->{_end_of_previous_hsp
};
333 # have p() a synonym of significance()
335 return shift->significance;