Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Search / Hit / BlastPullHit.pm
blob0320fe5e2bc4366745005cc9ad5c9f95028074d9
2 # BioPerl module for Bio::Search::Hit::BlastPullHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::Hit::BlastPullHit - A parser and hit object for BLASTN hits
18 =head1 SYNOPSIS
20 # generally we use Bio::SearchIO to build these objects
21 use Bio::SearchIO;
22 my $in = Bio::SearchIO->new(-format => 'blast_pull',
23 -file => 'result.blast');
25 while (my $result = $in->next_result) {
26 while (my $hit = $result->next_hit) {
27 print $hit->name, "\n";
28 print $hit->score, "\n";
29 print $hit->significance, "\n";
31 while (my $hsp = $hit->next_hsp) {
32 # process HSPI objects
37 =head1 DESCRIPTION
39 This object implements a parser for BLASTN hit output.
41 =head1 FEEDBACK
43 =head2 Mailing Lists
45 User feedback is an integral part of the evolution of this and other
46 Bioperl modules. Send your comments and suggestions preferably to
47 the Bioperl mailing list. Your participation is much appreciated.
49 bioperl-l@bioperl.org - General discussion
50 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52 =head2 Support
54 Please direct usage questions or support issues to the mailing list:
56 I<bioperl-l@bioperl.org>
58 rather than to the module maintainer directly. Many experienced and
59 reponsive experts will be able look at the problem and quickly
60 address it. Please include a thorough description of the problem
61 with code and data examples if at all possible.
63 =head2 Reporting Bugs
65 Report bugs to the Bioperl bug tracking system to help us keep track
66 of the bugs and their resolution. Bug reports can be submitted via the
67 web:
69 https://github.com/bioperl/bioperl-live/issues
71 =head1 AUTHOR - Sendu Bala
73 Email bix@sendu.me.uk
75 =head1 CONTRIBUTORS
77 Additional contributors names and emails here
79 =head1 APPENDIX
81 The rest of the documentation details each of the object methods.
82 Internal methods are usually preceded with a _
84 =cut
86 # Let the code begin...
88 package Bio::Search::Hit::BlastPullHit;
90 use strict;
92 use Bio::Search::HSP::BlastPullHSP;
94 use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI);
96 =head2 new
98 Title : new
99 Usage : my $obj = Bio::Search::Hit::BlastNHit->new();
100 Function: Builds a new Bio::Search::Hit::BlastNHit object.
101 Returns : Bio::Search::Hit::BlastNHit
102 Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
103 -parent => Bio::PullParserI object (required if no -chunk)
104 -hit_data => array ref with [name description score significance]
106 where the array ref provided to -chunk contains an IO object
107 for a filehandle to something representing the raw data of the
108 hit, and $start and $end define the tell() position within the
109 filehandle that the hit data starts and ends (optional; defaults
110 to start and end of the entire thing described by the filehandle)
112 =cut
114 sub new {
115 my ($class, @args) = @_;
116 my $self = $class->SUPER::new(@args);
118 $self->_setup(@args);
120 my $fields = $self->_fields;
121 foreach my $field (qw( header start_end )) {
122 $fields->{$field} = undef;
125 my $hit_data = $self->_raw_hit_data;
126 if ($hit_data && ref($hit_data) eq 'ARRAY') {
127 foreach my $field (qw(name description score significance)) {
128 $fields->{$field} = shift(@{$hit_data});
132 $self->_dependencies( { ( name => 'header',
133 length => 'header',
134 description => 'header',
135 accession => 'header',
136 next_hsp => 'header',
137 query_start => 'start_end',
138 query_end => 'start_end',
139 hit_start => 'start_end',
140 hit_end => 'start_end' ) } );
142 return $self;
146 # PullParserI discovery methods so we can answer all HitI questions
149 sub _discover_header {
150 my $self = shift;
151 $self->_chunk_seek(0);
152 my $header = $self->_get_chunk_by_end("\n Score = ");
154 unless ($header) {
155 # no alignment or other data; all information was in the hit table of
156 # the result
157 $self->_calculate_accession_from_name;
159 $self->_fields->{header} = 1;
160 return;
163 $self->{_after_header} = $self->_chunk_tell;
165 ($self->_fields->{name}, $self->_fields->{description}, $self->_fields->{length}) = $header =~ /^(\S+)\s+(\S.+?)?\s+Length\s*=\s*(\d+)/sm;
166 if ($self->_fields->{description}) {
167 $self->_fields->{description} =~ s/\n//g;
169 else {
170 $self->_fields->{description} = '';
173 $self->_calculate_accession_from_name;
175 $self->_fields->{header} = 1;
178 sub _calculate_accession_from_name {
179 my $self = shift;
180 my $name = $self->get_field('name');
181 if ($name =~ /.+?\|.+?\|.+?\|(\w+)/) {
182 $self->_fields->{accession} = $1;
184 elsif ($self->_fields->{name} =~ /.+?\|(\w+)?\./) {
185 # old form?
186 $self->_fields->{accession} = $1;
188 else {
189 $self->_fields->{accession} = $name;
193 sub _discover_start_end {
194 my $self = shift;
196 my ($q_start, $q_end, $h_start, $h_end);
197 foreach my $hsp ($self->hsps) {
198 my ($this_q_start, $this_h_start) = $hsp->start;
199 my ($this_q_end, $this_h_end) = $hsp->end;
201 if (! defined $q_start || $this_q_start < $q_start) {
202 $q_start = $this_q_start;
204 if (! defined $h_start || $this_h_start < $h_start) {
205 $h_start = $this_h_start;
208 if (! defined $q_end || $this_q_end > $q_end) {
209 $q_end = $this_q_end;
211 if (! defined $h_end || $this_h_end > $h_end) {
212 $h_end = $this_h_end;
216 $self->_fields->{query_start} = $q_start;
217 $self->_fields->{query_end} = $q_end;
218 $self->_fields->{hit_start} = $h_start;
219 $self->_fields->{hit_end} = $h_end;
222 sub _discover_next_hsp {
223 my $self = shift;
224 my $pos = $self->{_end_of_previous_hsp} || $self->{_after_header};
225 return unless $pos;
226 $self->_chunk_seek($pos);
228 my ($start, $end) = $self->_find_chunk_by_end("\n Score = ");
229 if ((defined $end && ($end + $self->_chunk_true_start) > $self->_chunk_true_end) || ! $end) {
230 $start = $self->{_end_of_previous_hsp} || $self->{_after_header};
231 $end = $self->_chunk_true_end;
233 else {
234 $end += $self->_chunk_true_start;
236 $start += $self->_chunk_true_start;
238 return if $start >= $self->_chunk_true_end;
240 $self->{_end_of_previous_hsp} = $end - $self->_chunk_true_start;
242 #*** needs to inherit piped_behaviour, and we need to deal with _sequential
243 # ourselves
244 $self->_fields->{next_hsp} = Bio::Search::HSP::BlastPullHSP->new(-parent => $self,
245 -chunk => [$self->chunk, $start, $end]);
248 sub _discover_num_hsps {
249 my $self = shift;
250 $self->_fields->{num_hsps} = $self->hsps;
253 =head2 next_hsp
255 Title : next_hsp
256 Usage : while( $hsp = $obj->next_hsp()) { ... }
257 Function : Returns the next available High Scoring Pair
258 Example :
259 Returns : L<Bio::Search::HSP::HSPI> object or null if finished
260 Args : none
262 =cut
264 sub next_hsp {
265 my $self = shift;
266 my $hsp = $self->get_field('next_hsp');
267 undef $self->_fields->{next_hsp};
268 return $hsp;
271 =head2 hsps
273 Usage : $hit_object->hsps();
274 Purpose : Get a list containing all HSP objects.
275 Example : @hsps = $hit_object->hsps();
276 Returns : list of L<Bio::Search::HSP::BlastHSP> objects.
277 Argument : none
279 =cut
281 sub hsps {
282 my $self = shift;
283 my $old = $self->{_end_of_previous_hsp};
284 $self->rewind;
285 my @hsps;
286 while (defined(my $hsp = $self->next_hsp)) {
287 push(@hsps, $hsp);
289 $self->{_end_of_previous_hsp} = $old;
290 return @hsps;
293 =head2 hsp
295 Usage : $hit_object->hsp( [string] );
296 Purpose : Get a single HSPI object for the present HitI object.
297 Example : $hspObj = $hit_object->hsp; # same as 'best'
298 : $hspObj = $hit_object->hsp('best');
299 : $hspObj = $hit_object->hsp('worst');
300 Returns : Object reference for a L<Bio::Search::HSP::HSPI> object.
301 Argument : String (or no argument).
302 : No argument (default) = highest scoring HSP (same as 'best').
303 : 'best' = highest scoring HSP.
304 : 'worst' = lowest scoring HSP.
305 Throws : Exception if an unrecognized argument is used.
307 See Also : L<hsps()|hsps>, L<num_hsps>()
309 =cut
311 sub hsp {
312 my ($self, $type) = @_;
313 $type ||= 'best';
314 $self->throw_not_implemented;
317 =head2 rewind
319 Title : rewind
320 Usage : $result->rewind;
321 Function: Allow one to reset the HSP iterator to the beginning, so that
322 next_hsp() will subsequently return the first hsp and so on.
323 Returns : n/a
324 Args : none
326 =cut
328 sub rewind {
329 my $self = shift;
330 delete $self->{_end_of_previous_hsp};
333 # have p() a synonym of significance()
334 sub p {
335 return shift->significance;