Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Search / Hit / GenericHit.pm
blobcc6e42feecfccab75b0a2a1d14a42913f5045468
2 # BioPerl module for Bio::Search::Hit::GenericHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
18 =head1 SYNOPSIS
20 use Bio::Search::Hit::GenericHit;
21 my $hit = Bio::Search::Hit::GenericHit->new(-algorithm => 'blastp');
23 # typically one gets HitI objects from a SearchIO stream via a ResultI
24 use Bio::SearchIO;
25 my $parser = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
27 my $result = $parser->next_result;
28 my $hit = $result->next_hit;
30 # TODO: Describe how to configure a SearchIO stream so that it generates
31 # GenericHit objects.
33 =head1 DESCRIPTION
35 This object handles the hit data from a Database Sequence Search such
36 as FASTA or BLAST.
38 Unless you're writing a parser, you won't ever need to create a
39 GenericHit or any other HitI-implementing object. If you use
40 the SearchIO system, HitI objects are created automatically from
41 a SearchIO stream which returns Bio::Search::Hit::HitI objects.
43 For documentation on what you can do with GenericHit (and other HitI
44 objects), please see the API documentation in
45 L<Bio::Search::Hit::HitI|Bio::Search::Hit::HitI>.
47 =head1 FEEDBACK
49 =head2 Mailing Lists
51 User feedback is an integral part of the evolution of this and other
52 Bioperl modules. Send your comments and suggestions preferably to
53 the Bioperl mailing list. Your participation is much appreciated.
55 bioperl-l@bioperl.org - General discussion
56 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58 =head2 Support
60 Please direct usage questions or support issues to the mailing list:
62 I<bioperl-l@bioperl.org>
64 rather than to the module maintainer directly. Many experienced and
65 reponsive experts will be able look at the problem and quickly
66 address it. Please include a thorough description of the problem
67 with code and data examples if at all possible.
69 =head2 Reporting Bugs
71 Report bugs to the Bioperl bug tracking system to help us keep track
72 of the bugs and their resolution. Bug reports can be submitted via the
73 web:
75 https://github.com/bioperl/bioperl-live/issues
77 =head1 AUTHOR - Jason Stajich and Steve Chervitz
79 Email jason-at-bioperl-dot-org
80 Email sac-at-bioperl-dot-org
82 =head1 CONTRIBUTORS
84 Sendu Bala, bix@sendu.me.uk
86 =head1 APPENDIX
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
91 =cut
94 # Let the code begin...
97 package Bio::Search::Hit::GenericHit;
98 use strict;
100 use Bio::Search::SearchUtils;
102 use base qw(Bio::Root::Root Bio::Search::Hit::HitI);
104 =head2 new
106 Title : new
107 Usage : my $obj = Bio::Search::Hit::GenericHit->new();
108 Function: Builds a new Bio::Search::Hit::GenericHit object
109 Returns : Bio::Search::Hit::GenericHit
110 Args : -name => Name of Hit (required)
111 -description => Description (optional)
112 -accession => Accession number (optional)
113 -ncbi_gi => NCBI GI UID (optional)
114 -length => Length of the Hit (optional)
115 -score => Raw Score for the Hit (optional)
116 -bits => Bit Score for the Hit (optional)
117 -significance => Significance value for the Hit (optional)
118 -algorithm => Algorithm used (BLASTP, FASTX, etc...)
119 -hsps => Array ref of HSPs for this Hit.
120 -found_again => boolean, true if hit appears in a
121 "previously found" section of a PSI-Blast report.
122 -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
123 objects.
125 =cut
127 sub new {
128 my($class,@args) = @_;
130 my $self = $class->SUPER::new(@args);
131 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
132 $score,$algo,$signif,$bits, $p,
133 $rank, $hsp_factory, $gi, $iter, $found) = $self->_rearrange([qw(HSPS
134 NAME
135 QUERY_LEN
136 DESCRIPTION
137 ACCESSION
138 LOCUS
139 LENGTH SCORE ALGORITHM
140 SIGNIFICANCE BITS P
141 RANK
142 HSP_FACTORY
143 NCBI_GI
144 ITERATION
145 FOUND_AGAIN)], @args);
147 defined $query_len && $self->query_length($query_len);
149 if( ! defined $name ) {
150 $self->throw("Must have defined a valid name for Hit");
151 } else {
152 $self->name($name);
155 defined $acc && $self->accession($acc);
156 defined $locus && $self->locus($locus);
157 defined $desc && $self->description($desc);
158 defined $length && $self->length($length);
159 defined $algo && $self->algorithm($algo);
160 defined $signif && $self->significance($signif);
161 defined $score && $self->raw_score($score);
162 defined $bits && $self->bits($bits);
163 defined $rank && $self->rank($rank);
164 defined $hsp_factory && $self->hsp_factory($hsp_factory);
165 defined $gi && $self->ncbi_gi($gi);
166 defined $iter && $self->iteration($iter);
167 defined $found && $self->found_again($found);
168 # p() has a weird interface, so this is a hack workaround
169 if (defined $p) {
170 $self->{_p} = $p;
173 $self->{'_iterator'} = 0;
174 if( defined $hsps ) {
175 if( ref($hsps) !~ /array/i ) {
176 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
177 } else {
178 my $hspcount=0;
179 while( @{$hsps} ) {
180 $hspcount++;
181 $self->add_hsp(shift @{$hsps} );
183 $self->{'_hsps'} = undef if $hspcount == 0;
186 else {
187 $self->{'_hsps'} = undef;
190 return $self;
193 =head2 add_hsp
195 Title : add_hsp
196 Usage : $hit->add_hsp($hsp)
197 Function: Add a HSP to the collection of HSPs for a Hit
198 Returns : number of HSPs in the Hit
199 Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
200 for creating a HSPI object (&hsp_factory must be set to get it back)
202 =cut
204 sub add_hsp {
205 my ($self,$hsp) = @_;
206 if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) {
207 $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
208 return;
211 push @{$self->{'_hsps'}}, $hsp;
212 if (ref($hsp) eq 'HASH') {
213 $self->{_hashes}->{$#{$self->{'_hsps'}}} = 1;
215 return scalar @{$self->{'_hsps'}};
218 =head2 hsp_factory
220 Title : hsp_factory
221 Usage : $hit->hsp_factory($hsp_factory)
222 Function: Get/set the factory used to build HSPI objects if necessary.
223 Returns : Bio::Factory::ObjectFactoryI
224 Args : Bio::Factory::ObjectFactoryI
226 =cut
228 sub hsp_factory {
229 my $self = shift;
230 if (@_) { $self->{_hsp_factory} = shift }
231 return $self->{_hsp_factory} || return;
234 =head2 Bio::Search::Hit::HitI methods
236 Implementation of Bio::Search::Hit::HitI methods
238 =head2 name
240 Title : name
241 Usage : $hit_name = $hit->name();
242 Function: returns the name of the Hit sequence
243 Returns : a scalar string
244 Args : [optional] scalar string to set the name
246 =cut
248 sub name {
249 my ($self,$value) = @_;
250 my $previous = $self->{'_name'};
251 if( defined $value || ! defined $previous ) {
252 $value = $previous = '' unless defined $value;
253 $self->{'_name'} = $value;
255 return $previous;
258 =head2 accession
260 Title : accession
261 Usage : $acc = $hit->accession();
262 Function: Retrieve the accession (if available) for the hit
263 Returns : a scalar string (empty string if not set)
264 Args : none
266 =cut
268 sub accession {
269 my ($self,$value) = @_;
270 my $previous = $self->{'_accession'};
271 if( defined $value || ! defined $previous ) {
272 $value = $previous = '' unless defined $value;
273 $self->{'_accession'} = $value;
275 return $previous;
278 =head2 description
280 Title : description
281 Usage : $desc = $hit->description();
282 Function: Retrieve the description for the hit
283 Returns : a scalar string
284 Args : [optional] scalar string to set the description
286 =cut
288 sub description {
289 my ($self,$value) = @_;
290 my $previous = $self->{'_description'};
291 if( defined $value || ! defined $previous ) {
292 $value = $previous = '' unless defined $value;
293 $self->{'_description'} = $value;
295 return $previous;
298 =head2 length
300 Title : length
301 Usage : my $len = $hit->length
302 Function: Returns the length of the hit
303 Returns : integer
304 Args : [optional] integer to set the length
306 =cut
308 sub length {
309 my ($self,$value) = @_;
310 my $previous = $self->{'_length'};
311 if( defined $value || ! defined $previous ) {
312 $value = $previous = 0 unless defined $value;
313 $self->{'_length'} = $value;
315 return $previous;
319 =head2 algorithm
321 Title : algorithm
322 Usage : $alg = $hit->algorithm();
323 Function: Gets the algorithm specification that was used to obtain the hit
324 For BLAST, the algorithm denotes what type of sequence was aligned
325 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
326 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
327 dna-translated dna).
328 Returns : a scalar string
329 Args : [optional] scalar string to set the algorithm
331 =cut
333 sub algorithm {
334 my ($self,$value) = @_;
335 my $previous = $self->{'_algorithm'};
336 if( defined $value || ! defined $previous ) {
337 $value = $previous = '' unless defined $value;
338 $self->{'_algorithm'} = $value;
340 return $previous;
343 =head2 raw_score
345 Title : raw_score
346 Usage : $score = $hit->raw_score();
347 Function: Gets the "raw score" generated by the algorithm. What
348 this score is exactly will vary from algorithm to algorithm,
349 returning undef if unavailable.
350 Returns : a scalar value
351 Args : [optional] scalar value to set the raw score
353 =cut
355 sub raw_score {
356 my ($self,$value) = @_;
357 my $previous = $self->{'_score'};
358 if( defined $value ) {
359 $self->{'_score'} = $value;
360 } elsif ( ! defined $previous ) {
361 # Set the bits of the Hit to that of the top HSP.
362 unless( defined $self->{'_hsps'}->[0] ) {
363 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
364 "If using NCBI BLAST, check bits() instead");
365 return;
367 # use 'score' if available
368 if ( defined( ($self->hsps)[0]->score ) ) {
369 $previous = $self->{'_score'} = ($self->hsps)[0]->score;
371 # otherwise use 'bits'
372 elsif ( defined( ($self->hsps)[0]->bits ) ) {
373 $previous = $self->{'_score'} = ($self->hsps)[0]->bits;
376 return $previous;
379 =head2 score
381 Equivalent to L<raw_score()|raw_score>
383 =cut
385 sub score { shift->raw_score(@_); }
387 =head2 significance
389 Title : significance
390 Usage : $significance = $hit->significance();
391 Function: Used to obtain the E or P value of a hit, i.e. the probability that
392 this particular hit was obtained purely by random chance. If
393 information is not available (nor calculatable from other
394 information sources), return undef.
395 Returns : a scalar value or undef if unavailable
396 Args : [optional] scalar value to set the significance
398 =cut
400 sub significance {
401 my ($self,$value) = @_;
402 my $previous = $self->{'_significance'};
403 if( defined $value ) {
404 $self->{'_significance'} = $value;
405 } elsif ( ! defined $previous ) {
406 unless( defined $self->{'_hsps'}->[0] ) {
407 $self->warn("No HSPs for this Hit (".$self->name.")");
408 return;
410 # Set the significance of the Hit to that of the top HSP.
411 $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
414 return $previous;
417 =head2 bits
419 Usage : $hit_object->bits();
420 Purpose : Gets the bit score of the best HSP for the current hit.
421 Example : $bits = $hit_object->bits();
422 Returns : Integer or undef if bit score is not set
423 Argument : n/a
424 Comments : For BLAST1, the non-bit score is listed in the summary line.
426 See Also : L<score()|score>
428 =cut
430 sub bits {
431 my ($self,$value) = @_;
432 my $previous = $self->{'_bits'};
433 if( defined $value ) {
434 $self->{'_bits'} = $value;
435 } elsif ( ! defined $previous ) {
436 # Set the bits of the Hit to that of the top HSP.
437 unless( defined $self->{'_hsps'}->[0] ) {
438 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
439 "If using WU-BLAST, check raw_score() instead");
440 return;
442 $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
444 return $previous;
447 =head2 next_hsp
449 Title : next_hsp
450 Usage : while( $hsp = $obj->next_hsp()) { ... }
451 Function : Returns the next available High Scoring Pair
452 Example :
453 Returns : Bio::Search::HSP::HSPI object or null if finished
454 Args : none
456 =cut
458 sub next_hsp {
459 my $self = shift;
460 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
461 return unless
462 defined($self->{'_hsps'})
463 && $self->{'_iterator'} <= scalar @{$self->{'_hsps'}};
465 my $iterator = $self->{'_iterator'}++;
466 my $hsp = $self->{'_hsps'}->[$iterator] || return;
467 if (ref($hsp) eq 'HASH') {
468 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
469 $hsp = $factory->create_object(%{$hsp});
470 $self->{'_hsps'}->[$iterator] = $hsp;
471 delete $self->{_hashes}->{$iterator};
473 return $hsp;
477 =head2 hsps
479 Usage : $hit_object->hsps();
480 Purpose : Get a list containing all HSP objects.
481 : Get the numbers of HSPs for the current hit.
482 Example : @hsps = $hit_object->hsps();
483 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
484 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
485 : Scalar context: integer (number of HSPs).
486 : (Equivalent to num_hsps()).
487 Argument : n/a. Relies on wantarray
488 Throws : Exception if the HSPs have not been collected.
490 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
492 =cut
494 sub hsps {
495 my $self = shift;
496 foreach my $i (keys %{$self->{_hashes} || {}}) {
497 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
498 $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
499 delete $self->{_hashes}->{$i};
502 return wantarray() ? @{$self->{'_hsps'} || []} : scalar(@{$self->{'_hsps'} || []});
505 =head2 num_hsps
507 Usage : $hit_object->num_hsps();
508 Purpose : Get the number of HSPs for the present hit.
509 Example : $nhsps = $hit_object->num_hsps();
510 Returns : Integer or '-' if HSPs have not been callected
511 Argument : n/a
513 See Also : L<hsps()|hsps>
515 =cut
517 sub num_hsps {
518 my $self = shift;
520 unless ($self->{'_hsps'}) {
521 return '-';
524 return scalar(@{$self->{'_hsps'}});
527 =head2 rewind
529 Title : rewind
530 Usage : $hit->rewind;
531 Function: Allow one to reset the HSP iterator to the beginning
532 Since this is an in-memory implementation
533 Returns : none
534 Args : none
536 =cut
538 sub rewind{
539 my ($self) = @_;
540 $self->{'_iterator'} = 0;
543 =head2 ambiguous_aln
545 Usage : $ambig_code = $hit_object->ambiguous_aln();
546 Purpose : Sets/Gets ambiguity code data member.
547 Example : (see usage)
548 Returns : String = 'q', 's', 'qs', '-'
549 : 'q' = query sequence contains overlapping sub-sequences
550 : while sbjct does not.
551 : 's' = sbjct sequence contains overlapping sub-sequences
552 : while query does not.
553 : 'qs' = query and sbjct sequence contains overlapping sub-sequences
554 : relative to each other.
555 : '-' = query and sbjct sequence do not contains multiple domains
556 : relative to each other OR both contain the same distribution
557 : of similar domains.
558 Argument : n/a
559 Throws : n/a
560 Comment : Note: "sbjct" is synonymous with "hit"
562 =cut
564 sub ambiguous_aln {
565 my $self = shift;
566 if(@_) { $self->{'_ambiguous_aln'} = shift; }
567 $self->{'_ambiguous_aln'} || '-';
570 =head2 overlap
572 See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
574 =cut
576 sub overlap {
577 my $self = shift;
578 if(@_) { $self->{'_overlap'} = shift; }
579 defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
583 =head2 n
585 Usage : $hit_object->n();
586 Purpose : Gets the N number for the current hit.
587 : This is the number of HSPs in the set which was ascribed
588 : the lowest P-value (listed on the description line).
589 : This number is not the same as the total number of HSPs.
590 : To get the total number of HSPs, use num_hsps().
591 Example : $n = $hit_object->n();
592 Returns : Integer
593 Argument : n/a
594 Throws : Exception if HSPs have not been set (BLAST2 reports).
595 Comments : Note that the N parameter is not reported in gapped BLAST2.
596 : Calling n() on such reports will result in a call to num_hsps().
597 : The num_hsps() method will count the actual number of
598 : HSPs in the alignment listing, which may exceed N in
599 : some cases.
601 See Also : L<num_hsps()|num_hsps>
603 =cut
605 sub n {
606 my $self = shift;
608 # The check for $self->{'_n'} is a remnant from the 'query' mode days
609 # in which the sbjct object would collect data from the description
610 # line only.
612 my ($n);
613 if(not defined($self->{'_n'})) {
614 if( $self->hsp ) {
615 $n = $self->hsp->n;
617 } else {
618 $n = $self->{'_n'};
620 $n ||= $self->num_hsps;
622 return $n;
625 =head2 p
627 Usage : $hit_object->p( [format] );
628 Purpose : Get the P-value for the best HSP of the given BLAST hit.
629 : (Note that P-values are not provided with NCBI Blast2 reports).
630 Example : $p = $sbjct->p;
631 : $p = $sbjct->p('exp'); # get exponent only.
632 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
633 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
634 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
635 : 2-element list (float, int) if format == 'parts' and P-value
636 : is in scientific notation (See Comments).
637 Argument : format: string of 'raw' | 'exp' | 'parts'
638 : 'raw' returns value given in report. Default. (1.2e-34)
639 : 'exp' returns exponent value only (34)
640 : 'parts' returns the decimal and exponent as a
641 : 2-element list (1.2, -34) (See Comments).
642 Throws : Warns if no P-value is defined. Uses expect instead.
643 Comments : Using the 'parts' argument is not recommended since it will not
644 : work as expected if the P-value is not in scientific notation.
645 : That is, floats are not converted into sci notation before
646 : splitting into parts.
648 See Also : L<expect()|expect>, L<significance()|significance>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
650 =cut
652 sub p {
653 # Some duplication of logic for p(), expect() and signif() for the sake of performance.
654 my ($self, $fmt) = @_;
656 my $val = $self->{'_p'};
658 # $val can be zero.
659 if(!defined $val) {
660 # P-value not defined, must be a NCBI Blast2 report.
661 # Use expect instead.
662 $self->warn( "P-value not defined. Using significance() instead.");
663 $val = $self->significance();
666 return $val if not $fmt or $fmt =~ /^raw/i;
667 ## Special formats: exponent-only or as list.
668 return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
669 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
671 ## Default: return the raw P-value.
672 return $val;
675 =head2 hsp
677 Usage : $hit_object->hsp( [string] );
678 Purpose : Get a single HSPI object for the present HitI object.
679 Example : $hspObj = $hit_object->hsp; # same as 'best'
680 : $hspObj = $hit_object->hsp('best');
681 : $hspObj = $hit_object->hsp('worst');
682 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
683 Argument : String (or no argument).
684 : No argument (default) = highest scoring HSP (same as 'best').
685 : 'best' or 'first' = highest scoring HSP.
686 : 'worst' or 'last' = lowest scoring HSP.
687 Throws : Exception if the HSPs have not been collected.
688 : Exception if an unrecognized argument is used.
690 See Also : L<hsps()|hsps>, L<num_hsps>()
692 =cut
694 sub hsp {
695 my( $self, $option ) = @_;
696 $option ||= 'best';
698 if (not ref $self->{'_hsps'}) {
699 $self->throw("Can't get HSPs: data not collected.");
702 my @hsps = $self->hsps;
704 return $hsps[0] if $option =~ /best|first|1/i;
705 return $hsps[$#hsps] if $option =~ /worst|last/i;
707 $self->throw("Can't get HSP for: $option\n" .
708 "Valid arguments: 'best', 'worst'");
711 =head2 logical_length
713 Usage : $hit_object->logical_length( [seq_type] );
714 : (mostly intended for internal use).
715 Purpose : Get the logical length of the hit sequence.
716 : This is necessary since the number of identical/conserved residues
717 : can be in terms of peptide sequence space, yet the query and/or hit
718 : sequence are in nucleotide space.
719 Example : $len = $hit_object->logical_length();
720 Returns : Integer
721 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
722 ('sbjct' is synonymous with 'hit')
723 Throws : n/a
724 Comments :
725 : In the case of BLAST flavors:
726 : For TBLASTN reports, the length of the aligned portion of the
727 : nucleotide hit sequence is divided by 3; for BLASTX reports,
728 : the length of the aligned portion of the nucleotide query
729 : sequence is divided by 3. For TBLASTX reports, the length of
730 : both hit and query sequence are converted.
732 : This is important for functions like frac_aligned_query()
733 : which need to operate in amino acid coordinate space when dealing
734 : with [T]BLAST[NX] type reports.
736 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
738 =cut
740 sub logical_length {
741 my $self = shift;
742 my $seqType = shift || 'query';
743 $seqType = 'sbjct' if $seqType eq 'hit';
745 my ($length, $logical);
746 my $algo = $self->algorithm;
748 # For the sbjct, return logical sbjct length
749 if( $seqType eq 'sbjct' ) {
750 $length = $self->length;
751 } else {
752 # Otherwise, return logical query length
753 $length = $self->query_length();
756 $logical = Bio::Search::SearchUtils::logical_length($algo, $seqType, $length);
758 return int($logical);
761 =head2 length_aln
763 Usage : $hit_object->length_aln( [seq_type] );
764 Purpose : Get the total length of the aligned region for query or sbjct seq.
765 : This number will include all HSPs
766 Example : $len = $hit_object->length_aln(); # default = query
767 : $lenAln = $hit_object->length_aln('query');
768 Returns : Integer
769 Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
770 ('sbjct' is synonymous with 'hit')
771 Throws : Exception if the argument is not recognized.
772 Comments : This method will report the logical length of the alignment,
773 : meaning that for TBLAST[NX] reports, the length is reported
774 : using amino acid coordinate space (i.e., nucleotides / 3).
776 : This method requires that all HSPs be tiled. If they have not
777 : already been tiled, they will be tiled first automatically..
778 : If you don't want the tiled data, iterate through each HSP
779 : calling length() on each (use hsps() to get all HSPs).
781 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
783 =cut
785 sub length_aln {
786 my( $self, $seqType, $num ) = @_;
788 $seqType ||= 'query';
789 $seqType = 'sbjct' if $seqType eq 'hit';
791 # Setter:
792 if( defined $num) {
793 return $self->{'_length_aln_'.$seqType} = $num;
796 unless ($self->{'_hsps'}) {
797 #return wantarray ? ('-','-') : '-';
798 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
799 return '-';
802 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
804 my $data = $self->{'_length_aln_'.$seqType};
806 ## If we don't have data, figure out what went wrong.
807 if(!$data) {
808 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
809 "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
811 return $data;
814 =head2 gaps
816 Usage : $hit_object->gaps( [seq_type] );
817 Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
818 : Data is summed across all HSPs.
819 Example : $qgaps = $hit_object->gaps('query');
820 : $hgaps = $hit_object->gaps('hit');
821 : $tgaps = $hit_object->gaps(); # default = total (query + hit)
822 Returns : scalar context: integer
823 : array context without args: two-element list of integers
824 : (queryGaps, hitGaps)
825 : Array context can be forced by providing an argument of 'list' or 'array'.
827 : CAUTION: Calling this method within printf or sprintf is arrray context.
828 : So this function may not give you what you expect. For example:
829 : printf "Total gaps: %d", $hit->gaps();
830 : Actually returns a two-element array, so what gets printed
831 : is the number of gaps in the query, not the total
833 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
834 ('sbjct' is synonymous with 'hit')
835 Throws : n/a
836 Comments : If you need data for each HSP, use hsps() and then interate
837 : through each HSP object.
838 : This method requires that all HSPs be tiled. If they have not
839 : already been tiled, they will be tiled first automatically..
840 : Not relying on wantarray since that will fail in situations
841 : such as printf "%d", $hit->gaps() in which you might expect to
842 : be printing the total gaps, but evaluates to array context.
844 See Also : L<length_aln()|length_aln>
846 =cut
848 sub gaps {
849 my( $self, $seqType, $num ) = @_;
851 $seqType ||= (wantarray ? 'list' : 'total');
852 $seqType = 'sbjct' if $seqType eq 'hit';
854 unless ($self->{'_hsps'}) {
855 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
856 return wantarray ? ('-','-') : '-';
857 #return '-';
860 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
862 $seqType = lc($seqType);
864 if( defined $num ) {
865 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
867 return $self->{'_gaps_'.$seqType} = $num;
869 elsif($seqType =~ /list|array/i) {
870 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
872 elsif($seqType eq 'total') {
873 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
874 } else {
875 return $self->{'_gaps_'.$seqType} || 0;
880 =head2 matches
882 See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
884 =cut
886 sub matches {
887 my( $self, $arg1, $arg2) = @_;
888 my(@data,$data);
890 unless ($self->{'_hsps'}) {
891 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
892 return wantarray ? ('-','-') : '-';
895 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
897 unless( $arg1 ) {
898 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
900 return @data;
901 } else {
903 if( defined $arg2 ) {
904 $self->{'_totalIdentical'} = $arg1;
905 $self->{'_totalConserved'} = $arg2;
906 return ( $arg1, $arg2 );
908 elsif($arg1 =~ /^id/i) {
909 $data = $self->{'_totalIdentical'};
910 } else {
911 $data = $self->{'_totalConserved'};
913 #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
914 return $data;
917 ## If we make it to here, it is likely the case that
918 ## the parser constructed a minimal hit object from the summary line only.
919 ## It either delibrately skipped parsing the alignment section,
920 ## or was not able to because it was absent (due to blast executable parameter
921 ## setting such as -b 0 (B=0 for WU-BLAST) )
922 #$self->throw("Can't get identical or conserved data: no data.");
926 =head2 start
928 Usage : $sbjct->start( [seq_type] );
929 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
930 : in the BlastHit object. If there is more than one HSP, the lowest start
931 : value of all HSPs is returned.
932 Example : $qbeg = $sbjct->start('query');
933 : $sbeg = $sbjct->start('hit');
934 : ($qbeg, $sbeg) = $sbjct->start();
935 Returns : scalar context: integer
936 : array context without args: list of two integers (queryStart, sbjctStart)
937 : Array context can be "induced" by providing an argument of 'list' or 'array'.
938 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
939 ('sbjct' is synonymous with 'hit')
940 Throws : n/a
942 See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
943 L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
945 =cut
947 sub start {
948 my ($self, $seqType, $num) = @_;
950 unless ($self->{'_hsps'}) {
951 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
952 return wantarray ? ('-','-') : '-';
955 $seqType ||= (wantarray ? 'list' : 'query');
956 $seqType = 'sbjct' if $seqType eq 'hit';
958 if( defined $num ) {
959 $seqType = "_\L$seqType\E";
960 return $self->{$seqType.'Start'} = $num;
963 # If there is only one HSP, defer this call to the solitary HSP.
964 if($self->num_hsps == 1) {
965 return $self->hsp->start($seqType);
967 else {
968 # Tiling normally generates $self->{'_queryStart'} and
969 # $self->{'_sbjctStart'}, but is very slow. If we haven't tiled,
970 # find the answer quickly without tiling.
971 unless (defined $self->{'_queryStart'}) {
972 my $earliest_query_start;
973 my $earliest_sbjct_start;
974 foreach my $hsp ($self->hsps) {
975 my $this_query_start = $hsp->start('query');
976 if (! defined $earliest_query_start || $this_query_start < $earliest_query_start) {
977 $earliest_query_start = $this_query_start;
980 my $this_sbjct_start = $hsp->start('sbjct');
981 if (! defined $earliest_sbjct_start || $this_sbjct_start < $earliest_sbjct_start) {
982 $earliest_sbjct_start = $this_sbjct_start;
985 $self->{'_queryStart'} = $earliest_query_start;
986 $self->{'_sbjctStart'} = $earliest_sbjct_start;
990 if ($seqType =~ /list|array/i) {
991 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
992 } else {
993 ## Sensitive to member name changes.
994 $seqType = "_\L$seqType\E";
995 return $self->{$seqType.'Start'};
1001 =head2 end
1003 Usage : $sbjct->end( [seq_type] );
1004 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1005 : in the BlastHit object. If there is more than one HSP,
1006 the largest end
1007 : value of all HSPs is returned.
1008 Example : $qend = $sbjct->end('query');
1009 : $send = $sbjct->end('hit');
1010 : ($qend, $send) = $sbjct->end();
1011 Returns : scalar context: integer
1012 : array context without args: list of two integers
1013 : (queryEnd, sbjctEnd)
1014 : Array context can be "induced" by providing an argument
1015 : of 'list' or 'array'.
1016 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1017 : (case insensitive). If not supplied, 'query' is used.
1018 Throws : n/a
1020 See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
1022 =cut
1024 sub end {
1025 my ($self, $seqType, $num) = @_;
1027 unless ($self->{'_hsps'}) {
1028 return wantarray ? ('-','-') : '-';
1031 $seqType ||= (wantarray ? 'list' : 'query');
1032 $seqType = 'sbjct' if $seqType eq 'hit';
1034 if( defined $num ) {
1035 $seqType = "_\L$seqType\E";
1036 return $self->{$seqType.'Stop'} = $num;
1039 # If there is only one HSP, defer this call to the solitary HSP.
1040 if($self->num_hsps == 1) {
1041 return $self->hsp->end($seqType);
1043 else {
1044 # Tiling normally generates $self->{'_queryStop'} and
1045 # $self->{'_sbjctStop'}, but is very slow. If we haven't tiled,
1046 # find the answer quickly without tiling.
1047 unless (defined $self->{'_queryStop'}) {
1048 my $latest_query_end;
1049 my $latest_sbjct_end;
1050 foreach my $hsp ($self->hsps) {
1051 my $this_query_end = $hsp->end('query');
1052 if (! defined $latest_query_end || $this_query_end > $latest_query_end) {
1053 $latest_query_end = $this_query_end;
1056 my $this_sbjct_end = $hsp->end('sbjct');
1057 if (! defined $latest_sbjct_end || $this_sbjct_end > $latest_sbjct_end) {
1058 $latest_sbjct_end = $this_sbjct_end;
1061 $self->{'_queryStop'} = $latest_query_end;
1062 $self->{'_sbjctStop'} = $latest_sbjct_end;
1066 if($seqType =~ /list|array/i) {
1067 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1068 } else {
1069 ## Sensitive to member name changes.
1070 $seqType = "_\L$seqType\E";
1071 return $self->{$seqType.'Stop'};
1076 =head2 range
1078 Usage : $sbjct->range( [seq_type] );
1079 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1080 : in the HSP alignment.
1081 Example : ($qbeg, $qend) = $sbjct->range('query');
1082 : ($sbeg, $send) = $sbjct->range('hit');
1083 Returns : Two-element array of integers
1084 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
1085 ('sbjct' is synonymous with 'hit')
1086 Throws : n/a
1088 See Also : L<start()|start>, L<end()|end>
1090 =cut
1092 sub range {
1093 my ($self, $seqType) = @_;
1094 $seqType ||= 'query';
1095 $seqType = 'sbjct' if $seqType eq 'hit';
1096 return ($self->start($seqType), $self->end($seqType));
1100 =head2 frac_identical
1102 Usage : $hit_object->frac_identical( [seq_type] );
1103 Purpose : Get the overall fraction of identical positions across all HSPs.
1104 : The number refers to only the aligned regions and does not
1105 : account for unaligned regions in between the HSPs, if any.
1106 Example : $frac_iden = $hit_object->frac_identical('query');
1107 Returns : Float (2-decimal precision, e.g., 0.75).
1108 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1109 : default = 'query' (but see comments below).
1110 : ('sbjct' is synonymous with 'hit')
1111 Throws : n/a
1112 Comments :
1113 : To compute the fraction identical, the logical length of the
1114 : aligned portion of the sequence is used, meaning that
1115 : in the case of BLAST flavors, for TBLASTN reports, the length of
1116 : the aligned portion of the
1117 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1118 : the length of the aligned portion of the nucleotide query
1119 : sequence is divided by 3. For TBLASTX reports, the length of
1120 : both hit and query sequence are converted.
1121 : This is necessary since the number of identical residues is
1122 : in terms of peptide sequence space.
1124 : Different versions of Blast report different values for the total
1125 : length of the alignment. This is the number reported in the
1126 : denominators in the stats section:
1127 : "Identical = 34/120 Positives = 67/120".
1128 : NCBI BLAST uses the total length of the alignment (with gaps)
1129 : WU-BLAST uses the length of the query sequence (without gaps).
1131 : Therefore, when called with an argument of 'total',
1132 : this method will report different values depending on the
1133 : version of BLAST used. Total does NOT take into account HSP
1134 : tiling, so it should not be used.
1136 : To get the fraction identical among only the aligned residues,
1137 : ignoring the gaps, call this method without an argument or
1138 : with an argument of 'query' or 'hit'.
1140 : If you need data for each HSP, use hsps() and then iterate
1141 : through the HSP objects.
1142 : This method requires that all HSPs be tiled. If they have not
1143 : already been tiled, they will be tiled first automatically.
1145 See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1147 =cut
1149 sub frac_identical {
1150 my ($self, $seqType) = @_;
1151 $seqType ||= 'query';
1152 $seqType = 'sbjct' if $seqType eq 'hit';
1154 ## Sensitive to member name format.
1155 $seqType = lc($seqType);
1157 unless ($self->{'_hsps'}) {
1158 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1159 #return wantarray ? ('-','-') : '-';
1160 return '-';
1163 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1165 my $ident = $self->matches('id');
1166 my $total = $self->length_aln($seqType);
1167 my $ratio = $ident / $total;
1168 my $ratio_rounded = sprintf( "%.3f", $ratio);
1170 # Round down iff normal rounding yields 1 (just like blast)
1171 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1172 return $ratio_rounded;
1176 =head2 frac_conserved
1178 Usage : $hit_object->frac_conserved( [seq_type] );
1179 Purpose : Get the overall fraction of conserved positions across all HSPs.
1180 : The number refers to only the aligned regions and does not
1181 : account for unaligned regions in between the HSPs, if any.
1182 Example : $frac_cons = $hit_object->frac_conserved('hit');
1183 Returns : Float (2-decimal precision, e.g., 0.75).
1184 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1185 : default = 'query' (but see comments below).
1186 : ('sbjct' is synonymous with 'hit')
1187 Throws : n/a
1188 Comments :
1189 : To compute the fraction conserved, the logical length of the
1190 : aligned portion of the sequence is used, meaning that
1191 : in the case of BLAST flavors, for TBLASTN reports, the length of
1192 : the aligned portion of the
1193 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1194 : the length of the aligned portion of the nucleotide query
1195 : sequence is divided by 3. For TBLASTX reports, the length of
1196 : both hit and query sequence are converted.
1197 : This is necessary since the number of conserved residues is
1198 : in terms of peptide sequence space.
1200 : Different versions of Blast report different values for the total
1201 : length of the alignment. This is the number reported in the
1202 : denominators in the stats section:
1203 : "Positives = 34/120 Positives = 67/120".
1204 : NCBI BLAST uses the total length of the alignment (with gaps)
1205 : WU-BLAST uses the length of the query sequence (without gaps).
1207 : Therefore, when called with an argument of 'total',
1208 : this method will report different values depending on the
1209 : version of BLAST used. Total does NOT take into account HSP
1210 : tiling, so it should not be used.
1212 : To get the fraction conserved among only the aligned residues,
1213 : ignoring the gaps, call this method without an argument or
1214 : with an argument of 'query' or 'hit'.
1216 : If you need data for each HSP, use hsps() and then interate
1217 : through the HSP objects.
1218 : This method requires that all HSPs be tiled. If they have not
1219 : already been tiled, they will be tiled first automatically.
1221 See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1223 =cut
1225 sub frac_conserved {
1226 my ($self, $seqType) = @_;
1227 $seqType ||= 'query';
1228 $seqType = 'sbjct' if $seqType eq 'hit';
1230 ## Sensitive to member name format.
1231 $seqType = lc($seqType);
1233 unless ($self->{'_hsps'}) {
1234 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1235 #return wantarray ? ('-','-') : '-';
1236 return '-';
1239 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1241 my $consv = $self->matches('cons');
1242 my $total = $self->length_aln($seqType);
1243 my $ratio = $consv / $total;
1244 my $ratio_rounded = sprintf( "%.3f", $ratio);
1246 # Round down iff normal rounding yields 1 (just like blast)
1247 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1248 return $ratio_rounded;
1254 =head2 frac_aligned_query
1256 Usage : $hit_object->frac_aligned_query();
1257 Purpose : Get the fraction of the query sequence which has been aligned
1258 : across all HSPs (not including intervals between non-overlapping
1259 : HSPs).
1260 Example : $frac_alnq = $hit_object->frac_aligned_query();
1261 Returns : Float (2-decimal precision, e.g., 0.75),
1262 : or undef if query length is unknown to avoid division by 0.
1263 Argument : n/a
1264 Throws : n/a
1265 Comments : If you need data for each HSP, use hsps() and then interate
1266 : through the HSP objects.
1267 : This method requires that all HSPs be tiled. If they have not
1268 : already been tiled, they will be tiled first automatically.
1270 See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1272 =cut
1274 sub frac_aligned_query {
1275 my $self = shift;
1277 unless ($self->{'_hsps'}) {
1278 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1279 #return wantarray ? ('-','-') : '-';
1280 return '-';
1283 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1285 my $qry_len = $self->logical_length('query');
1286 return undef if $qry_len == 0; # Avoid division by 0 crash
1287 sprintf( "%.2f", $self->length_aln('query') / $qry_len);
1292 =head2 frac_aligned_hit
1294 Usage : $hit_object->frac_aligned_hit();
1295 Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1296 : across all HSPs (not including intervals between non-overlapping
1297 : HSPs).
1298 Example : $frac_alnq = $hit_object->frac_aligned_hit();
1299 Returns : Float (2-decimal precision, e.g., 0.75),
1300 : or undef if hit length is unknown to avoid division by 0.
1301 Argument : n/a
1302 Throws : n/a
1303 Comments : If you need data for each HSP, use hsps() and then interate
1304 : through the HSP objects.
1305 : This method requires that all HSPs be tiled. If they have not
1306 : already been tiled, they will be tiled first automatically.
1308 See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1310 =cut
1312 sub frac_aligned_hit {
1313 my $self = shift;
1315 unless ($self->{'_hsps'}) {
1316 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1317 #return wantarray ? ('-','-') : '-';
1318 return '-';
1321 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1323 my $sbjct_len = $self->logical_length('sbjct');
1324 return undef if $sbjct_len == 0; # Avoid division by 0 crash
1325 sprintf( "%.2f", $self->length_aln('sbjct') / $sbjct_len);
1329 ## These methods are being maintained for backward compatibility.
1331 =head2 frac_aligned_sbjct
1333 Same as L<frac_aligned_hit()|frac_aligned_hit>
1335 =cut
1337 *frac_aligned_sbjct = \&frac_aligned_hit;
1339 =head2 num_unaligned_sbjct
1341 Same as L<num_unaligned_hit()|num_unaligned_hit>
1343 =cut
1345 *num_unaligned_sbjct = \&num_unaligned_hit;
1348 =head2 num_unaligned_hit
1350 Usage : $hit_object->num_unaligned_hit();
1351 Purpose : Get the number of the unaligned residues in the hit sequence.
1352 : Sums across all all HSPs.
1353 Example : $num_unaln = $hit_object->num_unaligned_hit();
1354 Returns : Integer
1355 Argument : n/a
1356 Throws : n/a
1357 Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1358 : They apply here as well.
1359 : If you need data for each HSP, use hsps() and then interate
1360 : through the HSP objects.
1361 : This method requires that all HSPs be tiled. If they have not
1362 : already been tiled, they will be tiled first automatically..
1364 See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1366 =cut
1368 sub num_unaligned_hit {
1369 my $self = shift;
1371 unless ($self->{'_hsps'}) {
1372 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1373 #return wantarray ? ('-','-') : '-';
1374 return '-';
1377 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1379 my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1380 ($num < 0 ? 0 : $num );
1384 =head2 num_unaligned_query
1386 Usage : $hit_object->num_unaligned_query();
1387 Purpose : Get the number of the unaligned residues in the query sequence.
1388 : Sums across all all HSPs.
1389 Example : $num_unaln = $hit_object->num_unaligned_query();
1390 Returns : Integer
1391 Argument : n/a
1392 Throws : n/a
1393 Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1394 : They apply here as well.
1395 : If you need data for each HSP, use hsps() and then interate
1396 : through the HSP objects.
1397 : This method requires that all HSPs be tiled. If they have not
1398 : already been tiled, they will be tiled first automatically..
1400 See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1402 =cut
1404 sub num_unaligned_query {
1405 my $self = shift;
1407 unless ($self->{'_hsps'}) {
1408 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1409 #return wantarray ? ('-','-') : '-';
1410 return '-';
1413 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1415 my $num = $self->logical_length('query') - $self->length_aln('query');
1416 ($num < 0 ? 0 : $num );
1421 =head2 seq_inds
1423 Usage : $hit->seq_inds( seq_type, class, collapse );
1424 Purpose : Get a list of residue positions (indices) across all HSPs
1425 : for identical or conserved residues in the query or sbjct sequence.
1426 Example : @s_ind = $hit->seq_inds('query', 'identical');
1427 : @h_ind = $hit->seq_inds('hit', 'conserved');
1428 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1429 Returns : Array of integers
1430 : May include ranges if collapse is non-zero.
1431 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1432 : ('sbjct' is synonymous with 'hit')
1433 : [1] class = 'identical' or 'conserved' (default = 'identical')
1434 : (can be shortened to 'id' or 'cons')
1435 : (actually, anything not 'id' will evaluate to 'conserved').
1436 : [2] collapse = boolean, if non-zero, consecutive positions are merged
1437 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1438 : collapses to "1-5 7 9-11". This is useful for
1439 : consolidating long lists. Default = no collapse.
1440 Throws : n/a.
1442 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1444 =cut
1446 sub seq_inds {
1447 my ($self, $seqType, $class, $collapse) = @_;
1449 $seqType ||= 'query';
1450 $class ||= 'identical';
1451 $collapse ||= 0;
1453 $seqType = 'sbjct' if $seqType eq 'hit';
1455 my (@inds, $hsp);
1456 foreach $hsp ($self->hsps) {
1457 # This will merge data for all HSPs together.
1458 push @inds, $hsp->seq_inds($seqType, $class);
1461 # Need to remove duplicates and sort the merged positions.
1462 if(@inds) {
1463 my %tmp = map { $_, 1 } @inds;
1464 @inds = sort {$a <=> $b} keys %tmp;
1467 $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
1471 =head2 strand
1473 See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
1475 =cut
1477 sub strand {
1478 my ($self, $seqType, $strnd) = @_;
1480 unless ($self->{'_hsps'}) {
1481 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1482 return wantarray ? ('-','-') : '-';
1483 #return '-';
1486 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1488 $seqType ||= (wantarray ? 'list' : 'query');
1489 $seqType = 'sbjct' if $seqType eq 'hit';
1491 $seqType = lc($seqType);
1493 if( defined $strnd ) {
1494 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1496 return $self->{'_strand_'.$seqType} = $strnd;
1499 my ($qstr, $hstr);
1500 # If there is only one HSP, defer this call to the solitary HSP.
1501 if($self->num_hsps == 1) {
1502 return $self->hsp->strand($seqType);
1504 elsif( defined $self->{'_strand_query'}) {
1505 # Get the data computed during hsp tiling.
1506 $qstr = $self->{'_strand_query'};
1507 $hstr = $self->{'_strand_sbjct'}
1509 else {
1510 # otherwise, iterate through all HSPs collecting strand info.
1511 # This will return the string "-1/1" if there are HSPs on different strands.
1512 # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1513 # (unless the above elsif{} is commented out).
1514 my (%qstr, %hstr);
1515 foreach my $hsp( $self->hsps ) {
1516 my ( $q, $h ) = $hsp->strand();
1517 $qstr{ $q }++;
1518 $hstr{ $h }++;
1520 $qstr = join( '/', sort keys %qstr);
1521 $hstr = join( '/', sort keys %hstr);
1524 if($seqType =~ /list|array/i) {
1525 return ($qstr, $hstr);
1526 } elsif( $seqType eq 'query' ) {
1527 return $qstr;
1528 } else {
1529 return $hstr;
1533 =head2 frame
1535 See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
1537 =cut
1539 sub frame {
1540 my( $self, $frm ) = @_;
1542 unless ($self->{'_hsps'}) {
1543 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1544 #return wantarray ? ('-','-') : '-';
1545 return '-';
1548 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1550 if( defined $frm ) {
1551 return $self->{'_frame'} = $frm;
1554 # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1555 # in which the sbjct object would collect data from the description line only.
1557 my ($frame);
1558 if(not defined($self->{'_frame'})) {
1559 $frame = $self->hsp->frame('hit');
1560 } else {
1561 $frame = $self->{'_frame'};
1563 return $frame;
1566 =head2 rank
1568 Title : rank
1569 Usage : $obj->rank($newval)
1570 Function: Get/Set the rank of this Hit in the Query search list
1571 i.e. this is the Nth hit for a specific query
1572 Returns : value of rank
1573 Args : newvalue (optional)
1576 =cut
1578 sub rank {
1579 my $self = shift;
1580 return $self->{'_rank'} = shift if @_;
1581 return $self->{'_rank'} || 1;
1584 =head2 locus
1586 Title : locus
1587 Usage : $locus = $hit->locus();
1588 Function: Retrieve the locus (if available) for the hit
1589 Returns : a scalar string (empty string if not set)
1590 Args : none
1592 =cut
1594 sub locus {
1595 my ($self,$value) = @_;
1596 my $previous = $self->{'_locus'};
1597 if( defined $value || ! defined $previous ) {
1598 unless (defined $value) {
1599 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1600 $value = $previous = $3;
1601 } else {
1602 $value = $previous = '';
1605 $self->{'_locus'} = $value;
1607 return $previous;
1610 =head2 each_accession_number
1612 Title : each_accession_number
1613 Usage : @each_accession_number = $hit->each_accession_number();
1614 Function: Get each accession number listed in the description of the hit.
1615 If there are no alternatives, then only the primary accession will
1616 be given
1617 Returns : list of all accession numbers in the description
1618 Args : none
1620 =cut
1622 sub each_accession_number {
1623 my ($self,$value) = @_;
1624 my $desc = $self->{'_description'};
1625 #put primary accnum on the list
1626 my @accnums;
1627 push (@accnums,$self->{'_accession'});
1628 if( defined $desc ) {
1629 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1630 my $id = $1;
1631 my ($acc, $version);
1632 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
1633 ($acc, $version) = split /\./, $2;
1634 } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1635 ($acc, $version) = split /\./, $3;
1636 } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1637 $acc = $id;
1638 } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1639 ($acc,$version) = ($2);
1640 } else {
1641 #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1642 #Database Name Identifier Syntax
1643 #============================ ========================
1644 #GenBank gb|accession|locus
1645 #EMBL Data Library emb|accession|locus
1646 #DDBJ, DNA Database of Japan dbj|accession|locus
1647 #NBRF PIR pir||entry
1648 #Protein Research Foundation prf||name
1649 #SWISS-PROT sp|accession|entry name
1650 #Brookhaven Protein Data Bank pdb|entry|chain
1651 #Patents pat|country|number
1652 #GenInfo Backbone Id bbs|number
1653 #General database identifier gnl|database|identifier
1654 #NCBI Reference Sequence ref|accession|locus
1655 #Local Sequence identifier lcl|identifier
1656 $acc=$id;
1658 push(@accnums, $acc);
1661 return @accnums;
1664 =head2 tiled_hsps
1666 See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1668 =cut
1670 sub tiled_hsps {
1671 my $self = shift;
1672 return $self->{'_tiled_hsps'} = shift if @_;
1673 return $self->{'_tiled_hsps'};
1676 =head2 query_length
1678 Title : query_length
1679 Usage : $obj->query_length($newval)
1680 Function: Get/Set the query_length
1681 Returns : value of query_length (a scalar)
1682 Args : on set, new value (a scalar or undef, optional)
1685 =cut
1687 sub query_length {
1688 my ($self,$value) = @_;
1689 my $previous = $self->{'_query_length'};
1690 if( defined $value || ! defined $previous ) {
1691 $value = $previous = 0 unless defined $value;
1692 $self->{'_query_length'} = $value;
1694 return $previous;
1697 =head2 ncbi_gi
1699 Title : ncbi_gi
1700 Usage : $acc = $hit->ncbi_gi();
1701 Function: Retrieve the NCBI Unique ID (aka the GI #),
1702 if available, for the hit
1703 Returns : a scalar string (empty string if not set)
1704 Args : none
1705 Note : As of Sept. 2016 NCBI records will no longer have a
1706 GI; this attributue will remain in place for older
1707 records
1709 =cut
1711 sub ncbi_gi {
1712 my ($self,$value) = @_;
1713 if( defined $value ) {
1714 $self->{'_ncbi_gi'} = $value;
1715 } else {
1716 $self->{'_ncbi_gi'} = $self->name =~ m{^gi\|(\d+)} ? $1 : '';
1718 return $self->{'_ncbi_gi'};
1722 # sort method for HSPs
1724 =head2 sort_hits
1726 Title : sort_hsps
1727 Usage : $result->sort_hsps(\&sort_function)
1728 Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
1729 by descending score.
1730 Returns : n/a
1731 Args : A coderef for the sort function. See the documentation on the Perl sort()
1732 function for guidelines on writing sort functions.
1733 Note : To access the special variables $a and $b used by the Perl sort() function
1734 the user function must access Bio::Search::Hit::HitI namespace.
1735 For example, use :
1736 $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
1737 $Bio::Search::Result::HitI::b->length});
1738 NOT $hit->sort_hsps($a->length <=> $b->length);
1740 =cut
1742 sub sort_hsps {
1743 my ($self, $coderef) = @_;
1744 my @sorted_hsps;
1746 if ($coderef) {
1747 $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
1748 unless (ref($coderef) eq 'CODE');
1750 else {
1751 $coderef = \&_default_sort_hsps;
1752 # throw a warning?
1755 my @hsps = $self->hsps();
1756 eval {@sorted_hsps = sort $coderef @hsps };
1758 if ($@) {
1759 $self->throw("Unable to sort hsps: $@");
1761 else {
1762 $self->{'_hsps'} = \@sorted_hsps;
1767 =head2 iteration
1769 Usage : $hit->iteration( $iteration_num );
1770 Purpose : Gets the iteration number in which the Hit was found.
1771 Example : $iteration_num = $sbjct->iteration();
1772 Returns : Integer greater than or equal to 1
1773 Non-PSI-BLAST reports may report iteration as 1, but this number
1774 is only meaningful for PSI-BLAST reports.
1775 Argument : iteration_num (optional, used when setting only)
1776 Throws : none
1778 See Also : L<found_again()|found_again>
1780 =cut
1782 sub iteration{
1783 my ($self,$value) = @_;
1784 if( defined $value) {
1785 $self->{'_psiblast_iteration'} = $value;
1787 return $self->{'_psiblast_iteration'};
1790 =head2 found_again
1792 Title : found_again
1793 Usage : $hit->found_again;
1794 $hit->found_again(1);
1795 Purpose : Gets a boolean indicator whether or not the hit has
1796 been found in a previous iteration.
1797 This is only applicable to PSI-BLAST reports.
1799 This method indicates if the hit was reported in the
1800 "Sequences used in model and found again" section of the
1801 PSI-BLAST report or if it was reported in the
1802 "Sequences not found previously or not previously below threshold"
1803 section of the PSI-BLAST report. Only for hits in iteration > 1.
1805 Example : if( $hit->found_again()) { ... };
1806 Returns : Boolean, true (1) if the hit has been found in a
1807 previous PSI-BLAST iteration.
1808 Returns false (0 or undef) for hits that have not occurred in a
1809 previous PSI-BLAST iteration.
1810 Argument : Boolean (1 or 0). Only used for setting.
1811 Throws : none
1813 See Also : L<iteration()|iteration>
1815 =cut
1817 sub found_again {
1818 my $self = shift;
1819 return $self->{'_found_again'} = shift if @_;
1820 return $self->{'_found_again'};