2 # BioPerl module for Bio::Search::Hit::GenericHit
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
20 use Bio::Search::Hit::GenericHit;
21 my $hit = Bio::Search::Hit::GenericHit->new(-algorithm => 'blastp');
23 # typically one gets HitI objects from a SearchIO stream via a ResultI
25 my $parser = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
27 my $result = $parser->next_result;
28 my $hit = $result->next_hit;
30 # TODO: Describe how to configure a SearchIO stream so that it generates
35 This object handles the hit data from a Database Sequence Search such
38 Unless you're writing a parser, you won't ever need to create a
39 GenericHit or any other HitI-implementing object. If you use
40 the SearchIO system, HitI objects are created automatically from
41 a SearchIO stream which returns Bio::Search::Hit::HitI objects.
43 For documentation on what you can do with GenericHit (and other HitI
44 objects), please see the API documentation in
45 L<Bio::Search::Hit::HitI|Bio::Search::Hit::HitI>.
51 User feedback is an integral part of the evolution of this and other
52 Bioperl modules. Send your comments and suggestions preferably to
53 the Bioperl mailing list. Your participation is much appreciated.
55 bioperl-l@bioperl.org - General discussion
56 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
60 Please direct usage questions or support issues to the mailing list:
62 I<bioperl-l@bioperl.org>
64 rather than to the module maintainer directly. Many experienced and
65 reponsive experts will be able look at the problem and quickly
66 address it. Please include a thorough description of the problem
67 with code and data examples if at all possible.
71 Report bugs to the Bioperl bug tracking system to help us keep track
72 of the bugs and their resolution. Bug reports can be submitted via the
75 https://github.com/bioperl/bioperl-live/issues
77 =head1 AUTHOR - Jason Stajich and Steve Chervitz
79 Email jason-at-bioperl-dot-org
80 Email sac-at-bioperl-dot-org
84 Sendu Bala, bix@sendu.me.uk
88 The rest of the documentation details each of the object methods.
89 Internal methods are usually preceded with a _
94 # Let the code begin...
97 package Bio
::Search
::Hit
::GenericHit
;
100 use Bio
::Search
::SearchUtils
;
102 use base
qw(Bio::Root::Root Bio::Search::Hit::HitI);
107 Usage : my $obj = Bio::Search::Hit::GenericHit->new();
108 Function: Builds a new Bio::Search::Hit::GenericHit object
109 Returns : Bio::Search::Hit::GenericHit
110 Args : -name => Name of Hit (required)
111 -description => Description (optional)
112 -accession => Accession number (optional)
113 -ncbi_gi => NCBI GI UID (optional)
114 -length => Length of the Hit (optional)
115 -score => Raw Score for the Hit (optional)
116 -bits => Bit Score for the Hit (optional)
117 -significance => Significance value for the Hit (optional)
118 -algorithm => Algorithm used (BLASTP, FASTX, etc...)
119 -hsps => Array ref of HSPs for this Hit.
120 -found_again => boolean, true if hit appears in a
121 "previously found" section of a PSI-Blast report.
122 -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
128 my($class,@args) = @_;
130 my $self = $class->SUPER::new
(@args);
131 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
132 $score,$algo,$signif,$bits, $p,
133 $rank, $hsp_factory, $gi, $iter, $found) = $self->_rearrange([qw(HSPS
139 LENGTH SCORE ALGORITHM
145 FOUND_AGAIN)], @args);
147 defined $query_len && $self->query_length($query_len);
149 if( ! defined $name ) {
150 $self->throw("Must have defined a valid name for Hit");
155 defined $acc && $self->accession($acc);
156 defined $locus && $self->locus($locus);
157 defined $desc && $self->description($desc);
158 defined $length && $self->length($length);
159 defined $algo && $self->algorithm($algo);
160 defined $signif && $self->significance($signif);
161 defined $score && $self->raw_score($score);
162 defined $bits && $self->bits($bits);
163 defined $rank && $self->rank($rank);
164 defined $hsp_factory && $self->hsp_factory($hsp_factory);
165 defined $gi && $self->ncbi_gi($gi);
166 defined $iter && $self->iteration($iter);
167 defined $found && $self->found_again($found);
168 # p() has a weird interface, so this is a hack workaround
173 $self->{'_iterator'} = 0;
174 if( defined $hsps ) {
175 if( ref($hsps) !~ /array/i ) {
176 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
181 $self->add_hsp(shift @
{$hsps} );
183 $self->{'_hsps'} = undef if $hspcount == 0;
187 $self->{'_hsps'} = undef;
196 Usage : $hit->add_hsp($hsp)
197 Function: Add a HSP to the collection of HSPs for a Hit
198 Returns : number of HSPs in the Hit
199 Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
200 for creating a HSPI object (&hsp_factory must be set to get it back)
205 my ($self,$hsp) = @_;
206 if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) {
207 $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
211 push @
{$self->{'_hsps'}}, $hsp;
212 if (ref($hsp) eq 'HASH') {
213 $self->{_hashes
}->{$#{$self->{'_hsps'}}} = 1;
215 return scalar @
{$self->{'_hsps'}};
221 Usage : $hit->hsp_factory($hsp_factory)
222 Function: Get/set the factory used to build HSPI objects if necessary.
223 Returns : Bio::Factory::ObjectFactoryI
224 Args : Bio::Factory::ObjectFactoryI
230 if (@_) { $self->{_hsp_factory
} = shift }
231 return $self->{_hsp_factory
} || return;
234 =head2 Bio::Search::Hit::HitI methods
236 Implementation of Bio::Search::Hit::HitI methods
241 Usage : $hit_name = $hit->name();
242 Function: returns the name of the Hit sequence
243 Returns : a scalar string
244 Args : [optional] scalar string to set the name
249 my ($self,$value) = @_;
250 my $previous = $self->{'_name'};
251 if( defined $value || ! defined $previous ) {
252 $value = $previous = '' unless defined $value;
253 $self->{'_name'} = $value;
261 Usage : $acc = $hit->accession();
262 Function: Retrieve the accession (if available) for the hit
263 Returns : a scalar string (empty string if not set)
269 my ($self,$value) = @_;
270 my $previous = $self->{'_accession'};
271 if( defined $value || ! defined $previous ) {
272 $value = $previous = '' unless defined $value;
273 $self->{'_accession'} = $value;
281 Usage : $desc = $hit->description();
282 Function: Retrieve the description for the hit
283 Returns : a scalar string
284 Args : [optional] scalar string to set the description
289 my ($self,$value) = @_;
290 my $previous = $self->{'_description'};
291 if( defined $value || ! defined $previous ) {
292 $value = $previous = '' unless defined $value;
293 $self->{'_description'} = $value;
301 Usage : my $len = $hit->length
302 Function: Returns the length of the hit
304 Args : [optional] integer to set the length
309 my ($self,$value) = @_;
310 my $previous = $self->{'_length'};
311 if( defined $value || ! defined $previous ) {
312 $value = $previous = 0 unless defined $value;
313 $self->{'_length'} = $value;
322 Usage : $alg = $hit->algorithm();
323 Function: Gets the algorithm specification that was used to obtain the hit
324 For BLAST, the algorithm denotes what type of sequence was aligned
325 against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
326 dna-prt, TBLASTN prt-translated dna, TBLASTX translated
328 Returns : a scalar string
329 Args : [optional] scalar string to set the algorithm
334 my ($self,$value) = @_;
335 my $previous = $self->{'_algorithm'};
336 if( defined $value || ! defined $previous ) {
337 $value = $previous = '' unless defined $value;
338 $self->{'_algorithm'} = $value;
346 Usage : $score = $hit->raw_score();
347 Function: Gets the "raw score" generated by the algorithm. What
348 this score is exactly will vary from algorithm to algorithm,
349 returning undef if unavailable.
350 Returns : a scalar value
351 Args : [optional] scalar value to set the raw score
356 my ($self,$value) = @_;
357 my $previous = $self->{'_score'};
358 if( defined $value ) {
359 $self->{'_score'} = $value;
360 } elsif ( ! defined $previous ) {
361 # Set the bits of the Hit to that of the top HSP.
362 unless( defined $self->{'_hsps'}->[0] ) {
363 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
364 "If using NCBI BLAST, check bits() instead");
367 # use 'score' if available
368 if ( defined( ($self->hsps)[0]->score ) ) {
369 $previous = $self->{'_score'} = ($self->hsps)[0]->score;
371 # otherwise use 'bits'
372 elsif ( defined( ($self->hsps)[0]->bits ) ) {
373 $previous = $self->{'_score'} = ($self->hsps)[0]->bits;
381 Equivalent to L<raw_score()|raw_score>
385 sub score
{ shift->raw_score(@_); }
390 Usage : $significance = $hit->significance();
391 Function: Used to obtain the E or P value of a hit, i.e. the probability that
392 this particular hit was obtained purely by random chance. If
393 information is not available (nor calculatable from other
394 information sources), return undef.
395 Returns : a scalar value or undef if unavailable
396 Args : [optional] scalar value to set the significance
401 my ($self,$value) = @_;
402 my $previous = $self->{'_significance'};
403 if( defined $value ) {
404 $self->{'_significance'} = $value;
405 } elsif ( ! defined $previous ) {
406 unless( defined $self->{'_hsps'}->[0] ) {
407 $self->warn("No HSPs for this Hit (".$self->name.")");
410 # Set the significance of the Hit to that of the top HSP.
411 $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
419 Usage : $hit_object->bits();
420 Purpose : Gets the bit score of the best HSP for the current hit.
421 Example : $bits = $hit_object->bits();
422 Returns : Integer or undef if bit score is not set
424 Comments : For BLAST1, the non-bit score is listed in the summary line.
426 See Also : L<score()|score>
431 my ($self,$value) = @_;
432 my $previous = $self->{'_bits'};
433 if( defined $value ) {
434 $self->{'_bits'} = $value;
435 } elsif ( ! defined $previous ) {
436 # Set the bits of the Hit to that of the top HSP.
437 unless( defined $self->{'_hsps'}->[0] ) {
438 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
439 "If using WU-BLAST, check raw_score() instead");
442 $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
450 Usage : while( $hsp = $obj->next_hsp()) { ... }
451 Function : Returns the next available High Scoring Pair
453 Returns : Bio::Search::HSP::HSPI object or null if finished
460 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
462 defined($self->{'_hsps'})
463 && $self->{'_iterator'} <= scalar @
{$self->{'_hsps'}};
465 my $iterator = $self->{'_iterator'}++;
466 my $hsp = $self->{'_hsps'}->[$iterator] || return;
467 if (ref($hsp) eq 'HASH') {
468 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
469 $hsp = $factory->create_object(%{$hsp});
470 $self->{'_hsps'}->[$iterator] = $hsp;
471 delete $self->{_hashes
}->{$iterator};
479 Usage : $hit_object->hsps();
480 Purpose : Get a list containing all HSP objects.
481 : Get the numbers of HSPs for the current hit.
482 Example : @hsps = $hit_object->hsps();
483 : $num = $hit_object->hsps(); # alternatively, use num_hsps()
484 Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
485 : Scalar context: integer (number of HSPs).
486 : (Equivalent to num_hsps()).
487 Argument : n/a. Relies on wantarray
488 Throws : Exception if the HSPs have not been collected.
490 See Also : L<hsp()|hsp>, L<num_hsps()|num_hsps>
496 foreach my $i (keys %{$self->{_hashes
} || {}}) {
497 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
498 $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
499 delete $self->{_hashes
}->{$i};
502 return wantarray() ? @
{$self->{'_hsps'} || []} : scalar(@
{$self->{'_hsps'} || []});
507 Usage : $hit_object->num_hsps();
508 Purpose : Get the number of HSPs for the present hit.
509 Example : $nhsps = $hit_object->num_hsps();
510 Returns : Integer or '-' if HSPs have not been callected
513 See Also : L<hsps()|hsps>
520 unless ($self->{'_hsps'}) {
524 return scalar(@
{$self->{'_hsps'}});
530 Usage : $hit->rewind;
531 Function: Allow one to reset the HSP iterator to the beginning
532 Since this is an in-memory implementation
540 $self->{'_iterator'} = 0;
545 Usage : $ambig_code = $hit_object->ambiguous_aln();
546 Purpose : Sets/Gets ambiguity code data member.
547 Example : (see usage)
548 Returns : String = 'q', 's', 'qs', '-'
549 : 'q' = query sequence contains overlapping sub-sequences
550 : while sbjct does not.
551 : 's' = sbjct sequence contains overlapping sub-sequences
552 : while query does not.
553 : 'qs' = query and sbjct sequence contains overlapping sub-sequences
554 : relative to each other.
555 : '-' = query and sbjct sequence do not contains multiple domains
556 : relative to each other OR both contain the same distribution
557 : of similar domains.
560 Comment : Note: "sbjct" is synonymous with "hit"
566 if(@_) { $self->{'_ambiguous_aln'} = shift; }
567 $self->{'_ambiguous_aln'} || '-';
572 See documentation in L<Bio::Search::Hit::HitI::overlap()|Bio::Search::Hit::HitI>
578 if(@_) { $self->{'_overlap'} = shift; }
579 defined $self->{'_overlap'} ?
$self->{'_overlap'} : 0;
585 Usage : $hit_object->n();
586 Purpose : Gets the N number for the current hit.
587 : This is the number of HSPs in the set which was ascribed
588 : the lowest P-value (listed on the description line).
589 : This number is not the same as the total number of HSPs.
590 : To get the total number of HSPs, use num_hsps().
591 Example : $n = $hit_object->n();
594 Throws : Exception if HSPs have not been set (BLAST2 reports).
595 Comments : Note that the N parameter is not reported in gapped BLAST2.
596 : Calling n() on such reports will result in a call to num_hsps().
597 : The num_hsps() method will count the actual number of
598 : HSPs in the alignment listing, which may exceed N in
601 See Also : L<num_hsps()|num_hsps>
608 # The check for $self->{'_n'} is a remnant from the 'query' mode days
609 # in which the sbjct object would collect data from the description
613 if(not defined($self->{'_n'})) {
620 $n ||= $self->num_hsps;
627 Usage : $hit_object->p( [format] );
628 Purpose : Get the P-value for the best HSP of the given BLAST hit.
629 : (Note that P-values are not provided with NCBI Blast2 reports).
630 Example : $p = $sbjct->p;
631 : $p = $sbjct->p('exp'); # get exponent only.
632 : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
633 Returns : Float or scientific notation number (the raw P-value, DEFAULT).
634 : Integer if format == 'exp' (the magnitude of the base 10 exponent).
635 : 2-element list (float, int) if format == 'parts' and P-value
636 : is in scientific notation (See Comments).
637 Argument : format: string of 'raw' | 'exp' | 'parts'
638 : 'raw' returns value given in report. Default. (1.2e-34)
639 : 'exp' returns exponent value only (34)
640 : 'parts' returns the decimal and exponent as a
641 : 2-element list (1.2, -34) (See Comments).
642 Throws : Warns if no P-value is defined. Uses expect instead.
643 Comments : Using the 'parts' argument is not recommended since it will not
644 : work as expected if the P-value is not in scientific notation.
645 : That is, floats are not converted into sci notation before
646 : splitting into parts.
648 See Also : L<expect()|expect>, L<significance()|significance>, L<Bio::Search::SearchUtils::get_exponent()|Bio::Search::SearchUtils>
653 # Some duplication of logic for p(), expect() and signif() for the sake of performance.
654 my ($self, $fmt) = @_;
656 my $val = $self->{'_p'};
660 # P-value not defined, must be a NCBI Blast2 report.
661 # Use expect instead.
662 $self->warn( "P-value not defined. Using significance() instead.");
663 $val = $self->significance();
666 return $val if not $fmt or $fmt =~ /^raw/i;
667 ## Special formats: exponent-only or as list.
668 return &Bio
::Search
::SearchUtils
::get_exponent
($val) if $fmt =~ /^exp/i;
669 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
671 ## Default: return the raw P-value.
677 Usage : $hit_object->hsp( [string] );
678 Purpose : Get a single HSPI object for the present HitI object.
679 Example : $hspObj = $hit_object->hsp; # same as 'best'
680 : $hspObj = $hit_object->hsp('best');
681 : $hspObj = $hit_object->hsp('worst');
682 Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
683 Argument : String (or no argument).
684 : No argument (default) = highest scoring HSP (same as 'best').
685 : 'best' or 'first' = highest scoring HSP.
686 : 'worst' or 'last' = lowest scoring HSP.
687 Throws : Exception if the HSPs have not been collected.
688 : Exception if an unrecognized argument is used.
690 See Also : L<hsps()|hsps>, L<num_hsps>()
695 my( $self, $option ) = @_;
698 if (not ref $self->{'_hsps'}) {
699 $self->throw("Can't get HSPs: data not collected.");
702 my @hsps = $self->hsps;
704 return $hsps[0] if $option =~ /best|first|1/i;
705 return $hsps[$#hsps] if $option =~ /worst|last/i;
707 $self->throw("Can't get HSP for: $option\n" .
708 "Valid arguments: 'best', 'worst'");
711 =head2 logical_length
713 Usage : $hit_object->logical_length( [seq_type] );
714 : (mostly intended for internal use).
715 Purpose : Get the logical length of the hit sequence.
716 : This is necessary since the number of identical/conserved residues
717 : can be in terms of peptide sequence space, yet the query and/or hit
718 : sequence are in nucleotide space.
719 Example : $len = $hit_object->logical_length();
721 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
722 ('sbjct' is synonymous with 'hit')
725 : In the case of BLAST flavors:
726 : For TBLASTN reports, the length of the aligned portion of the
727 : nucleotide hit sequence is divided by 3; for BLASTX reports,
728 : the length of the aligned portion of the nucleotide query
729 : sequence is divided by 3. For TBLASTX reports, the length of
730 : both hit and query sequence are converted.
732 : This is important for functions like frac_aligned_query()
733 : which need to operate in amino acid coordinate space when dealing
734 : with [T]BLAST[NX] type reports.
736 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>
742 my $seqType = shift || 'query';
743 $seqType = 'sbjct' if $seqType eq 'hit';
745 my ($length, $logical);
746 my $algo = $self->algorithm;
748 # For the sbjct, return logical sbjct length
749 if( $seqType eq 'sbjct' ) {
750 $length = $self->length;
752 # Otherwise, return logical query length
753 $length = $self->query_length();
756 $logical = Bio
::Search
::SearchUtils
::logical_length
($algo, $seqType, $length);
758 return int($logical);
763 Usage : $hit_object->length_aln( [seq_type] );
764 Purpose : Get the total length of the aligned region for query or sbjct seq.
765 : This number will include all HSPs
766 Example : $len = $hit_object->length_aln(); # default = query
767 : $lenAln = $hit_object->length_aln('query');
769 Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
770 ('sbjct' is synonymous with 'hit')
771 Throws : Exception if the argument is not recognized.
772 Comments : This method will report the logical length of the alignment,
773 : meaning that for TBLAST[NX] reports, the length is reported
774 : using amino acid coordinate space (i.e., nucleotides / 3).
776 : This method requires that all HSPs be tiled. If they have not
777 : already been tiled, they will be tiled first automatically..
778 : If you don't want the tiled data, iterate through each HSP
779 : calling length() on each (use hsps() to get all HSPs).
781 See Also : L<length()|length>, L<frac_aligned_query()|frac_aligned_query>, L<frac_aligned_hit()|frac_aligned_hit>, L<gaps()|gaps>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<Bio::Search::HSP::BlastHSP::length()|Bio::Search::HSP::BlastHSP>
786 my( $self, $seqType, $num ) = @_;
788 $seqType ||= 'query';
789 $seqType = 'sbjct' if $seqType eq 'hit';
793 return $self->{'_length_aln_'.$seqType} = $num;
796 unless ($self->{'_hsps'}) {
797 #return wantarray ? ('-','-') : '-';
798 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
802 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
804 my $data = $self->{'_length_aln_'.$seqType};
806 ## If we don't have data, figure out what went wrong.
808 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
809 "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
816 Usage : $hit_object->gaps( [seq_type] );
817 Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
818 : Data is summed across all HSPs.
819 Example : $qgaps = $hit_object->gaps('query');
820 : $hgaps = $hit_object->gaps('hit');
821 : $tgaps = $hit_object->gaps(); # default = total (query + hit)
822 Returns : scalar context: integer
823 : array context without args: two-element list of integers
824 : (queryGaps, hitGaps)
825 : Array context can be forced by providing an argument of 'list' or 'array'.
827 : CAUTION: Calling this method within printf or sprintf is arrray context.
828 : So this function may not give you what you expect. For example:
829 : printf "Total gaps: %d", $hit->gaps();
830 : Actually returns a two-element array, so what gets printed
831 : is the number of gaps in the query, not the total
833 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
834 ('sbjct' is synonymous with 'hit')
836 Comments : If you need data for each HSP, use hsps() and then interate
837 : through each HSP object.
838 : This method requires that all HSPs be tiled. If they have not
839 : already been tiled, they will be tiled first automatically..
840 : Not relying on wantarray since that will fail in situations
841 : such as printf "%d", $hit->gaps() in which you might expect to
842 : be printing the total gaps, but evaluates to array context.
844 See Also : L<length_aln()|length_aln>
849 my( $self, $seqType, $num ) = @_;
851 $seqType ||= (wantarray ?
'list' : 'total');
852 $seqType = 'sbjct' if $seqType eq 'hit';
854 unless ($self->{'_hsps'}) {
855 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
856 return wantarray ?
('-','-') : '-';
860 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
862 $seqType = lc($seqType);
865 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
867 return $self->{'_gaps_'.$seqType} = $num;
869 elsif($seqType =~ /list|array/i) {
870 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
872 elsif($seqType eq 'total') {
873 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
875 return $self->{'_gaps_'.$seqType} || 0;
882 See documentation in L<Bio::Search::Hit::HitI::matches()|Bio::Search::Hit::HitI>
887 my( $self, $arg1, $arg2) = @_;
890 unless ($self->{'_hsps'}) {
891 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
892 return wantarray ?
('-','-') : '-';
895 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
898 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
903 if( defined $arg2 ) {
904 $self->{'_totalIdentical'} = $arg1;
905 $self->{'_totalConserved'} = $arg2;
906 return ( $arg1, $arg2 );
908 elsif($arg1 =~ /^id/i) {
909 $data = $self->{'_totalIdentical'};
911 $data = $self->{'_totalConserved'};
913 #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
917 ## If we make it to here, it is likely the case that
918 ## the parser constructed a minimal hit object from the summary line only.
919 ## It either delibrately skipped parsing the alignment section,
920 ## or was not able to because it was absent (due to blast executable parameter
921 ## setting such as -b 0 (B=0 for WU-BLAST) )
922 #$self->throw("Can't get identical or conserved data: no data.");
928 Usage : $sbjct->start( [seq_type] );
929 Purpose : Gets the start coordinate for the query, sbjct, or both sequences
930 : in the BlastHit object. If there is more than one HSP, the lowest start
931 : value of all HSPs is returned.
932 Example : $qbeg = $sbjct->start('query');
933 : $sbeg = $sbjct->start('hit');
934 : ($qbeg, $sbeg) = $sbjct->start();
935 Returns : scalar context: integer
936 : array context without args: list of two integers (queryStart, sbjctStart)
937 : Array context can be "induced" by providing an argument of 'list' or 'array'.
938 Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
939 ('sbjct' is synonymous with 'hit')
942 See Also : L<end()|end>, L<range()|range>, L<strand()|strand>,
943 L<Bio::Search::HSP::BlastHSP::start|Bio::Search::HSP::BlastHSP>
948 my ($self, $seqType, $num) = @_;
950 unless ($self->{'_hsps'}) {
951 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
952 return wantarray ?
('-','-') : '-';
955 $seqType ||= (wantarray ?
'list' : 'query');
956 $seqType = 'sbjct' if $seqType eq 'hit';
959 $seqType = "_\L$seqType\E";
960 return $self->{$seqType.'Start'} = $num;
963 # If there is only one HSP, defer this call to the solitary HSP.
964 if($self->num_hsps == 1) {
965 return $self->hsp->start($seqType);
968 # Tiling normally generates $self->{'_queryStart'} and
969 # $self->{'_sbjctStart'}, but is very slow. If we haven't tiled,
970 # find the answer quickly without tiling.
971 unless (defined $self->{'_queryStart'}) {
972 my $earliest_query_start;
973 my $earliest_sbjct_start;
974 foreach my $hsp ($self->hsps) {
975 my $this_query_start = $hsp->start('query');
976 if (! defined $earliest_query_start || $this_query_start < $earliest_query_start) {
977 $earliest_query_start = $this_query_start;
980 my $this_sbjct_start = $hsp->start('sbjct');
981 if (! defined $earliest_sbjct_start || $this_sbjct_start < $earliest_sbjct_start) {
982 $earliest_sbjct_start = $this_sbjct_start;
985 $self->{'_queryStart'} = $earliest_query_start;
986 $self->{'_sbjctStart'} = $earliest_sbjct_start;
990 if ($seqType =~ /list|array/i) {
991 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
993 ## Sensitive to member name changes.
994 $seqType = "_\L$seqType\E";
995 return $self->{$seqType.'Start'};
1003 Usage : $sbjct->end( [seq_type] );
1004 Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1005 : in the BlastHit object. If there is more than one HSP,
1007 : value of all HSPs is returned.
1008 Example : $qend = $sbjct->end('query');
1009 : $send = $sbjct->end('hit');
1010 : ($qend, $send) = $sbjct->end();
1011 Returns : scalar context: integer
1012 : array context without args: list of two integers
1013 : (queryEnd, sbjctEnd)
1014 : Array context can be "induced" by providing an argument
1015 : of 'list' or 'array'.
1016 Argument : In scalar context: seq_type = 'query' or 'sbjct'
1017 : (case insensitive). If not supplied, 'query' is used.
1020 See Also : L<start()|start>, L<range()|range>, L<strand()|strand>
1025 my ($self, $seqType, $num) = @_;
1027 unless ($self->{'_hsps'}) {
1028 return wantarray ?
('-','-') : '-';
1031 $seqType ||= (wantarray ?
'list' : 'query');
1032 $seqType = 'sbjct' if $seqType eq 'hit';
1034 if( defined $num ) {
1035 $seqType = "_\L$seqType\E";
1036 return $self->{$seqType.'Stop'} = $num;
1039 # If there is only one HSP, defer this call to the solitary HSP.
1040 if($self->num_hsps == 1) {
1041 return $self->hsp->end($seqType);
1044 # Tiling normally generates $self->{'_queryStop'} and
1045 # $self->{'_sbjctStop'}, but is very slow. If we haven't tiled,
1046 # find the answer quickly without tiling.
1047 unless (defined $self->{'_queryStop'}) {
1048 my $latest_query_end;
1049 my $latest_sbjct_end;
1050 foreach my $hsp ($self->hsps) {
1051 my $this_query_end = $hsp->end('query');
1052 if (! defined $latest_query_end || $this_query_end > $latest_query_end) {
1053 $latest_query_end = $this_query_end;
1056 my $this_sbjct_end = $hsp->end('sbjct');
1057 if (! defined $latest_sbjct_end || $this_sbjct_end > $latest_sbjct_end) {
1058 $latest_sbjct_end = $this_sbjct_end;
1061 $self->{'_queryStop'} = $latest_query_end;
1062 $self->{'_sbjctStop'} = $latest_sbjct_end;
1066 if($seqType =~ /list|array/i) {
1067 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1069 ## Sensitive to member name changes.
1070 $seqType = "_\L$seqType\E";
1071 return $self->{$seqType.'Stop'};
1078 Usage : $sbjct->range( [seq_type] );
1079 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1080 : in the HSP alignment.
1081 Example : ($qbeg, $qend) = $sbjct->range('query');
1082 : ($sbeg, $send) = $sbjct->range('hit');
1083 Returns : Two-element array of integers
1084 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
1085 ('sbjct' is synonymous with 'hit')
1088 See Also : L<start()|start>, L<end()|end>
1093 my ($self, $seqType) = @_;
1094 $seqType ||= 'query';
1095 $seqType = 'sbjct' if $seqType eq 'hit';
1096 return ($self->start($seqType), $self->end($seqType));
1100 =head2 frac_identical
1102 Usage : $hit_object->frac_identical( [seq_type] );
1103 Purpose : Get the overall fraction of identical positions across all HSPs.
1104 : The number refers to only the aligned regions and does not
1105 : account for unaligned regions in between the HSPs, if any.
1106 Example : $frac_iden = $hit_object->frac_identical('query');
1107 Returns : Float (2-decimal precision, e.g., 0.75).
1108 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1109 : default = 'query' (but see comments below).
1110 : ('sbjct' is synonymous with 'hit')
1113 : To compute the fraction identical, the logical length of the
1114 : aligned portion of the sequence is used, meaning that
1115 : in the case of BLAST flavors, for TBLASTN reports, the length of
1116 : the aligned portion of the
1117 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1118 : the length of the aligned portion of the nucleotide query
1119 : sequence is divided by 3. For TBLASTX reports, the length of
1120 : both hit and query sequence are converted.
1121 : This is necessary since the number of identical residues is
1122 : in terms of peptide sequence space.
1124 : Different versions of Blast report different values for the total
1125 : length of the alignment. This is the number reported in the
1126 : denominators in the stats section:
1127 : "Identical = 34/120 Positives = 67/120".
1128 : NCBI BLAST uses the total length of the alignment (with gaps)
1129 : WU-BLAST uses the length of the query sequence (without gaps).
1131 : Therefore, when called with an argument of 'total',
1132 : this method will report different values depending on the
1133 : version of BLAST used. Total does NOT take into account HSP
1134 : tiling, so it should not be used.
1136 : To get the fraction identical among only the aligned residues,
1137 : ignoring the gaps, call this method without an argument or
1138 : with an argument of 'query' or 'hit'.
1140 : If you need data for each HSP, use hsps() and then iterate
1141 : through the HSP objects.
1142 : This method requires that all HSPs be tiled. If they have not
1143 : already been tiled, they will be tiled first automatically.
1145 See Also : L<frac_conserved()|frac_conserved>, L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1149 sub frac_identical
{
1150 my ($self, $seqType) = @_;
1151 $seqType ||= 'query';
1152 $seqType = 'sbjct' if $seqType eq 'hit';
1154 ## Sensitive to member name format.
1155 $seqType = lc($seqType);
1157 unless ($self->{'_hsps'}) {
1158 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1159 #return wantarray ? ('-','-') : '-';
1163 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1165 my $ident = $self->matches('id');
1166 my $total = $self->length_aln($seqType);
1167 my $ratio = $ident / $total;
1168 my $ratio_rounded = sprintf( "%.3f", $ratio);
1170 # Round down iff normal rounding yields 1 (just like blast)
1171 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1172 return $ratio_rounded;
1176 =head2 frac_conserved
1178 Usage : $hit_object->frac_conserved( [seq_type] );
1179 Purpose : Get the overall fraction of conserved positions across all HSPs.
1180 : The number refers to only the aligned regions and does not
1181 : account for unaligned regions in between the HSPs, if any.
1182 Example : $frac_cons = $hit_object->frac_conserved('hit');
1183 Returns : Float (2-decimal precision, e.g., 0.75).
1184 Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1185 : default = 'query' (but see comments below).
1186 : ('sbjct' is synonymous with 'hit')
1189 : To compute the fraction conserved, the logical length of the
1190 : aligned portion of the sequence is used, meaning that
1191 : in the case of BLAST flavors, for TBLASTN reports, the length of
1192 : the aligned portion of the
1193 : nucleotide hit sequence is divided by 3; for BLASTX reports,
1194 : the length of the aligned portion of the nucleotide query
1195 : sequence is divided by 3. For TBLASTX reports, the length of
1196 : both hit and query sequence are converted.
1197 : This is necessary since the number of conserved residues is
1198 : in terms of peptide sequence space.
1200 : Different versions of Blast report different values for the total
1201 : length of the alignment. This is the number reported in the
1202 : denominators in the stats section:
1203 : "Positives = 34/120 Positives = 67/120".
1204 : NCBI BLAST uses the total length of the alignment (with gaps)
1205 : WU-BLAST uses the length of the query sequence (without gaps).
1207 : Therefore, when called with an argument of 'total',
1208 : this method will report different values depending on the
1209 : version of BLAST used. Total does NOT take into account HSP
1210 : tiling, so it should not be used.
1212 : To get the fraction conserved among only the aligned residues,
1213 : ignoring the gaps, call this method without an argument or
1214 : with an argument of 'query' or 'hit'.
1216 : If you need data for each HSP, use hsps() and then interate
1217 : through the HSP objects.
1218 : This method requires that all HSPs be tiled. If they have not
1219 : already been tiled, they will be tiled first automatically.
1221 See Also : L<frac_identical()|frac_identical>, L<matches()|matches>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1225 sub frac_conserved
{
1226 my ($self, $seqType) = @_;
1227 $seqType ||= 'query';
1228 $seqType = 'sbjct' if $seqType eq 'hit';
1230 ## Sensitive to member name format.
1231 $seqType = lc($seqType);
1233 unless ($self->{'_hsps'}) {
1234 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1235 #return wantarray ? ('-','-') : '-';
1239 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1241 my $consv = $self->matches('cons');
1242 my $total = $self->length_aln($seqType);
1243 my $ratio = $consv / $total;
1244 my $ratio_rounded = sprintf( "%.3f", $ratio);
1246 # Round down iff normal rounding yields 1 (just like blast)
1247 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1248 return $ratio_rounded;
1254 =head2 frac_aligned_query
1256 Usage : $hit_object->frac_aligned_query();
1257 Purpose : Get the fraction of the query sequence which has been aligned
1258 : across all HSPs (not including intervals between non-overlapping
1260 Example : $frac_alnq = $hit_object->frac_aligned_query();
1261 Returns : Float (2-decimal precision, e.g., 0.75),
1262 : or undef if query length is unknown to avoid division by 0.
1265 Comments : If you need data for each HSP, use hsps() and then interate
1266 : through the HSP objects.
1267 : This method requires that all HSPs be tiled. If they have not
1268 : already been tiled, they will be tiled first automatically.
1270 See Also : L<frac_aligned_hit()|frac_aligned_hit>, L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1274 sub frac_aligned_query
{
1277 unless ($self->{'_hsps'}) {
1278 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1279 #return wantarray ? ('-','-') : '-';
1283 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1285 my $qry_len = $self->logical_length('query');
1286 return undef if $qry_len == 0; # Avoid division by 0 crash
1287 sprintf( "%.2f", $self->length_aln('query') / $qry_len);
1292 =head2 frac_aligned_hit
1294 Usage : $hit_object->frac_aligned_hit();
1295 Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1296 : across all HSPs (not including intervals between non-overlapping
1298 Example : $frac_alnq = $hit_object->frac_aligned_hit();
1299 Returns : Float (2-decimal precision, e.g., 0.75),
1300 : or undef if hit length is unknown to avoid division by 0.
1303 Comments : If you need data for each HSP, use hsps() and then interate
1304 : through the HSP objects.
1305 : This method requires that all HSPs be tiled. If they have not
1306 : already been tiled, they will be tiled first automatically.
1308 See Also : L<frac_aligned_query()|frac_aligned_query>, L<matches()|matches>, , L<logical_length()|logical_length>, L<length_aln()|length_aln>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1312 sub frac_aligned_hit
{
1315 unless ($self->{'_hsps'}) {
1316 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1317 #return wantarray ? ('-','-') : '-';
1321 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1323 my $sbjct_len = $self->logical_length('sbjct');
1324 return undef if $sbjct_len == 0; # Avoid division by 0 crash
1325 sprintf( "%.2f", $self->length_aln('sbjct') / $sbjct_len);
1329 ## These methods are being maintained for backward compatibility.
1331 =head2 frac_aligned_sbjct
1333 Same as L<frac_aligned_hit()|frac_aligned_hit>
1337 *frac_aligned_sbjct
= \
&frac_aligned_hit
;
1339 =head2 num_unaligned_sbjct
1341 Same as L<num_unaligned_hit()|num_unaligned_hit>
1345 *num_unaligned_sbjct
= \
&num_unaligned_hit
;
1348 =head2 num_unaligned_hit
1350 Usage : $hit_object->num_unaligned_hit();
1351 Purpose : Get the number of the unaligned residues in the hit sequence.
1352 : Sums across all all HSPs.
1353 Example : $num_unaln = $hit_object->num_unaligned_hit();
1357 Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1358 : They apply here as well.
1359 : If you need data for each HSP, use hsps() and then interate
1360 : through the HSP objects.
1361 : This method requires that all HSPs be tiled. If they have not
1362 : already been tiled, they will be tiled first automatically..
1364 See Also : L<num_unaligned_query()|num_unaligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>, L<frac_aligned_hit()|frac_aligned_hit>
1368 sub num_unaligned_hit
{
1371 unless ($self->{'_hsps'}) {
1372 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1373 #return wantarray ? ('-','-') : '-';
1377 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1379 my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1380 ($num < 0 ?
0 : $num );
1384 =head2 num_unaligned_query
1386 Usage : $hit_object->num_unaligned_query();
1387 Purpose : Get the number of the unaligned residues in the query sequence.
1388 : Sums across all all HSPs.
1389 Example : $num_unaln = $hit_object->num_unaligned_query();
1393 Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1394 : They apply here as well.
1395 : If you need data for each HSP, use hsps() and then interate
1396 : through the HSP objects.
1397 : This method requires that all HSPs be tiled. If they have not
1398 : already been tiled, they will be tiled first automatically..
1400 See Also : L<num_unaligned_hit()|num_unaligned_hit>, L<frac_aligned_query()|frac_aligned_query>, L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1404 sub num_unaligned_query
{
1407 unless ($self->{'_hsps'}) {
1408 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1409 #return wantarray ? ('-','-') : '-';
1413 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1415 my $num = $self->logical_length('query') - $self->length_aln('query');
1416 ($num < 0 ?
0 : $num );
1423 Usage : $hit->seq_inds( seq_type, class, collapse );
1424 Purpose : Get a list of residue positions (indices) across all HSPs
1425 : for identical or conserved residues in the query or sbjct sequence.
1426 Example : @s_ind = $hit->seq_inds('query', 'identical');
1427 : @h_ind = $hit->seq_inds('hit', 'conserved');
1428 : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1429 Returns : Array of integers
1430 : May include ranges if collapse is non-zero.
1431 Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1432 : ('sbjct' is synonymous with 'hit')
1433 : [1] class = 'identical' or 'conserved' (default = 'identical')
1434 : (can be shortened to 'id' or 'cons')
1435 : (actually, anything not 'id' will evaluate to 'conserved').
1436 : [2] collapse = boolean, if non-zero, consecutive positions are merged
1437 : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1438 : collapses to "1-5 7 9-11". This is useful for
1439 : consolidating long lists. Default = no collapse.
1442 See Also : L<Bio::Search::HSP::BlastHSP::seq_inds()|Bio::Search::HSP::BlastHSP>
1447 my ($self, $seqType, $class, $collapse) = @_;
1449 $seqType ||= 'query';
1450 $class ||= 'identical';
1453 $seqType = 'sbjct' if $seqType eq 'hit';
1456 foreach $hsp ($self->hsps) {
1457 # This will merge data for all HSPs together.
1458 push @inds, $hsp->seq_inds($seqType, $class);
1461 # Need to remove duplicates and sort the merged positions.
1463 my %tmp = map { $_, 1 } @inds;
1464 @inds = sort {$a <=> $b} keys %tmp;
1467 $collapse ?
&Bio
::Search
::SearchUtils
::collapse_nums
(@inds) : @inds;
1473 See documentation in L<Bio::Search::Hit::HitI::strand()|Bio::Search::Hit::HitI>
1478 my ($self, $seqType, $strnd) = @_;
1480 unless ($self->{'_hsps'}) {
1481 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1482 return wantarray ?
('-','-') : '-';
1486 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1488 $seqType ||= (wantarray ?
'list' : 'query');
1489 $seqType = 'sbjct' if $seqType eq 'hit';
1491 $seqType = lc($seqType);
1493 if( defined $strnd ) {
1494 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1496 return $self->{'_strand_'.$seqType} = $strnd;
1500 # If there is only one HSP, defer this call to the solitary HSP.
1501 if($self->num_hsps == 1) {
1502 return $self->hsp->strand($seqType);
1504 elsif( defined $self->{'_strand_query'}) {
1505 # Get the data computed during hsp tiling.
1506 $qstr = $self->{'_strand_query'};
1507 $hstr = $self->{'_strand_sbjct'}
1510 # otherwise, iterate through all HSPs collecting strand info.
1511 # This will return the string "-1/1" if there are HSPs on different strands.
1512 # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1513 # (unless the above elsif{} is commented out).
1515 foreach my $hsp( $self->hsps ) {
1516 my ( $q, $h ) = $hsp->strand();
1520 $qstr = join( '/', sort keys %qstr);
1521 $hstr = join( '/', sort keys %hstr);
1524 if($seqType =~ /list|array/i) {
1525 return ($qstr, $hstr);
1526 } elsif( $seqType eq 'query' ) {
1535 See documentation in L<Bio::Search::Hit::HitI::frame()|Bio::Search::Hit::HitI>
1540 my( $self, $frm ) = @_;
1542 unless ($self->{'_hsps'}) {
1543 Bio
::Search
::SearchUtils
::_warn_about_no_hsps
($self);
1544 #return wantarray ? ('-','-') : '-';
1548 Bio
::Search
::SearchUtils
::tile_hsps
($self) unless $self->tiled_hsps;
1550 if( defined $frm ) {
1551 return $self->{'_frame'} = $frm;
1554 # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1555 # in which the sbjct object would collect data from the description line only.
1558 if(not defined($self->{'_frame'})) {
1559 $frame = $self->hsp->frame('hit');
1561 $frame = $self->{'_frame'};
1569 Usage : $obj->rank($newval)
1570 Function: Get/Set the rank of this Hit in the Query search list
1571 i.e. this is the Nth hit for a specific query
1572 Returns : value of rank
1573 Args : newvalue (optional)
1580 return $self->{'_rank'} = shift if @_;
1581 return $self->{'_rank'} || 1;
1587 Usage : $locus = $hit->locus();
1588 Function: Retrieve the locus (if available) for the hit
1589 Returns : a scalar string (empty string if not set)
1595 my ($self,$value) = @_;
1596 my $previous = $self->{'_locus'};
1597 if( defined $value || ! defined $previous ) {
1598 unless (defined $value) {
1599 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1600 $value = $previous = $3;
1602 $value = $previous = '';
1605 $self->{'_locus'} = $value;
1610 =head2 each_accession_number
1612 Title : each_accession_number
1613 Usage : @each_accession_number = $hit->each_accession_number();
1614 Function: Get each accession number listed in the description of the hit.
1615 If there are no alternatives, then only the primary accession will
1617 Returns : list of all accession numbers in the description
1622 sub each_accession_number
{
1623 my ($self,$value) = @_;
1624 my $desc = $self->{'_description'};
1625 #put primary accnum on the list
1627 push (@accnums,$self->{'_accession'});
1628 if( defined $desc ) {
1629 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1631 my ($acc, $version);
1632 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
1633 ($acc, $version) = split /\./, $2;
1634 } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1635 ($acc, $version) = split /\./, $3;
1636 } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1638 } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1639 ($acc,$version) = ($2);
1641 #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1642 #Database Name Identifier Syntax
1643 #============================ ========================
1644 #GenBank gb|accession|locus
1645 #EMBL Data Library emb|accession|locus
1646 #DDBJ, DNA Database of Japan dbj|accession|locus
1647 #NBRF PIR pir||entry
1648 #Protein Research Foundation prf||name
1649 #SWISS-PROT sp|accession|entry name
1650 #Brookhaven Protein Data Bank pdb|entry|chain
1651 #Patents pat|country|number
1652 #GenInfo Backbone Id bbs|number
1653 #General database identifier gnl|database|identifier
1654 #NCBI Reference Sequence ref|accession|locus
1655 #Local Sequence identifier lcl|identifier
1658 push(@accnums, $acc);
1666 See documentation in L<Bio::Search::SearchUtils::tile_hsps()|Bio::Search::SearchUtils>
1672 return $self->{'_tiled_hsps'} = shift if @_;
1673 return $self->{'_tiled_hsps'};
1678 Title : query_length
1679 Usage : $obj->query_length($newval)
1680 Function: Get/Set the query_length
1681 Returns : value of query_length (a scalar)
1682 Args : on set, new value (a scalar or undef, optional)
1688 my ($self,$value) = @_;
1689 my $previous = $self->{'_query_length'};
1690 if( defined $value || ! defined $previous ) {
1691 $value = $previous = 0 unless defined $value;
1692 $self->{'_query_length'} = $value;
1700 Usage : $acc = $hit->ncbi_gi();
1701 Function: Retrieve the NCBI Unique ID (aka the GI #),
1702 if available, for the hit
1703 Returns : a scalar string (empty string if not set)
1705 Note : As of Sept. 2016 NCBI records will no longer have a
1706 GI; this attributue will remain in place for older
1712 my ($self,$value) = @_;
1713 if( defined $value ) {
1714 $self->{'_ncbi_gi'} = $value;
1716 $self->{'_ncbi_gi'} = $self->name =~ m{^gi\|(\d+)} ?
$1 : '';
1718 return $self->{'_ncbi_gi'};
1722 # sort method for HSPs
1727 Usage : $result->sort_hsps(\&sort_function)
1728 Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
1729 by descending score.
1731 Args : A coderef for the sort function. See the documentation on the Perl sort()
1732 function for guidelines on writing sort functions.
1733 Note : To access the special variables $a and $b used by the Perl sort() function
1734 the user function must access Bio::Search::Hit::HitI namespace.
1736 $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
1737 $Bio::Search::Result::HitI::b->length});
1738 NOT $hit->sort_hsps($a->length <=> $b->length);
1743 my ($self, $coderef) = @_;
1747 $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
1748 unless (ref($coderef) eq 'CODE');
1751 $coderef = \
&_default_sort_hsps
;
1755 my @hsps = $self->hsps();
1756 eval {@sorted_hsps = sort $coderef @hsps };
1759 $self->throw("Unable to sort hsps: $@");
1762 $self->{'_hsps'} = \
@sorted_hsps;
1769 Usage : $hit->iteration( $iteration_num );
1770 Purpose : Gets the iteration number in which the Hit was found.
1771 Example : $iteration_num = $sbjct->iteration();
1772 Returns : Integer greater than or equal to 1
1773 Non-PSI-BLAST reports may report iteration as 1, but this number
1774 is only meaningful for PSI-BLAST reports.
1775 Argument : iteration_num (optional, used when setting only)
1778 See Also : L<found_again()|found_again>
1783 my ($self,$value) = @_;
1784 if( defined $value) {
1785 $self->{'_psiblast_iteration'} = $value;
1787 return $self->{'_psiblast_iteration'};
1793 Usage : $hit->found_again;
1794 $hit->found_again(1);
1795 Purpose : Gets a boolean indicator whether or not the hit has
1796 been found in a previous iteration.
1797 This is only applicable to PSI-BLAST reports.
1799 This method indicates if the hit was reported in the
1800 "Sequences used in model and found again" section of the
1801 PSI-BLAST report or if it was reported in the
1802 "Sequences not found previously or not previously below threshold"
1803 section of the PSI-BLAST report. Only for hits in iteration > 1.
1805 Example : if( $hit->found_again()) { ... };
1806 Returns : Boolean, true (1) if the hit has been found in a
1807 previous PSI-BLAST iteration.
1808 Returns false (0 or undef) for hits that have not occurred in a
1809 previous PSI-BLAST iteration.
1810 Argument : Boolean (1 or 0). Only used for setting.
1813 See Also : L<iteration()|iteration>
1819 return $self->{'_found_again'} = shift if @_;
1820 return $self->{'_found_again'};