1 #-----------------------------------------------------------------
3 # BioPerl module for Bio::Search::HSP::HSPI
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Steve Chervitz <sac@bioperl.org>
8 # and Jason Stajich <jason@bioperl.org>
10 # You may distribute this module under the same terms as perl itself
11 #-----------------------------------------------------------------
13 # POD documentation - main docs before the code
17 Bio::Search::HSP::HSPI - Interface for a High Scoring Pair in a similarity search result
21 # Bio::Search::HSP::HSPI objects cannot be instantiated since this
22 # module defines a pure interface.
24 # Given an object that implements the Bio::Search::HSP::HSPI interface,
25 # you can do the following things with it:
27 $r_type = $hsp->algorithm;
29 $pvalue = $hsp->pvalue();
31 $evalue = $hsp->evalue();
33 $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
35 $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
37 $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
39 $qseq = $hsp->query_string;
41 $hseq = $hsp->hit_string;
43 $homology_string = $hsp->homology_string;
45 $len = $hsp->length( ['query'|'hit'|'total'] );
51 Bio::Search::HSP::HSPI objects cannot be instantiated since this
52 module defines a pure interface.
54 Given an object that implements the L<Bio::Search::HSP::HSPI> interface,
55 you can do the following things with it:
59 This interface inherits methods from these other modules:
62 L<Bio::SeqFeature::FeaturePair>
63 L<Bio::SeqFeature::SimilarityPair>
65 Please refer to these modules for documentation of the
66 many additional inherited methods.
72 User feedback is an integral part of the evolution of this and other
73 Bioperl modules. Send your comments and suggestions preferably to
74 the Bioperl mailing list. Your participation is much appreciated.
76 bioperl-l@bioperl.org - General discussion
77 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
81 Please direct usage questions or support issues to the mailing list:
83 I<bioperl-l@bioperl.org>
85 rather than to the module maintainer directly. Many experienced and
86 reponsive experts will be able look at the problem and quickly
87 address it. Please include a thorough description of the problem
88 with code and data examples if at all possible.
92 Report bugs to the Bioperl bug tracking system to help us keep track
93 of the bugs and their resolution. Bug reports can be submitted via the
96 https://github.com/bioperl/bioperl-live/issues
98 =head1 AUTHOR - Steve Chervitz, Jason Stajich
100 Email sac-at-bioperl.org
101 Email jason-at-bioperl.org
105 Copyright (c) 2001 Steve Chervitz, Jason Stajich. All Rights Reserved.
109 This software is provided "as is" without warranty of any kind.
113 The rest of the documentation details each of the object methods.
114 Internal methods are usually preceded with a _
119 # Let the code begin...
122 package Bio
::Search
::HSP
::HSPI
;
128 use base
qw(Bio::SeqFeature::SimilarityPair Bio::Root::RootI);
134 Usage : my $r_type = $hsp->algorithm
135 Function: Obtain the name of the algorithm used to obtain the HSP
136 Returns : string (e.g., BLASTP)
142 my ($self,@args) = @_;
143 $self->throw_not_implemented;
149 Usage : my $pvalue = $hsp->pvalue();
150 Function: Returns the P-value for this HSP or undef
151 Returns : float or exponential (2e-10)
152 P-value is not defined with NCBI Blast2 reports.
159 $self->throw_not_implemented;
165 Usage : my $evalue = $hsp->evalue();
166 Function: Returns the e-value for this HSP
167 Returns : float or exponential (2e-10)
174 $self->throw_not_implemented;
178 =head2 frac_identical
180 Title : frac_identical
181 Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] );
182 Function: Returns the fraction of identitical positions for this HSP
183 Returns : Float in range 0.0 -> 1.0
184 Args : 'query' = num identical / length of query seq (without gaps)
185 'hit' = num identical / length of hit seq (without gaps)
186 'total' = num identical / length of alignment (with gaps)
192 my ($self, $type) = @_;
193 $self->throw_not_implemented;
196 =head2 frac_conserved
198 Title : frac_conserved
199 Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] );
200 Function : Returns the fraction of conserved positions for this HSP.
201 This is the fraction of symbols in the alignment with a
203 Returns : Float in range 0.0 -> 1.0
204 Args : 'query' = num conserved / length of query seq (without gaps)
205 'hit' = num conserved / length of hit seq (without gaps)
206 'total' = num conserved / length of alignment (with gaps)
212 my ($self, $type) = @_;
213 $self->throw_not_implemented;
218 Title : num_identical
219 Usage : $obj->num_identical($newval)
220 Function: returns the number of identical residues in the alignment
222 Args : integer (optional)
228 shift->throw_not_implemented;
233 Title : num_conserved
234 Usage : $obj->num_conserved($newval)
235 Function: returns the number of conserved residues in the alignment
237 Args : integer (optional)
243 shift->throw_not_implemented();
249 Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
250 Function : Get the number of gap characters in the query, hit, or total alignment.
251 Returns : Integer, number of gap characters or 0 if none
252 Args : 'query' = num conserved / length of query seq (without gaps)
253 'hit' = num conserved / length of hit seq (without gaps)
254 'total' = num conserved / length of alignment (with gaps)
260 my ($self, $type) = @_;
261 $self->throw_not_implemented;
267 Usage : my $qseq = $hsp->query_string;
268 Function: Retrieves the query sequence of this HSP as a string
277 $self->throw_not_implemented;
283 Usage : my $hseq = $hsp->hit_string;
284 Function: Retrieves the hit sequence of this HSP as a string
293 $self->throw_not_implemented;
296 =head2 homology_string
298 Title : homology_string
299 Usage : my $homo_string = $hsp->homology_string;
300 Function: Retrieves the homology sequence for this HSP as a string.
301 : The homology sequence is the string of symbols in between the
302 : query and hit sequences in the alignment indicating the degree
303 : of conservation (e.g., identical, similar, not similar).
311 $self->throw_not_implemented;
317 Usage : my $len = $hsp->length( ['query'|'hit'|'total'] );
318 Function : Returns the length of the query or hit in the alignment (without gaps)
319 or the aggregate length of the HSP (including gaps;
320 this may be greater than either hit or query )
322 Args : 'query' = length of query seq (without gaps)
323 'hit' = length of hit seq (without gaps)
324 'total' = length of alignment (with gaps)
331 shift->throw_not_implemented();
334 =head2 percent_identity
336 Title : percent_identity
337 Usage : my $percentid = $hsp->percent_identity()
338 Function: Returns the calculated percent identity for an HSP
339 Returns : floating point between 0 and 100
345 sub percent_identity
{
347 return $self->frac_identical('hsp') * 100;
353 Usage : my $aln = $hsp->get_aln
354 Function: Returns a Bio::SimpleAlign representing the HSP alignment
355 Returns : Bio::SimpleAlign
362 $self->throw_not_implemented;
369 Purpose : Get a list of residue positions (indices) for all identical
370 : or conserved residues in the query or sbjct sequence.
371 Example : @s_ind = $hsp->seq_inds('query', 'identical');
372 : @h_ind = $hsp->seq_inds('hit', 'conserved');
373 : @h_ind = $hsp->seq_inds('hit', 'conserved', 1);
374 Returns : List of integers
375 : May include ranges if collapse is true.
376 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query)
377 ('sbjct' is synonymous with 'hit')
378 class = 'identical' or 'conserved' or 'nomatch' or 'gap'
379 (default = identical)
380 (can be shortened to 'id' or 'cons')
382 collapse = boolean, if true, consecutive positions are merged
383 using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
384 collapses to "1-5 7 9-11". This is useful for
385 consolidating long lists. Default = no collapse.
389 See Also : L<Bio::Search::BlastUtils::collapse_nums()|Bio::Search::BlastUtils>, L<Bio::Search::Hit::HitI::seq_inds()|Bio::Search::Hit::HitI>
394 shift->throw_not_implemented();
397 =head2 Inherited from L<Bio::SeqFeature::SimilarityPair>
399 These methods come from L<Bio::SeqFeature::SimilarityPair>
404 Usage : my $query = $hsp->query
405 Function: Returns a SeqFeature representing the query in the HSP
406 Returns : Bio::SeqFeature::Similarity
407 Args : [optional] new value to set
413 Usage : my $hit = $hsp->hit
414 Function: Returns a SeqFeature representing the hit in the HSP
415 Returns : Bio::SeqFeature::Similarity
416 Args : [optional] new value to set
422 Usage : $evalue = $obj->significance();
423 $obj->significance($evalue);
424 Function: Get/Set the significance value (see Bio::SeqFeature::SimilarityPair)
425 Returns : significance value (scientific notation string)
426 Args : significance value (sci notation string)
432 Usage : my $score = $hsp->score();
433 Function: Returns the score for this HSP or undef
435 Args : [optional] numeric to set value
440 Usage : my $bits = $hsp->bits();
441 Function: Returns the bit value for this HSP or undef
452 Usage : $hsp->strand('query')
453 Function: Retrieves the strand for the HSP component requested
454 Returns : +1 or -1 (0 if unknown)
455 Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
456 'query' to retrieve the query strand (default)
457 'list' or 'array' to retrieve both query and hit together
464 $val = 'query' unless defined $val;
467 if ( $val =~ /^q/i ) {
468 return $self->query->strand(@_);
470 elsif ( $val =~ /^hi|^s/i ) {
471 return $self->hit->strand(@_);
473 elsif ( $val =~ /^list|array/i ) {
475 # Do we really need to pass on additional arguments here? HL
476 # (formerly this was strand(shift) which is really bad coding because
477 # it breaks if the callee allows setting to undef)
478 return ( $self->query->strand(@_), $self->hit->strand(@_) );
481 $self->warn("unrecognized component '$val' requested\n");
489 Usage : $hsp->start('query')
490 Function: Retrieves the start for the HSP component requested
492 Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
493 'query' to retrieve the query start (default)
500 $val = 'query' unless defined $val;
503 if( $val =~ /^q/i ) {
504 return $self->query->start(@_);
505 } elsif( $val =~ /^(hi|s)/i ) {
506 return $self->hit->start(@_);
507 } elsif ( $val =~ /^list|array/i ) {
508 # do we really need to pass on additional arguments here? HL
509 # (formerly this was strand(shift) which is really bad coding because
510 # it breaks if the callee allows setting to undef)
511 return ($self->query->start(@_),
512 $self->hit->start(@_) );
514 $self->warn("unrecognized component '$val' requested\n");
522 Usage : $hsp->end('query')
523 Function: Retrieves the end for the HSP component requested
525 Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
526 'query' to retrieve the query end (default)
533 $val = 'query' unless defined $val;
536 if( $val =~ /^q/i ) {
537 return $self->query->end(@_);
538 } elsif( $val =~ /^(hi|s)/i ) {
539 return $self->hit->end(@_);
540 } elsif ( $val =~ /^list|array/i ) {
541 # do we really need to pass on additional arguments here? HL
542 # (formerly this was strand(shift) which is really bad coding because
543 # it breaks if the callee allows setting to undef)
544 return ($self->query->end(@_),
545 $self->hit->end(@_) );
547 $self->warn("unrecognized end component '$val' requested\n");
554 Usage : $hsp->seq( [seq_type] );
555 Purpose : Get the query or sbjct sequence as a Bio::Seq.pm object.
556 Example : $seqObj = $hsp->seq('query');
557 Returns : Object reference for a Bio::Seq.pm object.
558 Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query').
559 : ('sbjct' is synonymous with 'hit')
561 Throws : Propagates any exception that occurs during construction
562 : of the Bio::Seq.pm object.
563 Comments : The sequence is returned in an array of strings corresponding
564 : to the strings in the original format of the Blast alignment.
565 : (i.e., same spacing).
567 See Also : L</seq_str>, L</seq_inds>, L<Bio::Seq>
572 my($self,$seqType) = @_;
573 $seqType ||= 'query';
574 $seqType = 'sbjct' if $seqType eq 'hit';
575 my $str = $self->seq_str($seqType);
576 if( $seqType =~ /^(m|ho)/i ) {
577 $self->throw("cannot call seq on the homology match string, it isn't really a sequence, use get_aln to convert the HSP to a Bio::AlignIO and generate a consensus from that.");
579 require Bio
::LocatableSeq
;
580 my $id = $seqType =~ /^q/i ?
$self->query->seq_id : $self->hit->seq_id;
581 return Bio
::LocatableSeq
->new( -ID
=> $id,
583 -START
=> $self->start($seqType),
584 -END => $self->end($seqType),
585 -STRAND
=> $self->strand($seqType),
586 -FORCE_NSE
=> $id ?
0 : 1,
587 -DESC
=> "$seqType sequence " );
592 Usage : $hsp->seq_str( seq_type );
593 Purpose : Get the full query, sbjct, or 'match' sequence as a string.
594 : The 'match' sequence is the string of symbols in between the
595 : query and sbjct sequences.
596 Example : $str = $hsp->seq_str('query');
598 Argument : seq_Type = 'query' or 'hit' or 'sbjct' or 'match'
599 : ('sbjct' is synonymous with 'hit')
601 Throws : Exception if the argument does not match an accepted seq_type.
604 See Also : L</seq>, L</seq_inds>, C<_set_match_seq>
610 my $type = shift || 'query';
612 if( $type =~ /^q/i ) { return $self->query_string(@_) }
613 elsif( $type =~ /^(s)|(hi)/i ) { return $self->hit_string(@_)}
614 elsif ( $type =~ /^(ho)|(ma)/i ) { return $self->homology_string(@_) }
616 $self->warn("unknown sequence type $type");
624 Usage : $hsp->rank( [string] );
625 Purpose : Get the rank of the HSP within a given Blast hit.
626 Example : $rank = $hsp->rank;
627 Returns : Integer (1..n) corresponding to the order in which the HSP
628 appears in the BLAST report.
632 sub rank
{ shift->throw_not_implemented }
636 Usage : $hsp->matches(-seq => 'hit'|'query',
639 Purpose : Get the total number of identical and conservative matches
640 : in the query or sbjct sequence for the given HSP. Optionally can
641 : report data within a defined interval along the seq.
642 : (Note: 'conservative' matches are called 'positives' in the
644 Example : ($id,$cons) = $hsp_object->matches(-seq => 'hit');
645 : ($id,$cons) = $hsp_object->matches(-seq => 'query',
648 Returns : 2-element array of integers
649 Argument : (1) seq_type = 'query' or 'hit' or 'sbjct' (default = query)
650 : ('sbjct' is synonymous with 'hit')
651 : (2) start = Starting coordinate (optional)
652 : (3) stop = Ending coordinate (optional)
653 Throws : Exception if the supplied coordinates are out of range.
654 Comments : Relies on seq_str('match') to get the string of alignment symbols
655 : between the query and sbjct lines which are used for determining
656 : the number of identical and conservative matches.
658 See Also : L</length>, L</gaps>, L</seq_str>, L<Bio::Search::Hit::BlastHit::_adjust_contigs()|Bio::Search::Hit::BlastHit>
665 my( $self, %param ) = @_;
667 my($seqType, $beg, $end) = ($param{-SEQ
},
670 $seqType ||= 'query';
671 $seqType = 'sbjct' if $seqType eq 'hit';
673 if( (!defined $beg && !defined $end) || ! $self->seq_str('match') ) {
674 ## Get data for the whole alignment.
675 push @data, ($self->num_identical, $self->num_conserved);
678 ## Get the substring representing the desired sub-section of aln.
681 my($start,$stop) = $self->range($seqType);
682 if($beg == 0) { $beg = $start; $end = $beg+$end; } # sane?
683 elsif($end == 0) { $end = $stop; $beg = $end-$beg; } # sane?
685 if($end > $stop) { $end = $stop; }
686 if($beg < $start) { $beg = $start; }
688 # now with gap handling! /maj
689 my $match_str = $self->seq_str('match');
691 # strip the homology string of gap positions relative
693 $match_str = $self->seq_str('match');
694 my $tgt = $self->seq_str($seqType);
695 my $encode = $match_str ^ $tgt;
697 $encode =~ s/$zap//g;
699 $match_str = $tgt ^ $encode;
702 ## ML: START fix for substr out of range error ------------------
704 if (($self->algorithm =~ /TBLAST[NX]/) && ($seqType eq 'sbjct'))
706 $seq = substr($match_str,
707 int(($beg-$start)/3),
708 int(($end-$beg+1)/3));
710 } elsif (($self->algorithm =~ /T?BLASTX/) && ($seqType eq 'query')) {
711 $seq = substr($match_str,
712 int(($beg-$start)/3), int(($end-$beg+1)/3));
714 $seq = substr($match_str,
715 $beg-$start, ($end-$beg+1));
717 ## ML: End of fix for substr out of range error -----------------
719 if(!CORE
::length $seq) {
720 $self->throw("Undefined sub-sequence ($beg,$end). Valid range = $start - $stop");
723 $seq =~ s/ //g; # remove space (no info).
724 my $len_cons = CORE
::length $seq;
725 $seq =~ s/\+//g; # remove '+' characters (conservative substitutions)
726 my $len_id = CORE
::length $seq;
727 push @data, ($len_id, $len_cons);
734 Usage : $hsp_obj->n()
735 Purpose : Get the N value (num HSPs on which P/Expect is based).
736 : This value is not defined with NCBI Blast2 with gapping.
737 Returns : Integer or null string if not defined.
740 Comments : The 'N' value is listed in parenthesis with P/Expect value:
741 : e.g., P(3) = 1.2e-30 ---> (N = 3).
742 : Not defined in NCBI Blast2 with gaps.
743 : This typically is equal to the number of HSPs but not always.
744 : To obtain the number of HSPs, use Bio::Search::Hit::HitI::num_hsps().
746 See Also : L<Bio::SeqFeature::SimilarityPair::score()|Bio::SeqFeature::SimilarityPair>
750 sub n
{ shift->throw_not_implemented }
754 Usage : $hsp->range( [seq_type] );
755 Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
756 : in the HSP alignment.
757 Example : ($query_beg, $query_end) = $hsp->range('query');
758 : ($hit_beg, $hit_end) = $hsp->range('hit');
759 Returns : Two-element array of integers
760 Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
761 : ('sbjct' is synonymous with 'hit')
763 Comments : This is a convenience method for constructions such as
764 ($hsp->query->start, $hsp->query->end)
768 sub range
{ shift->throw_not_implemented }
770 sub expect
{ shift->evalue(@_) }