2 # BioPerl module for Bio::Search::Result::GenericResult
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Search::Result::GenericResult - Generic Implementation of
17 Bio::Search::Result::ResultI interface applicable to most search
23 # typically one gets Results from a SearchIO stream
25 my $io = Bio::SearchIO->new(-format => 'blast',
26 -file => 't/data/HUMBETGLOA.tblastx');
27 while( my $result = $io->next_result ) {
28 # process all search results within the input stream
29 while( my $hit = $result->next_hit ) {
30 # insert code here for hit processing
34 use Bio::Search::Result::GenericResult;
35 my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
36 # typically these are created from a Bio::SearchIO stream
37 my $result = Bio::Search::Result::GenericResult->new
38 ( -query_name => 'HUMBETGLOA',
39 -query_accession => ''
40 -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
42 -database_name => 'ecoli.aa'
43 -database_letters => 4662239,
44 -database_entries => 400,
45 -parameters => { 'e' => '0.001' },
46 -statistics => { 'kappa' => 0.731 },
47 -algorithm => 'blastp',
48 -algorithm_version => '2.1.2',
51 my $id = $result->query_name();
53 my $desc = $result->query_description();
55 my $name = $result->database_name();
57 my $size = $result->database_letters();
59 my $num_entries = $result->database_entries();
61 my $gap_ext = $result->get_parameter('e');
63 my @params = $result->available_parameters;
65 my $kappa = $result->get_statistic('kappa');
67 my @statnames = $result->available_statistics;
69 # TODO: Show how to configure a SearchIO stream so that it generates
70 # GenericResult objects.
75 This object is an implementation of the Bio::Search::Result::ResultI
76 interface and provides a generic place to store results from a
77 sequence database search.
79 Unless you're writing a parser, you won't ever need to create a
80 GenericResult or any other ResultI-implementing object. If you use
81 the SearchIO system, ResultI objects are created automatically from
82 a SearchIO stream which returns Bio::Search::Result::ResultI objects.
84 For documentation on what you can do with GenericResult (and other ResultI
85 objects), please see the API documentation in
86 L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
92 User feedback is an integral part of the evolution of this and other
93 Bioperl modules. Send your comments and suggestions preferably to
94 the Bioperl mailing list. Your participation is much appreciated.
96 bioperl-l@bioperl.org - General discussion
97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
101 Please direct usage questions or support issues to the mailing list:
103 I<bioperl-l@bioperl.org>
105 rather than to the module maintainer directly. Many experienced and
106 reponsive experts will be able look at the problem and quickly
107 address it. Please include a thorough description of the problem
108 with code and data examples if at all possible.
110 =head2 Reporting Bugs
112 Report bugs to the Bioperl bug tracking system to help us keep track
113 of the bugs and their resolution. Bug reports can be submitted via the
116 https://github.com/bioperl/bioperl-live/issues
118 =head1 AUTHOR - Jason Stajich and Steve Chervitz
120 Email jason@bioperl.org
121 Email sac@bioperl.org
125 Sendu Bala, bix@sendu.me.uk
129 The rest of the documentation details each of the object methods.
130 Internal methods are usually preceded with a _
135 # Let the code begin...
138 package Bio
::Search
::Result
::GenericResult
;
141 use Bio
::Search
::GenericStatistics
;
142 use Bio
::Tools
::Run
::GenericParameters
;
146 # '""' => \&to_string;
148 use base
qw(Bio::Root::Root Bio::Search::Result::ResultI);
153 Usage : my $obj = Bio::Search::Result::GenericResult->new();
154 Function: Builds a new Bio::Search::Result::GenericResult object
155 Returns : Bio::Search::Result::GenericResult
156 Args : -query_name => Name of query Sequence
157 -query_accession => Query accession number (if available)
158 -query_description => Description of query sequence
159 -query_length => Length of query sequence
160 -database_name => Name of database
161 -database_letters => Number of residues in database
162 -database_entries => Number of entries in database
163 -hits => array ref of Bio::Search::Hit::HitI objects
164 -parameters => hash ref of search parameters (key => value)
165 -statistics => hash ref of search statistics (key => value)
166 -algorithm => program name (blastx)
167 -algorithm_version => version of the algorithm (2.1.2)
168 -algorithm_reference => literature reference string for this algorithm
169 -rid => value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
170 -hit_factory => Bio::Factory::ObjectFactoryI capable of making
171 Bio::Search::Hit::HitI objects
176 my($class,@args) = @_;
178 my $self = $class->SUPER::new
(@args);
180 $self->{'_hits'} = [];
181 $self->{'_hitindex'} = 0;
182 $self->{'_statistics'} = Bio
::Search
::GenericStatistics
->new();
183 $self->{'_parameters'} = Bio
::Tools
::Run
::GenericParameters
->new();
185 my ($qname,$qacc,$qdesc,$qlen, $qgi,
186 $dbname,$dblet,$dbent,$params,
187 $stats, $hits, $algo, $algo_v,
188 $prog_ref, $algo_r, $rid, $hit_factory) = $self->_rearrange([qw(QUERY_NAME
207 $algo_r ||= $prog_ref;
208 defined $algo && $self->algorithm($algo);
209 defined $algo_v && $self->algorithm_version($algo_v);
210 defined $algo_r && $self->algorithm_reference($algo_r);
212 defined $rid && $self->rid($rid);
214 defined $qname && $self->query_name($qname);
215 defined $qacc && $self->query_accession($qacc);
216 defined $qdesc && $self->query_description($qdesc);
217 defined $qlen && $self->query_length($qlen);
218 defined $qgi && $self->query_gi($qgi);
219 defined $dbname && $self->database_name($dbname);
220 defined $dblet && $self->database_letters($dblet);
221 defined $dbent && $self->database_entries($dbent);
223 defined $hit_factory && $self->hit_factory($hit_factory);
225 if( defined $params ) {
226 if( ref($params) !~ /hash/i ) {
227 $self->throw("Must specify a hash reference with the parameter '-parameters");
229 while( my ($key,$value) = each %{$params} ) {
230 $self->{'_parameters'}->set_parameter($key => $value);
231 # $self->add_parameter($key,$value);
234 if( defined $stats ) {
235 if( ref($stats) !~ /hash/i ) {
236 $self->throw("Must specify a hash reference with the parameter '-statistics");
238 while( my ($key,$value) = each %{$stats} ) {
239 $self->{'_statistics'}->set_statistic($key => $value);
240 # $self->add_statistic($key,$value);
244 if( defined $hits ) {
245 $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
247 foreach my $s ( @
$hits ) {
257 Usage : my $r_type = $hsp->algorithm
258 Function: Obtain the name of the algorithm used to obtain the Result
259 Returns : string (e.g., BLASTP)
260 Args : [optional] scalar string to set value
265 my ($self,$value) = @_;
266 my $previous = $self->{'_algorithm'};
267 if( defined $value || ! defined $previous ) {
268 $value = $previous = '' unless defined $value;
269 $self->{'_algorithm'} = $value;
274 =head2 algorithm_version
276 Title : algorithm_version
277 Usage : my $r_version = $hsp->algorithm_version
278 Function: Obtain the version of the algorithm used to obtain the Result
279 Returns : string (e.g., 2.1.2)
280 Args : [optional] scalar string to set algorithm version value
284 sub algorithm_version
{
285 my ($self,$value) = @_;
286 my $previous = $self->{'_algorithm_version'};
287 if( defined $value || ! defined $previous ) {
288 $value = $previous = '' unless defined $value;
289 $self->{'_algorithm_version'} = $value;
295 =head2 Bio::Search::Result::ResultI interface methods
297 Bio::Search::Result::ResultI implementation
302 Usage : while( $hit = $result->next_hit()) { ... }
303 Function: Returns the next available Hit object, representing potential
304 matches between the query and various entities from the database.
305 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
312 my ($self,@args) = @_;
313 my $index = $self->_nexthitindex;
314 return if $index > scalar @
{$self->{'_hits'}};
316 my $hit = $self->{'_hits'}->[$index];
317 if (ref($hit) eq 'HASH') {
318 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
319 $hit = $factory->create_object(%{$hit});
320 $self->{'_hits'}->[$index] = $hit;
321 delete $self->{_hashes
}->{$index};
329 Usage : $id = $result->query_name();
330 Function: Get the string identifier of the query used by the
331 algorithm that performed the search.
333 Args : [optional] new string value for query name
338 my ($self,$value) = @_;
339 my $previous = $self->{'_queryname'};
340 if( defined $value || ! defined $previous ) {
341 $value = $previous = '' unless defined $value;
342 $self->{'_queryname'} = $value;
347 =head2 query_accession
349 Title : query_accession
350 Usage : $id = $result->query_accession();
351 Function: Get the accession (if available) for the query sequence
353 Args : [optional] new string value for accession
357 sub query_accession
{
358 my ($self,$value) = @_;
359 my $previous = $self->{'_queryacc'};
360 if( defined $value || ! defined $previous ) {
361 $value = $previous = '' unless defined $value;
362 $self->{'_queryacc'} = $value;
370 Usage : $acc = $hit->query_gi();
371 Function: Retrieve the NCBI Unique ID (aka the GI #),
372 if available, for the query
373 Returns : a scalar string (empty string if not set)
379 my ($self,$value) = @_;
380 if( defined $value ) {
381 $self->{'_query_gi'} = $value;
383 $self->{'_query_gi'} = $self->query_name =~ m{^gi\|(\d+)} ?
$1 : '';
385 return $self->{'_query_gi'};
391 Usage : $id = $result->query_length();
392 Function: Get the length of the query sequence
395 Args : [optional] new integer value for query length
400 my ($self,$value) = @_;
401 my $previous = $self->{'_querylength'};
402 if( defined $value || ! defined $previous ) {
403 $value = $previous = 0 unless defined $value;
404 $self->{'_querylength'} = $value;
409 =head2 query_description
411 Title : query_description
412 Usage : $id = $result->query_description();
413 Function: Get the description of the query sequence
416 Args : [optional] new string for the query description
420 sub query_description
{
421 my ($self,$value) = @_;
422 my $previous = $self->{'_querydesc'};
423 if( defined $value || ! defined $previous ) {
424 $value = $previous = '' unless defined $value;
425 $self->{'_querydesc'} = $value;
433 Title : database_name
434 Usage : $name = $result->database_name()
435 Function: Used to obtain the name of the database that the query was searched
436 against by the algorithm.
437 Returns : a scalar string
438 Args : [optional] new string for the db name
443 my ($self,$value) = @_;
444 my $previous = $self->{'_dbname'};
445 if( defined $value || ! defined $previous ) {
446 $value = $previous = '' unless defined $value;
447 $self->{'_dbname'} = $value;
452 =head2 database_letters
454 Title : database_letters
455 Usage : $size = $result->database_letters()
456 Function: Used to obtain the size of database that was searched against.
457 Returns : a scalar integer (units specific to algorithm, but probably the
458 total number of residues in the database, if available) or undef if
459 the information was not available to the Processor object.
460 Args : [optional] new scalar integer for number of letters in db
465 sub database_letters
{
466 my ($self,$value) = @_;
467 my $previous = $self->{'_dbletters'};
468 if( defined $value || ! defined $previous ) {
469 $value = $previous = '' unless defined $value;
470 $self->{'_dbletters'} = $value;
475 =head2 database_entries
477 Title : database_entries
478 Usage : $num_entries = $result->database_entries()
479 Function: Used to obtain the number of entries contained in the database.
480 Returns : a scalar integer representing the number of entities in the database
481 or undef if the information was not available.
482 Args : [optional] new integer for the number of sequence entries in the db
487 sub database_entries
{
488 my ($self,$value) = @_;
489 my $previous = $self->{'_dbentries'};
490 if( defined $value || ! defined $previous ) {
491 $value = $previous = '' unless defined $value;
492 $self->{'_dbentries'} = $value;
499 Title : get_parameter
500 Usage : my $gap_ext = $report->get_parameter('gapext')
501 Function: Returns the value for a specific parameter used
502 when running this report
504 Args : name of parameter (string)
509 my ($self,$name) = @_;
510 return $self->{'_parameters'}->get_parameter($name);
513 =head2 available_parameters
515 Title : available_parameters
516 Usage : my @params = $report->available_paramters
517 Function: Returns the names of the available parameters
518 Returns : Return list of available parameters used for this report
523 sub available_parameters
{
525 return $self->{'_parameters'}->available_parameters;
531 Title : get_statistic
532 Usage : my $gap_ext = $report->get_statistic('kappa')
533 Function: Returns the value for a specific statistic available
536 Args : name of statistic (string)
541 my ($self,$key) = @_;
542 return $self->{'_statistics'}->get_statistic($key);
545 =head2 available_statistics
547 Title : available_statistics
548 Usage : my @statnames = $report->available_statistics
549 Function: Returns the names of the available statistics
550 Returns : Return list of available statistics used for this report
555 sub available_statistics
{
557 return $self->{'_statistics'}->available_statistics;
560 =head2 Bio::Search::Report
562 Bio::Search::Result::GenericResult specific methods
567 Usage : $report->add_hit($hit)
568 Function: Adds a HitI to the stored list of hits
569 Returns : Number of HitI currently stored
570 Args : Bio::Search::Hit::HitI
576 if (ref($s) eq 'HASH' || $s->isa('Bio::Search::Hit::HitI') ) {
577 push @
{$self->{'_hits'}}, $s;
580 $self->throw("Passed in " .ref($s)." as a Hit which is not a Bio::Search::HitI.");
583 if (ref($s) eq 'HASH') {
584 $self->{_hashes
}->{$#{$self->{'_hits'}}} = 1;
586 return scalar @
{$self->{'_hits'}};
592 Usage : $hit->hit_factory($hit_factory)
593 Function: Get/set the factory used to build HitI objects if necessary.
594 Returns : Bio::Factory::ObjectFactoryI
595 Args : Bio::Factory::ObjectFactoryI
601 if (@_) { $self->{_hit_factory
} = shift }
602 return $self->{_hit_factory
} || return;
608 Usage : $result->rewind;
609 Function: Allow one to reset the Hit iterator to the beginning
610 Since this is an in-memory implementation
618 $self->{'_hitindex'} = 0;
624 Title : _nexthitindex
630 my ($self,@args) = @_;
631 return $self->{'_hitindex'}++;
637 Title : add_parameter
638 Usage : $report->add_parameter('gapext', 11);
639 Function: Adds a parameter
641 Args : key - key value name for this parama
642 value - value for this parameter
647 my ($self,$key,$value) = @_;
648 $self->{'_parameters'}->set_parameter($key => $value);
654 Title : add_statistic
655 Usage : $report->add_statistic('lambda', 2.3);
656 Function: Adds a parameter
658 Args : key - key value name for this parama
659 value - value for this parameter
664 my ($self,$key,$value) = @_;
665 $self->{'_statistics'}->set_statistic($key => $value);
673 Usage : my $hitcount= $result->num_hits
674 Function: returns the number of hits for this query result
682 if (not defined $self->{'_hits'}) {
683 $self->throw("Can't get Hits: data not collected.");
685 return scalar(@
{$self->{'_hits'}});
692 Usage : my @hits = $result->hits
693 Function: Returns the available hits for this Result
694 Returns : Array of L<Bio::Search::Hit::HitI> objects
703 foreach my $i (keys %{$self->{_hashes
} || {}}) {
704 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
705 $self->{'_hits'}->[$i] = $factory->create_object(%{$self->{'_hits'}->[$i]});
706 delete $self->{_hashes
}->{$i};
710 if (ref $self->{'_hits'}) {
711 @hits = @
{$self->{'_hits'}};
716 =head2 algorithm_reference
718 Title : algorithm_reference
719 Usage : $obj->algorithm_reference($newval)
721 Returns : string containing literature reference for the algorithm
722 Args : newvalue string (optional)
723 Comments: Formerly named program_reference(), which is still supported
724 for backwards compatibility.
728 sub algorithm_reference
{
729 my ($self,$value) = @_;
730 if( defined $value) {
731 $self->{'algorithm_reference'} = $value;
733 return $self->{'algorithm_reference'};
736 =head2 program_reference
738 Title : program_reference
739 Usage : $obj->program_reference()
741 Returns : string containing literature reference for the algorithm
743 Comments: Deprecated - use algorithm_reference() instead.
747 sub program_reference
{ shift->algorithm_reference(@_); }
752 Usage : $obj->rid($newval)
754 Returns : value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
755 Args : newvalue (optional)
756 Comments: The default implementation in ResultI returns an empty string
757 rather than throwing a NotImplemented exception, since
758 the RID may not always be available and is not critical.
759 See: (1) https://www.ncbi.nlm.nih.gov/Class/MLACourse/Modules/BLAST/rid.html
760 (2) https://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node63.html
764 my ($self,$value) = @_;
765 if( defined $value) {
766 $self->{'rid'} = $value;
768 return $self->{'rid'};
773 See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
780 # Watch the double negative!
781 # result = 0 means "yes hits were found"
782 # result = 1 means "no hits were found"
784 return $self->{'_no_hits_found'};
788 =head2 set_no_hits_found
790 See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
794 sub set_no_hits_found
{
796 $self->{'_no_hits_found'} = 1;
803 Usage : print $blast->to_string;
804 Function: Returns a string representation for the Blast result.
805 Primarily intended for debugging purposes.
807 Returns : A string of the form:
808 [GenericResult] <analysis_method> query=<name> <description> db=<database
810 [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ
817 my $str = ref($self) . ", algorithm= " . $self->algorithm . ", query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;