Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / Search / Result / GenericResult.pm
blob4acc305f573bd2c300e39179aa2bbdd1a457bafb
2 # BioPerl module for Bio::Search::Result::GenericResult
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Search::Result::GenericResult - Generic Implementation of
17 Bio::Search::Result::ResultI interface applicable to most search
18 results.
20 =head1 SYNOPSIS
23 # typically one gets Results from a SearchIO stream
24 use Bio::SearchIO;
25 my $io = Bio::SearchIO->new(-format => 'blast',
26 -file => 't/data/HUMBETGLOA.tblastx');
27 while( my $result = $io->next_result ) {
28 # process all search results within the input stream
29 while( my $hit = $result->next_hit ) {
30 # insert code here for hit processing
34 use Bio::Search::Result::GenericResult;
35 my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
36 # typically these are created from a Bio::SearchIO stream
37 my $result = Bio::Search::Result::GenericResult->new
38 ( -query_name => 'HUMBETGLOA',
39 -query_accession => ''
40 -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
41 -query_length => 3002
42 -database_name => 'ecoli.aa'
43 -database_letters => 4662239,
44 -database_entries => 400,
45 -parameters => { 'e' => '0.001' },
46 -statistics => { 'kappa' => 0.731 },
47 -algorithm => 'blastp',
48 -algorithm_version => '2.1.2',
51 my $id = $result->query_name();
53 my $desc = $result->query_description();
55 my $name = $result->database_name();
57 my $size = $result->database_letters();
59 my $num_entries = $result->database_entries();
61 my $gap_ext = $result->get_parameter('e');
63 my @params = $result->available_parameters;
65 my $kappa = $result->get_statistic('kappa');
67 my @statnames = $result->available_statistics;
69 # TODO: Show how to configure a SearchIO stream so that it generates
70 # GenericResult objects.
73 =head1 DESCRIPTION
75 This object is an implementation of the Bio::Search::Result::ResultI
76 interface and provides a generic place to store results from a
77 sequence database search.
79 Unless you're writing a parser, you won't ever need to create a
80 GenericResult or any other ResultI-implementing object. If you use
81 the SearchIO system, ResultI objects are created automatically from
82 a SearchIO stream which returns Bio::Search::Result::ResultI objects.
84 For documentation on what you can do with GenericResult (and other ResultI
85 objects), please see the API documentation in
86 L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
88 =head1 FEEDBACK
90 =head2 Mailing Lists
92 User feedback is an integral part of the evolution of this and other
93 Bioperl modules. Send your comments and suggestions preferably to
94 the Bioperl mailing list. Your participation is much appreciated.
96 bioperl-l@bioperl.org - General discussion
97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99 =head2 Support
101 Please direct usage questions or support issues to the mailing list:
103 I<bioperl-l@bioperl.org>
105 rather than to the module maintainer directly. Many experienced and
106 reponsive experts will be able look at the problem and quickly
107 address it. Please include a thorough description of the problem
108 with code and data examples if at all possible.
110 =head2 Reporting Bugs
112 Report bugs to the Bioperl bug tracking system to help us keep track
113 of the bugs and their resolution. Bug reports can be submitted via the
114 web:
116 https://github.com/bioperl/bioperl-live/issues
118 =head1 AUTHOR - Jason Stajich and Steve Chervitz
120 Email jason@bioperl.org
121 Email sac@bioperl.org
123 =head1 CONTRIBUTORS
125 Sendu Bala, bix@sendu.me.uk
127 =head1 APPENDIX
129 The rest of the documentation details each of the object methods.
130 Internal methods are usually preceded with a _
132 =cut
135 # Let the code begin...
138 package Bio::Search::Result::GenericResult;
139 use strict;
141 use Bio::Search::GenericStatistics;
142 use Bio::Tools::Run::GenericParameters;
144 # bug #1420
145 #use overload
146 # '""' => \&to_string;
148 use base qw(Bio::Root::Root Bio::Search::Result::ResultI);
150 =head2 new
152 Title : new
153 Usage : my $obj = Bio::Search::Result::GenericResult->new();
154 Function: Builds a new Bio::Search::Result::GenericResult object
155 Returns : Bio::Search::Result::GenericResult
156 Args : -query_name => Name of query Sequence
157 -query_accession => Query accession number (if available)
158 -query_description => Description of query sequence
159 -query_length => Length of query sequence
160 -database_name => Name of database
161 -database_letters => Number of residues in database
162 -database_entries => Number of entries in database
163 -hits => array ref of Bio::Search::Hit::HitI objects
164 -parameters => hash ref of search parameters (key => value)
165 -statistics => hash ref of search statistics (key => value)
166 -algorithm => program name (blastx)
167 -algorithm_version => version of the algorithm (2.1.2)
168 -algorithm_reference => literature reference string for this algorithm
169 -rid => value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
170 -hit_factory => Bio::Factory::ObjectFactoryI capable of making
171 Bio::Search::Hit::HitI objects
173 =cut
175 sub new {
176 my($class,@args) = @_;
178 my $self = $class->SUPER::new(@args);
180 $self->{'_hits'} = [];
181 $self->{'_hitindex'} = 0;
182 $self->{'_statistics'} = Bio::Search::GenericStatistics->new();
183 $self->{'_parameters'} = Bio::Tools::Run::GenericParameters->new();
185 my ($qname,$qacc,$qdesc,$qlen, $qgi,
186 $dbname,$dblet,$dbent,$params,
187 $stats, $hits, $algo, $algo_v,
188 $prog_ref, $algo_r, $rid, $hit_factory) = $self->_rearrange([qw(QUERY_NAME
189 QUERY_ACCESSION
190 QUERY_DESCRIPTION
191 QUERY_LENGTH
192 QUERY_GI
193 DATABASE_NAME
194 DATABASE_LETTERS
195 DATABASE_ENTRIES
196 PARAMETERS
197 STATISTICS
198 HITS
199 ALGORITHM
200 ALGORITHM_VERSION
201 PROGRAM_REFERENCE
202 ALGORITHM_REFERENCE
204 HIT_FACTORY
205 )],@args);
207 $algo_r ||= $prog_ref;
208 defined $algo && $self->algorithm($algo);
209 defined $algo_v && $self->algorithm_version($algo_v);
210 defined $algo_r && $self->algorithm_reference($algo_r);
212 defined $rid && $self->rid($rid);
214 defined $qname && $self->query_name($qname);
215 defined $qacc && $self->query_accession($qacc);
216 defined $qdesc && $self->query_description($qdesc);
217 defined $qlen && $self->query_length($qlen);
218 defined $qgi && $self->query_gi($qgi);
219 defined $dbname && $self->database_name($dbname);
220 defined $dblet && $self->database_letters($dblet);
221 defined $dbent && $self->database_entries($dbent);
223 defined $hit_factory && $self->hit_factory($hit_factory);
225 if( defined $params ) {
226 if( ref($params) !~ /hash/i ) {
227 $self->throw("Must specify a hash reference with the parameter '-parameters");
229 while( my ($key,$value) = each %{$params} ) {
230 $self->{'_parameters'}->set_parameter($key => $value);
231 # $self->add_parameter($key,$value);
234 if( defined $stats ) {
235 if( ref($stats) !~ /hash/i ) {
236 $self->throw("Must specify a hash reference with the parameter '-statistics");
238 while( my ($key,$value) = each %{$stats} ) {
239 $self->{'_statistics'}->set_statistic($key => $value);
240 # $self->add_statistic($key,$value);
244 if( defined $hits ) {
245 $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
247 foreach my $s ( @$hits ) {
248 $self->add_hit($s);
251 return $self;
254 =head2 algorithm
256 Title : algorithm
257 Usage : my $r_type = $hsp->algorithm
258 Function: Obtain the name of the algorithm used to obtain the Result
259 Returns : string (e.g., BLASTP)
260 Args : [optional] scalar string to set value
262 =cut
264 sub algorithm{
265 my ($self,$value) = @_;
266 my $previous = $self->{'_algorithm'};
267 if( defined $value || ! defined $previous ) {
268 $value = $previous = '' unless defined $value;
269 $self->{'_algorithm'} = $value;
271 return $previous;
274 =head2 algorithm_version
276 Title : algorithm_version
277 Usage : my $r_version = $hsp->algorithm_version
278 Function: Obtain the version of the algorithm used to obtain the Result
279 Returns : string (e.g., 2.1.2)
280 Args : [optional] scalar string to set algorithm version value
282 =cut
284 sub algorithm_version{
285 my ($self,$value) = @_;
286 my $previous = $self->{'_algorithm_version'};
287 if( defined $value || ! defined $previous ) {
288 $value = $previous = '' unless defined $value;
289 $self->{'_algorithm_version'} = $value;
292 return $previous;
295 =head2 Bio::Search::Result::ResultI interface methods
297 Bio::Search::Result::ResultI implementation
299 =head2 next_hit
301 Title : next_hit
302 Usage : while( $hit = $result->next_hit()) { ... }
303 Function: Returns the next available Hit object, representing potential
304 matches between the query and various entities from the database.
305 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
306 Args : none
309 =cut
311 sub next_hit {
312 my ($self,@args) = @_;
313 my $index = $self->_nexthitindex;
314 return if $index > scalar @{$self->{'_hits'}};
316 my $hit = $self->{'_hits'}->[$index];
317 if (ref($hit) eq 'HASH') {
318 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
319 $hit = $factory->create_object(%{$hit});
320 $self->{'_hits'}->[$index] = $hit;
321 delete $self->{_hashes}->{$index};
323 return $hit;
326 =head2 query_name
328 Title : query_name
329 Usage : $id = $result->query_name();
330 Function: Get the string identifier of the query used by the
331 algorithm that performed the search.
332 Returns : a string.
333 Args : [optional] new string value for query name
335 =cut
337 sub query_name {
338 my ($self,$value) = @_;
339 my $previous = $self->{'_queryname'};
340 if( defined $value || ! defined $previous ) {
341 $value = $previous = '' unless defined $value;
342 $self->{'_queryname'} = $value;
344 return $previous;
347 =head2 query_accession
349 Title : query_accession
350 Usage : $id = $result->query_accession();
351 Function: Get the accession (if available) for the query sequence
352 Returns : a string
353 Args : [optional] new string value for accession
355 =cut
357 sub query_accession {
358 my ($self,$value) = @_;
359 my $previous = $self->{'_queryacc'};
360 if( defined $value || ! defined $previous ) {
361 $value = $previous = '' unless defined $value;
362 $self->{'_queryacc'} = $value;
364 return $previous;
367 =head2 query_gi
369 Title : query_gi
370 Usage : $acc = $hit->query_gi();
371 Function: Retrieve the NCBI Unique ID (aka the GI #),
372 if available, for the query
373 Returns : a scalar string (empty string if not set)
374 Args : none
376 =cut
378 sub query_gi {
379 my ($self,$value) = @_;
380 if( defined $value ) {
381 $self->{'_query_gi'} = $value;
382 } else {
383 $self->{'_query_gi'} = $self->query_name =~ m{^gi\|(\d+)} ? $1 : '';
385 return $self->{'_query_gi'};
388 =head2 query_length
390 Title : query_length
391 Usage : $id = $result->query_length();
392 Function: Get the length of the query sequence
393 used in the search.
394 Returns : a number
395 Args : [optional] new integer value for query length
397 =cut
399 sub query_length {
400 my ($self,$value) = @_;
401 my $previous = $self->{'_querylength'};
402 if( defined $value || ! defined $previous ) {
403 $value = $previous = 0 unless defined $value;
404 $self->{'_querylength'} = $value;
406 return $previous;
409 =head2 query_description
411 Title : query_description
412 Usage : $id = $result->query_description();
413 Function: Get the description of the query sequence
414 used in the search.
415 Returns : a string
416 Args : [optional] new string for the query description
418 =cut
420 sub query_description {
421 my ($self,$value) = @_;
422 my $previous = $self->{'_querydesc'};
423 if( defined $value || ! defined $previous ) {
424 $value = $previous = '' unless defined $value;
425 $self->{'_querydesc'} = $value;
427 return $previous;
431 =head2 database_name
433 Title : database_name
434 Usage : $name = $result->database_name()
435 Function: Used to obtain the name of the database that the query was searched
436 against by the algorithm.
437 Returns : a scalar string
438 Args : [optional] new string for the db name
440 =cut
442 sub database_name {
443 my ($self,$value) = @_;
444 my $previous = $self->{'_dbname'};
445 if( defined $value || ! defined $previous ) {
446 $value = $previous = '' unless defined $value;
447 $self->{'_dbname'} = $value;
449 return $previous;
452 =head2 database_letters
454 Title : database_letters
455 Usage : $size = $result->database_letters()
456 Function: Used to obtain the size of database that was searched against.
457 Returns : a scalar integer (units specific to algorithm, but probably the
458 total number of residues in the database, if available) or undef if
459 the information was not available to the Processor object.
460 Args : [optional] new scalar integer for number of letters in db
463 =cut
465 sub database_letters {
466 my ($self,$value) = @_;
467 my $previous = $self->{'_dbletters'};
468 if( defined $value || ! defined $previous ) {
469 $value = $previous = '' unless defined $value;
470 $self->{'_dbletters'} = $value;
472 return $previous;
475 =head2 database_entries
477 Title : database_entries
478 Usage : $num_entries = $result->database_entries()
479 Function: Used to obtain the number of entries contained in the database.
480 Returns : a scalar integer representing the number of entities in the database
481 or undef if the information was not available.
482 Args : [optional] new integer for the number of sequence entries in the db
485 =cut
487 sub database_entries {
488 my ($self,$value) = @_;
489 my $previous = $self->{'_dbentries'};
490 if( defined $value || ! defined $previous ) {
491 $value = $previous = '' unless defined $value;
492 $self->{'_dbentries'} = $value;
494 return $previous;
497 =head2 get_parameter
499 Title : get_parameter
500 Usage : my $gap_ext = $report->get_parameter('gapext')
501 Function: Returns the value for a specific parameter used
502 when running this report
503 Returns : string
504 Args : name of parameter (string)
506 =cut
508 sub get_parameter {
509 my ($self,$name) = @_;
510 return $self->{'_parameters'}->get_parameter($name);
513 =head2 available_parameters
515 Title : available_parameters
516 Usage : my @params = $report->available_paramters
517 Function: Returns the names of the available parameters
518 Returns : Return list of available parameters used for this report
519 Args : none
521 =cut
523 sub available_parameters{
524 my ($self) = @_;
525 return $self->{'_parameters'}->available_parameters;
529 =head2 get_statistic
531 Title : get_statistic
532 Usage : my $gap_ext = $report->get_statistic('kappa')
533 Function: Returns the value for a specific statistic available
534 from this report
535 Returns : string
536 Args : name of statistic (string)
538 =cut
540 sub get_statistic{
541 my ($self,$key) = @_;
542 return $self->{'_statistics'}->get_statistic($key);
545 =head2 available_statistics
547 Title : available_statistics
548 Usage : my @statnames = $report->available_statistics
549 Function: Returns the names of the available statistics
550 Returns : Return list of available statistics used for this report
551 Args : none
553 =cut
555 sub available_statistics{
556 my ($self) = @_;
557 return $self->{'_statistics'}->available_statistics;
560 =head2 Bio::Search::Report
562 Bio::Search::Result::GenericResult specific methods
564 =head2 add_hit
566 Title : add_hit
567 Usage : $report->add_hit($hit)
568 Function: Adds a HitI to the stored list of hits
569 Returns : Number of HitI currently stored
570 Args : Bio::Search::Hit::HitI
572 =cut
574 sub add_hit {
575 my ($self,$s) = @_;
576 if (ref($s) eq 'HASH' || $s->isa('Bio::Search::Hit::HitI') ) {
577 push @{$self->{'_hits'}}, $s;
579 else {
580 $self->throw("Passed in " .ref($s)." as a Hit which is not a Bio::Search::HitI.");
583 if (ref($s) eq 'HASH') {
584 $self->{_hashes}->{$#{$self->{'_hits'}}} = 1;
586 return scalar @{$self->{'_hits'}};
589 =head2 hit_factory
591 Title : hit_factory
592 Usage : $hit->hit_factory($hit_factory)
593 Function: Get/set the factory used to build HitI objects if necessary.
594 Returns : Bio::Factory::ObjectFactoryI
595 Args : Bio::Factory::ObjectFactoryI
597 =cut
599 sub hit_factory {
600 my $self = shift;
601 if (@_) { $self->{_hit_factory} = shift }
602 return $self->{_hit_factory} || return;
605 =head2 rewind
607 Title : rewind
608 Usage : $result->rewind;
609 Function: Allow one to reset the Hit iterator to the beginning
610 Since this is an in-memory implementation
611 Returns : none
612 Args : none
614 =cut
616 sub rewind{
617 my ($self) = @_;
618 $self->{'_hitindex'} = 0;
622 =head2 _nexthitindex
624 Title : _nexthitindex
625 Usage : private
627 =cut
629 sub _nexthitindex{
630 my ($self,@args) = @_;
631 return $self->{'_hitindex'}++;
635 =head2 add_parameter
637 Title : add_parameter
638 Usage : $report->add_parameter('gapext', 11);
639 Function: Adds a parameter
640 Returns : none
641 Args : key - key value name for this parama
642 value - value for this parameter
644 =cut
646 sub add_parameter {
647 my ($self,$key,$value) = @_;
648 $self->{'_parameters'}->set_parameter($key => $value);
652 =head2 add_statistic
654 Title : add_statistic
655 Usage : $report->add_statistic('lambda', 2.3);
656 Function: Adds a parameter
657 Returns : none
658 Args : key - key value name for this parama
659 value - value for this parameter
661 =cut
663 sub add_statistic {
664 my ($self,$key,$value) = @_;
665 $self->{'_statistics'}->set_statistic($key => $value);
666 return;
670 =head2 num_hits
672 Title : num_hits
673 Usage : my $hitcount= $result->num_hits
674 Function: returns the number of hits for this query result
675 Returns : integer
676 Args : none
678 =cut
680 sub num_hits{
681 my ($self) = shift;
682 if (not defined $self->{'_hits'}) {
683 $self->throw("Can't get Hits: data not collected.");
685 return scalar(@{$self->{'_hits'}});
689 =head2 hits
691 Title : hits
692 Usage : my @hits = $result->hits
693 Function: Returns the available hits for this Result
694 Returns : Array of L<Bio::Search::Hit::HitI> objects
695 Args : none
698 =cut
700 sub hits {
701 my ($self) = shift;
703 foreach my $i (keys %{$self->{_hashes} || {}}) {
704 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
705 $self->{'_hits'}->[$i] = $factory->create_object(%{$self->{'_hits'}->[$i]});
706 delete $self->{_hashes}->{$i};
709 my @hits = ();
710 if (ref $self->{'_hits'}) {
711 @hits = @{$self->{'_hits'}};
713 return @hits;
716 =head2 algorithm_reference
718 Title : algorithm_reference
719 Usage : $obj->algorithm_reference($newval)
720 Function:
721 Returns : string containing literature reference for the algorithm
722 Args : newvalue string (optional)
723 Comments: Formerly named program_reference(), which is still supported
724 for backwards compatibility.
726 =cut
728 sub algorithm_reference{
729 my ($self,$value) = @_;
730 if( defined $value) {
731 $self->{'algorithm_reference'} = $value;
733 return $self->{'algorithm_reference'};
736 =head2 program_reference
738 Title : program_reference
739 Usage : $obj->program_reference()
740 Function:
741 Returns : string containing literature reference for the algorithm
742 Args :
743 Comments: Deprecated - use algorithm_reference() instead.
745 =cut
747 sub program_reference { shift->algorithm_reference(@_); }
749 =head2 rid
751 Title : rid
752 Usage : $obj->rid($newval)
753 Function:
754 Returns : value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
755 Args : newvalue (optional)
756 Comments: The default implementation in ResultI returns an empty string
757 rather than throwing a NotImplemented exception, since
758 the RID may not always be available and is not critical.
759 See: (1) https://www.ncbi.nlm.nih.gov/Class/MLACourse/Modules/BLAST/rid.html
760 (2) https://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node63.html
761 =cut
763 sub rid{
764 my ($self,$value) = @_;
765 if( defined $value) {
766 $self->{'rid'} = $value;
768 return $self->{'rid'};
771 =head2 no_hits_found
773 See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
775 =cut
777 sub no_hits_found {
778 my $self = shift;
780 # Watch the double negative!
781 # result = 0 means "yes hits were found"
782 # result = 1 means "no hits were found"
784 return $self->{'_no_hits_found'};
788 =head2 set_no_hits_found
790 See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
792 =cut
794 sub set_no_hits_found {
795 my $self = shift;
796 $self->{'_no_hits_found'} = 1;
800 =head2 to_string
802 Title : to_string
803 Usage : print $blast->to_string;
804 Function: Returns a string representation for the Blast result.
805 Primarily intended for debugging purposes.
806 Example : see usage
807 Returns : A string of the form:
808 [GenericResult] <analysis_method> query=<name> <description> db=<database
809 e.g.:
810 [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ
811 Args : None
813 =cut
815 sub to_string {
816 my $self = shift;
817 my $str = ref($self) . ", algorithm= " . $self->algorithm . ", query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
818 return $str;