2 # BioPerl module for Bio::Taxon
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sendu Bala <bix@sendu.me.uk>
8 # Copyright Sendu Bala, based heavily on a module by Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Taxon - A node in a represented taxonomy
22 # Typically you will get a Taxon from a Bio::DB::Taxonomy object
23 # but here is how you initialize one
24 my $taxon = Bio::Taxon->new(-name => $name,
29 # Get one from a database
30 my $dbh = Bio::DB::Taxonomy->new(-source => 'flatfile',
32 -nodesfile=> '/path/to/nodes.dmp',
33 -namesfile=> '/path/to/names.dmp');
34 my $human = $dbh->get_taxon(-name => 'Homo sapiens');
35 $human = $dbh->get_taxon(-taxonid => '9606');
37 print "id is ", $human->id, "\n"; # 9606
38 print "rank is ", $human->rank, "\n"; # species
39 print "scientific name is ", $human->scientific_name, "\n"; # Homo sapiens
40 print "division is ", $human->division, "\n"; # Primates
42 my $mouse = $dbh->get_taxon(-name => 'Mus musculus');
44 # You can quickly make your own lineages with the list database
45 my @ranks = qw(superkingdom class genus species);
46 my @h_lineage = ('Eukaryota', 'Mammalia', 'Homo', 'Homo sapiens');
47 my $list_dbh = Bio::DB::Taxonomy->new(-source => 'list', -names => \@h_lineage,
49 $human = $list_dbh->get_taxon(-name => 'Homo sapiens');
50 my @names = $human->common_names; # @names is empty
51 $human->common_names('woman');
52 @names = $human->common_names; # @names contains woman
54 # You can switch to another database when you need more information
55 my $entrez_dbh = Bio::DB::Taxonomy->new(-source => 'entrez');
56 $human->db_handle($entrez_dbh);
57 @names = $human->common_names; # @names contains woman, human, man
59 # Since Bio::Taxon implements Bio::Tree::NodeI, we have access to those
60 # methods (and can manually create our own taxa and taxonomy without the use
62 my $homo = $human->ancestor;
64 # Though be careful with each_Descendent - unless you add_Descendent()
65 # yourself, you won't get an answer because unlike for ancestor(), Bio::Taxon
66 # does not ask the database for the answer. You can ask the database yourself
67 # using the same method:
68 ($human) = $homo->db_handle->each_Descendent($homo);
70 # We can also take advantage of Bio::Tree::Tree* methods:
71 # a) some methods are available with just an empty tree object
73 my $tree_functions = Bio::Tree::Tree->new();
74 my @lineage = $tree_functions->get_lineage_nodes($human);
75 my $lineage = $tree_functions->get_lineage_string($human);
76 my $lca = $tree_functions->get_lca($human, $mouse);
78 # b) for other methods, create a tree using your Taxon object
79 my $tree = Bio::Tree::Tree->new(-node => $human);
80 my @taxa = $tree->get_nodes;
81 $homo = $tree->find_node(-rank => 'genus');
83 # Normally you can't get the lca of a list-database derived Taxon and an
84 # entrez or flatfile-derived one because the two different databases might
85 # have different roots and different numbers of ranks between the root and the
86 # taxa of interest. To solve this, make a tree of the Taxon with the more
87 # detailed lineage and splice out all the taxa that won't be in the lineage of
89 my $entrez_mouse = $entrez_dbh->get_taxon(-name => 'Mus musculus');
90 my $list_human = $list_dbh->get_taxon(-name => 'Homo sapiens');
91 my $mouse_tree = Bio::Tree::Tree->new(-node => $entrez_mouse);
92 $mouse_tree->splice(-keep_rank => \@ranks);
93 $lca = $mouse_tree->get_lca($entrez_mouse, $list_human);
97 This is the next generation (for Bioperl) of representing Taxonomy
98 information. Previously all information was managed by a single
99 object called Bio::Species. This new implementation allows
100 representation of the intermediate nodes not just the species nodes
101 and can relate their connections.
107 User feedback is an integral part of the evolution of this and other
108 Bioperl modules. Send your comments and suggestions preferably to
109 the Bioperl mailing list. Your participation is much appreciated.
111 bioperl-l@bioperl.org - General discussion
112 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
116 Please direct usage questions or support issues to the mailing list:
118 I<bioperl-l@bioperl.org>
120 rather than to the module maintainer directly. Many experienced and
121 reponsive experts will be able look at the problem and quickly
122 address it. Please include a thorough description of the problem
123 with code and data examples if at all possible.
125 =head2 Reporting Bugs
127 Report bugs to the Bioperl bug tracking system to help us keep track
128 of the bugs and their resolution. Bug reports can be submitted via
131 https://github.com/bioperl/bioperl-live/issues
133 =head1 AUTHOR - Sendu Bala
135 Email bix@sendu.me.uk
139 Jason Stajich, jason-at-bioperl-dot-org (original Bio::Taxonomy::Node)
140 Juguang Xiao, juguang@tll.org.sg
141 Gabriel Valiente, valiente@lsi.upc.edu
145 The rest of the documentation details each of the object methods.
146 Internal methods are usually preceded with a _
154 use Scalar
::Util
qw(blessed);
156 use Bio
::DB
::Taxonomy
;
158 use base
qw(Bio::Tree::Node Bio::IdentifiableI);
164 Usage : my $obj = Bio::Taxonomy::Node->new();
165 Function: Builds a new Bio::Taxonomy::Node object
166 Returns : an instance of Bio::Taxonomy::Node
167 Args : -dbh => a reference to a Bio::DB::Taxonomy object
169 -name => a string representing the taxon name
171 -id => human readable id - typically NCBI taxid
172 -ncbi_taxid => same as -id, but explicitly say that it is an
174 -rank => node rank (one of 'species', 'genus', etc)
175 -common_names => array ref of all common names
176 -division => 'Primates', 'Rodents', etc
177 -genetic_code => genetic code table number
178 -mito_genetic_code => mitochondrial genetic code table number
179 -create_date => date created in database
180 -update_date => date last updated in database
181 -pub_date => date published in database
186 my ($class, @args) = @_;
187 my $self = $class->SUPER::new
(@args);
188 my ($name, $id, $objid, $rank, $div, $dbh, $ncbitaxid, $commonname,
189 $commonnames, $gcode, $mitocode, $createdate, $updatedate, $pubdate,
190 $parent_id) = $self->_rearrange([qw(NAME ID OBJECT_ID RANK DIVISION DBH
191 NCBI_TAXID COMMON_NAME COMMON_NAMES
192 GENETIC_CODE MITO_GENETIC_CODE
193 CREATE_DATE UPDATE_DATE PUB_DATE
196 if (defined $id && (defined $ncbitaxid && $ncbitaxid ne $id || defined $objid && $objid ne $id)) {
197 $self->warn("Only provide one of -id, -object_id or -ncbi_taxid, using $id\n");
199 elsif(!defined $id) {
200 $id = $objid || $ncbitaxid;
202 defined $id && $self->id($id);
203 $self->{_ncbi_tax_id_provided
} = 1 if $ncbitaxid;
205 defined $rank && $self->rank($rank);
206 defined $name && $self->node_name($name);
210 $self->throw("-common_names takes only an array reference") unless $commonnames
211 && ref($commonnames) eq 'ARRAY';
212 @common_names = @
{$commonnames};
215 my %c_names = map { $_ => 1 } @common_names;
216 unless (exists $c_names{$commonname}) {
217 unshift(@common_names, $commonname);
220 @common_names > 0 && $self->common_names(@common_names);
222 defined $gcode && $self->genetic_code($gcode);
223 defined $mitocode && $self->mitochondrial_genetic_code($mitocode);
224 defined $createdate && $self->create_date($createdate);
225 defined $updatedate && $self->update_date($updatedate);
226 defined $pubdate && $self->pub_date($pubdate);
227 defined $div && $self->division($div);
228 defined $dbh && $self->db_handle($dbh);
230 # Making an administrative decision to override this behavior, particularly
231 # for optimization reasons (if it works to cache it up front, why not?
232 # Please trust your implementations to get it right)
235 # deprecated and will issue a warning when method called,
236 # eventually to be removed completely as option
237 defined $parent_id && $self->parent_id($parent_id);
239 # some things want to freeze/thaw Bio::Species objects, but
240 # _root_cleanup_methods contains a CODE ref, delete it.
241 delete $self->{_root_cleanup_methods
};
247 =head1 Bio::IdentifiableI interface
249 Also see L<Bio::IdentifiableI>
254 Usage : $taxon->version($newval)
255 Returns : value of version (a scalar)
256 Args : on set, new value (a scalar or undef, optional)
262 return $self->{'version'} = shift if @_;
263 return $self->{'version'};
270 Usage : $taxon->authority($newval)
271 Returns : value of authority (a scalar)
272 Args : on set, new value (a scalar or undef, optional)
278 return $self->{'authority'} = shift if @_;
279 return $self->{'authority'};
286 Usage : $taxon->namespace($newval)
287 Returns : value of namespace (a scalar)
288 Args : on set, new value (a scalar or undef, optional)
294 return $self->{'namespace'} = shift if @_;
295 return $self->{'namespace'};
299 =head1 Bio::Taxonomy::Node implementation
304 Usage : $taxon->db_handle($newval)
305 Function: Get/Set Bio::DB::Taxonomy Handle
306 Returns : value of db_handle (a scalar) (Bio::DB::Taxonomy object)
307 Args : on set, new value (a scalar, optional) Bio::DB::Taxonomy object
309 Also see L<Bio::DB::Taxonomy>
318 if (! ref($db) || ! $db->isa('Bio::DB::Taxonomy')) {
319 $self->throw("Must provide a valid Bio::DB::Taxonomy object to db_handle()");
321 if (!$self->{'db_handle'} || ($self->{'db_handle'} && $self->{'db_handle'} ne $db)) {
322 my $new_self = $self->_get_similar_taxon_from_db($self, $db);
323 $self->_merge_taxa($new_self) if $new_self;
326 # NB: The Bio::DB::Taxonomy modules access this data member directly
327 # to avoid calling this method and going infinite
328 $self->{'db_handle'} = $db;
330 return $self->{'db_handle'};
337 Usage : $taxon->rank($newval)
338 Function: Get/set rank of this Taxon, 'species', 'genus', 'order', etc...
339 Returns : value of rank (a scalar)
340 Args : on set, new value (a scalar or undef, optional)
346 return $self->{'rank'} = shift if @_;
347 return $self->{'rank'};
354 Usage : $taxon->id($newval)
355 Function: Get/Set id (NCBI Taxonomy ID in most cases); object_id() and
356 ncbi_taxid() are synonyms of this method.
357 Returns : id (a scalar)
358 Args : none to get, OR scalar to set
364 return $self->SUPER::id
(@_);
373 Usage : $taxon->ncbi_taxid($newval)
374 Function: Get/Set the NCBI Taxonomy ID; This actually sets the id() but only
375 returns an id when ncbi_taxid has been explictely set with this
377 Returns : id (a scalar)
378 Args : none to get, OR scalar to set
383 my ($self, $id) = @_;
386 $self->{_ncbi_tax_id_provided
} = 1;
387 return $self->SUPER::id
($id);
390 if ($self->{_ncbi_tax_id_provided
}) {
391 return $self->SUPER::id
;
400 Usage : $taxon->parent_id()
401 Function: Get parent ID, (NCBI Taxonomy ID in most cases);
402 parent_taxon_id() is a synonym of this method.
403 Returns : value of parent_id (a scalar)
411 $self->{parent_id
} = shift;
413 if (defined $self->{parent_id
}) {
414 return $self->{parent_id
}
416 my $ancestor = $self->ancestor() || return;
417 return $ancestor->id;
420 *parent_taxon_id
= \
&parent_id
;
422 =head2 trusted_parent_id
424 Title : trusted_parent_id
425 Usage : $taxon->trusted_parent_id()
426 Function: If the parent_id is explicitly set, trust it
427 Returns : simple boolean value (whether or not it has been set)
429 Notes : Previously, the parent_id method was to be deprecated in favor of
430 using ancestor(). However this removes one key optimization point,
431 namely when an implementation has direct access to the taxon's
432 parent ID when retrieving the information for the taxon ID. This
433 method is in place so implementations can choose to (1) check whether
434 the parent_id is set and (2) trust that the implementation (whether
435 it is self or another implementation) set the parent_id correctly.
439 sub trusted_parent_id
{
440 return defined $_[0]->{parent_id
};
446 Usage : $taxon->genetic_code($newval)
447 Function: Get/set genetic code table
448 Returns : value of genetic_code (a scalar)
449 Args : on set, new value (a scalar or undef, optional)
455 return $self->{'genetic_code'} = shift if @_;
456 return $self->{'genetic_code'};
460 =head2 mitochondrial_genetic_code
462 Title : mitochondrial_genetic_code
463 Usage : $taxon->mitochondrial_genetic_code($newval)
464 Function: Get/set mitochondrial genetic code table
465 Returns : value of mitochondrial_genetic_code (a scalar)
466 Args : on set, new value (a scalar or undef, optional)
470 sub mitochondrial_genetic_code
{
472 return $self->{'mitochondrial_genetic_code'} = shift if @_;
473 return $self->{'mitochondrial_genetic_code'};
480 Usage : $taxon->create_date($newval)
481 Function: Get/Set Date this node was created (in the database)
482 Returns : value of create_date (a scalar)
483 Args : on set, new value (a scalar or undef, optional)
489 return $self->{'create_date'} = shift if @_;
490 return $self->{'create_date'};
497 Usage : $taxon->update_date($newval)
498 Function: Get/Set Date this node was updated (in the database)
499 Returns : value of update_date (a scalar)
500 Args : on set, new value (a scalar or undef, optional)
506 return $self->{'update_date'} = shift if @_;
507 return $self->{'update_date'};
514 Usage : $taxon->pub_date($newval)
515 Function: Get/Set Date this node was published (in the database)
516 Returns : value of pub_date (a scalar)
517 Args : on set, new value (a scalar or undef, optional)
523 return $self->{'pub_date'} = shift if @_;
524 return $self->{'pub_date'};
531 Usage : my $ancestor_taxon = $taxon->ancestor()
532 Function: Retrieve the ancestor taxon. Normally the database is asked what the
535 If you manually set the ancestor (or you make a Bio::Tree::Tree with
536 this object as an argument to new()), the database (if any) will not
537 be used for the purposes of this method.
539 To restore normal database behaviour, call ancestor(undef) (which
540 would remove this object from the tree), or request this taxon again
541 as a new Taxon object from the database.
550 my $ancestor = $self->SUPER::ancestor
(@_);
554 my $dbh = $self->db_handle;
555 #*** could avoid the db lookup if we knew our current id was definitely
556 # information from the db...
558 my $definitely_from_dbh = $self->_get_similar_taxon_from_db($self);
559 return $dbh->ancestor($definitely_from_dbh);
563 =head2 get_Parent_Node
565 Title : get_Parent_Node
566 Function: Synonym of ancestor()
571 sub get_Parent_Node
{
573 $self->warn("get_Parent_Node is deprecated, use ancestor() instead");
574 return $self->ancestor(@_);
578 =head2 each_Descendent
580 Title : each_Descendent
581 Usage : my @taxa = $taxon->each_Descendent();
582 Function: Get all the descendents for this Taxon (but not their descendents,
583 ie. not a recursive fetchall). get_Children_Nodes() is a synonym of
586 Note that this method never asks the database for the descendents;
587 it will only return objects you have manually set with
588 add_Descendent(), or where this was done for you by making a
589 Bio::Tree::Tree with this object as an argument to new().
591 To get the database descendents use
592 $taxon->db_handle->each_Descendent($taxon).
594 Returns : Array of Bio::Taxon objects
595 Args : optionally, when you have set your own descendents, the string
596 "height", "creation", "alpha", "revalpha", or coderef to be used to
597 sort the order of children nodes.
602 # implemented by Bio::Tree::Node
604 =head2 get_Children_Nodes
606 Title : get_Children_Nodes
607 Function: Synonym of each_Descendent()
612 sub get_Children_Nodes
{
614 $self->warn("get_Children_Nodes is deprecated, use each_Descendent() instead");
615 return $self->each_Descendent(@_);
622 Usage: $taxon->name('scientific', 'Homo sapiens');
623 $taxon->name('common', 'human', 'man');
624 my @names = @{$taxon->name('common')};
625 Function: Get/set the names. node_name(), scientific_name() and common_names()
626 are shorthands to name('scientific'), name('scientific') and
627 name('common') respectively.
628 Returns: names (a array reference)
629 Args: Arg1 => the name_class. You can assign any text, but the words
630 'scientific' and 'common' have the special meaning, as
631 scientific name and common name, respectively. 'scientific' and
632 'division' are treated specially, allowing only the first value
633 in the Arg2 list to be set.
634 Arg2 ... => list of names
639 my ($self, $name_class, @names) = @_;
640 $self->throw('No name class specified') unless defined $name_class;
643 if ($name_class =~ /scientific|division/i) {
644 delete $self->{'_names_hash'}->{$name_class};
645 @names = (shift(@names));
647 push @
{$self->{'_names_hash'}->{$name_class}}, @names;
649 return $self->{'_names_hash'}->{$name_class} || return;
656 Usage : $taxon->node_name($newval)
657 Function: Get/set the name of this taxon (node), typically the scientific name
658 of the taxon, eg. 'Primate' or 'Homo'; scientific_name() is a synonym
660 Returns : value of node_name (a scalar)
661 Args : on set, new value (a scalar or undef, optional)
667 my @v = @
{$self->name('scientific', @_) || []};
671 *scientific_name
= \
&node_name
;
677 Usage : $taxon->common_names($newval)
678 Function: Get/add the other names of this taxon, typically the genbank common
679 name and others, eg. 'Human' and 'man'. common_name() is a synonym
681 Returns : array of names in list context, one of those names in scalar context
682 Args : on add, new list of names (scalars, optional)
688 my @v = @
{$self->name('common', @_) || []};
689 return ( wantarray ) ?
@v : pop @v;
692 *common_name
= \
&common_names
;
698 Usage : $taxon->division($newval)
699 Function: Get/set the division this taxon belongs to, eg. 'Primates' or
701 Returns : value of division (a scalar)
702 Args : on set, new value (a scalar or undef, optional)
708 my @v = @
{$self->name('division',@_) || []};
713 # get a node from the database that is like the supplied node
714 sub _get_similar_taxon_from_db
{
715 #*** not really happy with this having to be called so much; there must be
717 my ($self, $taxon, $db) = @_;
718 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa("Bio::Taxon");
719 ($self->id || $self->node_name) || return;
720 $db ||= $self->db_handle || return;
721 if (!blessed
($db) || !$db->isa('Bio::DB::Taxonomy')) {
722 $self->throw("DB handle is not a Bio::DB::Taxonomy: got $db in node ".$self->node_name)
724 my $db_taxon = $db->get_taxon(-taxonid
=> $taxon->id) if $taxon->id;
726 my @try_ids = $db->get_taxonids($taxon->node_name) if $taxon->node_name;
728 my $own_rank = $taxon->rank || 'no rank';
729 foreach my $try_id (@try_ids) {
730 my $try = $db->get_taxon(-taxonid
=> $try_id);
731 my $try_rank = $try->rank || 'no rank';
732 if ($own_rank eq 'no rank' || $try_rank eq 'no rank' || $own_rank eq $try_rank) {
743 # merge data from supplied Taxon into self
745 my ($self, $taxon) = @_;
746 $self->throw("Must supply a Bio::Taxon object") unless ref($taxon) && $taxon->isa('Bio::Taxon');
747 return if ($taxon eq $self);
749 foreach my $attrib (qw(scientific_name version authority namespace genetic_code mitochondrial_genetic_code create_date update_date pub_date division id)) {
750 my $own = $self->$attrib();
751 my $his = $taxon->$attrib();
753 $self->$attrib($his);
757 my $own = $self->rank || 'no rank';
758 my $his = $taxon->rank || 'no rank';
759 if ($own eq 'no rank' && $his ne 'no rank') {
763 my %own_cnames = map { $_ => 1 } $self->common_names;
764 my %his_cnames = map { $_ => 1 } $taxon->common_names;
765 foreach (keys %his_cnames) {
766 unless (exists $own_cnames{$_}) {
767 $self->common_names($_);
771 #*** haven't merged the other things in names() hash, could do above much easier with direct access to object data
775 =head2 remove_Descendent
777 Title : remove_Descendent
778 Usage : $node->remove_Descedent($node_foo);
779 Function: Removes a specific node from being a Descendent of this node
781 Args : An array of Bio::Node::NodeI objects which have been previously
782 passed to the add_Descendent call of this object.
786 sub remove_Descendent
{
787 # need to override this method from Bio::Tree::Node since it casually
788 # throws away nodes if they don't branch
789 my ($self,@nodes) = @_;
791 foreach my $n ( @nodes ) {
792 if ($self->{'_desc'}->{$n->internal_id}) {
793 $self->{_removing_descendent
} = 1;
795 $self->{_removing_descendent
} = 0;
796 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
797 delete $self->{'_desc'}->{$n->internal_id};