2 # BioPerl module for Bio::Taxonomy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Juguang Xiao
8 # You may distribute this module under the same terms as perl itself
10 # POD documentation - main docs before the code
14 Bio::Taxonomy - representing Taxonomy.
18 # NB: This module is deprecated. Use Bio::Taxon in combination with
19 # Bio::Tree::Tree methods instead.
23 # CREATION: You can either create an instance by assigning it,
24 # or fetch it through factory.
26 # Create the nodes first. See Bio::Taxonomy::Node for details.
27 my $node_species_sapiens = Bio::Taxonomy::Node->new(
28 -object_id => 9606, # or -ncbi_taxid. Requird tag
30 'scientific' => ['sapiens'],
31 'common_name' => ['human']
33 -rank => 'species' # Required tag
35 my $node_genus_Homo = Bio::Taxonomy::Node->new(
37 -names => { 'scientific' => ['Homo'] },
40 my $node_class_Mammalia = Bio::Taxonomy::Node->new(
43 'scientific' => ['Mammalia'],
44 'common' => ['mammals']
48 my $taxonomy = Bio::Taxonomy->new;
49 $taxonomy->add_node($node_class_Mammalia);
50 $taxonomy->add_node($node_species_sapiens);
51 $taxonomy->add_node($node_genus_Homo);
53 # OR you can fetch it through a factory implementing
54 # Bio::Taxonomy::FactoryI
57 my $taxonomy = $factory->fetch_by_ncbi_taxid(40674);
61 # In this case, binomial returns a defined value.
62 my $binomial = $taxonomy->binomial;
64 # 'common_names' refers to the lowest-rank node's common names, in
66 my @common_names = $taxonomy->common_names;
68 # 'get_node', will return undef if the rank is no defined in
69 # taxonomy object. It will throw error if the rank string is not
70 # defined, say 'species lah'.
71 my $node = $taxonomy->get_node('class');
72 my @nodes = $taxonomy->get_all_nodes;
74 # Also, you can search for parent and children nodes, if taxonomy
77 my $parent_taxonomy = $taxonomy->get_parent
81 Bio::Taxonomy object represents any rank-level in taxonomy system,
82 rather than Bio::Species which is able to represent only
85 There are two ways to create Taxonomy object, e.g.
86 1) instantiate an object and assign all nodes on your own code; and
87 2) fetch an object by factory.
89 =head2 Creation by instantiation
91 The abstraction of Taxonomy is actually a hash in data structure
92 term. The keys of the hash are the rank names, such as 'genus' and
93 'species', and the values are the instances of Bio::Taxonomy::Node.
95 =head2 Creation by Factory fetching
97 NCBI Taxonomy system is well accepted as the standard. The Taxonomy
98 Factories in bioperl access this system, through HTTP to NCBI Entrez,
99 dump file, and advanced biosql database.
101 Bio::Taxonomy::FactoryI defines all methods that all implementations
104 $factory-E<gt>fetch is a general method to fetch Taxonomy by either
105 NCBI taxid or any types of names.
107 $factory-E<gt>fetch_parent($taxonomy), returns a Taxonomy that is
108 one-step higher rank of the taxonomy specified as argument.
110 $factory-E<gt>fetch_children($taxonomy), reports an array of Taxonomy
111 those are one-step lower rank of the taxonomy specified as the
114 =head2 Usage of Taxonomy object
122 User feedback is an integral part of the evolution of this and other
123 Bioperl modules. Send your comments and suggestions preferably to
124 the Bioperl mailing list. Your participation is much appreciated.
126 bioperl-l@bioperl.org - General discussion
127 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
131 Please direct usage questions or support issues to the mailing list:
133 I<bioperl-l@bioperl.org>
135 rather than to the module maintainer directly. Many experienced and
136 reponsive experts will be able look at the problem and quickly
137 address it. Please include a thorough description of the problem
138 with code and data examples if at all possible.
140 =head2 Reporting Bugs
142 Report bugs to the Bioperl bug tracking system to help us keep track
143 of the bugs and their resolution. Bug reports can be submitted via the
146 https://github.com/bioperl/bioperl-live/issues
150 Juguang Xiao, juguang@tll.org.sg
154 The rest of the documentation details each of the object
155 methods. Internal methods are usually preceded with a _
163 package Bio
::Taxonomy
;
167 use base
qw(Bio::Root::Root);
173 Usage : my $obj = Bio::Taxonomy->new();
174 Function: Builds a new Bio::Taxonomy object
175 Returns : Bio::Taxonomy
176 Args : -method -> method used to decide classification
178 -ranks -> what ranks are there
184 my ($class,@args) = @_;
186 my $self = $class->SUPER::new
(@args);
187 $self->warn("Bio::Taxonomy is deprecated. Use Bio::Taxon in combination with Bio::Tree::Tree instead.");
189 $self->{'_method'}='none';
190 $self->{'_ranks'}=[];
191 $self->{'_rank_hash'}={};
192 $self->{_hierarchy
} = {}; # used to store the nodes, with ranks as keys.
193 my ($method,$ranks,$order) = $self->_rearrange([qw(METHOD RANKS ORDER)], @args);
196 $self->method($method);
199 if (defined $ranks &&
200 (ref($ranks) eq "ARRAY") ) {
201 $self->ranks(@
$ranks);
204 # I think these are in the right order, but not sure:
205 # some parvorder|suborder and varietas|subspecies seem
206 # to be at the same level - any taxonomists?
207 # I don't expect that these will actually be used except as a way
208 # to find what ranks there are in taxonomic use
209 $self->ranks(('root',
210 'superkingdom', 'kingdom',
211 'superphylum', 'phylum', 'subphylum',
212 'superclass', 'class', 'subclass', 'infraclass',
213 'superorder', 'order', 'suborder', 'parvorder', 'infraorder',
214 'superfamily', 'family', 'subfamily',
217 'species group', 'species subgroup', 'species', 'subspecies',
218 'varietas', 'forma', 'no rank'));
228 Usage : $obj = taxonomy->method($method);
229 Function: set or return the method used to decide classification
237 my ($self,$value) = @_;
238 if (defined $value && $value=~/none|trust|lookup/) {
239 $self->{'_method'} = $value;
241 return $self->{'_method'};
248 Usage : @obj[][0-1] = taxonomy->classify($species);
249 Function: return a ranked classification
250 Returns : @obj of taxa and ranks as word pairs separated by "@"
251 Args : Bio::Species object
257 my ($self,$value) = @_;
260 if (! $value->isa('Bio::Species') ) {
261 $self->throw("Trying to classify $value which is not a Bio::Species object");
264 my @classes=reverse($value->classification);
266 if ($self->method eq 'none') {
267 for (my $i=0; $i < @classes-2; $i++) {
268 ($ranks[$i][0],$ranks[$i][1])=($classes[$i],'no rank');
270 push @ranks,[$classes[-2],'genus'];
271 push @ranks,[$value->binomial,'species'];
272 } elsif ($self->method eq 'trust') {
273 if (scalar(@classes)==scalar($self->ranks)) {
274 for (my $i=0; $i < @classes; $i++) {
275 if ($self->rank_of_number($i) eq 'species') {
276 push @ranks,[$value->binomial,$self->rank_of_number($i)];
278 push @ranks,[$classes[$i],$self->rank_of_number($i)];
282 $self->throw("Species object and taxonomy object cannot be reconciled");
284 } elsif ($self->method eq 'lookup') {
285 # this will lookup a DB for the rank of a taxon name
286 # I imagine that some kind of Bio::DB class will be need to
287 # be given to the taxonomy object to act as an DB interface
288 # (I'm not sure how useful this is though - if you have a DB of
289 # taxonomy - why would you be doing things this way?)
290 $self->throw_not_implemented();
299 Title : level_of_rank
300 Usage : $obj = taxonomy->level_of_rank($obj);
301 Function: returns the level of a rank name
309 my ($self,$value) = @_;
311 return $self->{'_rank_hash'}{$value};
315 =head2 rank_of_number
317 Title : rank_of_number
318 Usage : $obj = taxonomy->rank_of_number($obj);
319 Function: returns the rank name of a rank level
327 my ($self,$value) = @_;
329 return ${$self->{'_ranks'}}[$value];
336 Usage : @obj = taxonomy->ranks(@obj);
337 Function: set or return all ranks
345 my ($self,@value) = @_;
347 # currently this makes no uniqueness sanity check (this should be done)
348 # I am think that adding a way of converting multiple 'no rank' ranks
349 # to unique 'no rank #' ranks so that the level of a 'no rank' is
350 # abstracted way from the user - I'm not sure of the value of this
353 $self->{'_ranks'}=\
@value;
356 for (my $i=0; $i <= @
{$self->{'_ranks'}}-1; $i++) {
357 $self->{'_rank_hash'}{$self->{'_ranks'}[$i]}=$i unless $self->{'_ranks'}[$i] eq 'no rank';
360 return @
{$self->{'_ranks'}};
366 Usage: $obj->add_node($node[, $node2, ...]);
367 Function: add one or more Bio::Taxonomy::Node objects
369 Args: any number of Bio::Taxonomy::Node(s)
374 my ($self, @nodes) = @_;
376 $self->throw("A Bio::Taxonomy::Node object needed")
377 unless($_->isa('Bio::Taxonomy::Node'));
378 my ($node, $rank) = ($_, $_->rank);
379 if(exists $self->{_hierarchy
}->{$rank}){
380 # $self->throw("$rank has been defined");
381 # print STDERR "RANK:$rank\n";
384 $self->{_hierarchy
}->{$rank} = $node;
391 Usage : my $val = $obj->binomial;
392 Function: returns the binomial name if this taxonomy reaches species level
393 Returns : the binomial name
394 OR undef if taxonmy does not reach species level
395 Args : [No arguments]
401 return $self->get_node('species')->scientific_name;
402 my $genus = $self->get_node('genus');
403 my $species = $self->get_node('species');
404 return ($species && $genus) ?
"$species $genus" : undef;
410 Usage : $node = $taxonomy->get_node('species');
411 Function: get a Bio::Taxonomy::Node object according to rank name
412 Returns : a Bio::Taxonomy::Node object or undef if null
413 Args : a valid rank name
418 my ($self, $rank) = @_;
419 unless(grep /$rank/, keys %{$self->{_hierarchy
}}){
420 $self->throw("'$rank' is not in the rank list");
422 return (exists $self->{_hierarchy
}->{$rank})?
423 $self->{_hierarchy
}->{$rank} : undef;
426 =head2 classification
428 Title : classification
429 Usage : @names = $taxonomy->classification;
430 Function: get the classification names of one taxonomy
431 Returns : array of names
432 Args : [No arguments]
438 my %rank_hash = %{$self->{_rank_hash
}};
439 my %hierarchy = %{$self->{_hierarchy
}};
440 my @ordered_nodes = sort {
441 ($rank_hash{$a} <=> $rank_hash{$b})
443 return map {$hierarchy{$_}->scientific_name} @ordered_nodes;