Bio::Root::RootI: deprecate deprecated() method (issue #294)
[bioperl-live.git] / lib / Bio / Tree / TreeFunctionsI.pm
blob977b515357e940b17c71f359eaa8ccf08302c934
2 # BioPerl module for Bio::Tree::TreeFunctionsI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
19 =head1 SYNOPSIS
21 use Bio::TreeIO;
22 my $in = Bio::TreeIO->new(-format => 'newick', -file => 'tree.tre');
24 my $tree = $in->next_tree;
26 my @nodes = $tree->find_node('id1');
28 if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
29 #...
32 =head1 DESCRIPTION
34 This interface provides a set of implemented Tree functions which
35 only use the defined methods in the TreeI or NodeI interface.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via the
63 web:
65 https://github.com/bioperl/bioperl-live/issues
67 =head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
69 Email jason-at-bioperl-dot-org
70 Email amackey-at-virginia.edu
71 Email jtr4v-at-virginia.edu
73 =head1 CONTRIBUTORS
75 Sendu Bala, bix@sendu.me.uk
77 Rerooting code was worked on by
79 Daniel Barker d.barker-at-reading.ac.uk
80 Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
82 =head1 APPENDIX
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
87 =cut
89 # Let the code begin...
92 package Bio::Tree::TreeFunctionsI;
94 use strict;
95 use base qw(Bio::Tree::TreeI);
97 use Carp;
99 =head2 find_node
101 Title : find_node
102 Usage : my @nodes = $self->find_node(-id => 'node1');
103 Function: returns all nodes that match a specific field, by default this
104 is id, but different branch_length,
105 Returns : List of nodes which matched search
106 Args : text string to search for
108 -fieldname => $textstring
110 =cut
112 sub find_node {
113 my ($self, $type, $field) = @_;
114 if( ! defined $type ) {
115 $self->warn("Must request a either a string or field and string when searching");
118 # all this work for a '-' named field
119 # is so that we could potentially
120 # expand to other constraints in
121 # different implementations
122 # like 'find all nodes with boostrap < XX'
124 if( ! defined $field ) {
125 # only 1 argument, default to searching by id
126 $field = $type;
127 $type = 'id';
128 } else {
129 $type =~ s/^-//;
132 # could actually do this by testing $rootnode->can($type) but
133 # it is possible that a tree is implemented with different node types
134 # - although it is unlikely that the root node would be richer than the
135 # leaf nodes. Can't handle NHX tags right now
137 my @nodes = grep { $_->can($type) && defined $_->$type() &&
138 $_->$type() eq $field } $self->get_nodes();
140 if ( wantarray) {
141 return @nodes;
142 } else {
143 if( @nodes > 1 ) {
144 $self->warn("More than 1 node found but caller requested scalar, only returning first node");
146 return shift @nodes;
151 =head2 remove_Node
153 Title : remove_Node
154 Usage : $tree->remove_Node($node)
155 Function: Removes a node from the tree
156 Returns : boolean represent status of success
157 Args : either Bio::Tree::NodeI or string of the node id
159 =cut
161 sub remove_Node {
162 my ($self,$input) = @_;
163 my $node = undef;
164 unless( ref($input) ) {
165 $node = $self->find_node($input);
166 } elsif( ! $input->isa('Bio::Tree::NodeI') ) {
167 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
168 return 0;
169 } else {
170 $node = $input;
172 if( ! $node->ancestor &&
173 $self->get_root_node->internal_id != $node->internal_id) {
174 $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
175 } else {
176 $node->ancestor->remove_Descendent($node);
181 =head2 get_lineage_nodes
183 Title : get_lineage_nodes
184 Usage : my @nodes = $tree->get_lineage_nodes($node);
185 Function: Given a node or its ID, get its full lineage, i.e. all its ancestors,
186 from the root to the most recent ancestor. Only use the node ID as
187 input if the nodes have been added to the tree.
188 Returns : list of nodes
189 Args : either Bio::Tree::NodeI (or string of the node id)
191 =cut
193 sub get_lineage_nodes {
194 my ($self, $input) = @_;
195 my $node;
197 # Sanity checks
198 if (ref $input) {
199 if (not $input->isa('Bio::Tree::NodeI')) {
200 $self->throw("Did not provide a valid Bio::Tree::NodeI object or ID string to get_lineage_nodes");
202 $node = $input;
203 } else {
204 $node = $self->find_node($input);
207 # When dealing with Bio::Taxon objects with databases, the root will always
208 # be the database's root, ignoring this Tree's set root node; prefer the
209 # Tree's idea of root.
210 my $root = $self->get_root_node || '';
212 my @lineage;
213 while ($node) {
214 $node = $node->ancestor || last;
215 unshift(@lineage, $node);
216 $node eq $root && last;
218 return @lineage;
222 =head2 get_lineage_string
224 Title : get_lineage_string
225 Usage : my $lineage = $tree->get_lineage_string($node);
226 Function: Get the string representation of the full lineage of a node, e.g.
227 for the Enterobacteriales node, return
228 Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales.
229 This method uses get_lineage_nodes internally and therefore inherits
230 of all of its caveats.
231 Returns : string
232 Args : * either Bio::Tree::NodeI (or string of the node id)
233 * an optional separator (default: ';')
235 =cut
237 sub get_lineage_string {
238 my ($self, $input, $sep) = @_;
239 $sep ||= ';';
240 my $node;
241 unless (ref $input) {
242 $node = $self->find_node($input);
244 elsif (! $input->isa('Bio::Tree::NodeI')) {
245 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
246 return;
248 else {
249 $node = $input;
251 my @nodes = ($self->get_lineage_nodes($node), $node);
252 for my $i (0 .. scalar @nodes - 1) {
253 my $node_name = $nodes[$i]->node_name || '';
254 if ($node_name =~ m/$sep/) {
255 $self->warn("Separator '$sep' is not safe to use because the node ".
256 "called '$node_name' contains it. Consider using another separator".
257 " or sanitizing the node name.");
259 $nodes[$i] = $node_name;
261 return join $sep, @nodes;
265 =head2 splice
267 Title : splice
268 Usage : $tree->splice(-remove_id => \@ids);
269 Function: Remove all the nodes from a tree that correspond to the supplied
270 args, making all the descendents of a removed node the descendents
271 of the removed node's ancestor.
272 You can ask to explicitly remove certain nodes by using -remove_*,
273 remove them conditionally by using -remove_* in combination with
274 -keep_*, or remove everything except certain nodes by using only
275 -keep_*.
276 Returns : n/a
277 Args : just a list of Bio::Tree::NodeI objects to remove, OR
278 -key => value pairs, where -key has the prefix 'remove' or 'keep',
279 followed by an underscore, followed by a fieldname (like for the
280 method find_node). Value should be a scalar or an array ref of
281 scalars (again, like you might supply to find_node).
283 So (-remove_id => [1, 2]) will remove all nodes from the tree that
284 have an id() of '1' or '2', while
285 (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
286 an id() of '1'.
287 (-keep_id => [2]) will remove all nodes unless they have an id() of
288 '2' (note, no -remove_*).
290 -preserve_lengths => 1 : setting this argument will splice out
291 intermediate nodes, preserving the original total length between
292 the ancestor and the descendants of the spliced node. Undef
293 by default.
295 =cut
297 sub splice {
298 my ($self, @args) = @_;
299 $self->throw("Must supply some arguments") unless @args > 0;
300 my $preserve_lengths = 0;
301 my @nodes_to_remove;
302 if (ref($args[0])) {
303 $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
304 @nodes_to_remove = @args;
306 else {
307 $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
308 my %args = @args;
309 my @keep_nodes;
310 my @remove_nodes;
311 my $remove_all = 1;
312 while (my ($key, $value) = each %args) {
313 my @values = ref($value) ? @{$value} : ($value);
315 if ($key =~ s/remove_//) {
316 $remove_all = 0;
317 foreach my $value (@values) {
318 push(@remove_nodes, $self->find_node($key => $value));
321 elsif ($key =~ s/keep_//) {
322 foreach my $value (@values) {
323 push(@keep_nodes, $self->find_node($key => $value));
326 elsif ($key =~ /preserve/) {
327 $preserve_lengths = $value;
331 if ($remove_all) {
332 if (@keep_nodes == 0) {
333 $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
334 return;
337 @remove_nodes = $self->get_nodes;
339 if (@keep_nodes > 0) {
340 my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
341 foreach my $node (@remove_nodes) {
342 push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
345 else {
346 @nodes_to_remove = @remove_nodes;
349 # do the splicing
350 #*** the algorithm here hasn't really been thought through and tested much,
351 # will probably need revising
352 my %root_descs;
353 my $reroot = 0;
354 foreach my $node (@nodes_to_remove) {
355 my @descs = $node->each_Descendent;
356 my $ancestor = $node->ancestor;
357 if (! $ancestor && ! $reroot) {
358 # we're going to remove the tree root, so will have to re-root the
359 # tree later
360 $reroot = 1;
361 %root_descs = map { $_->internal_id => $_ } @descs;
362 $node->remove_all_Descendents;
363 next;
365 if (exists $root_descs{$node->internal_id}) {
366 # well, this one can't be the future root anymore
367 delete $root_descs{$node->internal_id};
368 # but maybe one of this one's descs will become the root
369 foreach my $desc (@descs) {
370 $root_descs{$desc->internal_id} = $desc;
373 # make the ancestor of our descendents our own ancestor, and give us
374 # no ancestor of our own to remove us from the tree
375 foreach my $desc (@descs) {
376 $desc->ancestor($ancestor);
377 $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths;
379 $node->ancestor(undef);
381 if ($reroot) {
382 my @candidates = values %root_descs;
383 $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
384 $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
385 $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
390 =head2 get_lca
392 Title : get_lca
393 Usage : get_lca(-nodes => \@nodes ); OR
394 get_lca(@nodes);
395 Function: given two or more nodes, returns the lowest common ancestor (aka most
396 recent common ancestor)
397 Returns : node object or undef if there is no common ancestor
398 Args : -nodes => arrayref of nodes to test, OR
399 just a list of nodes
401 =cut
403 sub get_lca {
404 my ($self, @args) = @_;
405 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
406 my @nodes;
407 if (ref($nodes) eq 'ARRAY') {
408 @nodes = @{$nodes};
410 else {
411 @nodes = @args;
413 @nodes >= 2 or $self->throw("At least 2 nodes are required");
414 # We must go root->leaf to get the correct answer to lca (in a world where
415 # internal_id might not be uniquely assigned), but leaf->root is more
416 # forgiving (eg. lineages may not all have the same root, or they may have
417 # different numbers of 'minor' taxa inbeteen 'major' ones).
419 # I use root->leaf so that we can easily do multiple nodes at once - no
420 # matter what taxa are below the lca, the lca and all its ancestors ought to
421 # be identical.
422 my @paths;
423 foreach my $node (@nodes) {
424 unless(ref($node) && $node->isa('Bio::Tree::NodeI')) {
425 $self->throw("Cannot process get_lca() with a non-NodeI object ($node)\n");
427 my @path = ($self->get_lineage_nodes($node), $node);
428 push(@paths, \@path);
430 return unless @paths >= 2;
431 my $lca;
432 LEVEL: while ($paths[0] > 0) {
433 my %node_ids;
434 my $node;
435 foreach my $path (@paths) {
436 $node = shift(@{$path}) || last LEVEL;
437 my $node_id = $node->internal_id;
438 unless (defined $node_id) {
439 $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
440 return;
442 $node_ids{$node_id}++;
444 if (keys %node_ids == 1) {
445 $lca = $node;
447 else {
448 # at this point in the lineage the nodes are different; the previous
449 # loop had the lca
450 last LEVEL;
453 # If the tree that we are contains the lca (get_lca could have been called
454 # on an empty tree, since it works with plain Nodes), prefer to return the
455 # node object that belongs to us
456 if ($lca && $self->number_nodes > 0) {
457 my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
458 $lca = $own_lca if $own_lca;
460 return $lca;
464 =head2 merge_lineage
466 Title : merge_lineage
467 Usage : merge_lineage($node)
468 Function: Merge a lineage of nodes with this tree.
469 Returns : true for success, false (and a warning) otherwise
470 Args : Bio::Tree::TreeI with only one leaf, OR
471 Bio::Tree::NodeI which has an ancestor
473 For example, if we are the tree $tree:
475 +---B
479 +---C
481 and we want to merge the lineage $other_tree:
483 A---C---D
485 After calling $tree->merge_lineage($other_tree), $tree looks like:
487 +---B
491 +---C---D
493 =cut
495 sub merge_lineage {
496 my ($self, $thing) = @_;
497 $self->throw("Must supply an object reference") unless ref($thing);
499 my $lineage_leaf;
500 if ($thing->isa('Bio::Tree::TreeI')) {
501 my @leaves = $thing->get_leaf_nodes;
502 $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
503 $lineage_leaf = shift(@leaves);
505 elsif ($thing->isa('Bio::Tree::NodeI')) {
506 $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
507 $lineage_leaf = $thing;
510 # Find the lowest node in the supplied lineage that is in the tree
511 # That will be our lca and we can merge at the node below
512 my @lineage = ($lineage_leaf, reverse($self->get_lineage_nodes($lineage_leaf)));
513 my $merged = 0;
514 my $node;
515 my $i = 0;
516 while ($i <= $#lineage) {
517 $node = $self->find_node(-internal_id => $lineage[$i]->internal_id);
518 if (defined $node) {
519 $merged = 1;
520 last;
522 $i++;
524 if (not $merged) {
525 $self->warn("Could not merge the lineage of ".$lineage_leaf->id." with the rest of the tree");
528 # Merge descendents, recursively
529 while ($i > 0) {
530 $node->add_Descendent($lineage[$i-1]);
531 $node = $self->find_node(-internal_id => $lineage[$i-1]->internal_id);
532 $i--;
535 return $merged;
539 =head2 contract_linear_paths
541 Title : contract_linear_paths
542 Usage : contract_linear_paths()
543 Function: Splices out all nodes in the tree that have an ancestor and only one
544 descendent.
545 Returns : n/a
546 Args : none for normal behaviour, true to dis-regard the ancestor requirement
547 and re-root the tree as necessary
549 For example, if we are the tree $tree:
551 +---E
553 A---B---C---D
555 +---F
557 After calling $tree->contract_linear_paths(), $tree looks like:
559 +---E
561 A---D
563 +---F
565 Instead, $tree->contract_linear_paths(1) would have given:
567 +---E
571 +---F
573 =cut
575 sub contract_linear_paths {
576 my $self = shift;
577 my $reroot = shift;
578 my @remove;
579 foreach my $node ($self->get_nodes) {
580 if ($node->ancestor && $node->each_Descendent == 1) {
581 push(@remove, $node);
584 $self->splice(@remove) if @remove;
585 if ($reroot) {
586 my $root = $self->get_root_node;
587 my @descs = $root->each_Descendent;
588 if (@descs == 1) {
589 my $new_root = shift(@descs);
590 $self->set_root_node($new_root);
591 $new_root->ancestor(undef);
597 =head2 is_binary
599 Example : is_binary(); is_binary($node);
600 Description: Finds if the tree or subtree defined by
601 the internal node is a true binary tree
602 without polytomies
603 Returns : boolean
604 Exceptions :
605 Args : Internal node Bio::Tree::NodeI, optional
608 =cut
610 sub is_binary {
611 my $self = shift;
612 my $node = shift || $self->get_root_node;
614 my $binary = 1;
615 my @descs = $node->each_Descendent;
616 $binary = 0 unless @descs == 2 or @descs == 0;
617 #print "$binary, ", scalar @descs, "\n";
619 # recurse
620 foreach my $desc (@descs) {
621 $binary += $self->is_binary($desc) -1;
623 $binary = 0 if $binary < 0;
624 return $binary;
628 =head2 force_binary
630 Title : force_binary
631 Usage : force_binary()
632 Function: Forces the tree into a binary tree, splitting branches arbitrarily
633 and creating extra nodes as necessary, such that all nodes have
634 exactly two or zero descendants.
635 Returns : n/a
636 Args : none
638 For example, if we are the tree $tree:
640 +---G
642 +---F
644 +---E
648 +---D
650 +---C
652 +---B
654 (A has 6 descendants B-G)
656 After calling $tree->force_binary(), $tree looks like:
658 +---X
660 +---X
662 | +---X
664 +---X
666 | | +---G
667 | | |
668 | +---X
670 | +---F
672 | +---E
674 | +---X
675 | | |
676 | | +---D
678 +---X
680 | +---C
682 +---X
684 +---B
686 (Where X are artificially created nodes with ids 'artificial_n', where n is
687 an integer making the id unique within the tree)
689 =cut
691 sub force_binary {
692 my $self = shift;
693 my $node = shift || $self->get_root_node;
695 my @descs = $node->each_Descendent;
696 if (@descs > 2) {
697 # Removed overly verbose warning - cjfields 3-12-11
699 # Many nodes have no identifying names, a simple warning is probably
700 # enough.
702 $self->warn("Node has more than two descendants\nWill do an arbitrary balanced split");
703 my @working = @descs;
704 # create an even set of artifical nodes on which to later hang the descs
705 my $half = @working / 2;
706 $half++ if $half > int($half);
707 $half = int($half);
708 my @artificials;
709 while ($half > 1) {
710 my @this_level;
711 foreach my $top_node (@artificials || $node) {
712 for (1..2) {
713 my $art = $top_node->new(-id => "artificial_".++$self->{_art_num});
714 $top_node->add_Descendent($art);
715 push(@this_level, $art);
718 @artificials = @this_level;
719 $half--;
721 # attach two descs to each artifical leaf
722 foreach my $art (@artificials) {
723 for (1..2) {
724 my $desc = shift(@working) || $node->new(-id => "artificial_".++$self->{_art_num});
725 $desc->ancestor($art);
729 elsif (@descs == 1) {
730 # ensure that all nodes have 2 descs
731 $node->add_Descendent($node->new(-id => "artificial_".++$self->{_art_num}));
733 # recurse
734 foreach my $desc (@descs) {
735 $self->force_binary($desc);
740 =head2 simplify_to_leaves_string
742 Title : simplify_to_leaves_string
743 Usage : my $leaves_string = $tree->simplify_to_leaves_string()
744 Function: Creates a simple textual representation of the relationship between
745 leaves in self. It forces the tree to be binary, so the result may
746 not strictly correspond to the tree (if the tree wasn't binary), but
747 will be as close as possible. The tree object is not altered. Only
748 leaf node ids are output, in a newick-like format.
749 Returns : string
750 Args : none
752 =cut
754 sub simplify_to_leaves_string {
755 my $self = shift;
757 # Before contracting and forcing binary we need to clone self, but Clone.pm
758 # clone() seg faults and fails to make the clone, whilst Storable dclone
759 # needs $self->{_root_cleanup_methods} deleted (code ref) and seg faults at
760 # end of script. Let's make our own clone...
761 my $tree = $self->_clone;
763 $tree->contract_linear_paths(1);
764 $tree->force_binary;
765 foreach my $node ($tree->get_nodes) {
766 my $id = $node->id;
767 $id = ($node->is_Leaf && $id !~ /^artificial/) ? $id : '';
768 $node->id($id);
771 my %paired;
772 my @data = $self->_simplify_helper($tree->get_root_node, \%paired);
774 return join(',', @data);
778 # alias
779 sub _clone { shift->clone(@_) }
782 # safe node clone that doesn't seg fault, but deliberately loses ancestors and
783 # descendents
784 sub _clone_node {
785 my ($self, $node) = @_;
786 my $clone = $node->new;
788 while (my ($key, $val) = each %{$node}) {
789 if ($key eq '_desc' || $key eq '_ancestor') {
790 next;
792 ${$clone}{$key} = $val;
795 return $clone;
799 # tree string generator for simplify_to_leaves_string, based on
800 # Bio::TreeIO::newick::_write_tree_Helper
801 sub _simplify_helper {
802 my ($self, $node, $paired) = @_;
803 return () if (!defined $node);
805 my @data = ();
806 foreach my $node ($node->each_Descendent()) {
807 push(@data, $self->_simplify_helper($node, $paired));
810 my $id = $node->id_output || '';
811 if (@data) {
812 unless (exists ${$paired}{"@data"} || @data == 1) {
813 $data[0] = "(" . $data[0];
814 $data[-1] .= ")";
815 ${$paired}{"@data"} = 1;
818 elsif ($id) {
819 push(@data, $id);
822 return @data;
826 =head2 distance
828 Title : distance
829 Usage : distance(-nodes => \@nodes )
830 Function: returns the distance between two given nodes
831 Returns : numerical distance
832 Args : -nodes => arrayref of nodes to test
833 or ($node1, $node2)
835 =cut
837 sub distance {
838 my ($self,@args) = @_;
839 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
840 if( ! defined $nodes ) {
841 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
842 return;
844 elsif (ref($nodes) eq 'ARRAY') {
847 elsif ( @args == 2) { # assume these are nodes...
848 $nodes = \@args;
850 else {
851 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
852 return;
854 $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
856 my $lca = $self->get_lca(@{$nodes});
857 unless($lca) {
858 $self->warn("could not find the lca of supplied nodes; can't find distance either");
859 return;
862 my $cumul_dist = 0;
863 my $warned = 0;
864 foreach my $current_node (@{$nodes}) {
865 while (1) {
866 last if $current_node eq $lca;
867 if ($current_node->branch_length) {
868 $cumul_dist += $current_node->branch_length;
870 elsif (! $warned) {
871 $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
872 $warned = 1;
875 $current_node = $current_node->ancestor || last;
879 return $cumul_dist;
883 =head2 is_monophyletic
885 Title : is_monophyletic
886 Usage : if( $tree->is_monophyletic(-nodes => \@nodes,
887 -outgroup => $outgroup)
888 Function: Will do a test of monophyly for the nodes specified
889 in comparison to a chosen outgroup
890 Returns : boolean
891 Args : -nodes => arrayref of nodes to test
892 -outgroup => outgroup to serve as a reference
894 =cut
896 sub is_monophyletic{
897 my ($self,@args) = @_;
898 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
900 if( ! defined $nodes || ! defined $outgroup ) {
901 $self->warn("Must supply -nodes and -outgroup parameters to the method
902 is_monophyletic");
903 return;
905 if( ref($nodes) !~ /ARRAY/i ) {
906 $self->warn("Must provide a valid array reference for -nodes");
909 my $clade_root = $self->get_lca(@{$nodes});
910 unless( defined $clade_root ) {
911 $self->warn("could not find clade root via lca");
912 return;
915 my $og_ancestor = $outgroup->ancestor;
916 while( defined ($og_ancestor ) ) {
917 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
918 # monophyly is violated
919 return 0;
921 $og_ancestor = $og_ancestor->ancestor;
923 return 1;
927 =head2 is_paraphyletic
929 Title : is_paraphyletic
930 Usage : if( $tree->is_paraphyletic(-nodes =>\@nodes,
931 -outgroup => $node) ){ }
932 Function: Tests whether or not a given set of nodes are paraphyletic
933 (representing the full clade) given an outgroup
934 Returns : [-1,0,1] , -1 if the group is not monophyletic
935 0 if the group is not paraphyletic
936 1 if the group is paraphyletic
937 Args : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
938 -outgroup => a Bio::Tree::NodeI to compare the nodes to
941 =cut
943 sub is_paraphyletic{
944 my ($self,@args) = @_;
945 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
947 if( ! defined $nodes || ! defined $outgroup ) {
948 $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
949 return;
951 if( ref($nodes) !~ /ARRAY/i ) {
952 $self->warn("Must provide a valid array reference for -nodes");
953 return;
956 # Algorithm
957 # Find the lca
958 # Find all the nodes beneath the lca
959 # Test to see that none are missing from the nodes list
960 my %nodehash;
961 foreach my $n ( @$nodes ) {
962 $nodehash{$n->internal_id} = $n;
965 my $clade_root = $self->get_lca(-nodes => $nodes );
966 unless( defined $clade_root ) {
967 $self->warn("could not find clade root via lca");
968 return;
971 my $og_ancestor = $outgroup->ancestor;
973 # Is this necessary/correct for paraphyly test?
974 while( defined ($og_ancestor ) ) {
975 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
976 # monophyly is violated, could be paraphyletic
977 return -1;
979 $og_ancestor = $og_ancestor->ancestor;
981 my $tree = Bio::Tree::Tree->new(-root => $clade_root,
982 -nodelete => 1);
984 foreach my $n ( $tree->get_nodes() ) {
985 next unless $n->is_Leaf();
986 # if any leaf node is not in the list
987 # then it is part of the clade and so the list
988 # must be paraphyletic
989 return 1 unless ( $nodehash{$n->internal_id} );
991 return 0;
995 =head2 reroot
997 Title : reroot
998 Usage : $tree->reroot($node);
999 Function: Reroots a tree making a new node the root
1000 Returns : 1 on success, 0 on failure
1001 Args : Bio::Tree::NodeI that is in the tree, but is not the current root
1003 =cut
1005 sub reroot {
1006 my ($self,$new_root) = @_;
1007 unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
1008 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1009 return 0;
1012 my $old_root = $self->get_root_node;
1013 if( $new_root == $old_root ) {
1014 $self->warn("Node requested for reroot is already the root node!");
1015 return 0;
1017 my $anc = $new_root->ancestor;
1018 unless( $anc ) {
1019 # this is already the root
1020 $self->warn("Node requested for reroot is already the root node!");
1021 return 0;
1023 my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1);
1024 # reverse the ancestor & children pointers
1025 my $former_anc = $tmp_node->ancestor;
1026 my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node);
1027 for (my $i = 0; $i < $#path_from_oldroot; $i++) {
1028 my $current = $path_from_oldroot[$i];
1029 my $next = $path_from_oldroot[$i + 1];
1030 $current->remove_Descendent($next);
1031 $current->branch_length($next->branch_length);
1032 $current->bootstrap($next->bootstrap) if defined $next->bootstrap;
1033 $next->remove_tag('B');
1034 $next->add_Descendent($current);
1037 $new_root->add_Descendent($former_anc);
1038 $tmp_node->remove_Descendent($former_anc);
1040 $tmp_node = undef;
1041 $new_root->branch_length(undef);
1043 $old_root = undef;
1044 $self->set_root_node($new_root);
1046 return 1;
1050 =head2 reroot_at_midpoint
1052 Title : reroot_at_midpoint
1053 Usage : $tree->reroot_at_midpoint($node, $new_root_id);
1054 Function: Reroots a tree on a new node created halfway between the
1055 argument and its ancestor
1056 Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure
1057 Args : non-root Bio::Tree::NodeI currently in $tree
1058 scalar string, id for new node (optional)
1060 =cut
1062 sub reroot_at_midpoint {
1063 my $self = shift;
1064 my $node = shift;
1065 my $id = shift;
1067 unless (defined $node && $node->isa("Bio::Tree::NodeI")) {
1068 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1069 return 0;
1072 my $midpt = $node->create_node_on_branch(-FRACTION=>0.5);
1073 if (defined $id) {
1074 $self->warn("ID argument is not a scalar") if (ref $id);
1075 $midpt->id($id) if defined($id) && !ref($id);
1077 $self->reroot($midpt);
1078 return $midpt;
1082 =head2 findnode_by_id
1084 Title : findnode_by_id
1085 Usage : my $node = $tree->findnode_by_id($id);
1086 Function: Get a node by its id (which should be
1087 unique for the tree)
1088 Returns : L<Bio::Tree::NodeI>
1089 Args : node id
1092 =cut
1094 sub findnode_by_id {
1095 my $tree = shift;
1096 Carp::carp('use of findnode_by_id() is deprecated; use find_node()');
1097 my $id = shift;
1098 my $rootnode = $tree->get_root_node;
1099 if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
1100 return $rootnode;
1102 # process all the children
1103 foreach my $node ( $rootnode->get_Descendents ) {
1104 if ( ($node->id) and ($node->id eq $id ) ) {
1105 return $node;
1111 =head2 move_id_to_bootstrap
1113 Title : move_id_to_bootstrap
1114 Usage : $tree->move_id_to_bootstrap
1115 Function: Move internal IDs to bootstrap slot
1116 Returns : undef
1117 Args : undef
1119 =cut
1121 sub move_id_to_bootstrap{
1122 my ($tree) = shift;
1123 for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
1124 $node->bootstrap(defined $node->id ? $node->id : '');
1125 $node->id('');
1130 =head2 add_trait
1132 Title : add_trait
1133 Usage : my $key = $tree->add_trait($trait_file, 3);
1134 Function: Add traits to the leaf nodes of a Bio::Tree:Tree from a file.
1135 The trait file is a tab-delimited text file and needs to have a
1136 header line giving names to traits. The first column contains the
1137 leaf node ids. Subsequent columns contain different trait value sets.
1138 Single or double quotes are removed from the trait values. Traits
1139 are added to leaf nodes as a tag named $key using the add_tag_value()
1140 method. This means that you can retrieve the trait values using the
1141 get_tag_values() method (see the documentation for Bio::Tree::Node).
1142 Returns : Trait name (a scalar) on success, undef on failure (for example, if
1143 the column index requested was too large).
1144 Args : * Name of trait file (scalar string).
1145 * Index of trait file column (scalar int). Note that numbering starts
1146 at 0. Default: 1 (second column).
1147 * Ignore missing values. Typically, if a leaf node has no value in
1148 the trait file, an exception is thrown. If you set this option to
1149 1, then no trait will be given to the node (no exception thrown).
1151 =cut
1153 sub _read_trait_file {
1154 my ($self, $file, $column) = @_;
1155 $column ||= 1;
1157 my $trait_name;
1158 my $trait_values;
1159 open my $TRAIT, '<', $file or $self->throw("Could not read file '$file': $!");
1161 my $first_line = 1;
1162 while (<$TRAIT>) {
1163 chomp;
1164 s/['"]//g;
1165 my @line = split /\t/;
1166 if ($first_line) {
1167 $first_line = 0;
1168 $trait_name = $line[$column];
1169 next;
1172 my $id = $line[0];
1173 last if (not defined $id) or ($id eq '');
1175 # Skip empty trait values
1176 my $value = $line[$column];
1177 next if (not defined $value) or ($value eq '');
1179 $trait_values->{$id} = $value;
1182 close $TRAIT;
1183 return $trait_name, $trait_values;
1186 sub add_trait {
1187 my ($self, $file, $column, $ignore) = @_;
1188 $ignore = 0 if not defined $ignore;
1190 my ($trait_name, $trait_values) = $self->_read_trait_file($file, $column);
1192 if (defined $trait_name) {
1194 for my $node ($self->get_leaf_nodes) {
1196 # strip quotes from the node id
1197 $node->id($1) if $node->id =~ /^['"]+(.*)['"]+$/;
1199 if ( not exists $trait_values->{$node->id} ) {
1200 if ($ignore) {
1201 next;
1202 } else {
1203 $self->throw("No trait for node [".$node->id."/".$node->internal_id."]");
1207 $node->add_tag_value($trait_name, $trait_values->{ $node->id } );
1211 return $trait_name;