ide/bioperl-mode: into its own repository to be developed separately
[bioperl-live.git] / Bio / Tree / TreeFunctionsI.pm
blob51508f108177960423e0cec95666e4915a801e2f
2 # BioPerl module for Bio::Tree::TreeFunctionsI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::Tree::TreeFunctionsI - Decorated Interface implementing basic Tree exploration methods
19 =head1 SYNOPSIS
21 use Bio::TreeIO;
22 my $in = Bio::TreeIO->new(-format => 'newick', -file => 'tree.tre');
24 my $tree = $in->next_tree;
26 my @nodes = $tree->find_node('id1');
28 if( $tree->is_monophyletic(-nodes => \@nodes, -outgroup => $outnode) ){
29 #...
32 =head1 DESCRIPTION
34 This interface provides a set of implemented Tree functions which
35 only use the defined methods in the TreeI or NodeI interface.
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the evolution of this and other
42 Bioperl modules. Send your comments and suggestions preferably to
43 the Bioperl mailing list. Your participation is much appreciated.
45 bioperl-l@bioperl.org - General discussion
46 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 =head2 Support
50 Please direct usage questions or support issues to the mailing list:
52 I<bioperl-l@bioperl.org>
54 rather than to the module maintainer directly. Many experienced and
55 reponsive experts will be able look at the problem and quickly
56 address it. Please include a thorough description of the problem
57 with code and data examples if at all possible.
59 =head2 Reporting Bugs
61 Report bugs to the Bioperl bug tracking system to help us keep track
62 of the bugs and their resolution. Bug reports can be submitted via the
63 web:
65 https://github.com/bioperl/bioperl-live/issues
67 =head1 AUTHOR - Jason Stajich, Aaron Mackey, Justin Reese
69 Email jason-at-bioperl-dot-org
70 Email amackey-at-virginia.edu
71 Email jtr4v-at-virginia.edu
73 =head1 CONTRIBUTORS
75 Sendu Bala, bix@sendu.me.uk
77 Rerooting code was worked on by
79 Daniel Barker d.barker-at-reading.ac.uk
80 Ramiro Barrantes Ramiro.Barrantes-at-uvm.edu
82 =head1 APPENDIX
84 The rest of the documentation details each of the object methods.
85 Internal methods are usually preceded with a _
87 =cut
89 # Let the code begin...
92 package Bio::Tree::TreeFunctionsI;
94 use strict;
95 use base qw(Bio::Tree::TreeI);
98 =head2 find_node
100 Title : find_node
101 Usage : my @nodes = $self->find_node(-id => 'node1');
102 Function: returns all nodes that match a specific field, by default this
103 is id, but different branch_length,
104 Returns : List of nodes which matched search
105 Args : text string to search for
107 -fieldname => $textstring
109 =cut
111 sub find_node {
112 my ($self, $type, $field) = @_;
113 if( ! defined $type ) {
114 $self->warn("Must request a either a string or field and string when searching");
117 # all this work for a '-' named field
118 # is so that we could potentially
119 # expand to other constraints in
120 # different implementations
121 # like 'find all nodes with boostrap < XX'
123 if( ! defined $field ) {
124 # only 1 argument, default to searching by id
125 $field = $type;
126 $type = 'id';
127 } else {
128 $type =~ s/^-//;
131 # could actually do this by testing $rootnode->can($type) but
132 # it is possible that a tree is implemented with different node types
133 # - although it is unlikely that the root node would be richer than the
134 # leaf nodes. Can't handle NHX tags right now
136 my @nodes = grep { $_->can($type) && defined $_->$type() &&
137 $_->$type() eq $field } $self->get_nodes();
139 if ( wantarray) {
140 return @nodes;
141 } else {
142 if( @nodes > 1 ) {
143 $self->warn("More than 1 node found but caller requested scalar, only returning first node");
145 return shift @nodes;
150 =head2 remove_Node
152 Title : remove_Node
153 Usage : $tree->remove_Node($node)
154 Function: Removes a node from the tree
155 Returns : boolean represent status of success
156 Args : either Bio::Tree::NodeI or string of the node id
158 =cut
160 sub remove_Node {
161 my ($self,$input) = @_;
162 my $node = undef;
163 unless( ref($input) ) {
164 $node = $self->find_node($input);
165 } elsif( ! $input->isa('Bio::Tree::NodeI') ) {
166 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to remove_node");
167 return 0;
168 } else {
169 $node = $input;
171 if( ! $node->ancestor &&
172 $self->get_root_node->internal_id != $node->internal_id) {
173 $self->warn("Node (".$node->to_string . ") has no ancestor, can't remove!");
174 } else {
175 $node->ancestor->remove_Descendent($node);
180 =head2 get_lineage_nodes
182 Title : get_lineage_nodes
183 Usage : my @nodes = $tree->get_lineage_nodes($node);
184 Function: Given a node or its ID, get its full lineage, i.e. all its ancestors,
185 from the root to the most recent ancestor. Only use the node ID as
186 input if the nodes have been added to the tree.
187 Returns : list of nodes
188 Args : either Bio::Tree::NodeI (or string of the node id)
190 =cut
192 sub get_lineage_nodes {
193 my ($self, $input) = @_;
194 my $node;
196 # Sanity checks
197 if (ref $input) {
198 if (not $input->isa('Bio::Tree::NodeI')) {
199 $self->throw("Did not provide a valid Bio::Tree::NodeI object or ID string to get_lineage_nodes");
201 $node = $input;
202 } else {
203 $node = $self->find_node($input);
206 # When dealing with Bio::Taxon objects with databases, the root will always
207 # be the database's root, ignoring this Tree's set root node; prefer the
208 # Tree's idea of root.
209 my $root = $self->get_root_node || '';
211 my @lineage;
212 while ($node) {
213 $node = $node->ancestor || last;
214 unshift(@lineage, $node);
215 $node eq $root && last;
217 return @lineage;
221 =head2 get_lineage_string
223 Title : get_lineage_string
224 Usage : my $lineage = $tree->get_lineage_string($node);
225 Function: Get the string representation of the full lineage of a node, e.g.
226 for the Enterobacteriales node, return
227 Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacteriales.
228 This method uses get_lineage_nodes internally and therefore inherits
229 of all of its caveats.
230 Returns : string
231 Args : * either Bio::Tree::NodeI (or string of the node id)
232 * an optional separator (default: ';')
234 =cut
236 sub get_lineage_string {
237 my ($self, $input, $sep) = @_;
238 $sep ||= ';';
239 my $node;
240 unless (ref $input) {
241 $node = $self->find_node($input);
243 elsif (! $input->isa('Bio::Tree::NodeI')) {
244 $self->warn("Did not provide either a valid Bio::Tree::NodeI object or id to get_lineage_nodes");
245 return;
247 else {
248 $node = $input;
250 my @nodes = ($self->get_lineage_nodes($node), $node);
251 for my $i (0 .. scalar @nodes - 1) {
252 my $node_name = $nodes[$i]->node_name || '';
253 if ($node_name =~ m/$sep/) {
254 $self->warn("Separator '$sep' is not safe to use because the node ".
255 "called '$node_name' contains it. Consider using another separator".
256 " or sanitizing the node name.");
258 $nodes[$i] = $node_name;
260 return join $sep, @nodes;
264 =head2 splice
266 Title : splice
267 Usage : $tree->splice(-remove_id => \@ids);
268 Function: Remove all the nodes from a tree that correspond to the supplied
269 args, making all the descendents of a removed node the descendents
270 of the removed node's ancestor.
271 You can ask to explicitly remove certain nodes by using -remove_*,
272 remove them conditionally by using -remove_* in combination with
273 -keep_*, or remove everything except certain nodes by using only
274 -keep_*.
275 Returns : n/a
276 Args : just a list of Bio::Tree::NodeI objects to remove, OR
277 -key => value pairs, where -key has the prefix 'remove' or 'keep',
278 followed by an underscore, followed by a fieldname (like for the
279 method find_node). Value should be a scalar or an array ref of
280 scalars (again, like you might supply to find_node).
282 So (-remove_id => [1, 2]) will remove all nodes from the tree that
283 have an id() of '1' or '2', while
284 (-remove_id => [1, 2], -keep_id => [2]) will remove all nodes with
285 an id() of '1'.
286 (-keep_id => [2]) will remove all nodes unless they have an id() of
287 '2' (note, no -remove_*).
289 -preserve_lengths => 1 : setting this argument will splice out
290 intermediate nodes, preserving the original total length between
291 the ancestor and the descendants of the spliced node. Undef
292 by default.
294 =cut
296 sub splice {
297 my ($self, @args) = @_;
298 $self->throw("Must supply some arguments") unless @args > 0;
299 my $preserve_lengths = 0;
300 my @nodes_to_remove;
301 if (ref($args[0])) {
302 $self->throw("When supplying just a list of Nodes, they must be Bio::Tree::NodeI objects") unless $args[0]->isa('Bio::Tree::NodeI');
303 @nodes_to_remove = @args;
305 else {
306 $self->throw("When supplying -key => value pairs, must be an even number of args") unless @args % 2 == 0;
307 my %args = @args;
308 my @keep_nodes;
309 my @remove_nodes;
310 my $remove_all = 1;
311 while (my ($key, $value) = each %args) {
312 my @values = ref($value) ? @{$value} : ($value);
314 if ($key =~ s/remove_//) {
315 $remove_all = 0;
316 foreach my $value (@values) {
317 push(@remove_nodes, $self->find_node($key => $value));
320 elsif ($key =~ s/keep_//) {
321 foreach my $value (@values) {
322 push(@keep_nodes, $self->find_node($key => $value));
325 elsif ($key =~ /preserve/) {
326 $preserve_lengths = $value;
330 if ($remove_all) {
331 if (@keep_nodes == 0) {
332 $self->warn("Requested to remove everything except certain nodes, but those nodes were not found; doing nothing instead");
333 return;
336 @remove_nodes = $self->get_nodes;
338 if (@keep_nodes > 0) {
339 my %keep_iids = map { $_->internal_id => 1 } @keep_nodes;
340 foreach my $node (@remove_nodes) {
341 push(@nodes_to_remove, $node) unless exists $keep_iids{$node->internal_id};
344 else {
345 @nodes_to_remove = @remove_nodes;
348 # do the splicing
349 #*** the algorithm here hasn't really been thought through and tested much,
350 # will probably need revising
351 my %root_descs;
352 my $reroot = 0;
353 foreach my $node (@nodes_to_remove) {
354 my @descs = $node->each_Descendent;
355 my $ancestor = $node->ancestor;
356 if (! $ancestor && ! $reroot) {
357 # we're going to remove the tree root, so will have to re-root the
358 # tree later
359 $reroot = 1;
360 %root_descs = map { $_->internal_id => $_ } @descs;
361 $node->remove_all_Descendents;
362 next;
364 if (exists $root_descs{$node->internal_id}) {
365 # well, this one can't be the future root anymore
366 delete $root_descs{$node->internal_id};
367 # but maybe one of this one's descs will become the root
368 foreach my $desc (@descs) {
369 $root_descs{$desc->internal_id} = $desc;
372 # make the ancestor of our descendents our own ancestor, and give us
373 # no ancestor of our own to remove us from the tree
374 foreach my $desc (@descs) {
375 $desc->ancestor($ancestor);
376 $desc->branch_length($desc->branch_length + $node->branch_length) if $preserve_lengths;
378 $node->ancestor(undef);
380 if ($reroot) {
381 my @candidates = values %root_descs;
382 $self->throw("After splicing, there was no tree root!") unless @candidates > 0;
383 $self->throw("After splicing, the original root was removed but there are multiple candidates for the new root!") unless @candidates == 1;
384 $self->set_root_node($candidates[0]); # not sure its valid to use the reroot() method
389 =head2 get_lca
391 Title : get_lca
392 Usage : get_lca(-nodes => \@nodes ); OR
393 get_lca(@nodes);
394 Function: given two or more nodes, returns the lowest common ancestor (aka most
395 recent common ancestor)
396 Returns : node object or undef if there is no common ancestor
397 Args : -nodes => arrayref of nodes to test, OR
398 just a list of nodes
400 =cut
402 sub get_lca {
403 my ($self, @args) = @_;
404 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
405 my @nodes;
406 if (ref($nodes) eq 'ARRAY') {
407 @nodes = @{$nodes};
409 else {
410 @nodes = @args;
412 @nodes >= 2 or $self->throw("At least 2 nodes are required");
413 # We must go root->leaf to get the correct answer to lca (in a world where
414 # internal_id might not be uniquely assigned), but leaf->root is more
415 # forgiving (eg. lineages may not all have the same root, or they may have
416 # different numbers of 'minor' taxa inbeteen 'major' ones).
418 # I use root->leaf so that we can easily do multiple nodes at once - no
419 # matter what taxa are below the lca, the lca and all its ancestors ought to
420 # be identical.
421 my @paths;
422 foreach my $node (@nodes) {
423 unless(ref($node) && $node->isa('Bio::Tree::NodeI')) {
424 $self->throw("Cannot process get_lca() with a non-NodeI object ($node)\n");
426 my @path = ($self->get_lineage_nodes($node), $node);
427 push(@paths, \@path);
429 return unless @paths >= 2;
430 my $lca;
431 LEVEL: while ($paths[0] > 0) {
432 my %node_ids;
433 my $node;
434 foreach my $path (@paths) {
435 $node = shift(@{$path}) || last LEVEL;
436 my $node_id = $node->internal_id;
437 unless (defined $node_id) {
438 $self->warn("One of the lineages had a node with no internal_id, can't calculate the common ancestor");
439 return;
441 $node_ids{$node_id}++;
443 if (keys %node_ids == 1) {
444 $lca = $node;
446 else {
447 # at this point in the lineage the nodes are different; the previous
448 # loop had the lca
449 last LEVEL;
452 # If the tree that we are contains the lca (get_lca could have been called
453 # on an empty tree, since it works with plain Nodes), prefer to return the
454 # node object that belongs to us
455 if ($lca && $self->number_nodes > 0) {
456 my $own_lca = $self->find_node(-internal_id => $lca->internal_id);
457 $lca = $own_lca if $own_lca;
459 return $lca;
463 =head2 merge_lineage
465 Title : merge_lineage
466 Usage : merge_lineage($node)
467 Function: Merge a lineage of nodes with this tree.
468 Returns : true for success, false (and a warning) otherwise
469 Args : Bio::Tree::TreeI with only one leaf, OR
470 Bio::Tree::NodeI which has an ancestor
472 For example, if we are the tree $tree:
474 +---B
478 +---C
480 and we want to merge the lineage $other_tree:
482 A---C---D
484 After calling $tree->merge_lineage($other_tree), $tree looks like:
486 +---B
490 +---C---D
492 =cut
494 sub merge_lineage {
495 my ($self, $thing) = @_;
496 $self->throw("Must supply an object reference") unless ref($thing);
498 my $lineage_leaf;
499 if ($thing->isa('Bio::Tree::TreeI')) {
500 my @leaves = $thing->get_leaf_nodes;
501 $self->throw("The supplied Tree can only have one leaf") unless @leaves == 1;
502 $lineage_leaf = shift(@leaves);
504 elsif ($thing->isa('Bio::Tree::NodeI')) {
505 $self->throw("The supplied Node must have an ancestor") unless $thing->ancestor;
506 $lineage_leaf = $thing;
509 # Find the lowest node in the supplied lineage that is in the tree
510 # That will be our lca and we can merge at the node below
511 my @lineage = ($lineage_leaf, reverse($self->get_lineage_nodes($lineage_leaf)));
512 my $merged = 0;
513 my $node;
514 my $i = 0;
515 while ($i <= $#lineage) {
516 $node = $self->find_node(-internal_id => $lineage[$i]->internal_id);
517 if (defined $node) {
518 $merged = 1;
519 last;
521 $i++;
523 if (not $merged) {
524 $self->warn("Could not merge the lineage of ".$lineage_leaf->id." with the rest of the tree");
527 # Merge descendents, recursively
528 while ($i > 0) {
529 $node->add_Descendent($lineage[$i-1]);
530 $node = $self->find_node(-internal_id => $lineage[$i-1]->internal_id);
531 $i--;
534 return $merged;
538 =head2 contract_linear_paths
540 Title : contract_linear_paths
541 Usage : contract_linear_paths()
542 Function: Splices out all nodes in the tree that have an ancestor and only one
543 descendent.
544 Returns : n/a
545 Args : none for normal behaviour, true to dis-regard the ancestor requirement
546 and re-root the tree as necessary
548 For example, if we are the tree $tree:
550 +---E
552 A---B---C---D
554 +---F
556 After calling $tree->contract_linear_paths(), $tree looks like:
558 +---E
560 A---D
562 +---F
564 Instead, $tree->contract_linear_paths(1) would have given:
566 +---E
570 +---F
572 =cut
574 sub contract_linear_paths {
575 my $self = shift;
576 my $reroot = shift;
577 my @remove;
578 foreach my $node ($self->get_nodes) {
579 if ($node->ancestor && $node->each_Descendent == 1) {
580 push(@remove, $node);
583 $self->splice(@remove) if @remove;
584 if ($reroot) {
585 my $root = $self->get_root_node;
586 my @descs = $root->each_Descendent;
587 if (@descs == 1) {
588 my $new_root = shift(@descs);
589 $self->set_root_node($new_root);
590 $new_root->ancestor(undef);
596 =head2 is_binary
598 Example : is_binary(); is_binary($node);
599 Description: Finds if the tree or subtree defined by
600 the internal node is a true binary tree
601 without polytomies
602 Returns : boolean
603 Exceptions :
604 Args : Internal node Bio::Tree::NodeI, optional
607 =cut
609 sub is_binary {
610 my $self = shift;
611 my $node = shift || $self->get_root_node;
613 my $binary = 1;
614 my @descs = $node->each_Descendent;
615 $binary = 0 unless @descs == 2 or @descs == 0;
616 #print "$binary, ", scalar @descs, "\n";
618 # recurse
619 foreach my $desc (@descs) {
620 $binary += $self->is_binary($desc) -1;
622 $binary = 0 if $binary < 0;
623 return $binary;
627 =head2 force_binary
629 Title : force_binary
630 Usage : force_binary()
631 Function: Forces the tree into a binary tree, splitting branches arbitrarily
632 and creating extra nodes as necessary, such that all nodes have
633 exactly two or zero descendants.
634 Returns : n/a
635 Args : none
637 For example, if we are the tree $tree:
639 +---G
641 +---F
643 +---E
647 +---D
649 +---C
651 +---B
653 (A has 6 descendants B-G)
655 After calling $tree->force_binary(), $tree looks like:
657 +---X
659 +---X
661 | +---X
663 +---X
665 | | +---G
666 | | |
667 | +---X
669 | +---F
671 | +---E
673 | +---X
674 | | |
675 | | +---D
677 +---X
679 | +---C
681 +---X
683 +---B
685 (Where X are artificially created nodes with ids 'artificial_n', where n is
686 an integer making the id unique within the tree)
688 =cut
690 sub force_binary {
691 my $self = shift;
692 my $node = shift || $self->get_root_node;
694 my @descs = $node->each_Descendent;
695 if (@descs > 2) {
696 # Removed overly verbose warning - cjfields 3-12-11
698 # Many nodes have no identifying names, a simple warning is probably
699 # enough.
701 $self->warn("Node has more than two descendants\nWill do an arbitrary balanced split");
702 my @working = @descs;
703 # create an even set of artifical nodes on which to later hang the descs
704 my $half = @working / 2;
705 $half++ if $half > int($half);
706 $half = int($half);
707 my @artificials;
708 while ($half > 1) {
709 my @this_level;
710 foreach my $top_node (@artificials || $node) {
711 for (1..2) {
712 my $art = $top_node->new(-id => "artificial_".++$self->{_art_num});
713 $top_node->add_Descendent($art);
714 push(@this_level, $art);
717 @artificials = @this_level;
718 $half--;
720 # attach two descs to each artifical leaf
721 foreach my $art (@artificials) {
722 for (1..2) {
723 my $desc = shift(@working) || $node->new(-id => "artificial_".++$self->{_art_num});
724 $desc->ancestor($art);
728 elsif (@descs == 1) {
729 # ensure that all nodes have 2 descs
730 $node->add_Descendent($node->new(-id => "artificial_".++$self->{_art_num}));
732 # recurse
733 foreach my $desc (@descs) {
734 $self->force_binary($desc);
739 =head2 simplify_to_leaves_string
741 Title : simplify_to_leaves_string
742 Usage : my $leaves_string = $tree->simplify_to_leaves_string()
743 Function: Creates a simple textual representation of the relationship between
744 leaves in self. It forces the tree to be binary, so the result may
745 not strictly correspond to the tree (if the tree wasn't binary), but
746 will be as close as possible. The tree object is not altered. Only
747 leaf node ids are output, in a newick-like format.
748 Returns : string
749 Args : none
751 =cut
753 sub simplify_to_leaves_string {
754 my $self = shift;
756 # Before contracting and forcing binary we need to clone self, but Clone.pm
757 # clone() seg faults and fails to make the clone, whilst Storable dclone
758 # needs $self->{_root_cleanup_methods} deleted (code ref) and seg faults at
759 # end of script. Let's make our own clone...
760 my $tree = $self->_clone;
762 $tree->contract_linear_paths(1);
763 $tree->force_binary;
764 foreach my $node ($tree->get_nodes) {
765 my $id = $node->id;
766 $id = ($node->is_Leaf && $id !~ /^artificial/) ? $id : '';
767 $node->id($id);
770 my %paired;
771 my @data = $self->_simplify_helper($tree->get_root_node, \%paired);
773 return join(',', @data);
777 # alias
778 sub _clone { shift->clone(@_) }
781 # safe node clone that doesn't seg fault, but deliberately loses ancestors and
782 # descendents
783 sub _clone_node {
784 my ($self, $node) = @_;
785 my $clone = $node->new;
787 while (my ($key, $val) = each %{$node}) {
788 if ($key eq '_desc' || $key eq '_ancestor') {
789 next;
791 ${$clone}{$key} = $val;
794 return $clone;
798 # tree string generator for simplify_to_leaves_string, based on
799 # Bio::TreeIO::newick::_write_tree_Helper
800 sub _simplify_helper {
801 my ($self, $node, $paired) = @_;
802 return () if (!defined $node);
804 my @data = ();
805 foreach my $node ($node->each_Descendent()) {
806 push(@data, $self->_simplify_helper($node, $paired));
809 my $id = $node->id_output || '';
810 if (@data) {
811 unless (exists ${$paired}{"@data"} || @data == 1) {
812 $data[0] = "(" . $data[0];
813 $data[-1] .= ")";
814 ${$paired}{"@data"} = 1;
817 elsif ($id) {
818 push(@data, $id);
821 return @data;
825 =head2 distance
827 Title : distance
828 Usage : distance(-nodes => \@nodes )
829 Function: returns the distance between two given nodes
830 Returns : numerical distance
831 Args : -nodes => arrayref of nodes to test
832 or ($node1, $node2)
834 =cut
836 sub distance {
837 my ($self,@args) = @_;
838 my ($nodes) = $self->_rearrange([qw(NODES)],@args);
839 if( ! defined $nodes ) {
840 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
841 return;
843 elsif (ref($nodes) eq 'ARRAY') {
846 elsif ( @args == 2) { # assume these are nodes...
847 $nodes = \@args;
849 else {
850 $self->warn("Must supply two nodes or -nodes parameter to distance() method");
851 return;
853 $self->throw("Must provide 2 nodes") unless @{$nodes} == 2;
855 my $lca = $self->get_lca(@{$nodes});
856 unless($lca) {
857 $self->warn("could not find the lca of supplied nodes; can't find distance either");
858 return;
861 my $cumul_dist = 0;
862 my $warned = 0;
863 foreach my $current_node (@{$nodes}) {
864 while (1) {
865 last if $current_node eq $lca;
866 if ($current_node->branch_length) {
867 $cumul_dist += $current_node->branch_length;
869 elsif (! $warned) {
870 $self->warn("At least some nodes do not have a branch length, the distance returned could be wrong");
871 $warned = 1;
874 $current_node = $current_node->ancestor || last;
878 return $cumul_dist;
882 =head2 is_monophyletic
884 Title : is_monophyletic
885 Usage : if( $tree->is_monophyletic(-nodes => \@nodes,
886 -outgroup => $outgroup)
887 Function: Will do a test of monophyly for the nodes specified
888 in comparison to a chosen outgroup
889 Returns : boolean
890 Args : -nodes => arrayref of nodes to test
891 -outgroup => outgroup to serve as a reference
893 =cut
895 sub is_monophyletic{
896 my ($self,@args) = @_;
897 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
899 if( ! defined $nodes || ! defined $outgroup ) {
900 $self->warn("Must supply -nodes and -outgroup parameters to the method
901 is_monophyletic");
902 return;
904 if( ref($nodes) !~ /ARRAY/i ) {
905 $self->warn("Must provide a valid array reference for -nodes");
908 my $clade_root = $self->get_lca(@{$nodes});
909 unless( defined $clade_root ) {
910 $self->warn("could not find clade root via lca");
911 return;
914 my $og_ancestor = $outgroup->ancestor;
915 while( defined ($og_ancestor ) ) {
916 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
917 # monophyly is violated
918 return 0;
920 $og_ancestor = $og_ancestor->ancestor;
922 return 1;
926 =head2 is_paraphyletic
928 Title : is_paraphyletic
929 Usage : if( $tree->is_paraphyletic(-nodes =>\@nodes,
930 -outgroup => $node) ){ }
931 Function: Tests whether or not a given set of nodes are paraphyletic
932 (representing the full clade) given an outgroup
933 Returns : [-1,0,1] , -1 if the group is not monophyletic
934 0 if the group is not paraphyletic
935 1 if the group is paraphyletic
936 Args : -nodes => Array of Bio::Tree::NodeI objects which are in the tree
937 -outgroup => a Bio::Tree::NodeI to compare the nodes to
940 =cut
942 sub is_paraphyletic{
943 my ($self,@args) = @_;
944 my ($nodes,$outgroup) = $self->_rearrange([qw(NODES OUTGROUP)],@args);
946 if( ! defined $nodes || ! defined $outgroup ) {
947 $self->warn("Must suply -nodes and -outgroup parameters to the method is_paraphyletic");
948 return;
950 if( ref($nodes) !~ /ARRAY/i ) {
951 $self->warn("Must provide a valid array reference for -nodes");
952 return;
955 # Algorithm
956 # Find the lca
957 # Find all the nodes beneath the lca
958 # Test to see that none are missing from the nodes list
959 my %nodehash;
960 foreach my $n ( @$nodes ) {
961 $nodehash{$n->internal_id} = $n;
964 my $clade_root = $self->get_lca(-nodes => $nodes );
965 unless( defined $clade_root ) {
966 $self->warn("could not find clade root via lca");
967 return;
970 my $og_ancestor = $outgroup->ancestor;
972 # Is this necessary/correct for paraphyly test?
973 while( defined ($og_ancestor ) ) {
974 if( $og_ancestor->internal_id == $clade_root->internal_id ) {
975 # monophyly is violated, could be paraphyletic
976 return -1;
978 $og_ancestor = $og_ancestor->ancestor;
980 my $tree = Bio::Tree::Tree->new(-root => $clade_root,
981 -nodelete => 1);
983 foreach my $n ( $tree->get_nodes() ) {
984 next unless $n->is_Leaf();
985 # if any leaf node is not in the list
986 # then it is part of the clade and so the list
987 # must be paraphyletic
988 return 1 unless ( $nodehash{$n->internal_id} );
990 return 0;
994 =head2 reroot
996 Title : reroot
997 Usage : $tree->reroot($node);
998 Function: Reroots a tree making a new node the root
999 Returns : 1 on success, 0 on failure
1000 Args : Bio::Tree::NodeI that is in the tree, but is not the current root
1002 =cut
1004 sub reroot {
1005 my ($self,$new_root) = @_;
1006 unless (defined $new_root && $new_root->isa("Bio::Tree::NodeI")) {
1007 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1008 return 0;
1011 my $old_root = $self->get_root_node;
1012 if( $new_root == $old_root ) {
1013 $self->warn("Node requested for reroot is already the root node!");
1014 return 0;
1016 my $anc = $new_root->ancestor;
1017 unless( $anc ) {
1018 # this is already the root
1019 $self->warn("Node requested for reroot is already the root node!");
1020 return 0;
1022 my $tmp_node = $new_root->create_node_on_branch(-position=>0,-force=>1);
1023 # reverse the ancestor & children pointers
1024 my $former_anc = $tmp_node->ancestor;
1025 my @path_from_oldroot = ($self->get_lineage_nodes($tmp_node), $tmp_node);
1026 for (my $i = 0; $i < $#path_from_oldroot; $i++) {
1027 my $current = $path_from_oldroot[$i];
1028 my $next = $path_from_oldroot[$i + 1];
1029 $current->remove_Descendent($next);
1030 $current->branch_length($next->branch_length);
1031 $current->bootstrap($next->bootstrap) if defined $next->bootstrap;
1032 $next->remove_tag('B');
1033 $next->add_Descendent($current);
1036 $new_root->add_Descendent($former_anc);
1037 $tmp_node->remove_Descendent($former_anc);
1039 $tmp_node = undef;
1040 $new_root->branch_length(undef);
1042 $old_root = undef;
1043 $self->set_root_node($new_root);
1045 return 1;
1049 =head2 reroot_at_midpoint
1051 Title : reroot_at_midpoint
1052 Usage : $tree->reroot_at_midpoint($node, $new_root_id);
1053 Function: Reroots a tree on a new node created halfway between the
1054 argument and its ancestor
1055 Returns : the new midpoint Bio::Tree::NodeIon success, 0 on failure
1056 Args : non-root Bio::Tree::NodeI currently in $tree
1057 scalar string, id for new node (optional)
1059 =cut
1061 sub reroot_at_midpoint {
1062 my $self = shift;
1063 my $node = shift;
1064 my $id = shift;
1066 unless (defined $node && $node->isa("Bio::Tree::NodeI")) {
1067 $self->warn("Must provide a valid Bio::Tree::NodeI when rerooting");
1068 return 0;
1071 my $midpt = $node->create_node_on_branch(-FRACTION=>0.5);
1072 if (defined $id) {
1073 $self->warn("ID argument is not a scalar") if (ref $id);
1074 $midpt->id($id) if defined($id) && !ref($id);
1076 $self->reroot($midpt);
1077 return $midpt;
1081 =head2 findnode_by_id
1083 Title : findnode_by_id
1084 Usage : my $node = $tree->findnode_by_id($id);
1085 Function: Get a node by its id (which should be
1086 unique for the tree)
1087 Returns : L<Bio::Tree::NodeI>
1088 Args : node id
1091 =cut
1093 sub findnode_by_id {
1094 my $tree = shift;
1095 $tree->deprecated("use of findnode_by_id() is deprecated; ".
1096 "use find_node() instead");
1097 my $id = shift;
1098 my $rootnode = $tree->get_root_node;
1099 if ( ($rootnode->id) and ($rootnode->id eq $id) ) {
1100 return $rootnode;
1102 # process all the children
1103 foreach my $node ( $rootnode->get_Descendents ) {
1104 if ( ($node->id) and ($node->id eq $id ) ) {
1105 return $node;
1111 =head2 move_id_to_bootstrap
1113 Title : move_id_to_bootstrap
1114 Usage : $tree->move_id_to_bootstrap
1115 Function: Move internal IDs to bootstrap slot
1116 Returns : undef
1117 Args : undef
1119 =cut
1121 sub move_id_to_bootstrap{
1122 my ($tree) = shift;
1123 for my $node ( grep { ! $_->is_Leaf } $tree->get_nodes ) {
1124 $node->bootstrap(defined $node->id ? $node->id : '');
1125 $node->id('');
1130 =head2 add_trait
1132 Title : add_trait
1133 Usage : my $key = $tree->add_trait($trait_file, 3);
1134 Function: Add traits to the leaf nodes of a Bio::Tree:Tree from a file.
1135 The trait file is a tab-delimited text file and needs to have a
1136 header line giving names to traits. The first column contains the
1137 leaf node ids. Subsequent columns contain different trait value sets.
1138 Single or double quotes are removed from the trait values. Traits
1139 are added to leaf nodes as a tag named $key using the add_tag_value()
1140 method. This means that you can retrieve the trait values using the
1141 get_tag_values() method (see the documentation for Bio::Tree::Node).
1142 Returns : Trait name (a scalar) on success, undef on failure (for example, if
1143 the column index requested was too large).
1144 Args : * Name of trait file (scalar string).
1145 * Index of trait file column (scalar int). Note that numbering starts
1146 at 0. Default: 1 (second column).
1147 * Ignore missing values. Typically, if a leaf node has no value in
1148 the trait file, an exception is thrown. If you set this option to
1149 1, then no trait will be given to the node (no exception thrown).
1151 =cut
1153 sub _read_trait_file {
1154 my ($self, $file, $column) = @_;
1155 $column ||= 1;
1157 my $trait_name;
1158 my $trait_values;
1159 open my $TRAIT, '<', $file or $self->throw("Could not read file '$file': $!");
1161 my $first_line = 1;
1162 while (<$TRAIT>) {
1163 chomp;
1164 s/['"]//g;
1165 my @line = split /\t/;
1166 if ($first_line) {
1167 $first_line = 0;
1168 $trait_name = $line[$column];
1169 next;
1172 my $id = $line[0];
1173 last if (not defined $id) or ($id eq '');
1175 # Skip empty trait values
1176 my $value = $line[$column];
1177 next if (not defined $value) or ($value eq '');
1179 $trait_values->{$id} = $value;
1182 close $TRAIT;
1183 return $trait_name, $trait_values;
1186 sub add_trait {
1187 my ($self, $file, $column, $ignore) = @_;
1188 $ignore = 0 if not defined $ignore;
1190 my ($trait_name, $trait_values) = $self->_read_trait_file($file, $column);
1192 if (defined $trait_name) {
1194 for my $node ($self->get_leaf_nodes) {
1196 # strip quotes from the node id
1197 $node->id($1) if $node->id =~ /^['"]+(.*)['"]+$/;
1199 if ( not exists $trait_values->{$node->id} ) {
1200 if ($ignore) {
1201 next;
1202 } else {
1203 $self->throw("No trait for node [".$node->id."/".$node->internal_id."]");
1207 $node->add_tag_value($trait_name, $trait_values->{ $node->id } );
1211 return $trait_name;