Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / Tree / Node.pm
blob7dddd8fb5e4e4f652e64b62bf787d114d935d00b
2 # BioPerl module for Bio::Tree::Node
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::Node - A Simple Tree Node
18 =head1 SYNOPSIS
20 use Bio::Tree::Node;
21 my $nodeA = Bio::Tree::Node->new();
22 my $nodeL = Bio::Tree::Node->new();
23 my $nodeR = Bio::Tree::Node->new();
25 my $node = Bio::Tree::Node->new();
26 $node->add_Descendent($nodeL);
27 $node->add_Descendent($nodeR);
29 print "node is not a leaf \n" if( $node->is_leaf);
31 =head1 DESCRIPTION
33 Makes a Tree Node suitable for building a Tree.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to
41 the Bioperl mailing list. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 of the bugs and their resolution. Bug reports can be submitted via
61 the web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Jason Stajich
67 Email jason-at-bioperl-dot-org
69 =head1 CONTRIBUTORS
71 Aaron Mackey, amackey-at-virginia-dot-edu
72 Sendu Bala, bix@sendu.me.uk
74 =head1 APPENDIX
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
79 =cut
82 # Let the code begin...
84 package Bio::Tree::Node;
86 use vars qw($CREATIONORDER);
87 use strict;
89 use base qw(Bio::Root::Root Bio::Tree::NodeI);
91 BEGIN {
92 $CREATIONORDER = 1;
95 =head2 new
97 Title : new
98 Usage : my $obj = Bio::Tree::Node->new();
99 Function: Builds a new Bio::Tree::Node object
100 Returns : Bio::Tree::Node
101 Args : -descendents => arrayref of descendents (they will be
102 updated s.t. their ancestor point is this
103 node)
104 -branch_length => branch length [integer] (optional)
105 -bootstrap => value bootstrap value (string)
106 -description => description of node
107 -id => human readable id for node
109 =cut
111 sub new {
112 my($class,@args) = @_;
114 my $self = $class->SUPER::new(@args);
115 my ($children, $branchlen,$id,
116 $bootstrap, $desc,$d) = $self->_rearrange([qw(
117 DESCENDENTS
118 BRANCH_LENGTH
120 BOOTSTRAP
121 DESC
122 DESCRIPTION
124 @args);
125 $self->_register_for_cleanup(\&node_cleanup);
126 $self->{'_desc'} = {}; # for descendents
127 if( defined $d && defined $desc ) {
128 $self->warn("can only accept -desc or -description, not both, accepting -description");
129 $desc = $d;
130 } elsif( defined $d && ! defined $desc ) {
131 $desc = $d;
133 defined $desc && $self->description($desc);
134 defined $bootstrap && $self->bootstrap($bootstrap);
135 defined $id && $self->id($id);
136 defined $branchlen && $self->branch_length($branchlen);
137 if( defined $children ) {
138 if( ref($children) !~ /ARRAY/i ) {
139 $self->throw("Must specify a valid ARRAY reference to initialize a Node's Descendents");
141 foreach my $c ( @$children ) {
142 $self->add_Descendent($c);
145 $self->_creation_id($CREATIONORDER++);
146 return $self;
149 =head2 create_node_on_branch
151 Title : create_node_on_branch
152 Usage : $node->create_node_on_branch($at_length)
153 Function: Create a node on the ancestral branch of the calling
154 object.
155 Example :
156 Returns : the created node
157 Args : -POSITION=>$absolute_branch_length_from_caller (default)
158 -FRACTION=>$fraction_of_branch_length_from_caller
159 -ANNOT=>{ -id => "the id", -desc => "the description" }
160 -FORCE, set to allow nodes with zero branch lengths
162 =cut
164 sub create_node_on_branch{
165 my ($self,@args) = @_;
166 my ($pos, $frac, $annot, $force) = $self->_rearrange([qw(POSITION FRACTION ANNOT FORCE)], @args);
167 my ($newpos);
168 my $blen = $self->branch_length;
169 # arg checks
170 $force||=0;
171 $annot||={};
173 unless ($self->ancestor) {
174 $self->throw("Refusing to create nodes above the root--exiting");
176 unless ($blen) {
177 $self->throw("Calling node's branch length is zero") unless $force;
179 unless ((defined $pos && !defined $frac)||(defined $frac && !defined $pos)) {
180 $self->throw("Either position or fraction must be specified, but not both");
182 if (defined $frac) {
183 $self->throw("FRACTION arg must be in the range [0,1]") unless ( (0 <= $frac) && ($frac <= 1) );
184 $newpos = $frac*$blen;
186 elsif (defined $pos) {
187 $self->throw("POSITION arg must be in the range [0,$blen]") unless ( (0 <= $pos) && ($pos <= $blen) );
188 $newpos = $pos;
190 else {
191 $self->throw("How did I get here?");
193 $self->throw("Calling node's branch length will be zero (set -FORCE to force)--exiting") unless ($newpos > 0) || $force;
194 $self->throw("Created nodes branch length would be zero (set -FORCE to force)--exiting") unless ($newpos < $blen) || $force;
196 #guts
197 $annot->{'-branch_length'} = $blen-$newpos;
198 my $node = Bio::Tree::Node->new(%$annot);
199 my $anc = $self->ancestor;
200 # null anc check is above
201 $node->add_Descendent($self);
202 $anc->add_Descendent($node);
203 $anc->remove_Descendent($self);
204 $self->branch_length($newpos);
205 return $node;
208 =head2 add_Descendent
210 Title : add_Descendent
211 Usage : $node->add_Descendent($node);
212 Function: Adds a descendent to a node
213 Returns : number of current descendents for this node
214 Args : Bio::Node::NodeI
215 boolean flag, true if you want to ignore the fact that you are
216 adding a second node with the same unique id (typically memory
217 location reference in this implementation). default is false and
218 will throw an error if you try and overwrite an existing node.
220 =cut
222 sub add_Descendent{
223 my ($self,$node,$ignoreoverwrite) = @_;
224 return -1 if( ! defined $node );
226 if( ! ref($node) ||
227 ref($node) =~ /HASH/ ||
228 ! $node->isa('Bio::Tree::NodeI') ) {
229 $self->throw("Trying to add a Descendent who is not a Bio::Tree::NodeI");
230 return -1;
233 $self->{_adding_descendent} = 1;
234 # avoid infinite recurse
235 $node->ancestor($self) unless $node->{_setting_ancestor};
236 $self->{_adding_descendent} = 0;
238 if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) {
239 $self->throw("Going to overwrite a node which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future");
241 $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate??
243 $self->invalidate_height();
245 return scalar keys %{$self->{'_desc'}};
248 =head2 each_Descendent
250 Title : each_Descendent($sortby)
251 Usage : my @nodes = $node->each_Descendent;
252 Function: all the descendents for this Node (but not their descendents
253 i.e. not a recursive fetchall)
254 Returns : Array of Bio::Tree::NodeI objects
255 Args : $sortby [optional] "height", "creation", "alpha", "revalpha",
256 or coderef to be used to sort the order of children nodes.
258 =cut
260 sub each_Descendent{
261 my ($self, $sortby) = @_;
263 # order can be based on branch length (and sub branchlength)
264 $sortby ||= 'none';
265 if (ref $sortby eq 'CODE') {
266 my @values = sort { $sortby->($a,$b) } values %{$self->{'_desc'}};
267 return @values;
268 } elsif ($sortby eq 'height') {
269 return map { $_->[0] }
270 sort { $a->[1] <=> $b->[1] ||
271 $a->[2] <=> $b->[2] }
272 map { [$_, $_->height, $_->internal_id ] }
273 values %{$self->{'_desc'}};
274 } elsif( $sortby eq 'alpha' ) {
275 my @set;
276 for my $v ( values %{$self->{'_desc'}} ) {
277 unless( $v->is_Leaf ) {
278 my @lst = ( sort { $a cmp $b } map { $_->id }
279 grep { $_->is_Leaf }
280 $v->get_all_Descendents($sortby));
281 push @set, [$v, $lst[0], $v->internal_id];
282 } else {
283 push @set, [$v, $v->id, $v->internal_id];
286 return map { $_->[0] }
287 sort {$a->[1] cmp $b->[1] || $a->[2] <=> $b->[2] } @set;
288 } elsif( $sortby eq 'revalpha' ) {
289 my @set;
290 for my $v ( values %{$self->{'_desc'}} ) {
291 if( ! defined $v->id &&
292 ! $v->is_Leaf ) {
293 my ($l) = ( sort { $b cmp $a } map { $_->id }
294 grep { $_->is_Leaf }
295 $v->get_all_Descendents($sortby));
296 push @set, [$v, $l, $v->internal_id];
297 } else {
298 push @set, [$v, $v->id, $v->internal_id];
301 return map { $_->[0] }
302 sort {$b->[1] cmp $a->[1] || $b->[2] <=> $a->[2] } @set;
303 } else { # creation
304 return map { $_->[0] }
305 sort { $a->[1] <=> $b->[1] }
306 map { [$_, $_->internal_id ] }
307 grep {defined $_}
308 values %{$self->{'_desc'}};
312 =head2 remove_Descendent
314 Title : remove_Descendent
315 Usage : $node->remove_Descendent($node_foo);
316 Function: Removes a specific node from being a Descendent of this node
317 Returns : nothing
318 Args : An array of Bio::Node::NodeI objects which have been previously
319 passed to the add_Descendent call of this object.
321 =cut
323 sub remove_Descendent{
324 my ($self,@nodes) = @_;
325 my $c= 0;
326 foreach my $n ( @nodes ) {
327 if( $self->{'_desc'}->{$n->internal_id} ) {
328 $self->{_removing_descendent} = 1;
329 $n->ancestor(undef);
330 $self->{_removing_descendent} = 0;
331 # should be redundant
332 $self->{'_desc'}->{$n->internal_id}->ancestor(undef);
333 delete $self->{'_desc'}->{$n->internal_id};
334 $c++;
335 } else {
336 if( $self->verbose ) {
337 $self->debug(sprintf("no node %s (%s) listed as a descendent in this node %s (%s)\n",$n->id, $n,$self->id,$self));
338 $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n");
345 =head2 remove_all_Descendents
347 Title : remove_all_Descendents
348 Usage : $node->remove_All_Descendents()
349 Function: Cleanup the node's reference to descendents and reset
350 their ancestor pointers to undef, if you don't have a reference
351 to these objects after this call they will be cleaned up - so
352 a get_nodes from the Tree object would be a safe thing to do first
353 Returns : nothing
354 Args : none
356 =cut
358 sub remove_all_Descendents{
359 my ($self) = @_;
360 # This won't cleanup the nodes themselves if you also have
361 # a copy/pointer of them (I think)...
363 # That's true. But that's not a bug; if we retain a reference to them it's
364 # very possible we want to keep them. The only way to truly destroy them is
365 # to call DESTROY on the instance.
367 while( my ($node,$val) = each %{ $self->{'_desc'} } ) {
368 delete $self->{'_desc'}->{$node}
370 $self->{'_desc'} = {};
374 =head2 get_all_Descendents
376 Title : get_all_Descendents
377 Usage : my @nodes = $node->get_all_Descendents;
378 Function: Recursively fetch all the nodes and their descendents
379 *NOTE* This is different from each_Descendent
380 Returns : Array or Bio::Tree::NodeI objects
381 Args : none
383 =cut
385 # get_all_Descendents implemented in the interface
387 =head2 ancestor
389 Title : ancestor
390 Usage : $obj->ancestor($newval)
391 Function: Set the Ancestor
392 Returns : ancestral node
393 Args : newvalue (optional)
395 =cut
397 sub ancestor {
398 my $self = shift;
399 if (@_) {
400 my $new_ancestor = shift;
402 # we can set ancestor to undef
403 if ($new_ancestor) {
404 $self->throw("This is [$new_ancestor], not a Bio::Tree::NodeI")
405 unless $new_ancestor->isa('Bio::Tree::NodeI');
408 my $old_ancestor = $self->{'_ancestor'} || '';
409 if (!$old_ancestor ||
410 ($old_ancestor && ( !$new_ancestor ||
411 $new_ancestor ne $old_ancestor)) ) {
412 if( $old_ancestor && ! $old_ancestor->{_removing_descendent}) {
413 $old_ancestor->remove_Descendent($self);
415 if ($new_ancestor &&
416 ! $new_ancestor->{_adding_descendent} ) { # avoid infinite recurse
417 $self->{_setting_ancestor} = 1;
418 $new_ancestor->add_Descendent($self, 1);
419 $self->{_setting_ancestor} = 0;
422 $self->{'_ancestor'} = $new_ancestor;
425 return $self->{'_ancestor'};
428 =head2 branch_length
430 Title : branch_length
431 Usage : $obj->branch_length()
432 Function: Get/Set the branch length
433 Returns : value of branch_length
434 Args : newvalue (optional)
436 =cut
438 sub branch_length{
439 my $self = shift;
440 if( @_ ) {
441 my $bl = shift;
442 if( defined $bl &&
443 $bl =~ s/\[(\d+)\]// ) {
444 $self->bootstrap($1);
446 $self->{'_branch_length'} = $bl;
447 $self->invalidate_height();
449 return $self->{'_branch_length'};
452 =head2 bootstrap
454 Title : bootstrap
455 Usage : $obj->bootstrap($newval)
456 Function: Get/Set the bootstrap value
457 Returns : value of bootstrap
458 Args : newvalue (optional)
460 =cut
462 sub bootstrap {
463 my $self = shift;
464 if( @_ ) {
465 if( $self->has_tag('B') ) {
466 $self->remove_tag('B');
468 $self->add_tag_value('B',shift);
470 return ($self->get_tag_values('B'))[0];
473 =head2 description
475 Title : description
476 Usage : $obj->description($newval)
477 Function: Get/Set the description string
478 Returns : value of description
479 Args : newvalue (optional)
481 =cut
483 sub description {
484 my $self = shift;
485 $self->{'_description'} = shift @_ if @_;
486 return $self->{'_description'};
489 =head2 id
491 Title : id
492 Usage : $obj->id($newval)
493 Function: The human readable identifier for the node
494 Returns : value of human readable id
495 Args : newvalue (optional)
497 "A name can be any string of printable characters except blanks,
498 colons, semicolons, parentheses, and square brackets. Because you may
499 want to include a blank in a name, it is assumed that an underscore
500 character ("_") stands for a blank; any of these in a name will be
501 converted to a blank when it is read in."
503 from L<http://evolution.genetics.washington.edu/phylip/newicktree.html>
505 Also note that these objects now support spaces, ();: because we can
506 automatically quote the strings if they contain these characters. The
507 L<id_output> method does this for you so use the id() method to get
508 the raw string while L<id_output> to get the pre-escaped string.
510 =cut
512 sub id {
513 my ($self, $value) = @_;
514 if (defined $value) {
515 #$self->warn("Illegal characters ();: and space in the id [$value], converting to _ ")
516 # if $value =~ /\(\);:/ and $self->verbose >= 0;
517 #$value =~ s/[\(\);:\s]/_/g;
518 $self->{'_id'} = $value;
520 return $self->{'_id'};
523 =head2 Helper Functions
525 =cut
527 =head2 id_output
529 Title : id_output
530 Usage : my $id = $node->id_output;
531 Function: Return an id suitable for output in format like newick
532 so that if it contains spaces or ():; characters it is properly
533 quoted
534 Returns : $id string if $node->id has a value
535 Args : none
537 =cut
539 # implemented in NodeI interface
541 =head2 internal_id
543 Title : internal_id
544 Usage : my $internalid = $node->internal_id
545 Function: Returns the internal unique id for this Node
546 (a monotonically increasing number for this in-memory implementation
547 but could be a database determined unique id in other
548 implementations)
549 Returns : unique id
550 Args : none
552 =cut
554 sub internal_id {
555 return $_[0]->_creation_id;
558 =head2 _creation_id
560 Title : _creation_id
561 Usage : $obj->_creation_id($newval)
562 Function: a private method signifying the internal creation order
563 Returns : value of _creation_id
564 Args : newvalue (optional)
566 =cut
568 sub _creation_id {
569 my $self = shift @_;
570 $self->{'_creation_id'} = shift @_ if( @_);
571 return $self->{'_creation_id'} || 0;
574 =head2 Bio::Node::NodeI decorated interface implemented
576 The following methods are implemented by L<Bio::Node::NodeI> decorated
577 interface.
579 =head2 is_Leaf
581 Title : is_Leaf
582 Usage : if( $node->is_Leaf )
583 Function: Get Leaf status
584 Returns : boolean
585 Args : none
587 =cut
589 sub is_Leaf {
590 my ($self) = @_;
591 my $isleaf = ! (defined $self->{'_desc'} &&
592 (keys %{$self->{'_desc'}} > 0) );
593 return $isleaf;
596 =head2 height
598 Title : height
599 Usage : my $len = $node->height
600 Function: Returns the height of the tree starting at this
601 node. Height is the maximum branchlength to get to the tip.
602 Returns : The longest length (weighting branches with branch_length) to a leaf
603 Args : none
605 =cut
607 sub height {
608 my ($self) = @_;
609 return $self->{'_height'} if( defined $self->{'_height'} );
611 return 0 if( $self->is_Leaf );
612 my $max = 0;
613 foreach my $subnode ( $self->each_Descendent ) {
614 my $bl = $subnode->branch_length;
615 $bl = 1 unless (defined $bl && $bl =~ /^\-?\d+(\.\d+)?$/);
616 my $s = $subnode->height + $bl;
617 if( $s > $max ) { $max = $s; }
619 return ($self->{'_height'} = $max);
622 =head2 invalidate_height
624 Title : invalidate_height
625 Usage : private helper method
626 Function: Invalidate our cached value of the node height in the tree
627 Returns : nothing
628 Args : none
630 =cut
632 sub invalidate_height {
633 my ($self) = @_;
635 $self->{'_height'} = undef;
636 if( defined $self->ancestor ) {
637 $self->ancestor->invalidate_height;
641 =head2 set_tag_value
643 Title : set_tag_value
644 Usage : $node->set_tag_value($tag,$value)
645 $node->set_tag_value($tag,@values)
646 Function: Sets a tag value(s) to a node. Replaces old values.
647 Returns : number of values stored for this tag
648 Args : $tag - tag name
649 $value - value to store for the tag
651 =cut
653 sub set_tag_value{
654 my ($self,$tag,@values) = @_;
655 if( ! defined $tag || ! scalar @values ) {
656 $self->warn("cannot call set_tag_value with an undefined value");
658 $self->remove_tag ($tag);
659 map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
660 return scalar @{$self->{'_tags'}->{$tag}};
664 =head2 add_tag_value
666 Title : add_tag_value
667 Usage : $node->add_tag_value($tag,$value)
668 Function: Adds a tag value to a node
669 Returns : number of values stored for this tag
670 Args : $tag - tag name
671 $value - value to store for the tag
673 =cut
675 sub add_tag_value{
676 my ($self,$tag,$value) = @_;
677 if( ! defined $tag || ! defined $value ) {
678 $self->warn("cannot call add_tag_value with an undefined value".($tag ? " ($tag)" : ''));
679 $self->warn($self->stack_trace_dump,"\n");
681 push @{$self->{'_tags'}->{$tag}}, $value;
682 return scalar @{$self->{'_tags'}->{$tag}};
685 =head2 remove_tag
687 Title : remove_tag
688 Usage : $node->remove_tag($tag)
689 Function: Remove the tag and all values for this tag
690 Returns : boolean representing success (0 if tag does not exist)
691 Args : $tag - tagname to remove
694 =cut
696 sub remove_tag {
697 my ($self,$tag) = @_;
698 if( exists $self->{'_tags'}->{$tag} ) {
699 $self->{'_tags'}->{$tag} = undef;
700 delete $self->{'_tags'}->{$tag};
701 return 1;
703 return 0;
706 =head2 remove_all_tags
708 Title : remove_all_tags
709 Usage : $node->remove_all_tags()
710 Function: Removes all tags
711 Returns : None
712 Args : None
714 =cut
716 sub remove_all_tags{
717 my ($self) = @_;
718 $self->{'_tags'} = {};
719 return;
722 =head2 get_all_tags
724 Title : get_all_tags
725 Usage : my @tags = $node->get_all_tags()
726 Function: Gets all the tag names for this Node
727 Returns : Array of tagnames
728 Args : None
730 =cut
732 sub get_all_tags{
733 my ($self) = @_;
734 my @tags = sort keys %{$self->{'_tags'} || {}};
735 return @tags;
738 =head2 get_tag_values
740 Title : get_tag_values
741 Usage : my @values = $node->get_tag_values($tag)
742 Function: Gets the values for given tag ($tag)
743 Returns : In array context returns an array of values
744 or an empty list if tag does not exist.
745 In scalar context returns the first value or undef.
746 Args : $tag - tag name
748 =cut
750 sub get_tag_values{
751 my ($self,$tag) = @_;
752 return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
753 (@{$self->{'_tags'}->{$tag} || []})[0];
756 =head2 has_tag
758 Title : has_tag
759 Usage : $node->has_tag($tag)
760 Function: Boolean test if tag exists in the Node
761 Returns : Boolean
762 Args : $tag - tagname
764 =cut
766 sub has_tag {
767 my ($self,$tag) = @_;
768 return exists $self->{'_tags'}->{$tag};
771 sub node_cleanup {
772 my $self = shift;
773 return unless defined $self;
775 #*** below is wrong, cleanup doesn't actually occur. Will replace with:
776 # $self->remove_all_Descendents; once further fixes in place..
777 #if( defined $self->{'_desc'} &&
778 # ref($self->{'_desc'}) =~ /HASH/i ) {
779 # while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) {
780 # $node->ancestor(undef); # insure no circular references
781 # $node = undef;
784 $self->remove_all_Descendents;
786 #$self->{'_desc'} = {};
790 =head2 reverse_edge
792 Title : reverse_edge
793 Usage : $node->reverse_edge(child);
794 Function: makes child be a parent of node
795 Requires: child must be a direct descendent of node
796 Returns : 1 on success, 0 on failure
797 Args : Bio::Tree::NodeI that is in the tree
799 =cut
801 sub reverse_edge {
802 my ($self,$node) = @_;
803 if( $self->delete_edge($node) ) {
804 $node->add_Descendent($self);
805 return 1;
807 return 0;