Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / lib / CXGN / Phylo / Tree.pm
blob124a6354e8feda2ebac5350eed9acfc341d3fa99
1 package CXGN::Phylo::Tree;
3 =head1 NAME
5 CXGN::Phylo::Tree - an object to handle trees
7 =head1 USAGE
9 my $tree = CXGN::Phylo::Tree->new();
10 my $root = $tree->get_root();
11 my $node = $root->add_child();
12 $node->set_name("I'm a child node");
13 $node->set_link("http://solgenomics.net/");
14 my $child_node = $node->add_child();
15 $child_node->set_name("I'm a grand-child node");
16 print $tree->generate_newick();
18 =head1 DESCRIPTION
20 The tree object provides metadata for tree data structures. The tree data structure itself is defined with node objjects (L<CXGN::Phylo::Node>), for which the tree object stores the root node, which gives access to the entire tree structure using appropriate node functions such as get_children(). The tree object also provides convenience functions, which usually map to node functions on the root node.
22 For faster access of individual nodes, the tree object keeps a hash of nodes keyed by a unique id for each node. The tree object also provides a function to obtain new unique node ids.
24 The tree object also provides the layout and rendering functions. The both layout and rendering are defined by L<CXGN::Phylo::Layout> and L<CXGN::Phylo::Renderer> objects, of which several versions exist that provide different tree layouts and renderings.
27 =head1 AUTHORS
29 Lukas Mueller (lam87@cornell.edu)
30 Tom York (tly2@cornell.edu)
32 =cut
34 use strict;
35 use warnings;
37 use CXGN::Phylo::Node;
38 use CXGN::Phylo::Species_name_map;
39 use CXGN::Phylo::Layout;
40 use CXGN::Phylo::Renderer;
41 use CXGN::Phylo::Parser;
43 use base qw | CXGN::DB::Object |;
45 my $do_parse_set_error = 0;
47 =head2 function new()
49 Synopsis: my $t = CXGN::Phylo::Tree->new()
50 Arguments: none
51 Returns: an instance of a Tree object.
52 Side effects: creates the object and initializes some parameters.
53 Description:
55 =cut
57 sub new {
58 my $class = shift;
59 my $self = bless {}, $class;
60 #You can feed constructor with a newick string, which will create
61 #a parser object that creates a tree object *without* passing a
62 #string, which would lead to an infinite loop. Watch out!
63 my $arg = shift;
64 my $newick_string = "";
65 unless (ref($arg)) {
66 # print STDERR "Tree::new. [$newick_string] \n";
67 $newick_string = $arg;
68 # print STDERR "Tree::new. [$newick_string] \n";
69 } else {
70 my $newick_file = '';
71 if ($arg->{from_files}) {
72 $newick_file = $arg->{from_files}->{newick};
73 die "Need a newick file if 'from_files' is used\n" unless -f $newick_file;
75 $self = _tree_from_file($newick_file);
76 my $alignment_file = $arg->{from_files}->{alignment};
77 if ($alignment_file) {
78 die "Alignment file: $alignment_file not found" unless -f $alignment_file;
79 my $alignment = CXGN::Phylo::Alignment->new( from_file=>$alignment_file);
80 $self->set_alignment($alignment);
81 $self->standard_alignment_leaf_association();
83 return $self;
84 } elsif ($arg->{from_file}) {
85 $newick_file = $arg->{from_file};
86 $self = _tree_from_file($newick_file);
87 return $self;
90 if ($newick_string) {
91 $newick_string =~ s/\s//g;
92 $newick_string =~ s/\n|\r//sg;
93 if ($newick_string =~ /^\(.*\)|;$/) { # start with (, end with ) or ;
94 # print STDERR "in Tree::new, about to parse the newick_string \n";
95 my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error);
96 print "calling parse in Tree constructor\n";
97 my $self = $parser->parse();
98 return $self;
99 } elsif ($newick_string) {
100 print STDERR "String passed not recognized as newick\n";
104 ##############################################################
105 #$self is a new tree, not predefined by newick; instead it will be
106 #constructed by methods on this object and Phylo::Node's
108 #print STDERR "constructing Tree not predefined by a newick\n";
110 $self->set_unique_node_key(0);
112 # initialize the root node
114 my $root = CXGN::Phylo::Node->new();
115 $root->set_name(".");
116 $root->set_tree($self);
117 $root->set_node_key($self->get_unique_node_key());
118 $self->add_node_hash($root, $root->get_node_key());
119 $self->set_root($root);
121 # initialize some imaging parameters
123 $self->set_show_labels(1);
124 $self->set_hilite_color(255, 0 ,0);
125 $self->set_line_color(100, 100, 100);
126 $self->set_bgcolor(0, 0, 0);
127 $self->set_show_species_in_label(0);
128 $self->set_show_standard_species(0);
129 $self->set_species_standardizer(CXGN::Phylo::Species_name_map->new());
131 #Attribute names to show in newick extended format
132 $self->{newick_shown_attributes} = {};
133 $self->{shown_branch_length_transformation} = "branch_length"; # other possibilities: "proportion_different", equal
134 $self->{min_shown_branch_length} = 0.001; # when showing branches graphically, this is added to the displayed length
135 $self->set_min_branch_length(0.0001);
136 # initialize a default layout and renderer
138 $self->set_layout( CXGN::Phylo::Layout->new($self) );
139 $self->set_renderer( CXGN::Phylo::PNG_tree_renderer->new($self) );
141 return $self;
144 # copy some of the tree's fields. Other fields will just have default values as set in constructor
145 # e.g. layout and renderer aren't copied because there is no copy method for these objects
146 sub copy_tree_fields{
147 my $self = shift; # source
148 my $new = shift; # copy
150 $new->set_name($self->get_name());
152 # initialize some imaging parameters
154 $new->set_show_labels($self->get_show_labels());
155 $new->set_hilite_color($self->get_hilite_color());
156 $new->set_line_color($self->get_line_color());
157 $new->set_bgcolor($self->set_bgcolor());
159 $new->set_species_standardizer($self->get_species_standardizer()->copy()) if (defined $self->get_species_standardizer()) ;
160 $new->set_show_species_in_label($self->get_show_species_in_label());
161 $new->set_show_standard_species($self->get_show_standard_species());
163 #Attribute names to show in newick extended format
164 # $new->{newick_shown_attributes} = $self->{newick_shown_attributes};
165 # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}};
166 %{$new->{newick_shown_attributes}} = %{$self->{newick_shown_attributes}};
170 =head2 function copy()
172 Synopsis: my $t_copy = $a_tree->copy()
173 Arguments: none
174 Returns: A copy of $a_tree
175 Side effects: creates the object, and makes it be a copy.
176 Description:
178 =cut
180 sub copy {
181 my $self = shift;
182 my $new = $self->get_root()->copy_subtree();
183 $new->update_label_names();
184 return $new;
188 sub _tree_from_file {
189 my $file = shift;
190 my $tree = _tree_from_newick(_newick_from_file($file));
191 $tree->standard_layout();
192 return $tree;
195 sub _tree_from_newick {
196 my $newick_string = shift;
197 $newick_string =~ s/\s//g; # remove whitespace from newick string
198 $newick_string =~ s/\n|\r//g;
199 if($newick_string =~ /^\(.*\)|;$/){
200 my $parser = CXGN::Phylo::Parse_newick->new($newick_string, $do_parse_set_error);
201 print "parsing tree in Tree::_tree_from_newick\n";
202 my $tree = $parser->parse();
203 return $tree;
205 elsif($newick_string) {
206 print STDERR "String passed not recognized as newick\n";
207 return undef;
211 sub _newick_from_file {
212 my $file = shift;
213 open(FH, $file) or die "Can't open file: $file\n";
214 my $newick = "";
215 $newick .= $_ while (<FH>);
216 close FH;
217 $newick =~ s/\n//sg;
218 $newick =~ s/\r//sg;
219 $newick =~ s/\s//g;
220 return $newick;
223 sub get_alignment {
224 my $self = shift;
225 return $self->{alignment};
228 sub set_alignment {
229 my $self = shift;
230 $self->{alignment} = shift;
231 unless(@{$self->{alignment}->{members}}){
232 warn "The alignment set to the tree has no members. You must construct the alignment before setting it here";
233 return -1;
237 =head2 function standard_alignment_leaf_association()
239 Associate alignment members to leaf nodes based
240 on id/name equality
242 =cut
244 sub standard_alignment_leaf_association {
245 my $self = shift;
246 my $alignment = $self->get_alignment();
247 return unless $alignment;
248 my %id2mem = ();
249 foreach my $m ($alignment->get_members()) {
250 $id2mem{$m->get_id()} = $m;
252 foreach my $l ($self->get_leaves()) {
253 my $m = $id2mem{$l->get_name()};
254 next unless $m;
255 $l->set_alignment_member($m);
259 =head2 function get_root()
261 Synopsis: my $node = $t->get_root();
262 Arguments: none
263 Returns: a Node object, which is the root of the tree.
264 Side effects:
265 Description:
266 See also: $node->is_root()
268 =cut
270 sub get_root {
271 my $self=shift;
272 return $self->{root};
275 =head2 function set_root()
277 Synopsis: $t->set_root($node);
278 Arguments: a Node object
279 Returns: nothing
280 Side effects: the node $node will be defined as the root of the tree.
281 Note that $node->is_root() must evaluate to true,
282 set_root() will therefore set the parent of the root
283 to undef.
284 See also: prune_to_subtree() - takes a node as a parameter and
285 will create a sub-branch of the tree. It throws away all
286 other nodes that are not part of the sub-branch.
287 reset_root() - resets the root to the specified node and
288 inverts the parent child relationships from the
289 specified node upwards to the root.
291 =cut
293 sub set_root {
294 my $self=shift;
295 my $new_root=shift;
296 $new_root->set_parent(undef); #is_root must be true
297 $new_root->set_branch_length(undef);
298 $self->{root}=$new_root;
301 =head2 function delete_node() and del_node()
303 Synopsis: $tree->delete_node($node->get_node_key());
304 $tree->del_node($node);
305 Arguments: delete_node: a unique node key, del_node: a node object.
306 Returns: nothing if operation is successful, 1 if operation
307 not succesful (because it was attempted to delete
308 the root node).
309 Side effects: Adds the branch length to each of its children,
310 Recalculates the leaf list and node hash.
311 Note: The root node cannot be deleted.
312 Description:
314 =cut
316 sub delete_node {
317 my $self = shift;
318 my $node_key = shift;
320 # get the node object from the key
322 my $node=$self->get_node($node_key);
323 return $self->del_node($node);
326 # delete node by passing node object as argument
327 # rather than node key as with delete_node
328 sub del_node{
329 my $self = shift;
330 my $node = shift;
331 if (!$node) {
332 warn 'The node you want to delete does not exist!'; return;
334 my $retval = $node->delete_self();
335 $self->recalculate_tree_data();
336 return $retval;
339 =head2 function recalculate_tree_data()
341 Synopsis:
342 Arguments: none
343 Returns: nothing
344 Side effects: recalculates the leaf list, the node hash, and all the
345 subtree distances. It does not affect the node keys.
346 Description:
348 =cut
350 sub recalculate_tree_data {
351 my $self = shift;
352 $self->calculate_leaf_list();
353 $self->clear_node_hash();
354 $self->regenerate_node_hash($self->get_root());
355 $self->get_root()->calculate_distances_from_root();
356 $self->get_root()->recursive_clear_properties();
359 =head2 function prune_to_subtree()
361 Synopsis: $a_tree->prune_to_subtree($node);
362 Arguments: a node object, the root of the subtree to be kept.
363 Returns:
364 Side effects: Prunes the tree.
365 Description: Prune the tree so that only $node and its subtree
366 is left, with $node as the new root.
367 (sub_branch is synonymous)
369 =cut
371 sub prune_to_subtree {
372 my $self = shift;
373 my $new_root_node = shift;
375 $self->set_root($new_root_node);
376 $self->recalculate_tree_data();
379 #=head2 function sub_branch()
381 # Synopsis: deprecated, synonym for prune_to_subtree
383 #=cut
385 #sub sub_branch {
386 # if (0) {
387 # my $self = shift;
388 # my $new_root_node = shift;
390 # $self->set_root($new_root_node);
391 # $self->recalculate_tree_data();
392 # } else {
393 # prune_to_subtree(@_);
398 =head2 function reset_root()
400 Synopsis: $tree->reset_root($node);
401 Arguments: a node object that will be the new root node
402 Returns: nothing
403 Side effects: recalculates the tree parameters using the new
404 root node
405 Description: reverses all the parent-child relationships
406 between the node $node and the old root node,
407 then sets the tree root node to $node.
408 Authors: Lukas Mueller, Tom York.
410 =cut
412 sub reset_root {
413 my $self = shift; # tree object
414 my $new_root_node = shift; # node object
416 if (0) { #either of these branches should work.
417 my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root
418 $new_root_node->set_parent(undef); # because it is to be the root
419 my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp
420 my $cp=$new_root_node;
421 foreach my $pc (@parents) {
422 my $former_p_blen = $pc->get_branch_length();
423 $pc->remove_child($cp); # removes $cp from $pc's child list
424 $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp
425 $pc->set_branch_length($pc_blen);
426 $cp = $pc;
427 $pc_blen = $former_p_blen;
429 } else {
430 my @parents_root_down = reverse $new_root_node->get_all_parents();
431 push @parents_root_down, $new_root_node; # need to include the new root in the array
432 my $pc = shift @parents_root_down; # pc means goes from being parent to being child
434 for (my $cp = shift @parents_root_down; defined $cp; $cp = shift @parents_root_down) {
435 my $blen = $cp->get_branch_length();
436 $pc->remove_child($cp); # remove $cp from children list of $pc
437 $cp->set_parent(undef);
438 $cp->add_child_node($pc); # now $cp is parent, $pc the child
439 $pc->set_branch_length($blen);
440 $pc = $cp;
441 # at this point we still have a consistent tree, but with the root moved another step along the
442 # path from original root to new root.
445 $self->set_root($new_root_node);
446 $new_root_node->set_branch_length(0);
447 $self->recalculate_tree_data();
450 =head2 function get_leaf_count()
452 Synopsis: $tree->get_leaf_count()
453 Arguments: none
454 Returns: the number of leaves in the tree
455 Side effects:
456 Description:
458 =cut
460 sub get_leaf_count {
461 my $self = shift;
462 # $self->get_root()->count_leaves();
463 return scalar $self->get_leaf_list();
466 =head2 function get_unhidden_leaf_count()
468 Get the number of visible leaves in the tree
470 =cut
472 sub get_unhidden_leaf_count {
473 my $self = shift;
474 return scalar grep { !$_->is_hidden } $self->get_leaf_list;
477 =head2 function set_unique_node_key()
479 Synopsis: $tree->set_unique_node_key(345);
480 Arguments: an integer value to set the unique node key
481 property to.
482 Returns: nothing
483 Side effects: this value will then be used by get_unique_node_key().
484 The getter function increases the unique key by one
485 every time it is called.
486 Description:
487 Note: this function is used internally and it should not be
488 necessary to ever use it.
490 =cut
492 sub set_unique_node_key {
493 my $self = shift;
494 $self->{unique_node_key}=shift;
497 =head2 function get_unique_node_key()
499 Synopsis: $node->set_node_key(
500 $node->get_tree()->get_unique_node_key() );
501 Arguments: none
502 Returns: a unique node key
503 Side effects:
504 Description:
505 Note: it should not be necessary to call this method, because
506 new nodes should always be added using the
507 $node->add_child() function, which assures that the
508 node_key property is filled in correctly.
510 =cut
512 sub get_unique_node_key {
513 my $self = shift;
514 $self->{unique_node_key}++; # increment the unique node key
515 while (exists $self->{node_hash}->{$self->{unique_node_key}}) { # if key already in node_hash, increment again...
516 $self->{unique_node_key}++;
518 return $self->{unique_node_key};
521 =head2 function clear_node_hash()
523 Synopsis: $t -> clear_node_hash()
524 Arguments: none
525 Returns: clears the node hash
526 Side effects:
527 Description:
529 =cut
531 sub clear_node_hash {
532 my $self = shift;
533 %{$self->{node_hash}}=();
536 =head2 function regenerate_node_hash()
538 Synopsis: $tree->regenerate_node_hash()
539 Arguments: a node, most conveniently the root node.
540 Returns: nothing
541 Side effects: regenerates the node hash from the current root.
542 Description: it uses the predefined ...? Recursive. Adds node
543 to hash and then calls itself on each child
545 =cut
547 sub regenerate_node_hash {
548 my $self = shift;
549 my $node = shift;
550 $node ||= $self->get_root();
551 #print("in regenerate_node_hash. \n");
552 #$node->print_node();
553 #print("node key: ", $node->get_node_key());
554 $self->add_node_hash($node, $node->get_node_key());
555 foreach my $c ($node->get_children()) {
556 $self->regenerate_node_hash($c);
558 $self->set_unique_node_key( scalar $self->get_all_nodes() );
561 =head2 function add_node_hash()
563 Synopsis: $tree->add_node_hash($node, $unique_key);
564 Arguments: an instance of a Node object; a unique node key.
565 Returns: nothing
566 Side effects: the $node is added to the node hash.
567 the node hash uses the node\'s node_key property
568 as a hash key, and the node object itself as a
569 hash value. Note that it should not be necessary
570 to call this function. All new nodes should be
571 added using the add_child() method which automatically
572 inserts the new node in the node_hash.
573 Description:
575 =cut
577 sub add_node_hash {
578 my $self = shift;
579 my $node = shift;
580 my $unique_key = shift;
582 ${$self->{node_hash}}{$unique_key}=$node;
585 =head2 function get_all_nodes()
587 Synopsis: returns a list of all nodes, in no particular order.
588 Arguments: none
589 Returns: a list of nodes
590 Side effects: none
591 Description:
593 =cut
595 sub get_all_nodes {
596 my $self = shift;
597 return (values %{$self->{node_hash}});
601 sub get_all_node_keys {
602 my $self = shift;
603 return (keys %{$self->{node_hash}});
607 sub get_node_count {
608 my $self = shift;
609 return scalar($self->get_all_nodes());
612 =head2 function get_node()
614 Synopsis: my $node->get_node($unique_node_key);
615 Arguments: a unique node key of a node
616 Returns: the $node object associated with the node key.
617 Side effects:
618 Description: this function uses the node hash and should therefore
619 be fast. The node key values can be embedded in things
620 like HTML imagemaps, and the corresponding nodes can
621 be quickly retrieved for further manipulation using
622 this function.
624 =cut
626 sub get_node {
627 my $self = shift;
628 my $key = shift;
629 return ${$self->{node_hash}}{$key};
632 sub print_node_keys{
633 my $self = shift;
634 my $hashref = $self->{node_hash};
635 foreach my $k (keys (%$hashref)) {
636 my $n = $self->get_node($k);
637 if (defined $n) {
638 print("key, node: ", $k); $n->print_node();
639 } else {
640 print("key: ", $k, " has undefined node (returned by get_node($k) ). \n");
643 print("present value of unique_node_key: ", $self->{unique_node_key}, "\n");
648 =head2 function incorporate_nodes()
650 Given a list of nodes, add them to this tree's membership
651 by setting their 'tree' attributes and giving them new node
652 keys from this tree's pool, setting the hash appropriately
654 Arg: List of node objects
655 Ret: Nothing
657 =cut
659 sub incorporate_nodes {
660 my $self = shift;
661 my @nodes = @_;
662 foreach my $n (@nodes) {
663 my $new_key = $self->get_unique_node_key();
664 $n->set_tree($self);
665 $n->set_node_key($new_key);
666 $self->add_node_hash($n, $new_key);
670 =head2 function incorporate_tree()
672 Given a tree, incorporate that tree's nodes into this tree. This does not affect parent/child relationships; you have to set those yourself
674 =cut
676 sub incorporate_tree {
677 my $self = shift;
678 my $sub_tree = shift;
679 my @nodes = $sub_tree->get_root()->get_descendents();
680 $self->incorporate_nodes(@nodes);
683 =head2 function make_binary()
685 Inserts joint nodes at polyphetic points so that the tree is biphetic or monophetic. The joint nodes have branch-length 0, so this should not affect analysis, but it allows the tree to conform to certain standards in external programs.
687 =cut
689 sub make_binary {
690 my $self = shift;
691 my $node = shift;
692 $node ||= $self->get_root();
693 my $new_bl = shift;
694 $new_bl ||= $self->get_min_branch_length();
695 # warn "new bl in make_binary: $new_bl \n";
696 $node->binarify_children($new_bl);
697 foreach($node->get_children()){
698 $self->make_binary($_, $new_bl);
702 =head2 function traverse()
704 Synopsis: $tree->traverse( sub{ my $node = shift;
705 $node->set_hidden() } );
706 Arguments: a function to be performed on each node, taking
707 that node as its only argument
708 Returns: nothing
709 Side effects: the function will be executed on each node object.
710 Description: not yet implemented... UPDATE: C. Carpita attempts
712 =cut
714 sub traverse {
715 my $self = shift;
716 my $function = shift;
717 my $node = shift;
718 die "You did not pass a subroutine reference" unless (ref($function) eq "CODE");
719 $node ||= $self->get_root();
721 &$function($node);
723 foreach( $node->get_children() ){
724 $self->traverse($function, $_);
728 sub newick_shown_attributes { # just return the keys (attributes), so everything should work the same.
729 my $self = shift;
730 return keys %{$self->{newick_shown_attributes}};
733 sub show_newick_attribute {
734 my $self = shift;
735 my $attr = shift;
736 # push(@{$self->{newick_shown_attributes}}, $attr);
737 $self->{newick_shown_attributes}->{$attr}++;
740 sub unshow_newick_attribute {
741 my $self = shift;
742 my $attr = shift;
744 delete $self->{newick_shown_attributes}->{$attr};
746 # my $size = scalar @{$self->{newick_shown_attributes}};
747 # foreach my $index (0..$size-1) {
748 # if ( ($self->{newick_shown_attributes})->[$index] eq $attr) {
749 # delete $self->{newick_shown_attributes}->[$index];
750 # last;
755 sub get_min_branch_length{
756 my $self = shift;
757 return $self->{min_branch_length};
760 sub set_min_branch_length{
761 my $self = shift;
762 $self->{min_branch_length} = shift;
765 sub get_shown_branch_length_transformation{
766 my $self = shift;
767 return $self->{shown_branch_length_transformation};
770 sub set_shown_branch_length_transformation{
771 my $self = shift;
772 $self->{shown_branch_length_transformation} = shift;
775 sub set_min_shown_branch_length{
776 my $self = shift;
777 $self->{min_shown_branch_length} = shift;
780 sub get_min_shown_branch_length{
781 my $self = shift;
782 return $self->{min_shown_branch_length};
785 sub shown_branch_length_transformation_reset{
786 my $self = shift;
787 $self->set_shown_branch_length_transformation(shift);
788 $self->{longest_branch_length} = undef;
789 $self->get_root()->calculate_distances_from_root();
792 sub subtree_newick{
793 my $self = shift;
794 my $subtree_root = shift || $self->get_root();
795 #print "number of children: ", scalar $subtree_root->get_children(), "\n";
796 my $newick_string = $subtree_root->recursive_subtree_newick();
797 return $newick_string;
800 =head2 function generate_newick()
802 Args: (optional) node, defaults to root node
803 (optional) $show_root - boolean, will show the root node in the newick string
804 Returns: Newick expression from the given node, or for the whole
805 tree if no argument is provided
807 =cut
809 sub generate_newick {
810 my $self = shift;
811 my $node = shift;
812 my $show_root = shift;
814 $node ||= $self->get_root();
815 return $node->recursive_generate_newick('');
819 =head2 function get_orthologs()
821 Synopsis: my $ortho_trees_ref = $tree->get_orthologs();
822 Arguments: none.
823 Returns: a reference to a list of trees in which the leaves are all
824 orthologs.
825 Side effects: Sets some node attributes, but deletes at end.
826 Description: This version uses the number of leaves and the number of
827 leaf species in a subtree to decide if that subtree's
828 leaves are all orthologs. (The topology is not used,
829 subroutine orthologs compares the topology to a
830 species_tree.)
831 Author: Tom York
833 =cut
835 sub get_orthologs {
836 my $self=shift;
837 my $root_node = $self->get_root();
839 $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes
840 $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes
841 my $trees_ref = $root_node->collect_orthologs();
843 # can delete the leaf_count and leaf_species_count attributes here
844 my @node_list = $self->get_all_nodes();
845 map($_->delete_attribute("leaf_count"), @node_list);
846 map($_->delete_attribute("leaf_species_count"), @node_list);
848 return $trees_ref;
851 #This should recursively get all the subtree leaf species counts, and then run over everything again,
852 # comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the
853 # complement of each subtree.
854 sub set_all_subtree_and_complement_leaf_species_counts{
855 my $self = shift;
856 my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count();
857 print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n"; readline();
858 $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash);
861 sub get_complement_ortho_group_candidates{
862 my $self = shift;
863 my @node_list = $self->get_root()->recursive_subtree_node_list();
864 foreach my $n (@node_list) {
865 my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count");
866 my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count");
867 if ($comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count >1) {
868 print "complement to subtree : ", $n->get_name(), " is a og candidate \n";
869 print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n";
875 sub get_leaf_parents_list {
876 my $self = shift;
877 foreach my $leaf ($self->get_leaf_list()) {
878 my $parent = $leaf->get_parent();
879 ${$self->{leaf_parent_hash}}{$parent->get_node_key()}=$parent;
881 # return the parents as a neat list
882 return map (${$self->{leaf_parent_hash}}{$_}, keys(%{$self->{leaf_parent_hash}}));
885 # helper functions that deal with the leaf list. It contains a list of nodes
886 # that form leaves, in the order they will be rendered. The leaf list is stored
887 # in the Tree datastructure.
890 =head2 function get_leaf_list()
892 Synopsis: my @leaves = $tree->get_leaf_list();
893 Arguments: none
894 Returns: a list of Nodes that represent the leaves of the tree
895 Side effects:
896 Description:
898 =cut
900 sub get_leaf_list {
901 my $self=shift;
902 if (!exists($self->{leaf_list}) || !@{$self->{leaf_list}}) { $self->calculate_leaf_list(); }
903 return @{$self->{leaf_list}};
906 =head2 get_leaves
908 Alias for get_leaf_list()
910 =cut
912 sub get_leaves {
913 my $self = shift;
914 return $self->get_leaf_list();
917 sub add_leaf_list {
918 my $self = shift;
919 my $leaf_node = shift;
920 push @{$self->{leaf_list}}, $leaf_node;
923 sub clear_leaf_list {
924 my $self = shift;
925 @{$self->{leaf_list}}=();
928 sub calculate_leaf_list {
929 my $self = shift;
930 $self->clear_leaf_list();
931 my @leaf_list = $self->get_root()->recursive_leaf_list();
932 foreach my $leaf (@leaf_list) {
933 $self->add_leaf_list($leaf);
939 # the tree_topology_changed member variable contains the status of the
940 # topology of the tree. If the tree has been changed, it should be 1,
941 # otherwise it should be 0.
943 sub get_tree_topology_changed {
944 my $self = shift;
945 return $self->{tree_topology_changed};
948 sub _set_tree_topology_changed {
949 my $self = shift;
950 $self->{tree_topology_changed}=shift;
953 =head2 function get_name()
955 Synopsis: my $tree_name = $tree->get_name();
956 Arguments: none
957 Returns: the name of the tree.
958 Side effects: none
959 Description:
961 =cut
963 sub get_name {
964 my $self=shift;
965 return $self->{name};
968 =head2 function set_name()
970 Synopsis: $tree->set_name("A tree of the cytochrome P450 family in the Solanaceae");
971 Arguments: a string representing a name
972 Returns: nothing
973 Side effects: this name will be used somehow in the future, such as when
974 the tree is rendered as an image.
975 Description:
977 =cut
979 sub set_name {
980 my $self=shift;
981 $self->{name}=shift;
984 =head2 function get_longest_root_leaf_length()
986 Synopsis: my $longest = $tree->get_longest_root_leaf_length()
987 Arguments: none
988 Returns: the longest distance from the root to any leaf [real]
989 Side effects:
990 Description:
992 =cut
994 sub get_longest_root_leaf_length {
995 my $self=shift;
996 if (!$self->{longest_branch_length}) {
997 $self->set_longest_root_leaf_length($self->calculate_longest_root_leaf_length());
999 # print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n";
1000 return $self->{longest_branch_length};
1003 =head2 function set_longest_root_leaf_length()
1005 Synopsis: $tree->set_longest_root_leaf_length($distance)
1006 Arguments: the distance from root to the furthest leaf.
1007 Returns: nothing
1008 Side effects: This value is used for the scaling of the tree in the
1009 horizontal dimension. Normally it should be calculated
1010 using get_longest_root_leaf_length().
1011 Description:
1013 =cut
1015 sub set_longest_root_leaf_length {
1016 my $self=shift;
1017 $self->{longest_branch_length}=shift;
1020 sub calculate_longest_root_leaf_length {
1021 my $self=shift;
1022 my $largest = 0;
1023 foreach my $leaf ($self->get_leaf_list()) {
1024 my $dist = $leaf->get_dist_from_root();
1025 if ($dist > $largest) {
1026 $largest=$dist;
1029 return $largest;
1032 =head2 function retrieve_longest_branch_node()
1034 Synopsis:
1035 Arguments:
1036 Returns:
1037 Side effects:
1038 Description:
1040 =cut
1042 sub retrieve_longest_branch_node {
1043 my $self=shift;
1044 my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node(CXGN::Phylo::Node->new());
1045 return $longest_branch_node;
1051 =head2 APPEARANCE OF THE TREE
1053 =head2 function get_show_labels()
1055 Synopsis: my $flag = $tree->get_show_lables();
1056 Arguments: none
1057 Returns: a boolean if the labels are currently visible.
1058 Side effects:
1059 Description:
1061 =cut
1063 sub get_show_labels {
1064 my $self=shift;
1065 return $self->{show_labels};
1068 =head2 function set_show_labels()
1070 Synopsis: $tree->set_show_lables(1);
1071 Arguments: a boolean value representing the visibility
1072 of the labels.
1073 Returns: nothing
1074 Side effects:
1075 Description:
1077 =cut
1079 sub set_show_labels {
1080 my $self=shift;
1081 $self->{show_labels}=shift;
1085 sub get_show_species_in_label{
1086 my $self = shift;
1087 return $self->{show_species_in_labels};
1090 sub set_show_species_in_label{
1091 my $self = shift;
1092 $self->{show_species_in_labels} = shift;
1096 =head2 accessors get_line_color(), set_line_color()
1098 Synopsis: my ($r, $g, $b) = $tree->get_line_color();
1100 Property: a list of (red, gree, blue) components of the
1101 color used to draw the tree lines.
1102 Side effects:
1103 Description:
1105 =cut
1107 sub get_line_color {
1108 my $self=shift;
1109 return @{$self->{line_color}};
1112 sub set_line_color {
1113 my $self=shift;
1114 @{$self->{line_color}}=@_;
1118 =head2 accessors get_bgcolor(), set_bgcolor()
1120 Synopsis: $tree->set_bgcolor(255, 255, 255);
1121 Property: a list of (red, green, blue) components for the
1122 tree background color.
1123 Side effects:
1124 Description:
1126 =cut
1128 sub get_bgcolor {
1129 my $self=shift;
1130 return @{$self->{bgcolor}};
1133 sub set_bgcolor {
1134 my $self=shift;
1135 @{$self->{bgcolor}}=@_;
1138 =head2 accessors get_hilite_color(), set_hilite_color()
1140 Synopsis: $tree->set_hilite_color(0, 255, 255);
1141 Property: a list of color components for the hilite color
1142 Side effects: this color is used to hilite labels of nodes that
1143 have the hilite propery set to a true value.
1144 Description:
1146 =cut
1148 sub get_hilite_color {
1149 my $self=shift;
1150 return @{$self->{hilite_color}};
1153 sub set_hilite_color {
1154 my $self=shift;
1155 @{$self->{hilite_color}}=@_;
1158 =head2 function get_node_by_name()
1160 Synopsis:
1161 Arguments: a search term
1162 Returns: a node object that has a matching node name
1163 Side effects:
1164 Description: get_node_by_name() calls search_node_name(), appending
1165 ^ and $ to the regular expression. It assumes that all
1166 nodes have distinct names. If several nodes have the same
1167 name, only the first node it finds is returned. If it
1168 does not find the node, it returns undef.
1170 =cut
1172 sub get_node_by_name {
1173 my $self = shift;
1174 my $name = shift;
1175 foreach my $n ($self->get_all_nodes()){
1176 return $n if ($n->get_name() eq $name);
1178 return undef;
1181 #returns a list of nodes matching a certain reg expression depending on the argument
1182 sub search_node_name {
1183 my $self = shift;
1184 my $term = shift;
1185 my @nodes = ();
1186 foreach my $n ($self->get_all_nodes()) {
1187 my $node_name = $n->get_name();
1188 if ($node_name =~ /\Q$term\E/i) {
1189 push @nodes, $n;
1192 return @nodes;
1195 #returns a list of nodes matching a certain reg expression depending on the argument
1196 sub search_label_name {
1197 my $self = shift;
1198 my $term = shift;
1199 my @nodes = ();
1200 foreach my $n ($self->get_all_nodes()) {
1201 my $label_name = $n->get_label()->get_name();
1202 if ($term =~ m/m\/(.*)\//) { # if enter m/stuff/ then treat stuff as perl regex
1203 my $match = $1;
1204 if ($match && $label_name =~ /$match/) {
1205 push @nodes, $n;
1207 } else {
1208 if ($term && $label_name =~ /\Q$term\E/i) {
1209 push @nodes, $n;
1213 return @nodes;
1216 =head2 function compare()
1218 Synopsis: $this_tree->compare($another_tree);
1219 Arguments: a tree object
1220 Returns: 1 if the tree is identical in topology to
1221 $another_tree,
1222 0 if the trees have a different topology.
1223 Side effects:
1224 Description: compare() works by comparing the node names and
1225 the topology of the tree. Because not all nodes
1226 usually have explicit names, it derives implicit
1227 names for each node (it assumes the leaf nodes have
1228 unique names). The implicit names are defined by an
1229 array containing all the names of the subnodes. The
1230 names are sorted by alphabetical order and then compared.
1232 Note: This is a synonym for compare_rooted. There is also a
1233 compare_unrooted routine to test whether trees
1234 are the same aside from being rooted in different places.
1236 =cut
1238 #sub compare {
1239 # my $self = shift;
1240 # my $other_tree = shift;
1241 #my $compare_field = shift;
1243 ## print STDOUT "in compare. compare_field: $compare_field \n";
1245 # return $self->compare_rooted($other_tree, $compare_field);
1248 =head2 function compare_rooted
1250 Synopsis: $tree1->compare_rooted($tree2);
1251 Arguments: A tree object.
1252 Returns: 1 if $tree1 and $tree2 are topologically the same
1253 when regarded as rooted trees, 0 otherwise.
1254 Side effects: None.
1255 Description: Works with copies of trees; collapses them, gets
1256 implicit names, then recursively compares trees
1257 using implicit names.
1258 Note: Now synonymous with compare. Can compare subtrees
1259 with Node->compare_subtrees
1260 Author: Tom York
1262 =cut
1264 sub compare_rooted{
1265 my $self = shift;
1266 my $other_tree = shift;
1267 my $compare_field = shift;
1268 # print STDOUT "in compare_rooted. compare_field: $compare_field \n";
1269 return $self->get_root()->compare_subtrees($other_tree->get_root(), $compare_field);
1272 =head2 function compare_unrooted
1274 Synopsis: $tree1->compare_unrooted($tree2);
1275 Arguments: A tree object.
1276 Returns: 1 if $tree1 and $tree2 are topologically the same
1277 when regarded as unrooted trees, 0 otherwise.
1278 Side effects: None.
1279 Description: Copies the 2 trees, finds a leaf common to both
1280 (if one exists) and resets roots of both trees to those
1281 leaves. Then recursively compares trees using implicit
1282 names in same way as compare_rooted().
1283 Note: In its present form, assumes uniqueness of leaf names.
1284 Otherwise, if may return 0 when it should return 1.
1285 Author: Tom York.
1287 =cut
1289 sub compare_unrooted {
1290 my $self = shift;
1291 my $other_tree = shift;
1292 my $compare_field = shift; # to control comparison of names (default) or species ("species")
1293 # copy the trees into temporary trees, so that the trees can
1294 # be manipulated (rerooted, collapsed) without changing the original trees.
1296 # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
1297 my $tree1 = $self->copy();
1298 my $tree2 = $other_tree->copy();
1300 # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
1302 my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
1303 my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
1305 if (!$corresponding_leaf) {
1306 print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
1307 return 0;
1310 # reset roots of trees to the two corresponding leaves:
1311 $tree1->reset_root($leaf1);
1312 $tree2->reset_root($corresponding_leaf);
1314 return $tree1->get_root()->compare_subtrees($tree2->get_root(), $compare_field);
1318 =head2 function get_layout(), set_layout()
1320 Synopsis: $tree->set_layout($layout)
1321 Arguments: a CXGN::Phylo::Layout object or subclass
1322 Returns: nothing
1323 Side effects: the layout object will be used to lay out the
1324 tree in the rendering process.
1325 Description:
1327 =cut
1329 sub get_layout {
1330 my $self=shift;
1331 return $self->{layout};
1334 sub set_layout {
1335 my $self=shift;
1336 $self->{layout}=shift;
1340 =head2 function layout()
1342 Synopsis: $tree->layout()
1343 Arguments:
1344 Returns:
1345 Side effects:
1346 Description: a convenience function that calls the layout function of the
1347 trees layout object.
1349 =cut
1352 sub layout {
1353 my $self = shift;
1354 $self->get_layout()->layout();
1358 =head2 accessors get_renderer(), set_renderer()
1360 Synopsis: $tree->set_renderer($renderer)
1361 Arguments: a CXGN::Phylo::Renderer object or subclass
1362 Returns: nothing
1363 Side effects: the $renderer is used for rendering the tree
1364 Description:
1366 =cut
1368 sub get_renderer {
1369 my $self=shift;
1370 return $self->{renderer};
1373 sub set_renderer {
1374 my $self=shift;
1375 $self->{renderer}=shift;
1378 =head2 function render()
1380 Synopsis: $tree->render();
1381 Arguments: (optional) a boolean for printing all node names, and not only the leaf labels
1382 Returns:
1383 Side effects:
1384 Description: a convenience function that calls the render()
1385 function on the tree\'s renderer. Does not perform
1386 the layout of the tree. Call layout() on the tree
1387 object before render().
1389 =cut
1391 sub render {
1392 my $self = shift;
1393 my $print_all_labels=shift;
1394 $self->get_renderer()->render($print_all_labels);
1397 sub standard_layout {
1398 my $self = shift;
1399 my $layout = CXGN::Phylo::Layout->new($self);
1400 $layout->set_top_margin(20);
1401 $layout->set_bottom_margin(20);
1402 $layout->set_image_height(400);
1403 $layout->set_image_width(700);
1404 $self->set_layout($layout);
1405 $self->layout();
1408 =head2 function render_png()
1410 Synopsis: $r->render_png($filename, $print_all_labels);
1411 Arguments: a filename, (optional) a boolean for printing the labels for all nodes in the tree.
1412 Returns: nothing
1413 Side effects: creates (or overwrites) file $filename
1414 which contains the png graphics representing
1415 the tree.
1416 Description:
1418 =cut
1420 sub render_png {
1421 my $self = shift;
1422 my $file = shift;
1423 my $print_all_labels= shift; ## Boolean for printing non-leaf node labels
1424 $self->layout();
1425 my $png_string = $self->render($print_all_labels);
1426 if(defined $file){
1427 open (my $T, ">$file") || die "PNG_tree_renderer: render_png(): Can't open file $file.";
1428 print $T $png_string;
1429 close $T ;
1431 else {
1432 return $png_string;
1436 =head2 function collapse_tree()
1438 Synopsis:
1439 Arguments:
1440 Returns:
1441 Side effects:
1442 Description:
1444 =cut
1446 sub collapse_tree {
1447 my $self = shift;
1448 # first, collapse all nodes that have only one child onto the
1449 # parent node
1451 #print STDERR "before rec..coll...single_nodes\n";
1452 $self->get_root()->recursive_collapse_single_nodes();
1453 #print STDERR "after rec..coll...single_nodes\n";
1454 $self->recalculate_tree_data();
1456 # then, collapse all nodes that have branch lengths of zero
1457 # with their parent node
1459 #print STDERR "before rec..coll...zero_branches\n";
1460 $self->get_root()->recursive_collapse_zero_branches();
1461 #print STDERR "after rec..coll...zero_branches\n";
1463 # let's re-calculate the tree's properties
1465 $self->recalculate_tree_data();
1468 sub collapse_unique_species_subtrees {
1469 my $self = shift;
1470 # calculate, for each node, how many nodes are beneath it.
1471 # This information can then be accessed using the
1472 # $node-> get_subtree_node_count() function.
1474 # $self->get_root()->calculate_subtree_node_count();
1476 # calculate, for each node, how many different species are in the leaves of the subtree beneath it.
1478 $self->get_root()->recursive_set_leaf_species_count();
1480 # recursively go through the tree
1482 $self->get_root()->recursive_collapse_unique_species_subtrees();
1485 =head2 function find_point_furthest_from_leaves()
1487 Synopsis: $t->find_point_furthest_from_leaves()
1488 Arguments: None.
1489 Returns: A list containing a node object, and the distance
1490 above that node of the point furthest from the leaves.
1491 Side effects: Calls recursive_find_point_furthest_from_leaves,
1492 which sets some attributes.
1493 Description: For each point there is a nearest leaf at distance
1494 dnear. This returns the point which maximizes dnear.
1496 =cut
1498 sub find_point_furthest_from_leaves{
1499 my $self = shift;
1500 $self->set_min_dist_to_leaf();
1501 my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves();
1502 $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf");
1503 return @furthest_point;
1506 =head2 function find_point_closest_to_furthest_leaf()
1508 Synopsis: $t->find_point_closest_to_furthest_leaf();
1509 Arguments: None.
1510 Returns: A list containing a node object, and the distance
1511 above that node of the point closest to furthest leaf.
1512 Side effects: Calls recursive_set_max_dist_to_leaf_in_subtree,
1513 which sets some attributes.
1514 Description: For each point there is a furthest leaf at distance
1515 dfar. This returns the point which minimizes dfar.
1517 =cut
1519 # returns a list containing a node object, and the distance of the point above that node
1520 sub find_point_closest_to_furthest_leaf{
1521 my $self = shift;
1522 $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree();
1524 my @nodes = $self->get_root()->recursive_subtree_node_list();
1525 push @nodes, $self->get_root(); # we want the root in our list
1527 my @sorted_nodes = sort
1528 { $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node()
1529 <=>
1530 $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node() }
1531 @nodes;
1533 # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf,
1534 # until you reach the midpoint of the longest leaf to leaf path
1535 my $current_node = pop @sorted_nodes;
1536 my $distance_to_go = 0.5*($current_node->get_attribute("dist_to_leaf_longest") - $current_node->get_attribute("dist_to_leaf_next_longest"));
1537 for (;;) {
1538 my $next_node = $current_node->get_attribute("lptl_child");
1539 my $branch_length = $next_node->get_branch_length();
1540 if ($branch_length >= $distance_to_go) {
1541 return ($next_node, $branch_length - $distance_to_go);
1542 } else {
1543 $distance_to_go -= $branch_length;
1544 $current_node = $next_node;
1551 =head2 function reset_root_to_point_on_branch()
1553 Synopsis: $t->reset_root_to_point_on_branch($anode, $distance)
1554 Arguments: First arg is a node, the second a distance above that
1555 node. Together they define a point which will be new
1556 root.
1557 Returns: Nothing.
1558 Side effects: Resets root to a point specified by arguments, and
1559 deletes old root node.
1560 Description: Use this to reset the root to a point along a branch.
1561 Author: Tom York.
1563 =cut
1564 sub reset_root_to_point_on_branch{
1565 my $self = shift;
1566 my ($child_of_new_node, $dist_above) = @_;
1568 my $new_node = $child_of_new_node->add_parent($dist_above); # goes
1569 my $former_root = $self->get_root();
1571 $self->reset_root($new_node);
1573 $self->collapse_tree();
1576 =head2 function set_min_dist_to_leaf()
1578 Synopsis: $t->set_min_dist_to_leaf()
1579 Arguments: None.
1580 Returns: Nothing.
1581 Side effects: Sets the following attributes for every node
1582 in tree: min_dist_to_leaf, near_leaf_path_direction,
1583 near_leaf_path_next_node
1584 Description:
1585 Author: Tom York.
1587 =cut
1589 sub set_min_dist_to_leaf{
1590 my $self = shift;
1591 $self->get_root()->recursive_set_min_dist_to_leaf();
1592 $self->get_root()->recursive_propagate_mdtl();
1596 =head2 function min_leaf_dist_variance_point()
1598 Synopsis: $t->min_leaf_dist_variance_point()
1599 Arguments: None.
1600 Returns: List ($n, $d) specifying the desired point as
1601 lying at a distance $d above the node $n.
1602 Side effects: Calls recursive_set_dl_dlsqr_sums_down(), and
1603 recursive_set_dl_dlsqr_sums_up(), which set
1604 several node attributes
1605 Description: Returns the point in the tree such that the
1606 variance of the distances from the point to the
1607 leaves is minimized.
1608 Author: Tom York.
1610 =cut
1612 sub min_leaf_dist_variance_point{
1613 my $self = shift;
1615 $self->get_root()->recursive_set_dl_dlsqr_sums_down();
1616 $self->get_root()->recursive_set_dl_dlsqr_sums_up();
1618 my @node_list = $self->get_root()->recursive_subtree_node_list();
1619 my $opt_node = shift @node_list;
1620 my ($opt_dist_above, $opt_var) = $opt_node->min_leaf_dist_variance_point();
1622 foreach my $n (@node_list) {
1623 my ($da, $var) = $n->min_leaf_dist_variance_point();
1624 if ($var < $opt_var) {
1625 $opt_node = $n;
1626 $opt_dist_above = $da;
1627 $opt_var = $var;
1630 $self->get_root()->recursive_delete_dl_dlsqr_attributes();
1631 return ($opt_node, $opt_dist_above, $opt_var);
1635 =head2 function test_tree_node_hash()
1637 Synopsis: $t->test_tree_node_hash()
1638 Arguments: None.
1639 Returns: 1 if test is passed, 0 otherwise.
1640 Side effects: None.
1641 Description: Tests that the nodes in the tree as found by
1642 recursive_subtree_node_list() agree
1643 with the node hash. Specifically tests that
1644 1) the key of each node (found by
1645 recursive_subtree_node_list()) is found in
1646 the node hash,
1647 2) no two nodes have the same key,
1648 3) each key in the node hash is the key of some node.
1650 It is possible for parts of the tree to become
1651 disconnected, so that it would not be possible to
1652 get from one to the other by at each step going from
1653 a node to a parent or child node, although all nodes
1654 would be in the node hash.
1655 Author: Tom York.
1657 =cut
1659 sub test_tree_node_hash{
1660 my $ok1 = 1; my $ok2 = 1; my $ok3 = 1;
1661 my $self = shift;
1662 my $node_hashref = $self->{node_hash};
1663 my $root = $self->get_root();
1664 my @node_list = $root->recursive_subtree_node_list();
1665 push @node_list, $root;
1666 my %nodekeys;
1668 foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash
1669 my $node_key = $n->get_node_key();
1670 $nodekeys{$node_key}++;
1671 if (!defined $node_hashref->{$node_key}) { # a node in node_list is not in the hash.
1672 $ok1 = 0;
1676 if (scalar keys %nodekeys != scalar @node_list) { # test that each node in node_list has a distinct key
1677 $ok2 = 0;
1680 my @node_keys = keys (%$node_hashref); # test that each key in node hash is
1681 if (scalar @node_keys != scalar @node_list) {
1682 $ok3 = 0;
1684 return $ok1*$ok2*$ok3;
1688 =head2 function test_tree_parents_and_children()
1690 Synopsis: $t->test_tree_parents_and_children()
1691 Arguments: None.
1692 Returns: 1 if test is passed, 0 otherwise.
1693 Side effects: None.
1694 Description: Tests that node $a is a child of $b,
1695 if and only iff $b is the parent of $a.
1696 Author: Tom York.
1698 =cut
1700 sub test_tree_parents_and_children{
1701 my $self = shift;
1702 my $ok1 = $self->test_tree_nodes_are_parents_of_their_children();
1703 my $ok2 = $self->test_tree_nodes_are_children_of_their_parents();
1704 return ($ok1 && $ok2);
1707 sub test_tree{
1708 my $self = shift;
1709 return $self->test_tree_node_hash() && $self->test_tree_parents_and_children();
1712 # tests that for all nodes n, each child of n has n as its parent.
1713 sub test_tree_nodes_are_parents_of_their_children{
1714 my $self = shift;
1715 my $root = $self->get_root();
1716 my @node_list = $root->recursive_subtree_node_list();
1717 push @node_list, $root;
1718 my $ok = 1;
1720 foreach my $n (@node_list) {
1721 my @children = $n->get_children();
1722 my $node_key = $n->get_node_key();
1723 foreach my $c (@children) {
1724 if(! defined $c->get_parent()){
1725 print("child node has undefined parent. \n"); $n->print_node(); $c->print_node();
1726 $ok = 0;
1727 } elsif ($c->get_parent()->get_node_key() != $node_key) {
1728 print("child node has wrong parent. \n"); $n->print_node(); $c->print_node();
1729 $ok = 0;
1733 return $ok;
1737 # tests that for all nodes n, that if n has a parent, then n is among the children of that parent.
1738 sub test_tree_nodes_are_children_of_their_parents{
1739 my $self = shift;
1740 my $root = $self->get_root();
1741 my @node_list = $root->recursive_subtree_node_list();
1742 push @node_list, $root;
1743 my $ok = 1;
1745 foreach my $n (@node_list) { # test that $n is among the children of its parent
1746 my $p = $n->get_parent();
1747 if (defined $p) { # if not defined, do no test for this node
1748 my @children = $p->get_children();
1749 my $this_n_ok = 0;
1750 foreach my $c (@children) {
1751 if ($c->get_node_key() == $n->get_node_key()) {
1752 $this_n_ok = 1;
1753 last;
1756 if(! $this_n_ok){ print("This node not among the children of its parent: \n"), $n->print_node(); }
1757 $ok &&= $this_n_ok;
1760 return $ok;
1763 =head2 function orthologs()
1765 Synopsis: $ortho_grp = $ortho_tree->
1766 orthologs($species_tree, $cssst)
1767 Arguments: a tree object, and an argument which, if non-zero,
1768 causes single-species trees to be collapsed to a
1769 single node.
1770 Returns: An list of ortholog groups.
1771 Side effects:
1772 Description: Calls get_orthologs to get the ortholog_trees
1773 defined without using a species tree, i.e. maximal
1774 subtrees in which all leaves are of distinct species.
1775 Then for each ortholog group compare its tree to
1776 the species tree (if present), to see if topologies
1777 are the same, and if not, get a "distance " from
1778 ortholog tree to species tree topology.
1779 See also:
1781 =cut
1783 sub orthologs{
1784 my $self = shift; # tree
1785 my $species_t = shift; # species tree; if undefined,
1786 my $cssst = shift; # switch to collapse single-species subtrees to a single node
1787 my $qRFd_max = shift;
1788 if (!defined $qRFd_max) {
1789 $qRFd_max = 0;
1791 if ($cssst) {
1792 $self->collapse_unique_species_subtrees();
1794 # should we collapse the tree here?
1795 my $ortho_trees_ref = $self->get_orthologs();
1796 my @ortho_groups=(); # a list of Ortholog_group object that will contain the results
1798 # go through all the ortho_trees and compare to the species tree
1800 foreach my $ortho_t (@$ortho_trees_ref) {
1801 my $ortho_group = CXGN::Phylo::Ortholog_group->new($ortho_t, $species_t, $qRFd_max);
1802 if ($ortho_group->get_ortholog_tree()->get_leaf_count()>1) {
1803 push @ortho_groups, $ortho_group;
1805 } # end of foreach my $ortho_t (@$ortho_trees_ref) {
1806 return @ortho_groups;
1807 } # end of sub orthologs
1809 =head2 function set_missing_species_from_names()
1811 Synopsis: $atree->set_missing_species_from_names()
1812 Arguments: none
1813 Returns: nothing
1814 Side effects: For any leaf nodes with species undefined,
1815 sets the species to something derived from
1816 node name
1817 Description: Try to come up with a species for each leaf node
1818 if not already defined. So will not overwrite
1819 species names coming from, e.g., the [species='tomato']
1820 type specification in a newick file.
1821 See also:
1823 =cut
1825 sub set_missing_species_from_names{
1826 my $self = shift;
1827 foreach my $n ($self->get_leaf_list()) {
1828 # print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n");
1829 if (!$n->get_species()) {
1830 $n->set_species($n->determine_species_from_name());
1835 =head2 function impose_branch_length_minimum()
1837 Synopsis: $atree->impose_branch_length_minimum($bl_min)
1838 Arguments: The minimum branch length.
1839 Returns: nothing
1840 Side effects: Set branch lengths < $bl_min to $bl_min.
1841 (Root branch length remains 0)
1842 Description: Zero branch lengths may possibly cause problems
1843 in some cases; use this to establish a small
1844 non-zero minimum branch length;
1846 =cut
1848 sub impose_branch_length_minimum{
1849 my $self = shift;
1850 my $minimum_bl = shift;
1851 $minimum_bl ||= $self->get_min_branch_length();
1852 foreach my $n ($self->get_all_nodes()) {
1853 unless (defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl) {
1854 $n->set_branch_length($minimum_bl);
1857 $self->get_root()->set_branch_length(0.0); # leave this at 0
1861 sub set_show_standard_species{
1862 my $self = shift;
1863 $self->{show_standard_species} = shift;
1865 sub get_show_standard_species{
1866 my $self = shift;
1867 return $self->{show_standard_species};
1870 sub set_species_standardizer{
1871 my $self = shift;
1872 $self->{species_standardizer} = shift;
1875 sub get_species_standardizer{
1876 my $self = shift;
1877 return $self->{species_standardizer};
1881 =head2 function update_label_names()
1883 Synopsis: $atree->update_label_names()
1884 Arguments: none
1885 Returns: nothing
1886 Side effects: Sets all the node labels to the node name
1887 with or without the species appended,
1888 as specified by $self->get_show_species_in_labels()
1890 =cut
1892 sub update_label_names{
1893 my $self = shift;
1894 my $show_spec = $self->get_show_species_in_label();
1895 foreach my $n ($self->get_all_nodes()) {
1896 # print "node: ", $n->get_name(), " impl names: ", join("\t", @{$n->get_implicit_names()}), "\n";
1897 my $n_leaves = scalar @{$n->get_implicit_names()};
1898 my $label_text = $n->get_name();
1899 # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n";
1900 if ($show_spec) {
1901 my $species_text = $n->get_shown_species();
1902 # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n";
1903 $label_text .= " [".$species_text."]" if(defined $species_text);
1905 $n->get_label()->set_name($label_text);
1909 =head2 function prune_nameless_leaves()
1911 Synopsis: $atree->prune_nameless_leaves()
1912 Arguments: none
1913 Returns: nothing
1914 Side effects: Deletes from the tree all leaves whose
1915 names are empty or undefined.
1917 =cut
1919 sub prune_nameless_leaves{
1921 my $self = shift;
1922 my @leaf_list = $self->get_root()->recursive_leaf_list();
1923 my $count_leaves_deleted = 0;
1924 $self->get_root()->recursive_implicit_names(); # is this needed?
1925 foreach my $l (@leaf_list) {
1926 if ($l->get_name()) { # non-empty string. OK.
1927 } else {
1928 # print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n";
1929 $self->del_node($l);
1930 $self->collapse_tree();
1931 $count_leaves_deleted++;
1934 return $count_leaves_deleted;
1937 # return key, node pair corresponding to the implicit name given as argument.
1938 sub node_from_implicit_name_string{
1939 #searches tree until the node with the specified implicit name string (tab separated) is found
1940 my $self = shift;
1941 my $in_string = shift;
1942 if (! scalar $self->get_root()->get_implicit_names() > 0) {
1943 $self->get_root()->recursive_implicit_names();
1946 foreach my $k ($self->get_all_node_keys()) {
1947 my $n = $self->get_node($k);
1948 my $node_impl_name = join("\t", @{$n->get_implicit_names()});
1949 # print STDERR "node impl name: $node_impl_name \n";
1950 if ($node_impl_name eq $in_string) {
1951 return ($k, $n);
1955 # print STDERR "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n";
1956 # print STDERR $self->generate_newick(), "\n";
1957 return (undef, undef);
1961 sub leaf_species_string{
1962 my $self = shift;
1963 my $str = "species, standard species \n";
1964 foreach my $l ($self->get_leaf_list()) {
1965 my $std_species = ($l->get_standard_species())? $l->get_standard_species(): 'std_species_undefined';
1966 $str .= $l->get_species() . " " . $std_species . "\n";
1968 return $str;
1971 =head2 function quasiRF_distance
1973 Synopsis: $tree1->quasiRF_distance($tree2), or
1974 $node1->quasiRF_distance($tree2, "species");
1975 Arguments: A tree object; and optionally a string specifying
1976 whether to compare node name or species.
1977 (Default is name)
1978 Returns: Compares tree1 and tree2. If they are topologically
1979 the same, 0 is returned. Otherwise returns a "distance"
1980 describing how different the two trees are.
1981 Side effects: Sets "subtree_leaves_match" field for each node, and
1982 (by calling recursive_quasiRF_distance) sets
1983 "qRF_distance" field for each node.
1984 Description: Tree1, tree2 should be collapsed before calling this
1985 function. For each node in tree1 add branch length to
1986 distance if a node with the same implicit name
1987 (or implicit species, depending on value of second
1988 argument) is not present in tree2.
1989 This is somewhat like the Robinson-Foulds distance, but
1990 is not symmetric (hence not a true distance),
1991 as the topologies of both subtrees are used, but only
1992 the tree1 branch lengths are used. Think of it as a
1993 measure of how much tree1 much be changed to reach the
1994 topology of tree2.
1995 We are typically going to use it to compare an ortholog
1996 tree with a species tree, in which case the implicit
1997 species should be used in the comparison, i.e. it
1998 should be called with the optional 2nd arg having value
1999 "species"
2001 =cut
2003 sub quasiRF_distance{
2004 my $self = shift;
2005 my $tree1 = $self;
2006 my $tree2 = shift;
2007 my $compare_field = shift;
2009 my $root1 = $tree1->get_root();
2010 my $root2 = $tree2->get_root();
2012 my $distance = 0.0;
2014 # get the implicit names or species for each node in both trees
2016 if (lc $compare_field eq "species") {
2017 #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
2018 $root1->recursive_implicit_species();
2019 $root2->recursive_implicit_species();
2021 my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused)
2022 my $nhr2 = $tree2->{node_hash};
2023 foreach my $n2 (values ( %$nhr2)) {
2024 my $implicit_species = join("\t", @{$n2->get_implicit_species()});
2025 # print STDOUT "Y stree implicit species: $implicit_species <br>\n";
2026 $n_bl_2{$implicit_species}++; # values are not used, just count occurrences
2029 my $nhr1 = $tree1->{node_hash};
2030 foreach my $n1 (values ( %$nhr1)) {
2031 my $implicit_species = join("\t", @{$n1->get_implicit_species()});
2032 # print STDOUT "otree implicit species: $implicit_species <br>\n";
2033 if (exists $n_bl_2{$implicit_species}) { # there are subtrees with this set of leaves in both trees, do nothing
2034 $n1->set_attribute("subtree_leaves_match", "true");
2035 # print STDOUT "true <br>\n";
2036 } else { # no node with this implicit name in tree2, so add branch length to total
2037 $distance += $n1->get_branch_length();
2038 $n1->set_attribute("subtree_leaves_match", "false");
2041 } else {
2042 $root1->recursive_implicit_names();
2043 $root2->recursive_implicit_names();
2045 # set up the hash for tree2 nodes, with name as key (value unused)
2046 my %n_bl_2 = ();
2047 my $nhr2 = $tree2->{node_hash};
2048 foreach my $n2 (values ( %$nhr2)) {
2049 $n_bl_2{$n2->get_name()}++; # values are not used, just count occurrences of the name
2052 my $nhr1 = $tree1->{node_hash};
2053 foreach my $n1 (values ( %$nhr1)) {
2054 if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees, do nothing
2055 $n1->set_attribute("subtree_leaves_match", "true");
2056 } else { # no node with this implicit name in tree2, so add branch length to total
2057 $distance += $n1->get_branch_length();
2058 $n1->set_attribute("subtree_leaves_match", "false");
2062 my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here.
2063 return $distance; # $tree1 has qRFd info at every node.
2066 sub RF_distance {
2067 my $self = shift;
2068 my $other_tree = shift;
2069 my $compare_field = shift; # to control comparison of names (default) or species ("species")
2070 # copy the trees into temporary trees, so that the trees can
2071 # be manipulated (rerooted, collapsed) without changing the original trees.
2073 # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
2074 my $tree1 = $self->copy();
2075 my $tree2 = $other_tree->copy();
2077 # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
2079 my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
2080 my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
2082 if (!$corresponding_leaf) {
2083 print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
2084 return 0;
2087 # reset roots of trees to the two corresponding leaves:
2088 $tree1->reset_root($leaf1);
2089 $tree2->reset_root($corresponding_leaf);
2091 return $tree1->RF_distance_inner($tree2, $compare_field);
2094 =head2 function RF_distance_inner
2096 Synopsis: $tree1->RF_distance($tree2),
2097 or $node1->RF_distance($tree2, "species");
2098 Arguments: A tree object; and optionally a string specifying
2099 whether to compare node name or species.
2100 (Default is name)
2101 Returns: Compares tree1 and tree2. If they are topologically
2102 the same, 0 is returned. Otherwise returns a "distance"
2103 describing how different the two trees are.
2104 Side effects: Sets "subtree_leaves_match" field for each node
2105 Description: Tree1, tree2 should be collapsed before calling this
2106 function. For each node in tree1 add branch length to
2107 distance if a node with the same implicit name
2108 (or implicit species, depending on value of second
2109 argument) is not present in tree2.
2110 This computes the Robinson-Foulds distance. Topologies
2111 and branch lengths of both trees are used
2112 Think of it as a measure of how much tree1 much be
2113 changed to become tree2.
2116 =cut
2118 sub RF_distance_inner{
2119 my $self = shift;
2120 my $tree1 = $self;
2121 my $tree2 = shift;
2122 my $compare_field = shift;
2124 my $root1 = $tree1->get_root();
2125 my $root2 = $tree2->get_root();
2127 my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree
2128 my $distance = 0.0;
2129 my $in_both_sum = 0.0;
2130 my $in_one_only_sum = 0.0;
2131 my $branch_score = 0.0;
2133 # get the implicit names or species for each node in both trees
2135 if (lc $compare_field eq "species") {
2136 # die "RF_distance with compare_field set to species is not implemented. \n";
2137 #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
2138 $root1->recursive_implicit_species();
2139 $root2->recursive_implicit_species();
2140 unless(join("\t", $root1->get_implicit_species()) eq join("\t", $root1->get_implicit_species())){
2141 print STDERR "In RFdistance; trees do not have same set of leaves (by species).\n";
2142 return undef;
2144 # set up the hash for tree nodes, with species as key, node obj as value
2145 my %n_bl_1 = ();
2146 my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash};
2148 foreach my $n1 (@nhr1) { #all tree1 nodes except root1
2149 $n_bl_1{$n1->get_species()} = $n1;
2151 my %n_bl_2 = ();
2152 my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash};
2153 foreach my $n2 (@nhr2) { #all tree2 nodes except root2
2154 $n_bl_2{$n2->get_species()} = $n2;
2157 # my $in_both_sum = 0.0;
2158 # my $in_one_only_sum = 0.0;
2159 foreach my $n1 (@nhr1) {
2160 if (exists $n_bl_2{$n1->get_species()}) { # there are subtrees with this set of leaves in both trees
2161 my $n2 = $n_bl_2{$n1->get_species()};
2162 $in_both_sum += abs($n1->get_branch_length() - $n2->get_branch_length());
2163 } else { # no node with this implicit species in tree2, so add branch length to total
2164 $in_one_only_sum += $n1->get_branch_length();
2165 $sym_diff++;
2168 # my $in_both_sum2 = 0.0;
2169 foreach my $n2 (@nhr2) {
2170 if (exists $n_bl_1{$n2->get_species()}) { # there are subtrees with this set of leaves in both trees
2171 # my $n1 = $n_bl_1{$n2->get_species()};
2172 # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
2173 } else { # no node with this implicit species in tree2, so add branch length to total
2174 $in_one_only_sum += $n2->get_branch_length();
2175 $sym_diff++;
2178 # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n");
2179 # $distance = $in_both_sum + $in_one_only_sum;
2180 # print "distance: ", $distance, "\n";
2183 } else { # compare field is "name"
2184 # print "comparing trees by name fields \n";
2185 $root1->recursive_implicit_names();
2186 $root2->recursive_implicit_names();
2187 unless(join("\t", $root1->get_name()) eq join("\t", $root1->get_name())){
2188 print STDERR "In RFdistance; trees do not have same set of leaves (by name).\n";
2189 return undef;
2191 # set up the hash for tree nodes, with name as key, node obj as value
2192 my %n_bl_1 = ();
2193 my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash};
2195 foreach my $n1 (@nhr1) { #all tree1 nodes except root1
2196 # print "n1 name: ", $n1->get_name(), "\n";
2197 $n_bl_1{$n1->get_name()} = $n1;
2199 my %n_bl_2 = ();
2200 my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash};
2201 foreach my $n2 (@nhr2) { #all tree2 nodes except root2
2202 # print "n2 name: ", $n2->get_name(), "\n";
2203 $n_bl_2{$n2->get_name()} = $n2;
2206 # my $in_both_sum = 0.0;
2207 # my $in_one_only_sum = 0.0;
2208 my $diff = 0.0;
2209 # foreach my $n1 (@nhr1) {
2210 foreach my $name1 (keys %n_bl_1){
2211 my $n1 = $n_bl_1{$name1};
2212 if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees
2213 my $n2 = $n_bl_2{$n1->get_name()};
2214 $diff = $n1->get_branch_length() - $n2->get_branch_length();
2215 $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length());
2216 # $branch_score += $diff*$diff;
2217 } else { # no node with this implicit name in tree2, so add branch length to total
2218 $diff = $n1->get_branch_length();
2219 $in_one_only_sum += $diff; # $n1->get_branch_length();
2220 # $branch_score += $diff*$diff;
2221 $sym_diff++;
2222 # print "name not present in hash 2: ", $n1->get_name(), "\n";
2224 $branch_score += $diff*$diff;
2226 # my $in_both_sum2 = 0.0;
2227 # foreach my $n2 (@nhr2) {
2228 foreach my $name2 (keys %n_bl_2){
2229 my $n2 = $n_bl_2{$name2};
2230 if (exists $n_bl_1{$n2->get_name()}) { # there are subtrees with this set of leaves in both trees
2231 # my $n1 = $n_bl_1{$n2->get_name()};
2232 # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
2233 } else { # no node with this implicit name in tree2, so add branch length to total
2234 $in_one_only_sum += $n2->get_branch_length();
2235 $sym_diff++;
2236 # print "name not present in hash 1: ", $n2->get_name(), "\n";
2241 $distance = $in_both_sum + $in_one_only_sum;
2242 # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, " RFdistance: ", $distance, "\n");
2243 return ($distance, $sym_diff, $branch_score);
2246 sub get_branch_length_sum{
2247 my $self = shift;
2248 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2249 my $bl_sum = 0.0;
2250 foreach (@nodelist) {
2251 $bl_sum += $_->get_branch_length();
2253 return $bl_sum;
2256 sub get_branch_length_sum_noterm{ # sum of all non-terminal branch lengths
2257 my $self = shift;
2258 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2259 my $bl_sum = 0.0;
2260 foreach (@nodelist) {
2261 next if($->is_leaf());
2262 $bl_sum += $_->get_branch_length();
2264 return $bl_sum;
2267 sub multiply_branch_lengths_by{
2268 my $self = shift;
2269 my $factor = shift;
2270 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2271 foreach (@nodelist) {
2272 $_->set_branch_length($_->get_branch_length()*$factor);
2276 #scale branch lengths s.t. their sum is #desired_bl_sum (1.0 by default)
2277 # returns original bl sum
2278 sub normalize_branch_length_sum{
2279 my $self = shift;
2280 my $desired_bl_sum = shift;
2281 $desired_bl_sum ||= 1.0;
2282 my $bl_sum = $self->get_branch_length_sum();
2283 if ($bl_sum <= 0.0) {
2284 print STDERR "Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n";
2285 } else {
2286 $self->multiply_branch_lengths_by($desired_bl_sum/$bl_sum);
2288 return $bl_sum;
2291 sub RFdist_over_totbl{ # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all
2292 # branch lengths in both trees, so it will lie in range [0,1]
2293 my $self = shift;
2294 my $tree1 = $self;
2295 my $tree2 = shift;
2296 my $compare_field = shift;
2297 my $normalize_bl_sums = shift;
2298 $normalize_bl_sums = 0 unless(defined $normalize_bl_sums);
2300 if ($normalize_bl_sums) {
2301 $tree1->normalize_branch_length_sum();
2302 $tree2->normalize_branch_length_sum();
2304 my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum();
2305 #print "bl_sum: $bl_sum \n";
2306 my ($rfd, $symdiff, $branch_score) = $tree1->RF_distance($tree2, $compare_field);
2307 # print "bl_sum: $bl_sum . rfd: $rfd \n";
2308 return $rfd/$bl_sum;
2311 # divide into trees no bigger than $max_leaves
2312 sub divide_into_small_trees{
2313 my $self = shift;
2314 my $max_leaves = shift;
2315 $max_leaves ||= 100;
2316 # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n";
2317 my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves);
2318 return $small_trees_array;
2321 # get list of subtrees containing ortholog group candidate subtrees
2322 # (trees with > 1 leaf, and distinct species in all leaves)
2323 # the argument allows one to specify to go up some number of parent
2324 # nodes above the nodes with the ortholog group candidate subtrees.
2325 sub get_ortholog_group_candidate_subtrees{
2326 my $self = shift;
2327 my $desired_levels_above = shift;
2328 $desired_levels_above = 0 unless($desired_levels_above > 0);
2329 # print "tree. levels_above: ", $desired_levels_above, "\n";
2330 my $ortholog_group_candidate_subtrees_array = [];
2331 $self->get_root()->recursive_set_levels_above_distinct_species_subtree();
2332 $self->get_root()->recursive_find_ortholog_group_candidate_subtrees($ortholog_group_candidate_subtrees_array, $desired_levels_above);
2333 return $ortholog_group_candidate_subtrees_array;
2337 sub make_names_urec_ok{
2338 my $self = shift;
2339 my @nodes = $self->get_all_nodes();
2340 foreach my $a_node (@nodes){
2341 my $name = $a_node->get_name();
2342 if($name =~ /^[a-zA-Z]/){ # ok - leave as is.
2343 }else{ # add X_ in front to make urec happy
2344 $a_node->set_name("X_$name");
2349 # using urec, find the node s.t. rooting on its branch gives minimal duplications and losses
2350 # w.r.t. a species tree
2351 sub find_mindl_node{
2352 my $gene_tree = shift; # a rooted gene tree
2353 my $species_t = shift; # a species tree
2354 #print "in find_mindl_node. species_tree newick: \n", $species_t->subtree_newick(), " \n";
2356 # print STDERR "##################### Top of find_mindl_node. #############\n";
2357 # urec requires binary tree - make sure the tree is binary
2358 # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy)
2359 my @new_root_point;
2361 my @root_children = $gene_tree->get_root()->get_children();
2362 if (scalar @root_children != 2) {
2363 @new_root_point = ($root_children[0], 0.9*$root_children[0]->get_branch_length());
2364 $gene_tree->reset_root_to_point_on_branch(@new_root_point);
2367 # binarify every non-binary node. At present doesn't attempt to choose in a smart way
2368 # among the various possible resolutions
2369 $gene_tree->make_binary($gene_tree->get_root()); # urec requires binary tree.
2371 my $store_show_std_species = $gene_tree->get_show_standard_species();
2372 # put the trees into form of newick strings with no whitespace, so urec will be happy
2373 $gene_tree->show_newick_attribute("species");
2374 $gene_tree->set_show_standard_species(1);
2375 # make sure node names start with alphabetic char to make urec happy.
2376 $gene_tree->make_names_urec_ok();
2377 # need to redo node implicit names here.
2378 my $gene_newick_string = $gene_tree->generate_newick();
2379 # print "binarified gene tree (urec input): ", $gene_newick_string, "\n";
2380 $gene_newick_string =~ s/\s//g;
2382 $species_t->show_newick_attribute("species");
2383 $species_t->set_show_standard_species(1);
2384 my @thenodes = $species_t->get_leaves();
2385 foreach my $a_node (@thenodes){
2386 # print "species,stdspecies: ", $a_node->get_species(), " ", $a_node->get_standard_species(), "\n";
2388 #print "species tree (from subtree_newick): ", $species_t->subtree_newick(), "\n";
2389 my $species_newick_string = $species_t->generate_newick();
2390 $species_newick_string =~ s/\s//g; # remove whitespace
2391 #print "species tree newick string: $species_newick_string\n";
2392 #print "gene newick string: $gene_newick_string \n\n";
2393 #print "about to call urec. \n";
2394 # print STDERR "about to call urec, with gene tree newick:\n$gene_newick_string \n$species_newick_string \n";
2395 # my $rerooted_newick = `/home/tomfy/cxgn/cxgn-corelibs/lib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2396 # my $rerooted_newick = `/data/local/cxgn/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2398 my $rerooted_newick;
2399 # my $urec_dir = `which urec`;
2400 # if($urec_dir =~ /\S/){
2401 if(`which urec` =~ /\S/){
2402 $rerooted_newick = `urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2403 }else{
2404 $rerooted_newick = `/data/local/cxgn-old/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2407 # print STDERR "gene_newick_string: \n $gene_newick_string \n\nspecies_newick_string: \n $species_newick_string.\n\n";
2408 # print STDERR "Rerooted newick string: [$rerooted_newick].\n";
2410 # print "parsing mindl rerooted gene tree in Tree\n";
2411 my $minDL_rerooted_gene_tree = (CXGN::Phylo::Parse_newick->new($rerooted_newick, $do_parse_set_error))->parse(); # this is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree,
2412 # but branch lengthswill be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents).
2413 $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names();
2415 # $minDL_rerooted_gene_tree should have 2 children and (at least) one should have it's subtree also present in the pre-rerooting tree.
2416 # identify the node at the root of this subtree (using implicit names) and reroot there.
2417 # Have to do this because some branch length info was lost in urec step.
2418 # warn "minDL_rerooted_gene_tree newick: \n", $minDL_rerooted_gene_tree->generate_newick(), "\n";
2419 my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children();
2420 my ($node_key, $rr_node);
2421 foreach (@root_children) {
2422 my $implicit_name_string = join("\t", @{$_->get_implicit_names()});
2423 # warn "implicit name string: $implicit_name_string \n";
2424 ($node_key, $rr_node) = $gene_tree->node_from_implicit_name_string($implicit_name_string);
2425 if (defined $rr_node) {
2426 $minDL_rerooted_gene_tree->decircularize();
2427 # debug ("Reroot above this node: $implicit_name_string \n");
2428 # warn "branch length along which to reroot: ", $rr_node->get_branch_length(), "\n";
2429 return @new_root_point = ($rr_node, 0.5*($rr_node->get_branch_length()));
2432 $| = 1;
2434 warn "problem with mindl rerooting. minDL_rerooted_gene_tree newick: \n",
2435 $minDL_rerooted_gene_tree->generate_newick(), "\n";
2436 die "find_mindl_node failed. \n";
2438 # $gene_tree->set_shown_standard_species($store_show_standard_species);
2439 #$gene_tree->update_label_names();
2440 return (undef, undef);
2443 sub get_species_bithash{ #get a hash giving a bit pattern for each species in both $gene_tree and $spec_tree
2444 my $gene_tree = shift;
2445 my $spec_tree = shift;
2446 my $bithash = {};
2447 my %genehash;
2448 my %spechash;
2449 $spec_tree->show_newick_attribute("species");
2450 my $stree_newick = $spec_tree->generate_newick();
2451 # print STDERR "SPECIES TREE: $stree_newick \n";
2452 my @leaf_list = $gene_tree->get_leaf_list();
2453 foreach (@leaf_list) {
2454 my $lspecies = $_->get_standard_species();
2455 print STDERR "In Tree::get_species_bithash; gtree species: $lspecies \n";
2456 $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species
2458 @leaf_list = $spec_tree->get_leaf_list();
2459 foreach (@leaf_list) {
2460 my $lspecies = $_->get_standard_species();
2461 # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n";
2462 if ($genehash{$lspecies}) {
2463 $spechash{$lspecies}++; # keys are species in both trees.
2466 my @species_list = sort (keys %spechash);
2467 # print join(" ", @species_list), "\n";
2468 my $bits = 1;
2469 foreach (@species_list) {
2470 $bithash->{$_} = $bits;
2471 $bits = $bits << 1; # mult by two
2472 # print "$_, $bits \n";
2475 return $bithash;
2478 sub prune_non{ # prune leaves from tree 1 if their species does not occur in tree2
2481 # return a hash whose keys are leaf node names (hidden nodes excluded)
2482 # and whose values are refs to arrays of 1's and 0's, the 1's indicating orthology.
2483 sub ortho_matrix_hash{
2484 my $self = shift;
2485 my @leaf_names = ();
2486 for ($self->get_leaves()) {
2487 next if($_->get_hide_label()); # do not include hidden labels
2488 push @leaf_names, $_->get_name();
2490 @leaf_names = sort @leaf_names;
2491 # print STDERR join(" ", @leaf_names), "\n";
2492 my $n_leaves = scalar @leaf_names;
2493 my %name_hash;
2494 my %ortho_hash;
2496 my $i = 0;
2497 foreach (@leaf_names) {
2498 $name_hash{$_} = $i;
2499 my @zeroes = (0)x$n_leaves;
2500 $ortho_hash{$_} = \@zeroes;
2501 $i++;
2503 my @leaves = $self->get_leaves();
2504 foreach (@leaves) {
2505 my $name = $_->get_name();
2506 my $o_array = $ortho_hash{$name};
2507 # print STDERR join(" ", @$o_array), "\n";
2508 my @orthologs = $_->collect_orthologs_of_leaf();
2509 foreach (@orthologs) {
2510 my $o_name = $_; #->get_name();
2511 # print STDERR $o_name, " ", $name_hash{$o_name}, "\n";
2512 $o_array->[$name_hash{$o_name}] = 1; # in the array for $name set the right element to 1
2515 # foreach (@leaf_names) {
2516 # my $ortho_array_ref = $ortho_hash{$_};
2517 # printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n";
2519 return \%ortho_hash;
2522 sub set_branch_lengths_equal{
2523 my $self = shift;
2524 my $bl = shift || 1.0;
2525 $self->get_root()->recursive_set_branch_length($bl);
2528 sub decircularize{
2529 my $self = shift;
2530 $self->get_root()->recursive_decircularize();
2531 $self->set_renderer(undef);
2532 $self->set_layout(undef);
2535 sub DESTROY {
2537 my $self = shift;
2538 # $self->decircularize();
2539 # check for an overridden destructor...
2540 $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
2541 # now do your own thing before or after