1 package CXGN
::Phylo
::Tree
;
5 CXGN::Phylo::Tree - an object to handle trees
9 my $tree = CXGN::Phylo::Tree->new();
10 my $root = $tree->get_root();
11 my $node = $root->add_child();
12 $node->set_name("I'm a child node");
13 $node->set_link("http://solgenomics.net/");
14 my $child_node = $node->add_child();
15 $child_node->set_name("I'm a grand-child node");
16 print $tree->generate_newick();
20 The tree object provides metadata for tree data structures. The tree data structure itself is defined with node objjects (L<CXGN::Phylo::Node>), for which the tree object stores the root node, which gives access to the entire tree structure using appropriate node functions such as get_children(). The tree object also provides convenience functions, which usually map to node functions on the root node.
22 For faster access of individual nodes, the tree object keeps a hash of nodes keyed by a unique id for each node. The tree object also provides a function to obtain new unique node ids.
24 The tree object also provides the layout and rendering functions. The both layout and rendering are defined by L<CXGN::Phylo::Layout> and L<CXGN::Phylo::Renderer> objects, of which several versions exist that provide different tree layouts and renderings.
29 Lukas Mueller (lam87@cornell.edu)
30 Tom York (tly2@cornell.edu)
37 use CXGN
::Phylo
::Node
;
38 use CXGN
::Phylo
::Species_name_map
;
39 use CXGN
::Phylo
::Layout
;
40 use CXGN
::Phylo
::Renderer
;
41 use CXGN
::Phylo
::Parser
;
42 use CXGN
::Phylo
::Alignment
;
43 use CXGN
::Phylo
::Ortholog_group
;
45 use base qw
/ CXGN::DB::Object / ;
49 Synopsis: my $t = CXGN::Phylo::Tree->new()
51 Returns: an instance of a Tree object.
52 Side effects: creates the object and initializes some parameters.
59 my $self = bless {}, $class;
60 #You can feed constructor with a newick string, which will create
61 #a parser object that creates a tree object *without* passing a
62 #string, which would lead to an infinite loop. Watch out!
64 my $newick_string = "";
66 # print STDERR "Tree::new. [$newick_string] \n";
67 $newick_string = $arg;
68 # print STDERR "Tree::new. [$newick_string] \n";
71 if ($arg->{from_files
}) {
72 $newick_file = $arg->{from_files
}->{newick
};
73 die "Need a newick file if 'from_files' is used\n" unless -f
$newick_file;
75 $self = _tree_from_file
($newick_file);
76 my $alignment_file = $arg->{from_files
}->{alignment
};
77 if ($alignment_file) {
78 die "Alignment file: $alignment_file not found" unless -f
$alignment_file;
79 my $alignment = CXGN
::Phylo
::Alignment
->new( from_file
=>$alignment_file);
80 $self->set_alignment($alignment);
81 $self->standard_alignment_leaf_association();
84 } elsif ($arg->{from_file
}) {
85 $newick_file = $arg->{from_file
};
86 $self = _tree_from_file
($newick_file);
91 $newick_string =~ s/\s//g;
92 $newick_string =~ s/\n|\r//sg;
93 if ($newick_string =~ /^\(.*\)|;$/) { # start with (, end with ) or ;
94 # print STDERR "in Tree::new, about to parse the newick_string \n";
95 my $parser = CXGN
::Phylo
::Parse_newick
->new($newick_string);
96 my $self = $parser->parse();
98 } elsif ($newick_string) {
99 print STDERR
"String passed not recognized as newick\n";
103 ##############################################################
104 #$self is a new tree, not predefined by newick; instead it will be
105 #constructed by methods on this object and Phylo::Node's
107 #print STDERR "constructing Tree not predefined by a newick\n";
109 $self->set_unique_node_key(0);
111 # initialize the root node
113 my $root = CXGN
::Phylo
::Node
->new();
114 $root->set_name(".");
115 $root->set_tree($self);
116 $root->set_node_key($self->get_unique_node_key());
117 $self->add_node_hash($root, $root->get_node_key());
118 $self->set_root($root);
120 # initialize some imaging parameters
122 $self->set_show_labels(1);
123 $self->set_hilite_color(255, 0 ,0);
124 $self->set_line_color(100, 100, 100);
125 $self->set_bgcolor(0, 0, 0);
126 $self->set_show_species_in_label(0);
127 $self->set_show_standard_species(0);
128 $self->set_species_standardizer(CXGN
::Phylo
::Species_name_map
->new());
130 #Attribute names to show in newick extended format
131 $self->{newick_shown_attributes
} = {};
132 $self->{shown_branch_length_transformation
} = "branch_length"; # other possibilities: "proportion_different", equal
133 $self->{min_shown_branch_length
} = 0.001; # when showing branches graphically, this is added to the displayed length
134 $self->{min_branch_length
} = 0.0001;
135 # initialize a default layout and renderer
137 $self->set_layout( CXGN
::Phylo
::Layout
->new($self) );
138 $self->set_renderer( CXGN
::Phylo
::PNG_tree_renderer
->new($self) );
143 # copy some of the tree's fields. Other fields will just have default values as set in constructor
144 # e.g. layout and renderer aren't copied because there is no copy method for these objects
145 sub copy_tree_fields
{
146 my $self = shift; # source
147 my $new = shift; # copy
149 $new->set_name($self->get_name());
151 # initialize some imaging parameters
153 $new->set_show_labels($self->get_show_labels());
154 $new->set_hilite_color($self->get_hilite_color());
155 $new->set_line_color($self->get_line_color());
156 $new->set_bgcolor($self->set_bgcolor());
158 $new->set_species_standardizer($self->get_species_standardizer()->copy()) if (defined $self->get_species_standardizer()) ;
159 $new->set_show_species_in_label($self->get_show_species_in_label());
160 $new->set_show_standard_species($self->get_show_standard_species());
162 #Attribute names to show in newick extended format
163 # $new->{newick_shown_attributes} = $self->{newick_shown_attributes};
164 # @{$new->{newick_shown_attributes}} = @{$self->{newick_shown_attributes}};
165 %{$new->{newick_shown_attributes
}} = %{$self->{newick_shown_attributes
}};
169 =head2 function copy()
171 Synopsis: my $t_copy = $a_tree->copy()
173 Returns: A copy of $a_tree
174 Side effects: creates the object, and makes it be a copy.
181 my $new = $self->get_root()->copy_subtree();
182 $new->update_label_names();
187 sub _tree_from_file
{
189 my $tree = _tree_from_newick
(_newick_from_file
($file));
190 $tree->standard_layout();
194 sub _tree_from_newick
{
195 my $newick_string = shift;
196 $newick_string =~ s/\s//g;
197 $newick_string =~ s/\n|\r//g;
198 if($newick_string =~ /^\(.*\)|;$/){
199 my $parser = CXGN
::Phylo
::Parse_newick
->new($newick_string);
200 my $tree = $parser->parse();
203 elsif($newick_string) {
204 print STDERR
"String passed not recognized as newick\n";
209 sub _newick_from_file
{
211 open(FH
, $file) or die "Can't open file: $file\n";
213 $newick .= $_ while (<FH
>);
223 return $self->{alignment
};
228 $self->{alignment
} = shift;
229 unless(@
{$self->{alignment
}->{members
}}){
230 warn "The alignment set to the tree has no members. You must construct the alignment before setting it here";
235 =head2 function standard_alignment_leaf_association()
237 Associate alignment members to leaf nodes based
242 sub standard_alignment_leaf_association
{
244 my $alignment = $self->get_alignment();
245 return unless $alignment;
247 foreach my $m ($alignment->get_members()) {
248 $id2mem{$m->get_id()} = $m;
250 foreach my $l ($self->get_leaves()) {
251 my $m = $id2mem{$l->get_name()};
253 $l->set_alignment_member($m);
257 =head2 function get_root()
259 Synopsis: my $node = $t->get_root();
261 Returns: a Node object, which is the root of the tree.
264 See also: $node->is_root()
270 return $self->{root
};
273 =head2 function set_root()
275 Synopsis: $t->set_root($node);
276 Arguments: a Node object
278 Side effects: the node $node will be defined as the root of the tree.
279 Note that $node->is_root() must evaluate to true,
280 set_root() will therefore set the parent of the root
282 See also: prune_to_subtree() - takes a node as a parameter and
283 will create a sub-branch of the tree. It throws away all
284 other nodes that are not part of the sub-branch.
285 reset_root() - resets the root to the specified node and
286 inverts the parent child relationships from the
287 specified node upwards to the root.
294 $new_root->set_parent(undef); #is_root must be true
295 $new_root->set_branch_length(undef);
296 $self->{root
}=$new_root;
299 =head2 function delete_node() and del_node()
301 Synopsis: $tree->delete_node($node->get_node_key());
302 $tree->del_node($node);
303 Arguments: delete_node: a unique node key, del_node: a node object.
304 Returns: nothing if operation is successful, 1 if operation
305 not succesful (because it was attempted to delete
307 Side effects: Adds the branch length to each of its children,
308 Recalculates the leaf list and node hash.
309 Note: The root node cannot be deleted.
316 my $node_key = shift;
318 # get the node object from the key
320 my $node=$self->get_node($node_key);
321 return $self->del_node($node);
324 # delete node by passing node object as argument
325 # rather than node key as with delete_node
330 warn 'The node you want to delete does not exist!'; return;
332 my $retval = $node->delete_self();
333 $self->recalculate_tree_data();
337 =head2 function recalculate_tree_data()
342 Side effects: recalculates the leaf list, the node hash, and all the
343 subtree distances. It does not affect the node keys.
348 sub recalculate_tree_data
{
350 $self->calculate_leaf_list();
351 $self->clear_node_hash();
352 $self->regenerate_node_hash($self->get_root());
353 $self->get_root()->calculate_distances_from_root();
354 $self->get_root()->recursive_clear_properties();
357 =head2 function prune_to_subtree()
359 Synopsis: $a_tree->prune_to_subtree($node);
360 Arguments: a node object, the root of the subtree to be kept.
362 Side effects: Prunes the tree.
363 Description: Prune the tree so that only $node and its subtree
364 is left, with $node as the new root.
365 (sub_branch is synonymous)
369 sub prune_to_subtree
{
371 my $new_root_node = shift;
373 $self->set_root($new_root_node);
374 $self->recalculate_tree_data();
377 #=head2 function sub_branch()
379 # Synopsis: deprecated, synonym for prune_to_subtree
386 # my $new_root_node = shift;
388 # $self->set_root($new_root_node);
389 # $self->recalculate_tree_data();
391 # prune_to_subtree(@_);
396 =head2 function reset_root()
398 Synopsis: $tree->reset_root($node);
399 Arguments: a node object that will be the new root node
401 Side effects: recalculates the tree parameters using the new
403 Description: reverses all the parent-child relationships
404 between the node $node and the old root node,
405 then sets the tree root node to $node.
406 Authors: Lukas Mueller, Tom York.
411 my $self = shift; # tree object
412 my $new_root_node = shift; # node object
414 if (0) { #either of these branches should work.
415 my @parents = $new_root_node->get_all_parents(); # parent, grandparent, etc. up to & including root
416 $new_root_node->set_parent(undef); # because it is to be the root
417 my $pc_blen = $new_root_node->get_branch_length(); # branch length between $pc and $cp
418 my $cp=$new_root_node;
419 foreach my $pc (@parents) {
420 my $former_p_blen = $pc->get_branch_length();
421 $pc->remove_child($cp); # removes $cp from $pc's child list
422 $cp->add_child_node($pc); # adds $pc as child of $cp, and set $pc's parent to $cp
423 $pc->set_branch_length($pc_blen);
425 $pc_blen = $former_p_blen;
428 my @parents_root_down = reverse $new_root_node->get_all_parents();
429 push @parents_root_down, $new_root_node; # need to include the new root in the array
430 my $pc = shift @parents_root_down; # pc means goes from being parent to being child
432 for (my $cp = shift @parents_root_down; defined $cp; $cp = shift @parents_root_down) {
433 my $blen = $cp->get_branch_length();
434 $pc->remove_child($cp); # remove $cp from children list of $pc
435 $cp->set_parent(undef);
436 $cp->add_child_node($pc); # now $cp is parent, $pc the child
437 $pc->set_branch_length($blen);
439 # at this point we still have a consistent tree, but with the root moved another step along the
440 # path from original root to new root.
443 $self->set_root($new_root_node);
444 $new_root_node->set_branch_length(0);
445 $self->recalculate_tree_data();
448 =head2 function get_leaf_count()
450 Synopsis: $tree->get_leaf_count()
452 Returns: the number of leaves in the tree
460 # $self->get_root()->count_leaves();
461 return scalar $self->get_leaf_list();
464 =head2 function get_unhidden_leaf_count()
466 Get the number of visible leaves in the tree
470 sub get_unhidden_leaf_count
{
472 return scalar grep { !$_->is_hidden } $self->get_leaf_list;
475 =head2 function set_unique_node_key()
477 Synopsis: $tree->set_unique_node_key(345);
478 Arguments: an integer value to set the unique node key
481 Side effects: this value will then be used by get_unique_node_key().
482 The getter function increases the unique key by one
483 every time it is called.
485 Note: this function is used internally and it should not be
486 necessary to ever use it.
490 sub set_unique_node_key
{
492 $self->{unique_node_key
}=shift;
495 =head2 function get_unique_node_key()
497 Synopsis: $node->set_node_key(
498 $node->get_tree()->get_unique_node_key() );
500 Returns: a unique node key
503 Note: it should not be necessary to call this method, because
504 new nodes should always be added using the
505 $node->add_child() function, which assures that the
506 node_key property is filled in correctly.
510 sub get_unique_node_key
{
512 $self->{unique_node_key
}++; # increment the unique node key
513 while (exists $self->{node_hash
}->{$self->{unique_node_key
}}) { # if key already in node_hash, increment again...
514 $self->{unique_node_key
}++;
516 return $self->{unique_node_key
};
519 =head2 function clear_node_hash()
521 Synopsis: $t -> clear_node_hash()
523 Returns: clears the node hash
529 sub clear_node_hash
{
531 %{$self->{node_hash
}}=();
534 =head2 function regenerate_node_hash()
536 Synopsis: $tree->regenerate_node_hash()
537 Arguments: a node, most conveniently the root node.
539 Side effects: regenerates the node hash from the current root.
540 Description: it uses the predefined ...? Recursive. Adds node
541 to hash and then calls itself on each child
545 sub regenerate_node_hash
{
548 $node ||= $self->get_root();
549 #print("in regenerate_node_hash. \n");
550 #$node->print_node();
551 #print("node key: ", $node->get_node_key());
552 $self->add_node_hash($node, $node->get_node_key());
553 foreach my $c ($node->get_children()) {
554 $self->regenerate_node_hash($c);
556 $self->set_unique_node_key( scalar $self->get_all_nodes() );
559 =head2 function add_node_hash()
561 Synopsis: $tree->add_node_hash($node, $unique_key);
562 Arguments: an instance of a Node object; a unique node key.
564 Side effects: the $node is added to the node hash.
565 the node hash uses the node\'s node_key property
566 as a hash key, and the node object itself as a
567 hash value. Note that it should not be necessary
568 to call this function. All new nodes should be
569 added using the add_child() method which automatically
570 inserts the new node in the node_hash.
578 my $unique_key = shift;
580 ${$self->{node_hash
}}{$unique_key}=$node;
583 =head2 function get_all_nodes()
585 Synopsis: returns a list of all nodes, in no particular order.
587 Returns: a list of nodes
595 return (values %{$self->{node_hash
}});
599 sub get_all_node_keys
{
601 return (keys %{$self->{node_hash
}});
607 return scalar($self->get_all_nodes());
610 =head2 function get_node()
612 Synopsis: my $node->get_node($unique_node_key);
613 Arguments: a unique node key of a node
614 Returns: the $node object associated with the node key.
616 Description: this function uses the node hash and should therefore
617 be fast. The node key values can be embedded in things
618 like HTML imagemaps, and the corresponding nodes can
619 be quickly retrieved for further manipulation using
627 return ${$self->{node_hash
}}{$key};
632 my $hashref = $self->{node_hash
};
633 foreach my $k (keys (%$hashref)) {
634 my $n = $self->get_node($k);
636 print("key, node: ", $k); $n->print_node();
638 print("key: ", $k, " has undefined node (returned by get_node($k) ). \n");
641 print("present value of unique_node_key: ", $self->{unique_node_key
}, "\n");
646 =head2 function incorporate_nodes()
648 Given a list of nodes, add them to this tree's membership
649 by setting their 'tree' attributes and giving them new node
650 keys from this tree's pool, setting the hash appropriately
652 Arg: List of node objects
657 sub incorporate_nodes
{
660 foreach my $n (@nodes) {
661 my $new_key = $self->get_unique_node_key();
663 $n->set_node_key($new_key);
664 $self->add_node_hash($n, $new_key);
668 =head2 function incorporate_tree()
670 Given a tree, incorporate that tree's nodes into this tree. This does not affect parent/child relationships; you have to set those yourself
674 sub incorporate_tree
{
676 my $sub_tree = shift;
677 my @nodes = $sub_tree->get_root()->get_descendents();
678 $self->incorporate_nodes(@nodes);
681 =head2 function make_binary()
683 Inserts joint nodes at polyphetic points so that the tree is biphetic or monophetic. The joint nodes have branch-length 0, so this should not affect analysis, but it allows the tree to conform to certain standards in external programs.
690 $node ||= $self->get_root();
692 $new_bl ||= $self->get_min_branch_length();
693 $node->binarify_children($new_bl);
694 foreach($node->get_children()){
695 $self->make_binary($_, $new_bl);
699 =head2 function traverse()
701 Synopsis: $tree->traverse( sub{ my $node = shift;
702 $node->set_hidden() } );
703 Arguments: a function to be performed on each node, taking
704 that node as its only argument
706 Side effects: the function will be executed on each node object.
707 Description: not yet implemented... UPDATE: C. Carpita attempts
713 my $function = shift;
715 die "You did not pass a subroutine reference" unless (ref($function) eq "CODE");
716 $node ||= $self->get_root();
720 foreach( $node->get_children() ){
721 $self->traverse($function, $_);
725 sub newick_shown_attributes
{ # just return the keys (attributes), so everything should work the same.
727 return keys %{$self->{newick_shown_attributes
}};
730 sub show_newick_attribute
{
733 # push(@{$self->{newick_shown_attributes}}, $attr);
734 $self->{newick_shown_attributes
}->{$attr}++;
737 sub unshow_newick_attribute
{
741 delete $self->{newick_shown_attributes
}->{$attr};
743 # my $size = scalar @{$self->{newick_shown_attributes}};
744 # foreach my $index (0..$size-1) {
745 # if ( ($self->{newick_shown_attributes})->[$index] eq $attr) {
746 # delete $self->{newick_shown_attributes}->[$index];
752 sub get_min_branch_length
{
754 return $self->{min_branch_length
};
757 sub set_min_branch_length
{
759 $self->{min_branch_length
} = shift;
762 sub get_shown_branch_length_transformation
{
764 return $self->{shown_branch_length_transformation
};
767 sub set_shown_branch_length_transformation
{
769 $self->{shown_branch_length_transformation
} = shift;
772 sub set_min_shown_branch_length
{
774 $self->{min_shown_branch_length
} = shift;
777 sub get_min_shown_branch_length
{
779 return $self->{min_shown_branch_length
};
782 sub shown_branch_length_transformation_reset
{
784 $self->set_shown_branch_length_transformation(shift);
785 $self->{longest_branch_length
} = undef;
786 $self->get_root()->calculate_distances_from_root();
789 =head2 function generate_newick()
791 Args: (optional) node, defaults to root node
792 (optional) $show_root - boolean, will show the root node in the newick string
793 Returns: Newick expression from the given node, or for the whole
794 tree if no argument is provided
798 sub generate_newick
{
801 my $show_root = shift;
803 $node ||= $self->get_root();
804 return $node->recursive_generate_newick("", 1, $show_root);
808 =head2 function get_orthologs()
810 Synopsis: my $ortho_trees_ref = $tree->get_orthologs();
812 Returns: a reference to a list of trees in which the leaves are all
814 Side effects: Sets some node attributes, but deletes at end.
815 Description: This version uses the number of leaves and the number of
816 leaf species in a subtree to decide if that subtree's
817 leaves are all orthologs. (The topology is not used,
818 subroutine orthologs compares the topology to a
826 my $root_node = $self->get_root();
828 $root_node->recursive_set_leaf_count(); # set leaf_count attribute for all nodes
829 $root_node->recursive_set_leaf_species_count(); # set leaf_species_count attribute for all nodes
830 my $trees_ref = $root_node->collect_orthologs();
832 # can delete the leaf_count and leaf_species_count attributes here
833 my @node_list = $self->get_all_nodes();
834 map($_->delete_attribute("leaf_count"), @node_list);
835 map($_->delete_attribute("leaf_species_count"), @node_list);
840 #This should recursively get all the subtree leaf species counts, and then run over everything again,
841 # comparing to the leaf counts for each species in the whole tree, to get the leaf species counts for the
842 # complement of each subtree.
843 sub set_all_subtree_and_complement_leaf_species_counts
{
845 my $leaf_species_count_hash = $self->get_root()->recursive_set_leaf_species_count();
846 print "in set_all_subtree... ; number of species: ", scalar keys %$leaf_species_count_hash, "\n"; readline();
847 $self->get_root()->recursive_set_leaf_species_count($leaf_species_count_hash);
850 sub get_complement_ortho_group_candidates
{
852 my @node_list = $self->get_root()->recursive_subtree_node_list();
853 foreach my $n (@node_list) {
854 my $comp_leaf_count = $self->get_root()->get_attribute("leaf_count") - $n->get_attribute("leaf_count");
855 my $comp_leaf_species_count = $n->get_attribute("comp_leaf_species_count");
856 if ($comp_leaf_count == $comp_leaf_species_count && $comp_leaf_count >1) {
857 print "complement to subtree : ", $n->get_name(), " is a og candidate \n";
858 print "with $comp_leaf_count leaves and $comp_leaf_species_count leaf species \n";
864 sub get_leaf_parents_list
{
866 foreach my $leaf ($self->get_leaf_list()) {
867 my $parent = $leaf->get_parent();
868 ${$self->{leaf_parent_hash
}}{$parent->get_node_key()}=$parent;
870 # return the parents as a neat list
871 return map (${$self->{leaf_parent_hash
}}{$_}, keys(%{$self->{leaf_parent_hash
}}));
874 # helper functions that deal with the leaf list. It contains a list of nodes
875 # that form leaves, in the order they will be rendered. The leaf list is stored
876 # in the Tree datastructure.
879 =head2 function get_leaf_list()
881 Synopsis: my @leaves = $tree->get_leaf_list();
883 Returns: a list of Nodes that represent the leaves of the tree
891 if (!exists($self->{leaf_list
}) || !@
{$self->{leaf_list
}}) { $self->calculate_leaf_list(); }
892 return @
{$self->{leaf_list
}};
897 Alias for get_leaf_list()
903 return $self->get_leaf_list();
908 my $leaf_node = shift;
909 push @
{$self->{leaf_list
}}, $leaf_node;
912 sub clear_leaf_list
{
914 @
{$self->{leaf_list
}}=();
917 sub calculate_leaf_list
{
919 $self->clear_leaf_list();
920 my @leaf_list = $self->get_root()->recursive_leaf_list();
921 foreach my $leaf (@leaf_list) {
922 $self->add_leaf_list($leaf);
928 # the tree_topology_changed member variable contains the status of the
929 # topology of the tree. If the tree has been changed, it should be 1,
930 # otherwise it should be 0.
932 sub get_tree_topology_changed
{
934 return $self->{tree_topology_changed
};
937 sub _set_tree_topology_changed
{
939 $self->{tree_topology_changed
}=shift;
942 =head2 function get_name()
944 Synopsis: my $tree_name = $tree->get_name();
946 Returns: the name of the tree.
954 return $self->{name
};
957 =head2 function set_name()
959 Synopsis: $tree->set_name("A tree of the cytochrome P450 family in the Solanaceae");
960 Arguments: a string representing a name
962 Side effects: this name will be used somehow in the future, such as when
963 the tree is rendered as an image.
973 =head2 function get_longest_root_leaf_length()
975 Synopsis: my $longest = $tree->get_longest_root_leaf_length()
977 Returns: the longest distance from the root to any leaf [real]
983 sub get_longest_root_leaf_length
{
985 if (!$self->{longest_branch_length
}) {
986 $self->set_longest_root_leaf_length($self->calculate_longest_root_leaf_length());
988 # print "in get_longest_root_leaf_length: ", $self->{longest_branch_length}, "\n";
989 return $self->{longest_branch_length
};
992 =head2 function set_longest_root_leaf_length()
994 Synopsis: $tree->set_longest_root_leaf_length($distance)
995 Arguments: the distance from root to the furthest leaf.
997 Side effects: This value is used for the scaling of the tree in the
998 horizontal dimension. Normally it should be calculated
999 using get_longest_root_leaf_length().
1004 sub set_longest_root_leaf_length
{
1006 $self->{longest_branch_length
}=shift;
1009 sub calculate_longest_root_leaf_length
{
1012 foreach my $leaf ($self->get_leaf_list()) {
1013 my $dist = $leaf->get_dist_from_root();
1014 if ($dist > $largest) {
1021 =head2 function retrieve_longest_branch_node()
1031 sub retrieve_longest_branch_node
{
1033 my $longest_branch_node = $self->get_root()->_recursive_longest_branch_node(CXGN
::Phylo
::Node
->new());
1034 return $longest_branch_node;
1040 =head2 APPEARANCE OF THE TREE
1042 =head2 function get_show_labels()
1044 Synopsis: my $flag = $tree->get_show_lables();
1046 Returns: a boolean if the labels are currently visible.
1052 sub get_show_labels
{
1054 return $self->{show_labels
};
1057 =head2 function set_show_labels()
1059 Synopsis: $tree->set_show_lables(1);
1060 Arguments: a boolean value representing the visibility
1068 sub set_show_labels
{
1070 $self->{show_labels
}=shift;
1074 sub get_show_species_in_label
{
1076 return $self->{show_species_in_labels
};
1079 sub set_show_species_in_label
{
1081 $self->{show_species_in_labels
} = shift;
1085 =head2 accessors get_line_color(), set_line_color()
1087 Synopsis: my ($r, $g, $b) = $tree->get_line_color();
1089 Property: a list of (red, gree, blue) components of the
1090 color used to draw the tree lines.
1096 sub get_line_color
{
1098 return @
{$self->{line_color
}};
1101 sub set_line_color
{
1103 @
{$self->{line_color
}}=@_;
1107 =head2 accessors get_bgcolor(), set_bgcolor()
1109 Synopsis: $tree->set_bgcolor(255, 255, 255);
1110 Property: a list of (red, green, blue) components for the
1111 tree background color.
1119 return @
{$self->{bgcolor
}};
1124 @
{$self->{bgcolor
}}=@_;
1127 =head2 accessors get_hilite_color(), set_hilite_color()
1129 Synopsis: $tree->set_hilite_color(0, 255, 255);
1130 Property: a list of color components for the hilite color
1131 Side effects: this color is used to hilite labels of nodes that
1132 have the hilite propery set to a true value.
1137 sub get_hilite_color
{
1139 return @
{$self->{hilite_color
}};
1142 sub set_hilite_color
{
1144 @
{$self->{hilite_color
}}=@_;
1147 =head2 function get_node_by_name()
1150 Arguments: a search term
1151 Returns: a node object that has a matching node name
1153 Description: get_node_by_name() calls search_node_name(), appending
1154 ^ and $ to the regular expression. It assumes that all
1155 nodes have distinct names. If several nodes have the same
1156 name, only the first node it finds is returned. If it
1157 does not find the node, it returns undef.
1161 sub get_node_by_name
{
1164 foreach my $n ($self->get_all_nodes()){
1165 return $n if ($n->get_name() eq $name);
1170 #returns a list of nodes matching a certain reg expression depending on the argument
1171 sub search_node_name
{
1175 foreach my $n ($self->get_all_nodes()) {
1176 my $node_name = $n->get_name();
1177 if ($node_name =~ /\Q$term\E/i) {
1184 #returns a list of nodes matching a certain reg expression depending on the argument
1185 sub search_label_name
{
1189 foreach my $n ($self->get_all_nodes()) {
1190 my $label_name = $n->get_label()->get_name();
1191 if ($term =~ m/m\/(.*)\
//) { # if enter m/stuff/ then treat stuff as perl regex
1193 if ($match && $label_name =~ /$match/) {
1197 if ($term && $label_name =~ /\Q$term\E/i) {
1205 =head2 function compare()
1207 Synopsis: $this_tree->compare($another_tree);
1208 Arguments: a tree object
1209 Returns: 1 if the tree is identical in topology to
1211 0 if the trees have a different topology.
1213 Description: compare() works by comparing the node names and
1214 the topology of the tree. Because not all nodes
1215 usually have explicit names, it derives implicit
1216 names for each node (it assumes the leaf nodes have
1217 unique names). The implicit names are defined by an
1218 array containing all the names of the subnodes. The
1219 names are sorted by alphabetical order and then compared.
1221 Note: This is a synonym for compare_rooted. There is also a
1222 compare_unrooted routine to test whether trees
1223 are the same aside from being rooted in different places.
1229 # my $other_tree = shift;
1230 #my $compare_field = shift;
1232 ## print STDOUT "in compare. compare_field: $compare_field \n";
1234 # return $self->compare_rooted($other_tree, $compare_field);
1237 =head2 function compare_rooted
1239 Synopsis: $tree1->compare_rooted($tree2);
1240 Arguments: A tree object.
1241 Returns: 1 if $tree1 and $tree2 are topologically the same
1242 when regarded as rooted trees, 0 otherwise.
1244 Description: Works with copies of trees; collapses them, gets
1245 implicit names, then recursively compares trees
1246 using implicit names.
1247 Note: Now synonymous with compare. Can compare subtrees
1248 with Node->compare_subtrees
1255 my $other_tree = shift;
1256 my $compare_field = shift;
1257 # print STDOUT "in compare_rooted. compare_field: $compare_field \n";
1258 return $self->get_root()->compare_subtrees($other_tree->get_root(), $compare_field);
1261 =head2 function compare_unrooted
1263 Synopsis: $tree1->compare_unrooted($tree2);
1264 Arguments: A tree object.
1265 Returns: 1 if $tree1 and $tree2 are topologically the same
1266 when regarded as unrooted trees, 0 otherwise.
1268 Description: Copies the 2 trees, finds a leaf common to both
1269 (if one exists) and resets roots of both trees to those
1270 leaves. Then recursively compares trees using implicit
1271 names in same way as compare_rooted().
1272 Note: In its present form, assumes uniqueness of leaf names.
1273 Otherwise, if may return 0 when it should return 1.
1278 sub compare_unrooted
{
1280 my $other_tree = shift;
1281 my $compare_field = shift; # to control comparison of names (default) or species ("species")
1282 # copy the trees into temporary trees, so that the trees can
1283 # be manipulated (rerooted, collapsed) without changing the original trees.
1285 # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
1286 my $tree1 = $self->copy();
1287 my $tree2 = $other_tree->copy();
1289 # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
1291 my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
1292 my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
1294 if (!$corresponding_leaf) {
1295 print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
1299 # reset roots of trees to the two corresponding leaves:
1300 $tree1->reset_root($leaf1);
1301 $tree2->reset_root($corresponding_leaf);
1303 return $tree1->get_root()->compare_subtrees($tree2->get_root(), $compare_field);
1307 =head2 function get_layout(), set_layout()
1309 Synopsis: $tree->set_layout($layout)
1310 Arguments: a CXGN::Phylo::Layout object or subclass
1312 Side effects: the layout object will be used to lay out the
1313 tree in the rendering process.
1320 return $self->{layout
};
1325 $self->{layout
}=shift;
1329 =head2 function layout()
1331 Synopsis: $tree->layout()
1335 Description: a convenience function that calls the layout function of the
1336 trees layout object.
1343 $self->get_layout()->layout();
1347 =head2 accessors get_renderer(), set_renderer()
1349 Synopsis: $tree->set_renderer($renderer)
1350 Arguments: a CXGN::Phylo::Renderer object or subclass
1352 Side effects: the $renderer is used for rendering the tree
1359 return $self->{renderer
};
1364 $self->{renderer
}=shift;
1367 =head2 function render()
1369 Synopsis: $tree->render();
1370 Arguments: (optional) a boolean for printing all node names, and not only the leaf labels
1373 Description: a convenience function that calls the render()
1374 function on the tree\'s renderer. Does not perform
1375 the layout of the tree. Call layout() on the tree
1376 object before render().
1382 my $print_all_labels=shift;
1383 $self->get_renderer()->render($print_all_labels);
1386 sub standard_layout
{
1388 my $layout = CXGN
::Phylo
::Layout
->new($self);
1389 $layout->set_top_margin(20);
1390 $layout->set_bottom_margin(20);
1391 $layout->set_image_height(400);
1392 $layout->set_image_width(700);
1393 $self->set_layout($layout);
1397 =head2 function render_png()
1399 Synopsis: $r->render_png($filename, $print_all_labels);
1400 Arguments: a filename, (optional) a boolean for printing the labels for all nodes in the tree.
1402 Side effects: creates (or overwrites) file $filename
1403 which contains the png graphics representing
1412 my $print_all_labels= shift; ## Boolean for printing non-leaf node labels
1414 my $png_string = $self->render($print_all_labels);
1416 open (my $T, ">$file") || die "PNG_tree_renderer: render_png(): Can't open file $file.";
1417 print $T $png_string;
1425 =head2 function collapse_tree()
1437 # first, collapse all nodes that have only one child onto the
1440 #print STDERR "before rec..coll...single_nodes\n";
1441 $self->get_root()->recursive_collapse_single_nodes();
1442 #print STDERR "after rec..coll...single_nodes\n";
1443 $self->recalculate_tree_data();
1445 # then, collapse all nodes that have branch lengths of zero
1446 # with their parent node
1448 #print STDERR "before rec..coll...zero_branches\n";
1449 $self->get_root()->recursive_collapse_zero_branches();
1450 #print STDERR "after rec..coll...zero_branches\n";
1452 # let's re-calculate the tree's properties
1454 $self->recalculate_tree_data();
1457 sub collapse_unique_species_subtrees
{
1459 # calculate, for each node, how many nodes are beneath it.
1460 # This information can then be accessed using the
1461 # $node-> get_subtree_node_count() function.
1463 # $self->get_root()->calculate_subtree_node_count();
1465 # calculate, for each node, how many different species are in the leaves of the subtree beneath it.
1467 $self->get_root()->recursive_set_leaf_species_count();
1469 # recursively go through the tree
1471 $self->get_root()->recursive_collapse_unique_species_subtrees();
1474 =head2 function find_point_furthest_from_leaves()
1476 Synopsis: $t->find_point_furthest_from_leaves()
1478 Returns: A list containing a node object, and the distance
1479 above that node of the point furthest from the leaves.
1480 Side effects: Calls recursive_find_point_furthest_from_leaves,
1481 which sets some attributes.
1482 Description: For each point there is a nearest leaf at distance
1483 dnear. This returns the point which maximizes dnear.
1487 sub find_point_furthest_from_leaves
{
1489 $self->set_min_dist_to_leaf();
1490 my @furthest_point = $self->get_root()->recursive_find_point_furthest_from_leaves();
1491 $furthest_point[1] -= $furthest_point[0]->get_attribute("min_dist_to_leaf");
1492 return @furthest_point;
1495 =head2 function find_point_closest_to_furthest_leaf()
1497 Synopsis: $t->find_point_closest_to_furthest_leaf();
1499 Returns: A list containing a node object, and the distance
1500 above that node of the point closest to furthest leaf.
1501 Side effects: Calls recursive_set_max_dist_to_leaf_in_subtree,
1502 which sets some attributes.
1503 Description: For each point there is a furthest leaf at distance
1504 dfar. This returns the point which minimizes dfar.
1508 # returns a list containing a node object, and the distance of the point above that node
1509 sub find_point_closest_to_furthest_leaf
{
1511 $self->get_root()->recursive_set_max_dist_to_leaf_in_subtree();
1513 my @nodes = $self->get_root()->recursive_subtree_node_list();
1514 push @nodes, $self->get_root(); # we want the root in our list
1516 my @sorted_nodes = sort
1517 { $a->get_max_leaf_leaf_pathlength_in_subtree_thru_node()
1519 $b->get_max_leaf_leaf_pathlength_in_subtree_thru_node() }
1522 # using attribute "lptl_child" (longest path to leaf child) follow the longest path to leaf,
1523 # until you reach the midpoint of the longest leaf to leaf path
1524 my $current_node = pop @sorted_nodes;
1525 my $distance_to_go = 0.5*($current_node->get_attribute("dist_to_leaf_longest") - $current_node->get_attribute("dist_to_leaf_next_longest"));
1527 my $next_node = $current_node->get_attribute("lptl_child");
1528 my $branch_length = $next_node->get_branch_length();
1529 if ($branch_length >= $distance_to_go) {
1530 return ($next_node, $branch_length - $distance_to_go);
1532 $distance_to_go -= $branch_length;
1533 $current_node = $next_node;
1540 =head2 function reset_root_to_point_on_branch()
1542 Synopsis: $t->reset_root_to_point_on_branch($anode, $distance)
1543 Arguments: First arg is a node, the second a distance above that
1544 node. Together they define a point which will be new
1547 Side effects: Resets root to a point specified by arguments, and
1548 deletes old root node.
1549 Description: Use this to reset the root to a point along a branch.
1553 sub reset_root_to_point_on_branch
{
1555 my ($child_of_new_node, $dist_above) = @_;
1557 my $new_node = $child_of_new_node->add_parent($dist_above); # goes
1558 my $former_root = $self->get_root();
1560 $self->reset_root($new_node);
1562 $self->collapse_tree();
1565 =head2 function set_min_dist_to_leaf()
1567 Synopsis: $t->set_min_dist_to_leaf()
1570 Side effects: Sets the following attributes for every node
1571 in tree: min_dist_to_leaf, near_leaf_path_direction,
1572 near_leaf_path_next_node
1578 sub set_min_dist_to_leaf
{
1580 $self->get_root()->recursive_set_min_dist_to_leaf();
1581 $self->get_root()->recursive_propagate_mdtl();
1585 =head2 function min_leaf_dist_variance_point()
1587 Synopsis: $t->min_leaf_dist_variance_point()
1589 Returns: List ($n, $d) specifying the desired point as
1590 lying at a distance $d above the node $n.
1591 Side effects: Calls recursive_set_dl_dlsqr_sums_down(), and
1592 recursive_set_dl_dlsqr_sums_up(), which set
1593 several node attributes
1594 Description: Returns the point in the tree such that the
1595 variance of the distances from the point to the
1596 leaves is minimized.
1601 sub min_leaf_dist_variance_point
{
1604 $self->get_root()->recursive_set_dl_dlsqr_sums_down();
1605 $self->get_root()->recursive_set_dl_dlsqr_sums_up();
1607 my @node_list = $self->get_root()->recursive_subtree_node_list();
1608 my $opt_node = shift @node_list;
1609 my ($opt_dist_above, $opt_var) = $opt_node->min_leaf_dist_variance_point();
1611 foreach my $n (@node_list) {
1612 my ($da, $var) = $n->min_leaf_dist_variance_point();
1613 if ($var < $opt_var) {
1615 $opt_dist_above = $da;
1619 $self->get_root()->recursive_delete_dl_dlsqr_attributes();
1620 return ($opt_node, $opt_dist_above, $opt_var);
1624 =head2 function test_tree_node_hash()
1626 Synopsis: $t->test_tree_node_hash()
1628 Returns: 1 if test is passed, 0 otherwise.
1630 Description: Tests that the nodes in the tree as found by
1631 recursive_subtree_node_list() agree
1632 with the node hash. Specifically tests that
1633 1) the key of each node (found by
1634 recursive_subtree_node_list()) is found in
1636 2) no two nodes have the same key,
1637 3) each key in the node hash is the key of some node.
1639 It is possible for parts of the tree to become
1640 disconnected, so that it would not be possible to
1641 get from one to the other by at each step going from
1642 a node to a parent or child node, although all nodes
1643 would be in the node hash.
1648 sub test_tree_node_hash
{
1649 my $ok1 = 1; my $ok2 = 1; my $ok3 = 1;
1651 my $node_hashref = $self->{node_hash
};
1652 my $root = $self->get_root();
1653 my @node_list = $root->recursive_subtree_node_list();
1654 push @node_list, $root;
1657 foreach my $n (@node_list) { # test that each node in this list is found in the tree's node hash
1658 my $node_key = $n->get_node_key();
1659 $nodekeys{$node_key}++;
1660 if (!defined $node_hashref->{$node_key}) { # a node in node_list is not in the hash.
1665 if (scalar keys %nodekeys != scalar @node_list) { # test that each node in node_list has a distinct key
1669 my @node_keys = keys (%$node_hashref); # test that each key in node hash is
1670 if (scalar @node_keys != scalar @node_list) {
1673 return $ok1*$ok2*$ok3;
1677 =head2 function test_tree_parents_and_children()
1679 Synopsis: $t->test_tree_parents_and_children()
1681 Returns: 1 if test is passed, 0 otherwise.
1683 Description: Tests that node $a is a child of $b,
1684 if and only iff $b is the parent of $a.
1689 sub test_tree_parents_and_children
{
1691 my $ok1 = $self->test_tree_nodes_are_parents_of_their_children();
1692 my $ok2 = $self->test_tree_nodes_are_children_of_their_parents();
1693 return ($ok1 && $ok2);
1698 return $self->test_tree_node_hash() && $self->test_tree_parents_and_children();
1701 # tests that for all nodes n, each child of n has n as its parent.
1702 sub test_tree_nodes_are_parents_of_their_children
{
1704 my $root = $self->get_root();
1705 my @node_list = $root->recursive_subtree_node_list();
1706 push @node_list, $root;
1709 foreach my $n (@node_list) {
1710 my @children = $n->get_children();
1711 my $node_key = $n->get_node_key();
1712 foreach my $c (@children) {
1713 if(! defined $c->get_parent()){
1714 print("child node has undefined parent. \n"); $n->print_node(); $c->print_node();
1716 } elsif ($c->get_parent()->get_node_key() != $node_key) {
1717 print("child node has wrong parent. \n"); $n->print_node(); $c->print_node();
1726 # tests that for all nodes n, that if n has a parent, then n is among the children of that parent.
1727 sub test_tree_nodes_are_children_of_their_parents
{
1729 my $root = $self->get_root();
1730 my @node_list = $root->recursive_subtree_node_list();
1731 push @node_list, $root;
1734 foreach my $n (@node_list) { # test that $n is among the children of its parent
1735 my $p = $n->get_parent();
1736 if (defined $p) { # if not defined, do no test for this node
1737 my @children = $p->get_children();
1739 foreach my $c (@children) {
1740 if ($c->get_node_key() == $n->get_node_key()) {
1745 if(! $this_n_ok){ print("This node not among the children of its parent: \n"), $n->print_node(); }
1752 =head2 function orthologs()
1754 Synopsis: $ortho_grp = $ortho_tree->
1755 orthologs($species_tree, $cssst)
1756 Arguments: a tree object, and an argument which, if non-zero,
1757 causes single-species trees to be collapsed to a
1759 Returns: An list of ortholog groups.
1761 Description: Calls get_orthologs to get the ortholog_trees
1762 defined without using a species tree, i.e. maximal
1763 subtrees in which all leaves are of distinct species.
1764 Then for each ortholog group compare its tree to
1765 the species tree (if present), to see if topologies
1766 are the same, and if not, get a "distance " from
1767 ortholog tree to species tree topology.
1773 my $self = shift; # tree
1774 my $species_t = shift; # species tree; if undefined,
1775 my $cssst = shift; # switch to collapse single-species subtrees to a single node
1776 my $qRFd_max = shift;
1777 if (!defined $qRFd_max) {
1781 $self->collapse_unique_species_subtrees();
1783 # should we collapse the tree here?
1784 my $ortho_trees_ref = $self->get_orthologs();
1785 my @ortho_groups=(); # a list of Ortholog_group object that will contain the results
1787 # go through all the ortho_trees and compare to the species tree
1789 foreach my $ortho_t (@
$ortho_trees_ref) {
1790 my $ortho_group = CXGN
::Phylo
::Ortholog_group
->new($ortho_t, $species_t, $qRFd_max);
1791 if ($ortho_group->get_ortholog_tree()->get_leaf_count()>1) {
1792 push @ortho_groups, $ortho_group;
1794 } # end of foreach my $ortho_t (@$ortho_trees_ref) {
1795 return @ortho_groups;
1796 } # end of sub orthologs
1798 =head2 function set_missing_species_from_names()
1800 Synopsis: $atree->set_missing_species_from_names()
1803 Side effects: For any leaf nodes with species undefined,
1804 sets the species to something derived from
1806 Description: Try to come up with a species for each leaf node
1807 if not already defined. So will not overwrite
1808 species names coming from, e.g., the [species='tomato']
1809 type specification in a newick file.
1814 sub set_missing_species_from_names
{
1816 foreach my $n ($self->get_leaf_list()) {
1817 # print("defined \$n->get_species():{", defined $n->get_species(), "} ,\$n->get_species():{", $n->get_species(), "}\n");
1818 if (!$n->get_species()) {
1819 $n->set_species($n->determine_species_from_name());
1824 =head2 function impose_branch_length_minimum()
1826 Synopsis: $atree->impose_branch_length_minimum($bl_min)
1827 Arguments: The minimum branch length.
1829 Side effects: Set branch lengths < $bl_min to $bl_min.
1830 (Root branch length remains 0)
1831 Description: Zero branch lengths may possibly cause problems
1832 in some cases; use this to establish a small
1833 non-zero minimum branch length;
1837 sub impose_branch_length_minimum
{
1839 my $minimum_bl = shift;
1840 $minimum_bl ||= $self->get_min_branch_length();
1841 foreach my $n ($self->get_all_nodes()) {
1842 unless (defined $n->get_branch_length() and $n->get_branch_length() > $minimum_bl) {
1843 $n->set_branch_length($minimum_bl);
1846 $self->get_root()->set_branch_length(0.0); # leave this at 0
1850 sub set_show_standard_species
{
1852 $self->{show_standard_species
} = shift;
1854 sub get_show_standard_species
{
1856 return $self->{show_standard_species
};
1859 sub set_species_standardizer
{
1861 $self->{species_standardizer
} = shift;
1864 sub get_species_standardizer
{
1866 return $self->{species_standardizer
};
1870 =head2 function update_label_names()
1872 Synopsis: $atree->update_label_names()
1875 Side effects: Sets all the node labels to the node name
1876 with or without the species appended,
1877 as specified by $self->get_show_species_in_labels()
1881 sub update_label_names
{
1883 my $show_spec = $self->get_show_species_in_label();
1884 foreach my $n ($self->get_all_nodes()) {
1885 my $n_leaves = scalar @
{$n->get_implicit_names()};
1886 my $label_text = $n->get_name();
1887 # print STDERR "in update_label_names. $n_leaves, [", $n->get_name(), "][", $label_text, "] \n";
1889 my $species_text = $n->get_shown_species();
1890 # print STDERR "species text: ", $n->get_shown_species(), " is leaf:[", $n->is_leaf(), "]\n";
1891 $label_text .= " [".$species_text."]" if(defined $species_text);
1893 $n->get_label()->set_name($label_text);
1897 =head2 function prune_nameless_leaves()
1899 Synopsis: $atree->prune_nameless_leaves()
1902 Side effects: Deletes from the tree all leaves whose
1903 names are empty or undefined.
1907 sub prune_nameless_leaves
{
1910 my @leaf_list = $self->get_root()->recursive_leaf_list();
1911 my $count_leaves_deleted = 0;
1912 $self->get_root()->recursive_implicit_names(); # is this needed?
1913 foreach my $l (@leaf_list) {
1914 if ($l->get_name()) { # non-empty string. OK.
1916 # print STDERR "Warning. Leaf node with key: ", $l->get_node_key(), " has empty or undefined name. Deleting nameless node. \n";
1917 $self->del_node($l);
1918 $self->collapse_tree();
1919 $count_leaves_deleted++;
1922 return $count_leaves_deleted;
1925 # return key, node pair corresponding to the implicit name given as argument.
1926 sub node_from_implicit_name_string
{
1927 #searches tree until the node with the specified implicit name string (tab separated) is found
1929 my $in_string = shift;
1930 if (! scalar $self->get_root()->get_implicit_names() > 0) {
1931 $self->get_root()->recursive_implicit_names();
1934 foreach my $k ($self->get_all_node_keys()) {
1935 my $n = $self->get_node($k);
1936 my $node_impl_name = join("\t", @
{$n->get_implicit_names()});
1937 if ($node_impl_name eq $in_string) {
1941 # print STDOUT "In Tree::node_from_implicit_name_string. Node not found which matches specified string: $in_string \n";
1942 # $self->get_root()->print_subtree("<br>");
1943 return (undef, undef);
1947 sub leaf_species_string
{
1949 my $str = "species, standard species \n";
1950 foreach my $l ($self->get_leaf_list()) {
1951 $str .= $l->get_species() . " " . $l->get_standard_species() . "\n";
1955 =head2 function quasiRF_distance
1957 Synopsis: $tree1->quasiRF_distance($tree2), or
1958 $node1->quasiRF_distance($tree2, "species");
1959 Arguments: A tree object; and optionally a string specifying
1960 whether to compare node name or species.
1962 Returns: Compares tree1 and tree2. If they are topologically
1963 the same, 0 is returned. Otherwise returns a "distance"
1964 describing how different the two trees are.
1965 Side effects: Sets "subtree_leaves_match" field for each node, and
1966 (by calling recursive_quasiRF_distance) sets
1967 "qRF_distance" field for each node.
1968 Description: Tree1, tree2 should be collapsed before calling this
1969 function. For each node in tree1 add branch length to
1970 distance if a node with the same implicit name
1971 (or implicit species, depending on value of second
1972 argument) is not present in tree2.
1973 This is somewhat like the Robinson-Foulds distance, but
1974 is not symmetric (hence not a true distance),
1975 as the topologies of both subtrees are used, but only
1976 the tree1 branch lengths are used. Think of it as a
1977 measure of how much tree1 much be changed to reach the
1979 We are typically going to use it to compare an ortholog
1980 tree with a species tree, in which case the implicit
1981 species should be used in the comparison, i.e. it
1982 should be called with the optional 2nd arg having value
1987 sub quasiRF_distance
{
1991 my $compare_field = shift;
1993 my $root1 = $tree1->get_root();
1994 my $root2 = $tree2->get_root();
1998 # get the implicit names or species for each node in both trees
2000 if (lc $compare_field eq "species") {
2001 #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
2002 $root1->recursive_implicit_species();
2003 $root2->recursive_implicit_species();
2005 my %n_bl_2 = (); # set up the hash for tree2 nodes, with species as key (value unused)
2006 my $nhr2 = $tree2->{node_hash
};
2007 foreach my $n2 (values ( %$nhr2)) {
2008 my $implicit_species = join("\t", @
{$n2->get_implicit_species()});
2009 # print STDOUT "Y stree implicit species: $implicit_species <br>\n";
2010 $n_bl_2{$implicit_species}++; # values are not used, just count occurrences
2013 my $nhr1 = $tree1->{node_hash
};
2014 foreach my $n1 (values ( %$nhr1)) {
2015 my $implicit_species = join("\t", @
{$n1->get_implicit_species()});
2016 # print STDOUT "otree implicit species: $implicit_species <br>\n";
2017 if (exists $n_bl_2{$implicit_species}) { # there are subtrees with this set of leaves in both trees, do nothing
2018 $n1->set_attribute("subtree_leaves_match", "true");
2019 # print STDOUT "true <br>\n";
2020 } else { # no node with this implicit name in tree2, so add branch length to total
2021 $distance += $n1->get_branch_length();
2022 $n1->set_attribute("subtree_leaves_match", "false");
2026 $root1->recursive_implicit_names();
2027 $root2->recursive_implicit_names();
2029 # set up the hash for tree2 nodes, with name as key (value unused)
2031 my $nhr2 = $tree2->{node_hash
};
2032 foreach my $n2 (values ( %$nhr2)) {
2033 $n_bl_2{$n2->get_name()}++; # values are not used, just count occurrences of the name
2036 my $nhr1 = $tree1->{node_hash
};
2037 foreach my $n1 (values ( %$nhr1)) {
2038 if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees, do nothing
2039 $n1->set_attribute("subtree_leaves_match", "true");
2040 } else { # no node with this implicit name in tree2, so add branch length to total
2041 $distance += $n1->get_branch_length();
2042 $n1->set_attribute("subtree_leaves_match", "false");
2046 my $distance2 = $root1->recursive_quasiRF_distance(); # this works on tree1 - which is not a copy here.
2047 return $distance; # $tree1 has qRFd info at every node.
2052 my $other_tree = shift;
2053 my $compare_field = shift; # to control comparison of names (default) or species ("species")
2054 # copy the trees into temporary trees, so that the trees can
2055 # be manipulated (rerooted, collapsed) without changing the original trees.
2057 # print STDOUT "in compare_unrooted. compare_field: $compare_feld \n";
2058 my $tree1 = $self->copy();
2059 my $tree2 = $other_tree->copy();
2061 # find a leaf - any leaf - of tree1 and the corresponding leaf (i.e. with the same name) of tree2
2063 my $leaf1 = $tree1->get_root()->recursive_get_a_leaf();
2064 my $corresponding_leaf = $tree2->get_node_by_name($leaf1->get_name());
2066 if (!$corresponding_leaf) {
2067 print("in compare_unrooted. leaf1 name: ", $leaf1->get_name(), ". Can't find corresponding leaf in other tree. \n");
2071 # reset roots of trees to the two corresponding leaves:
2072 $tree1->reset_root($leaf1);
2073 $tree2->reset_root($corresponding_leaf);
2075 return $tree1->RF_distance_inner($tree2, $compare_field);
2078 =head2 function RF_distance_inner
2080 Synopsis: $tree1->RF_distance($tree2),
2081 or $node1->RF_distance($tree2, "species");
2082 Arguments: A tree object; and optionally a string specifying
2083 whether to compare node name or species.
2085 Returns: Compares tree1 and tree2. If they are topologically
2086 the same, 0 is returned. Otherwise returns a "distance"
2087 describing how different the two trees are.
2088 Side effects: Sets "subtree_leaves_match" field for each node
2089 Description: Tree1, tree2 should be collapsed before calling this
2090 function. For each node in tree1 add branch length to
2091 distance if a node with the same implicit name
2092 (or implicit species, depending on value of second
2093 argument) is not present in tree2.
2094 This computes the Robinson-Foulds distance. Topologies
2095 and branch lengths of both trees are used
2096 Think of it as a measure of how much tree1 much be
2097 changed to become tree2.
2102 sub RF_distance_inner
{
2106 my $compare_field = shift;
2108 my $root1 = $tree1->get_root();
2109 my $root2 = $tree2->get_root();
2111 my $sym_diff = 0; #symmetric difference, just one for each partition in only one tree
2113 my $in_both_sum = 0.0;
2114 my $in_one_only_sum = 0.0;
2115 my $branch_score = 0.0;
2117 # get the implicit names or species for each node in both trees
2119 if (lc $compare_field eq "species") {
2120 # die "RF_distance with compare_field set to species is not implemented. \n";
2121 #print STDOUT "top of quasiRF... compare_field eq species branch. \n";
2122 $root1->recursive_implicit_species();
2123 $root2->recursive_implicit_species();
2124 unless(join("\t", $root1->get_implicit_species()) eq join("\t", $root1->get_implicit_species())){
2125 print STDERR
"In RFdistance; trees do not have same set of leaves (by species).\n";
2128 # set up the hash for tree nodes, with species as key, node obj as value
2130 my @nhr1 = $root1->recursive_subtree_node_list; #->{node_hash};
2132 foreach my $n1 (@nhr1) { #all tree1 nodes except root1
2133 $n_bl_1{$n1->get_species()} = $n1;
2136 my @nhr2 = $root2->recursive_subtree_node_list; #$tree2->{node_hash};
2137 foreach my $n2 (@nhr2) { #all tree2 nodes except root2
2138 $n_bl_2{$n2->get_species()} = $n2;
2141 # my $in_both_sum = 0.0;
2142 # my $in_one_only_sum = 0.0;
2143 foreach my $n1 (@nhr1) {
2144 if (exists $n_bl_2{$n1->get_species()}) { # there are subtrees with this set of leaves in both trees
2145 my $n2 = $n_bl_2{$n1->get_species()};
2146 $in_both_sum += abs($n1->get_branch_length() - $n2->get_branch_length());
2147 } else { # no node with this implicit species in tree2, so add branch length to total
2148 $in_one_only_sum += $n1->get_branch_length();
2152 # my $in_both_sum2 = 0.0;
2153 foreach my $n2 (@nhr2) {
2154 if (exists $n_bl_1{$n2->get_species()}) { # there are subtrees with this set of leaves in both trees
2155 # my $n1 = $n_bl_1{$n2->get_species()};
2156 # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
2157 } else { # no node with this implicit species in tree2, so add branch length to total
2158 $in_one_only_sum += $n2->get_branch_length();
2162 # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, "\n");
2163 # $distance = $in_both_sum + $in_one_only_sum;
2164 # print "distance: ", $distance, "\n";
2167 } else { # compare field is "name"
2168 # print "comparing trees by name fields \n";
2169 $root1->recursive_implicit_names();
2170 $root2->recursive_implicit_names();
2171 unless(join("\t", $root1->get_name()) eq join("\t", $root1->get_name())){
2172 print STDERR
"In RFdistance; trees do not have same set of leaves (by name).\n";
2175 # set up the hash for tree nodes, with name as key, node obj as value
2177 my @nhr1 = $root1->recursive_subtree_node_list(); #->{node_hash};
2179 foreach my $n1 (@nhr1) { #all tree1 nodes except root1
2180 # print "n1 name: ", $n1->get_name(), "\n";
2181 $n_bl_1{$n1->get_name()} = $n1;
2184 my @nhr2 = $root2->recursive_subtree_node_list(); #$tree2->{node_hash};
2185 foreach my $n2 (@nhr2) { #all tree2 nodes except root2
2186 # print "n2 name: ", $n2->get_name(), "\n";
2187 $n_bl_2{$n2->get_name()} = $n2;
2190 # my $in_both_sum = 0.0;
2191 # my $in_one_only_sum = 0.0;
2193 # foreach my $n1 (@nhr1) {
2194 foreach my $name1 (keys %n_bl_1){
2195 my $n1 = $n_bl_1{$name1};
2196 if (exists $n_bl_2{$n1->get_name()}) { # there are subtrees with this set of leaves in both trees
2197 my $n2 = $n_bl_2{$n1->get_name()};
2198 $diff = $n1->get_branch_length() - $n2->get_branch_length();
2199 $in_both_sum += abs($diff); # $n1->get_branch_length() - $n2->get_branch_length());
2200 # $branch_score += $diff*$diff;
2201 } else { # no node with this implicit name in tree2, so add branch length to total
2202 $diff = $n1->get_branch_length();
2203 $in_one_only_sum += $diff; # $n1->get_branch_length();
2204 # $branch_score += $diff*$diff;
2206 # print "name not present in hash 2: ", $n1->get_name(), "\n";
2208 $branch_score += $diff*$diff;
2210 # my $in_both_sum2 = 0.0;
2211 # foreach my $n2 (@nhr2) {
2212 foreach my $name2 (keys %n_bl_2){
2213 my $n2 = $n_bl_2{$name2};
2214 if (exists $n_bl_1{$n2->get_name()}) { # there are subtrees with this set of leaves in both trees
2215 # my $n1 = $n_bl_1{$n2->get_name()};
2216 # $in_both_sum2 += abs($n1->get_branch_length() - $n2->get_branch_length());
2217 } else { # no node with this implicit name in tree2, so add branch length to total
2218 $in_one_only_sum += $n2->get_branch_length();
2220 # print "name not present in hash 1: ", $n2->get_name(), "\n";
2225 $distance = $in_both_sum + $in_one_only_sum;
2226 # print ("in_both_sum: ", $in_both_sum, " in_one_only_sum: ", $in_one_only_sum, " RFdistance: ", $distance, "\n");
2227 return ($distance, $sym_diff, $branch_score);
2230 sub get_branch_length_sum
{
2232 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2234 foreach (@nodelist) {
2235 $bl_sum += $_->get_branch_length();
2240 sub get_branch_length_sum_noterm
{ # sum of all non-terminal branch lengths
2242 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2244 foreach (@nodelist) {
2245 next if($->is_leaf());
2246 $bl_sum += $_->get_branch_length();
2251 sub multiply_branch_lengths_by
{
2254 my @nodelist = $self->get_root()->recursive_subtree_node_list;
2255 foreach (@nodelist) {
2256 $_->set_branch_length($_->get_branch_length()*$factor);
2260 #scale branch lengths s.t. their sum is #desired_bl_sum (1.0 by default)
2261 # returns original bl sum
2262 sub normalize_branch_length_sum
{
2264 my $desired_bl_sum = shift;
2265 $desired_bl_sum ||= 1.0;
2266 my $bl_sum = $self->get_branch_length_sum();
2267 if ($bl_sum <= 0.0) {
2268 print STDERR
"Can\'t normalize branch length sum, sum is $bl_sum; <= zero. \n";
2270 $self->multiply_branch_lengths_by($desired_bl_sum/$bl_sum);
2275 sub RFdist_over_totbl
{ # this is (weighted, i.e. using branch lengths) RF distance, normalized by sum of all
2276 # branch lengths in both trees, so it will lie in range [0,1]
2280 my $compare_field = shift;
2281 my $normalize_bl_sums = shift;
2282 $normalize_bl_sums = 0 unless(defined $normalize_bl_sums);
2284 if ($normalize_bl_sums) {
2285 $tree1->normalize_branch_length_sum();
2286 $tree2->normalize_branch_length_sum();
2288 my $bl_sum = $tree1->get_branch_length_sum() + $tree2->get_branch_length_sum();
2289 #print "bl_sum: $bl_sum \n";
2290 my ($rfd, $symdiff, $branch_score) = $tree1->RF_distance($tree2, $compare_field);
2291 # print "bl_sum: $bl_sum . rfd: $rfd \n";
2292 return $rfd/$bl_sum;
2295 # divide into trees no bigger than $max_leaves
2296 sub divide_into_small_trees
{
2298 my $max_leaves = shift;
2299 $max_leaves ||= 100;
2300 # print "in Tree::divide_into_small_trees. ", $self->get_root()->get_attribute("leaf_count"), "\n\n";
2301 my $small_trees_array = $self->get_root()->recursive_divide_subtree_into_small_trees($max_leaves);
2302 return $small_trees_array;
2305 # get list of subtrees containing ortholog group candidate subtrees
2306 # (trees with > 1 leaf, and distinct species in all leaves)
2307 # the argument allows one to specify to go up some number of parent
2308 # nodes above the nodes with the ortholog group candidate subtrees.
2309 sub get_ortholog_group_candidate_subtrees
{
2311 my $desired_levels_above = shift;
2312 $desired_levels_above = 0 unless($desired_levels_above > 0);
2313 # print "tree. levels_above: ", $desired_levels_above, "\n";
2314 my $ortholog_group_candidate_subtrees_array = [];
2315 $self->get_root()->recursive_set_levels_above_distinct_species_subtree();
2316 $self->get_root()->recursive_find_ortholog_group_candidate_subtrees($ortholog_group_candidate_subtrees_array, $desired_levels_above);
2317 return $ortholog_group_candidate_subtrees_array;
2320 # using urec, find the node s.t. rooting on its branch gives minimal duplications and losses
2321 # w.r.t. a species tree
2322 sub find_mindl_node
{
2323 my $gene_tree = shift; # a rooted gene tree
2324 my $species_t = shift; # a species tree
2326 # print STDERR "##################### Top of find_mindl_node. #############\n";
2327 # urec requires binary tree - make sure the tree is binary
2328 # if polytomy at root, reroot a bit down one branch, to get binary root (if was tritomy)
2331 my @root_children = $gene_tree->get_root()->get_children();
2332 if (scalar @root_children != 2) {
2333 @new_root_point = ($root_children[0], 0.9*$root_children[0]->get_branch_length());
2334 $gene_tree->reset_root_to_point_on_branch(@new_root_point);
2337 # binarify every non-binary node. At present doesn't attempt to choose in a smart way
2338 # among the various possible resolutions
2339 $gene_tree->make_binary($gene_tree->get_root()); # urec requires binary tree.
2341 my $store_show_std_species = $gene_tree->get_show_standard_species();
2342 # put the trees into form of newick strings with no whitespace, so urec will be happy
2343 $gene_tree->show_newick_attribute("species");
2344 $gene_tree->set_show_standard_species(1);
2345 my $gene_newick_string = $gene_tree->generate_newick();
2346 print "binarified gene tree (urec input): ", $gene_newick_string, "\n";
2347 $gene_newick_string =~ s/\s//g;
2349 $species_t->show_newick_attribute("species");
2350 $species_t->set_show_standard_species(1);
2351 my $species_newick_string = $species_t->generate_newick();
2352 $species_newick_string =~ s/\s//g;
2354 # my $rerooted_newick = `/data/local/cxgn/core/sgn-tools/family/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2355 my $rerooted_newick = `/data/local/cxgn/core/perllib/CXGN/Phylo/Urec/urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2356 # my $rerooted_newick = `urec -s "$species_newick_string" -g "$gene_newick_string" -b -O`;
2358 # print STDERR "gene_newick_string: \n $gene_newick_string \n\nspecies_newick_string: \n $species_newick_string.\n\n";
2359 # print STDERR "Rerooted newick string: [$rerooted_newick].\n";
2363 my $minDL_rerooted_gene_tree = (CXGN
::Phylo
::Parse_newick
->new($rerooted_newick))->parse(); # this is now rooted so as to minimize gene duplication and loss needed to reconcile with species tree,
2364 # but branch lengthswill be wrong for nodes whose parent has changed in the rerooting (they are just the branch lengths to the old parents).
2365 $minDL_rerooted_gene_tree->get_root()->recursive_implicit_names();
2367 # $minDL_rerooted_gene_tree should have 2 children and (at least) one should have it's subtree also present in the pre-rerooting tree.
2368 # identify the node at the root of this subtree (using implicit names) and reroot there.
2369 # Have to do this because some branch length info was lost in urec step.
2370 my @root_children = $minDL_rerooted_gene_tree->get_root()->get_children();
2371 my ($node_key, $rr_node);
2372 foreach (@root_children) {
2373 my $implicit_name_string = join("\t", @
{$_->get_implicit_names()});
2374 ($node_key, $rr_node) = $gene_tree->node_from_implicit_name_string($implicit_name_string);
2375 if (defined $rr_node) {
2376 # debug ("Reroot above this node: $implicit_name_string \n");
2377 return @new_root_point = ($rr_node, 0.5*($rr_node->get_branch_length));
2380 die "find_mindl_node failed. \n";
2382 # $gene_tree->set_shown_standard_species($store_show_standard_species);
2383 #$gene_tree->update_label_names();
2384 return (undef, undef);
2389 sub get_species_bithash
{ #get a hash giving a bit pattern for each species in both $gene_tree and $spec_tree
2390 my $gene_tree = shift;
2391 my $spec_tree = shift;
2395 $spec_tree->show_newick_attribute("species");
2396 my $stree_newick = $spec_tree->generate_newick();
2397 # print STDERR "SPECIES TREE: $stree_newick \n";
2398 my @leaf_list = $gene_tree->get_leaf_list();
2399 foreach (@leaf_list) {
2400 my $lspecies = $_->get_standard_species();
2401 # print STDERR "gtree species: $lspecies \n";
2402 $genehash{$lspecies}++; # keys are species in gene tree, values are number of leaves with that species
2404 @leaf_list = $spec_tree->get_leaf_list();
2405 foreach (@leaf_list) {
2406 my $lspecies = $_->get_standard_species();
2407 # print STDERR "stree species, raw, std: ", $_->get_standard_species(), " $lspecies \n";
2408 if ($genehash{$lspecies}) {
2409 $spechash{$lspecies}++; # keys are species in both trees.
2412 my @species_list = sort (keys %spechash);
2413 # print join(" ", @species_list), "\n";
2415 foreach (@species_list) {
2416 $bithash->{$_} = $bits;
2417 $bits = $bits << 1; # mult by two
2418 # print "$_, $bits \n";
2424 sub prune_non
{ # prune leaves from tree 1 if their species does not occur in tree2
2427 # return a hash whose keys are leaf node names (hidden nodes excluded)
2428 # and whose values are refs to arrays of 1's and 0's, the 1's indicating orthology.
2429 sub ortho_matrix_hash
{
2431 my @leaf_names = ();
2432 for ($self->get_leaves()) {
2433 next if($_->get_hide_label()); # do not include hidden labels
2434 push @leaf_names, $_->get_name();
2436 @leaf_names = sort @leaf_names;
2437 # print STDERR join(" ", @leaf_names), "\n";
2438 my $n_leaves = scalar @leaf_names;
2443 foreach (@leaf_names) {
2444 $name_hash{$_} = $i;
2445 my @zeroes = (0)x
$n_leaves;
2446 $ortho_hash{$_} = \
@zeroes;
2449 my @leaves = $self->get_leaves();
2451 my $name = $_->get_name();
2452 my $o_array = $ortho_hash{$name};
2453 # print STDERR join(" ", @$o_array), "\n";
2454 my @orthologs = $_->collect_orthologs_of_leaf();
2455 foreach (@orthologs) {
2456 my $o_name = $_; #->get_name();
2457 # print STDERR $o_name, " ", $name_hash{$o_name}, "\n";
2458 $o_array->[$name_hash{$o_name}] = 1; # in the array for $name set the right element to 1
2461 # foreach (@leaf_names) {
2462 # my $ortho_array_ref = $ortho_hash{$_};
2463 # printf STDERR ("%50s ", $_); print STDERR join(" ", @$ortho_array_ref), "\n";
2465 return \
%ortho_hash;