tweak debug output.
[cxgn-corelibs.git] / t / CXGN / Phylo / phylo_test.t
blobb270f37639d83e3c0b1070c37f92519e5f8e9aa5
1 #!/usr/bin/perl
2 use strict;
3 use Test::More qw/no_plan/;
5 use CXGN::Phylo::Tree;
6 use CXGN::Phylo::Node;
7 use CXGN::Phylo::Parser;
8 use Data::Dumper;
10 use Carp;
12 # expression to test the Phylo packages with
14 my $newick_expression =
15 "(1:0.082376,(2:0.196674,((3:0.038209,6:0.354293):0.026742,5:0.094338):0.064142):0.067562,4:0.295612)";
16 my $parser = CXGN::Phylo::Parse_newick -> new($newick_expression);
18 # test tokenizer
20 my @tokens = $parser -> tokenize($newick_expression);
21 print STDERR "\tTOKENS: ".join("|", @tokens)."\n";
22 is (@tokens, 22, "Token count test");
24 my $tree = $parser-> parse();
26 #print STDERR Dumper($tree);
28 #print STDERR "Total Nodes: ".(keys(%{$tree->{node_hash}}))."\n";
29 #is (keys(%{$tree->{node_hash}}), 10, "node count test");
31 # check the number of nodes returned by get_all_nodes
33 is ($tree->get_all_nodes(), 10, "node count test [node_hash]");
35 # pick an element and verify if it is a CXGN::Phylo::Node object
37 is ( UNIVERSAL::isa(($tree->get_all_nodes())[4], "CXGN::Phylo::Node"), 1, "node id test");
39 my $n = $tree->get_node(5);
40 #print STDERR "NODE 5: ".$n->get_name()."\n";
41 #$n->set_hilited(1);
42 #print STDERR "Set node 5 to hilited ".$n->get_hilited()."\n";
43 #$n->rotate_node();
44 #$tree->get_root()->rotate_node();
45 #$n4->set_hidden(1);
47 #my $subtree_len = $tree->get_root()->calculate_subtree_distances();
48 #is ($subtree_len, 12, "subtree length test");
50 # test the leaf functions in two different ways
52 is ($tree->get_leaf_count(), 6, "leaf count test");
54 my @leaflist = $tree->get_leaf_list();
55 # foreach my $leaf (@leaflist) {
56 # print STDERR "Leaf: ".$leaf->get_name()."\n";
57 # }
59 is ($tree->get_root()->is_leaf, 0, "root leaf test");
61 # foreach my $l (@leaflist) { print STDERR "LEAFLIST: ". ($l->get_name())."\n"; }
63 is (@leaflist, 6, "leaf list test");
65 #$tree->preorder_traversal(sub {my $node = shift; print STDERR "Preorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";});
66 #print STDERR "\n";
68 #$tree->postorder_traversal(sub {my $node = shift; print STDERR "Postorder. node name: ", $node->get_name(), " ", $node->get_branch_length(), "\n";});
70 #exit;
72 # test the root
74 my $root = $tree->get_root();
75 is ($root->is_root(), 1, "root test");
77 # test the subtree node count functions
79 #my @ortho_groups = $tree->get_orthologs();
80 $tree->get_root()->calculate_subtree_node_count();
81 my $root_subnode_count = $tree->get_root()->get_subtree_node_count();
82 my $leaf_subnode_count = $tree->get_node(7)->get_subtree_node_count();
83 my $inner_node_subnode_count = $tree->get_node(3)->get_subtree_node_count();
84 is ($root_subnode_count, 9, "root subnode count test");
85 is ($leaf_subnode_count, 0, "leaf subnode count test");
86 is ($inner_node_subnode_count, 6, "inner node subnode count test");
88 # set species information to test subtree_species_count stuff
90 my @species_list = ("coffee", "tomato", "potato", "pepper", "eggplant", "brachypodium");
91 my @node_list = values %{$tree->{node_hash}};
92 my $i = 0;
93 foreach my $n (@node_list) {
94 next if(scalar $n->get_children() > 0); # skip non-leaves
95 $n->set_species($species_list[$i % 6]);
96 print "i, species: $i ", $n->get_species(), "\n";
97 $i++;
100 # test the subtree_species count functions
102 #$tree->get_root()->recursive_text_render();
103 #exit;#
104 $tree->get_root()->recursive_set_leaf_species_count();
105 #$tree->get_root()->calculate_subtree_species_count();
107 # pick out a node and test the count
109 print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
110 is($tree->get_root()->get_attribute("leaf_species_count"), 6, "subtree leaf species count test");
111 #is ($tree->get_node(5)->get_attribute("leaf_species_count"), 3, "subtree leaf species count test");
113 # test the remove_child function
115 print STDERR 'before $tree->copy() ', "\n";
116 my $rm_tree = $tree->copy();
117 print STDERR 'after $tree->copy() \n';
118 my @root_children = $rm_tree->get_root()->get_children();
119 my $n = $root_children[1];
120 my @children =$n->get_children();
121 print STDERR "\tRemove child\nbefore: ".$n->to_string()."\n";
122 is ($n->get_children, 2, "get_children test");
123 print STDERR "\t(Removing child ".$children[0]->get_node_key().")\n";
124 $n->remove_child($children[0]);
125 is ($n->get_children(), 1, "remove child test");
126 print STDERR "\tafter : ".$n->to_string()."\n";
128 my @root_kids = $rm_tree->get_root()->get_children();
129 is (@root_kids, 3, "root children count test");
130 $rm_tree->get_root()->remove_child($root_kids[1]);
131 print STDERR "\tRemoving child key=".($root_kids[1]->get_name())."\n";
132 #foreach my $c ($rm_tree->get_root()->get_children()) { print "current children = ".$c->get_name()."\n"; }
133 is ($rm_tree->get_root()->get_children(), 2, "removed one root child test");
135 # test reset_root
138 #my $nn = $tree->get_node(5);
139 #$tree->reset_root($nn);
144 # test the compare function
145 # initialize two identical trees and compare
146 # (should return 1)
148 my $species_tree_newick = "((((( tomato_tomato:1, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)";
149 my $species_tree_parser = CXGN::Phylo::Parse_newick->new($species_tree_newick);
150 my $species_tree = $species_tree_parser->parse();
152 my $species_tree_newick2 = "((((( tomato_tomato:5, potato_potato:1):1, pepper_pepper:1 ):1, eggplant_eggplant):1, nicotiana_nicotiana:1):1, coffee_coffee:1)";
153 my $species_tree_parser2 = CXGN::Phylo::Parse_newick->new($species_tree_newick2);
154 my $species_tree2 = $species_tree_parser2->parse();
156 # compare the tree to itself
158 is ($species_tree->compare_rooted($species_tree), 1, "tree self comparison test (tree1-tree1)");
159 is ($species_tree->compare_rooted($species_tree2), 1, "tree comparison test (tree1-tree2)");
160 is ($species_tree2->compare_rooted($species_tree2), 1, "tree self comparison test (tree2-tree2)");
161 is ($species_tree2->compare_rooted($species_tree), 1, "tree comparion test (tree2-tree1)");
163 # test that a different tree returns 0
165 is($species_tree->compare_rooted($tree), 0, "tree inequality test");
167 # test that two topologically identical but specified differently match in the comparison
169 my $tree_a = CXGN::Phylo::Parse_newick->new("(A:1, B:1)")->parse();
170 my $tree_b = CXGN::Phylo::Parse_newick->new("(B:1, A:1)")->parse();
171 is ($tree_a->compare_rooted($tree_b), 1, "tree topology specification test");
174 # test the copy function
176 my $new_tree = $tree->copy();
177 if ($tree->compare_rooted($new_tree)) { # should be the same, shouldn't it?
178 print STDERR "Compared tree to newtree and found them to be identical.\n";
180 else { print STDERR "newtree and tree are not identical. Oops.\n"; }
181 is ($new_tree->compare_rooted($tree), 1, "copied tree identity check");
182 isnt ( $new_tree, $tree, "tree pointer non-identity check");
184 my ($rfd, $symd, $d3) = $tree->RF_distance($new_tree);
185 is($rfd, 0, "check RF distance between tree and copy is 0.\n");
186 is($symd, 0, "check RF distance between tree and copy is 0.\n");
188 # check if I can remove a node in new_tree without affecting $tree
190 #print "node keys: ", join(" ", keys %{$tree->{node_hash}}), "\n";
191 $new_tree->delete_node(3);
194 # print $tree->generate_newick(), "\n";
195 # print $new_tree->generate_newick(), "\n";
197 is($new_tree->compare_rooted($tree), 0, "changed copied tree identity check");
199 # test the collapsing function - test a tree with many nodes that
200 # have only one child.
202 print STDERR "\tTesting CXGN::Phylo::Node::recursive_collapse_nodes\n";
203 my $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1)"))->parse();
205 $c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
207 #print STDERR "The original tree: \n";
208 #$c_tree->render();
209 #print STDERR "=====\n\n";
211 is ($c_tree->get_all_nodes(), 6, "node count before collapse");
213 $c_tree->collapse_tree();
214 #print STDERR "The collapsed tree:\n";
215 #$c_tree->render();
216 #print STDERR "=====\n\n";
218 is ($c_tree->get_all_nodes(), 3, "node count after collapse");
220 #$new_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($new_tree));
221 #$new_tree->render();
223 #if(1 || $c_tree->get_all_nodes() != 3){
224 #$c_tree->print_node_keys();
225 #$c_tree->get_root()->print_subtree();
226 #}exit;
228 # test a more complex case for collapsing
230 $c_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
231 $c_tree->set_renderer(CXGN::Phylo::Text_tree_renderer->new($c_tree));
232 $c_tree->collapse_tree();
234 # test a tree collapsing with a tree that has branch lengths of zero.
236 my $z_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:0)C:0)D:0)E:1, (((G:1, F:1)H:0)I:1)J:1)"))->parse();
237 print STDERR "Testing the recursive_collapse_zero_branches() function...\nOriginal tree:\n";
238 $z_tree->get_root()->print_subtree();
240 my $z_tree_node_count = $z_tree->get_node_count();
241 $z_tree ->get_root()->recursive_collapse_zero_branches();
243 is ($z_tree->get_node_count(), $z_tree_node_count-4, "recursive_collapse_zero_nodes test");
245 # check the delete node function
246 # first, check if we can delete an internal node...
248 print STDERR "\tDeleting internal node (key=4)...\n";
249 my $ind_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1)C:1)D:1)E:1, (((G:1, F:1)H:1)I:1)J:1)"))->parse();
250 my $ind_tree_copy = $ind_tree->copy();
251 $ind_tree->delete_node(4);
252 is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+1), "node count after delete test");
253 is ($ind_tree->get_node(4), undef, "has node really disappeared test");
255 # let's delete a leaf node...
257 print STDERR "\tDeleting a leaf node (key=2)...\n";
258 $ind_tree->delete_node(2);
259 #$ind_tree->render();
260 is ($ind_tree_copy->get_all_nodes(), ($ind_tree->get_all_nodes()+2), "node count after leaf node deletion");
262 # test the newick generation from the node
264 my $original_newick = "((((A:1,B:1)C:1)D:1)E:1,(((G:1,F:1)H:1)I:1)J:1)";
265 my $t = (CXGN::Phylo::Parse_newick->new($original_newick))->parse();
266 my $new = $t->get_root()->recursive_generate_newick();
267 # print STDERR "Original: $original_newick\n";
268 # print STDERR "Regenerated newick = $new\n";
269 my $t2 = (CXGN::Phylo::Parse_newick->new($new))->parse();
270 is($t->compare_rooted($t2), 1, "Newick regeneration from tree test");
273 my $incorp_tree = $tree->copy();
274 $incorp_tree->incorporate_nodes(CXGN::Phylo::Node->new());
275 is($incorp_tree->get_all_nodes(), 11, "Incorporate Node Test");
277 my $b_tree = $tree->copy();
278 is($b_tree->get_all_nodes(), 10, "Binary tree test: copy");
279 $b_tree->make_binary();
280 my @nodes = $b_tree->get_all_nodes();
281 is(@nodes, 11, "Binary tree test: node count");
282 my $binary_fail = 0;
283 foreach(@nodes){
284 my @children = $_->get_children();
285 $binary_fail = 1 if @children > 2;
287 isnt($binary_fail, 1, "Binary tree test: all children count <= 2");
289 # render the tree
291 # $tree->get_layout()->set_image_width(500);
292 # $tree->get_layout()->set_image_height(300);
294 # my $PNG_tree_renderer = CXGN::Phylo::PNG_tree_renderer -> new($tree);
295 # $tree->get_layout()->set_left_margin(50);
296 # $tree->get_layout()->set_right_margin(40);
297 # $PNG_tree_renderer->render();
299 # my $renderer = CXGN::Phylo::Text_tree_renderer -> new($tree);
300 # $renderer->render();
303 # test tree root resetting, and tree comparison
305 # Get tree from newick expression. Reset root so as to minimize maximum root-leaf distance.
306 # copy a tree, then, for each branch, reset the root to a point along the branch,
307 # compare to original tree in both rooted and unrooted senses,
308 # unrooted comparison should give 1, rooted 0, except for branches to orig. root.
309 # Then reset root again so as to minimize max distance to leaves from root, and should
310 # recover original tree. Check that rooted and unrooted compares both give 1.
312 # $tree = CXGN::Phylo::Parse_newick->new("(A:1, (B:1, C:1):1)")->parse();
313 my $newick_expression = "(A:0.082376,(B:0.196674,((C:0.038209,F:0.354293):0.026742,E:0.094338):0.064142):0.067562,D:0.295612)";
314 #my $newick_expression = "(A:1,(B:1,((C:2,F:4):1,E:1):2.02):1,D:2)";
315 #my $newick_expression = "((A:1, D:2):1, (B:1, C:2, E:3):2)";
316 #my $newick_expression = "((A:0.89, D:1.2):1.4, (B:1, C:1.1, E:0.9):1)";
317 #my $newick_expression = "(C:1, D:3, (A:5, B:2): 1)";
318 #my $newick_expression = "(A:3, ((B:1, C:2):1.5):1)";
319 $tree = CXGN::Phylo::Parse_newick->new($newick_expression)->parse();
320 ok($tree->test_tree(), "tree test 1");
321 $tree->get_root()->recursive_collapse_single_nodes();
322 ok($tree->test_tree(), "tree test 2");
325 #my ($mldv_node, $mldv_dist_above, $min_var) = $tree->min_leaf_dist_variance_point();
326 #$tree->reset_root_to_point_on_branch($mldv_node, $mldv_dist_above);
328 $tree->reset_root_to_point_on_branch($tree->min_leaf_dist_variance_point());
329 # print("tree initially rerooted at min variance point, (i.e. before loop): \n");
332 $tree->get_root()->recursive_implicit_names();
333 # $tree->get_root()->print_subtree("\n");
334 # readline stdin;
335 #$tree->reset_root_min_max_root_leaf_distance();
337 #my ($anode, $adist, $avar) = $tree->min_leaf_dist_variance_point();
338 #print("opt node name, dist above, variance, stddev: ", $anode->get_name(), " ", $adist, " ", $avar, " ", sqrt($avar), "\n");
339 ##exit();
341 my $total_branch_length = subtree_branch_length($tree->get_root());
342 my $new_tree = $tree->copy();
343 my ($new_root, $da) = $new_tree->min_leaf_dist_variance_point();
344 #exit;
346 my $count_compare_rooted1 = 0;
347 my $count_compare_unrooted1 = 0;
348 my $count_compare_rooted2 = 0;
349 my $count_compare_unrooted2 = 0;
350 my $count_treetesta_ok = 0;
351 my $count_treetestb_ok = 0;
352 my @node_list = $tree->get_root()->recursive_subtree_node_list();
354 my $max_branch_length_change = -1.0;
355 my $blc;
356 my ($comp_rooted1, $comp_unrooted1, $comp_rooted2, $comp_unrooted2) = (-1, -1, -1, -1);
358 srand(132456);
359 for (my $i = 0; $i < @node_list; $i++) {
360 my $new_tree = $tree->copy();
362 my @new_node_list = $new_tree->get_root()->recursive_subtree_node_list();
363 my $n = $new_node_list[$i];
364 my $small = 0.0;
365 my $dab = ($small +(1.0 - $small)*rand())*$n->get_branch_length(); #random point on ith branch
367 $new_tree->reset_root_to_point_on_branch($n, $dab);
368 my $testres = $new_tree->test_tree();
369 # print "testres: $testres \n";
370 $count_treetesta_ok += $testres;
371 # print "count_treetesta_ok: $count_treetesta_ok \n";
373 $count_compare_rooted1 += $comp_rooted1 = $tree->compare_rooted($new_tree); # compare_rooted should be true only for $n a child of $new_tree's root.
374 $count_compare_unrooted1 += $comp_unrooted1 = $tree->compare_unrooted($new_tree); # compare_unrooted should be true
376 #put in some RF distance tests here.
378 $blc = abs($total_branch_length - subtree_branch_length($new_tree->get_root()));
379 if ($blc > $max_branch_length_change) {
380 $max_branch_length_change = $blc;
383 # my ($new_root, $da, $var) = $new_tree->min_leaf_dist_variance_point();
384 # $new_tree->reset_root_to_point_on_branch($new_root, $da);
386 $new_tree->reset_root_to_point_on_branch($new_tree->min_leaf_dist_variance_point());
387 $count_treetestb_ok += $new_tree->test_tree();
389 $count_compare_rooted2 += $comp_rooted2 = $tree->compare_rooted($new_tree);
390 $count_compare_unrooted2 += $comp_unrooted2 = $tree->compare_unrooted($new_tree);
392 if (!$comp_rooted2 || !$comp_unrooted2) {
393 print("tree : \n");
394 $tree->get_root()->print_subtree("\n");
395 print("new_tree: \n");
396 $new_tree->get_root()->print_subtree("\n");
397 # exit;
400 my $subtree_bl = subtree_branch_length($new_tree->get_root());
401 $blc = abs($total_branch_length - $subtree_bl);
402 if ($blc > $max_branch_length_change) {
403 $max_branch_length_change = $blc;
404 print STDERR "tbl, stbl: $total_branch_length, $subtree_bl \n";
407 ok($max_branch_length_change < 5.0e-15*$total_branch_length, "Test that resetting root leaves total branch length unchanged. \n");
408 print($count_compare_rooted1, " ", $count_compare_unrooted1, " ", $count_compare_rooted2, " ", $count_compare_unrooted2, "\n");
409 is($count_treetesta_ok, @node_list, "tree_test ok on trees rooted at random points.\n");
410 is($count_treetestb_ok, @node_list, "tree_test ok on trees rooted at min variance point.\n");
411 is($count_compare_rooted1, scalar $tree->get_root()->get_children(), "tree reset_root and compare test 1\n");
412 is($count_compare_unrooted1, @node_list, "tree reset_root and compare test 2\n");
413 is($count_compare_rooted2, @node_list, "tree reset_root and compare test 3\n");
414 is($count_compare_unrooted2, @node_list, "tree reset_root and compare test 4\n");
416 # Test pre- in- post- order traversals.
417 my $t_tree = (CXGN::Phylo::Parse_newick->new("((((A:1, B:1):1, C:1):1, D:1):1, E:1)"))->parse();
418 my $preorder_names_by_hand = "node: .\n" . "node: \n" . "node: \n" . "node: \n" . "node: A\n" . "node: B\n"
419 . "node: C\n" . "node: D\n" . "node: E\n";
420 #our $preorder_names = "";
421 my $preorder_names = $t_tree->preorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str;} );
422 is($preorder_names, $preorder_names_by_hand, "preorder traversal test.\n");
423 #print STDERR "preorder_names: \n", $preorder_names, "\n\n", $preorder_names_by_hand. "\n\n";
425 my $inorder_names_by_hand = "node: A\n" . "node: \n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n"
426 . "node: D\n" . "node: .\n" . "node: E\n";
427 my $inorder_names = $t_tree->inorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } );
428 is($inorder_names, $inorder_names_by_hand, "inorder traversal test.\n");
429 #print STDERR "inorder_names: \n", $inorder_names, "\n\n", $inorder_names_by_hand. "\n\n";
431 my $postorder_names_by_hand = "node: A\n" . "node: B\n" . "node: \n" . "node: C\n" . "node: \n" . "node: D\n"
432 . "node: \n" . "node: E\n" . "node: .\n";
433 my $postorder_names = $t_tree->postorder_traversal( sub{ my $str = "node: " . shift->get_name() . "\n"; return $str; } );
434 is($postorder_names, $postorder_names_by_hand, "postorder traversal test.\n");
435 #print STDERR "postorder_names: \n", $postorder_names, "\n\n", $postorder_names_by_hand. "\n\n";
438 sub subtree_branch_length{
439 my $self = shift; # node
440 my @node_list = $self->recursive_subtree_node_list();
441 my $total_branch_length = 0.0;
442 foreach my $n (@node_list) {
443 $total_branch_length += $n->get_branch_length();
445 return $total_branch_length;
449 sub traverse_test_function{
450 my $node = shift;
451 my $str = "node: " . $node->get_name() . "\n"; return $str;