t/SeqFeature/Generic.t: fix typo on required module for testing
[bioperl-live.git] / lib / Bio / Tree / Statistics.pm
blobe72aec3d21ebe9e08494e9d035651712f3eee975
2 # BioPerl module for Bio::Tree::Statistics
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tree::Statistics - Calculate certain statistics for a Tree
18 =head1 SYNOPSIS
20 use Bio::Tree::Statistics;
22 =head1 DESCRIPTION
24 This should be where Tree statistics are calculated. It was
25 previously where statistics from a Coalescent simulation.
27 It now contains several methods for calculating L<Tree-Trait
28 statistics>.
30 =head1 FEEDBACK
32 =head2 Mailing Lists
34 User feedback is an integral part of the evolution of this and other
35 Bioperl modules. Send your comments and suggestions preferably to
36 the Bioperl mailing list. Your participation is much appreciated.
38 bioperl-l@bioperl.org - General discussion
39 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41 =head2 Support
43 Please direct usage questions or support issues to the mailing list:
45 I<bioperl-l@bioperl.org>
47 rather than to the module maintainer directly. Many experienced and
48 reponsive experts will be able look at the problem and quickly
49 address it. Please include a thorough description of the problem
50 with code and data examples if at all possible.
52 =head2 Reporting Bugs
54 Report bugs to the Bioperl bug tracking system to help us keep track
55 of the bugs and their resolution. Bug reports can be submitted via
56 the web:
58 https://github.com/bioperl/bioperl-live/issues
60 =head1 AUTHOR - Jason Stajich
62 Email jason AT bioperl.org
64 =head1 CONTRIBUTORS
66 Heikki Lehvaslaiho, heikki at bioperl dot org
68 =head1 APPENDIX
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
73 =cut
76 # Let the code begin...
79 package Bio::Tree::Statistics;
80 use strict;
83 use base qw(Bio::Root::Root);
85 =head2 new
87 Title : new
88 Usage : my $obj = Bio::Tree::Statistics->new();
89 Function: Builds a new Bio::Tree::Statistics object
90 Returns : Bio::Tree::Statistics
91 Args :
93 =head2 assess_bootstrap
95 Title : assess_bootstrap
96 Usage : my $tree_with_bs = $stats->assess_bootstrap(\@bs_trees,$guide_tree);
97 Function: Calculates the bootstrap for internal nodes based on the percentage
98 of times \@bs_trees agree with each internal node
99 Returns : L<Bio::Tree::TreeI>
100 Args : Arrayref of L<Bio::Tree::TreeI>s
101 Guide tree, L<Bio::Tree::TreeI>s
103 =cut
105 sub assess_bootstrap{
106 my ($self,$bs_trees,$guide_tree) = @_;
107 my @consensus;
109 if(!defined($bs_trees) || ref($bs_trees) ne 'ARRAY'){
110 die "ERROR: second parameter in assess_bootstrap() must be a list";
112 my $num_bs_trees = scalar(@$bs_trees);
113 if($num_bs_trees < 1){
114 die "ERROR: no bootstrap trees were passed to assess_bootstrap()";
117 # internal nodes are defined by their children
119 my (%lookup,%internal);
120 my $i = 0;
121 for my $tree ( $guide_tree, @$bs_trees ) {
122 # Do this as a top down approach, can probably be
123 # improved by caching internal node states, but not going
124 # to worry about it right now.
126 my @allnodes = $tree->get_nodes;
127 my @internalnodes = grep { ! $_->is_Leaf } @allnodes;
128 for my $node ( @internalnodes ) {
129 my @tips = sort map { $_->id }
130 grep { $_->is_Leaf() } $node->get_all_Descendents;
131 my $id = "(".join(",", @tips).")";
132 if( $i == 0 ) {
133 $internal{$id} = $node->internal_id;
134 } else {
135 $lookup{$id}++;
138 $i++;
140 #my @save; # unsure why this variable is needed
141 for my $l ( keys %lookup ) {
142 if( defined $internal{$l} ) {#&& $lookup{$l} > $min_seen ) {
143 my $intnode = $guide_tree->find_node(-internal_id => $internal{$l});
144 $intnode->bootstrap(sprintf("%d",100 * $lookup{$l} / $num_bs_trees));
147 return $guide_tree;
151 =head2 cherries
153 Example : cherries($tree, $node);
154 Description: Count number of paired leaf nodes
155 in a binary tree
156 Returns : integer
157 Exceptions :
158 Args : 1. Bio::Tree::TreeI object
159 2. Bio::Tree::NodeI object within the tree, optional
161 Commonly used statistics assume a binary tree, but this methods
162 returns a value even for trees with polytomies.
164 =cut
166 sub cherries ($;$) {
167 my $self = shift;
168 my $tree = shift;
169 my $node = shift || $tree->get_root_node;
171 my $cherries = 0;
172 my @descs = $node->each_Descendent;
174 if ($descs[0]->is_Leaf and $descs[1]->is_Leaf) {
175 if ($descs[3]) { #polytomy at leaf level
176 $cherries = 0;
177 } else {
178 $cherries = 1;
180 } else {
181 # recurse
182 foreach my $desc (@descs) {
183 $cherries += $self->cherries($tree, $desc);
186 return $cherries;
190 =head2 Tree-Trait statistics
192 The following methods produce descriptors of trait distribution among
193 leaf nodes within the trees. They require that a trait has been set
194 for each leaf node. The tag methods of Bio::Tree::Node are used to
195 store them as key/value pairs. In this way, one tree can store more
196 than one trait.
198 Trees have method add_traits() to set trait values from a file. See the
199 add_trait() method in L<Bio::Tree::TreeFunctionsI>.
201 =head2 fitch
203 Example : fitch($tree, $key, $node);
204 Description: Calculates Parsimony Score (PS) and internal trait
205 values using the Fitch 1971 parsimony algorithm for
206 the subtree a defined by the (internal) node.
207 Node defaults to the root.
208 Returns : true on success
209 Exceptions : leaf nodes have to have the trait defined
210 Args : 1. Bio::Tree::TreeI object
211 2. trait name string
212 3. Bio::Tree::NodeI object within the tree, optional
214 Runs first L<fitch_up> that calculates parsimony scores and then
215 L<fitch_down> that should resolve most of the trait/character state
216 ambiguities.
218 Fitch, W.M., 1971. Toward defining the course of evolution: minimal
219 change for a specific tree topology. Syst. Zool. 20, 406-416.
221 You can access calculated parsimony values using:
223 $score = $node->->get_tag_values('ps_score');
225 and the trait value with:
227 $traitvalue = $node->->get_tag_values('ps_trait'); # only the first
228 @traitvalues = $node->->get_tag_values('ps_trait');
230 Note that there can be more that one trait value, especially for the
231 root node.
233 =cut
235 sub fitch {
236 my $self = shift;
237 my $tree = shift;
238 my $key = shift || $self->throw("Trait name is needed");
239 my $node = shift || $tree->get_root_node;
241 $self->fitch_up($tree, $key, $node);
242 $self->fitch_down($tree, $node);
246 =head2 ps
248 Example : ps($tree, $key, $node);
249 Description: Calculates Parsimony Score (PS) from Fitch 1971
250 parsimony algorithm for the subtree as defined
251 by the (internal) node.
252 Node defaults to the root.
253 Returns : integer, 1 < PS < n, where n is number of branches
254 Exceptions : leaf nodes have to have the trait defined
255 Args : 1. Bio::Tree::TreeI object
256 2. trait name string
257 3. Bio::Tree::NodeI object within the tree, optional
259 This is the first half of the Fitch algorithm that is enough for
260 calculating the resolved parsimony values. The trait/chararacter
261 states are commonly left in ambiguous state. To resolve them, run
262 L<fitch_down>.
264 =cut
266 sub ps { shift->fitch_up(@_) }
269 =head2 fitch_up
271 Example : fitch_up($tree, $key, $node);
272 Description: Calculates Parsimony Score (PS) from the Fitch 1971
273 parsimony algorithm for the subtree as defined
274 by the (internal) node.
275 Node defaults to the root.
276 Returns : integer, 1< PS < n, where n is number of branches
277 Exceptions : leaf nodes have to have the trait defined
278 Args : 1. Bio::Tree::TreeI object
279 2. trait name string
280 3. Bio::Tree::NodeI object within the tree, optional
282 This is a more generic name for L<ps> and indicates that it performs
283 the first bottom-up tree traversal that calculates the parsimony score
284 but usually leaves trait/character states ambiguous. If you are
285 interested in internal trait states, running L<fitch_down> should
286 resolve most of the ambiguities.
288 =cut
290 sub fitch_up {
291 my $self = shift;
292 my $tree = shift;
293 my $key = shift || $self->throw("Trait name is needed");
294 my $node = shift || $tree->get_root_node;
296 if ($node->is_Leaf) {
297 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
298 unless $node->has_tag($key);
299 $node->set_tag_value('ps_trait', $node->get_tag_values($key) );
300 $node->set_tag_value('ps_score', 0 );
301 return; # end of recursion
304 foreach my $child ($node->each_Descendent) {
305 $self->fitch_up($tree, $key, $child);
308 my %intersection;
309 my %union;
310 my $score;
312 foreach my $child ($node->each_Descendent) {
313 foreach my $trait ($child->get_tag_values('ps_trait') ) {
314 $intersection{$trait}++ if $union{$trait};
315 $union{$trait}++;
317 $score += $child->get_tag_values('ps_score');
320 if (keys %intersection) {
321 $node->set_tag_value('ps_trait', keys %intersection);
322 $node->set_tag_value('ps_score', $score);
323 } else {
324 $node->set_tag_value('ps_trait', keys %union);
325 $node->set_tag_value('ps_score', $score+1);
328 if ($self->verbose) {
329 print "-- node --------------------------\n";
330 print "iID: ", $node->internal_id, " (", $node->id, ")\n";
331 print "Trait: ", join (', ', $node->get_tag_values('ps_trait') ), "\n";
332 print "length :", scalar($node->get_tag_values('ps_score')) , "\n";
334 return scalar $node->get_tag_values('ps_score');
338 =head2 fitch_down
340 Example : fitch_down($tree, $node);
341 Description: Runs the second pass from Fitch 1971
342 parsimony algorithm to resolve ambiguous
343 trait states left by first pass.
344 by the (internal) node.
345 Node defaults to the root.
346 Returns : true
347 Exceptions : dies unless the trait is defined in all nodes
348 Args : 1. Bio::Tree::TreeI object
349 2. Bio::Tree::NodeI object within the tree, optional
351 Before running this method you should have ran L<fitch_up> (alias to
352 L<ps> ). Note that it is not guaranteed that all states are completely
353 resolved.
355 =cut
357 sub fitch_down {
358 my $self = shift;
359 my $tree = shift;
360 my $node = shift || $tree->get_root_node;
362 my $key = 'ps_trait';
363 $self->throw ("ERROR: ". $node->internal_id. " needs a value for $key")
364 unless $node->has_tag($key);
366 my $nodev;
367 foreach my $trait ($node->get_tag_values($key) ) {
368 $nodev->{$trait}++;
371 foreach my $child ($node->each_Descendent) {
372 next if $child->is_Leaf; # end of recursion
374 my $intersection;
375 foreach my $trait ($child->get_tag_values($key) ) {
376 $intersection->{$trait}++ if $nodev->{$trait};
379 $self->fitch_down($tree, $child);
380 $child->set_tag_value($key, keys %$intersection);
382 return 1; # success
386 =head2 persistence
388 Example : persistence($tree, $node);
389 Description: Calculates the persistence
390 for node in the subtree defined by the (internal)
391 node. Node defaults to the root.
392 Returns : int, number of generations trait value has to remain same
393 Exceptions : all the nodes need to have the trait defined
394 Args : 1. Bio::Tree::TreeI object
395 2. Bio::Tree::NodeI object within the tree, optional
397 Persistence measures the stability that the trait value has in a
398 tree. It expresses the number of generations the trait value remains
399 the same. All the decendants of the root in the same generation have
400 to share the same value.
402 Depends on Fitch's parsimony score (PS).
404 =cut
406 sub _persistence {
407 my $self = shift;
408 my $tree = shift;
409 my $node = shift;
410 my $value = shift || $self->throw("Value is needed");
413 my $key = 'ps_trait';
415 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
417 return 0 unless $node->get_tag_values($key) eq $value; # wrong value
418 return 1 if $node->is_Leaf; # end of recursion
420 my $persistence = 10000000; # an arbitrarily large number
421 foreach my $child ($node->each_Descendent) {
422 my $pers = $self->_persistence($tree, $child, $value);
423 $persistence = $pers if $pers < $persistence;
425 return $persistence + 1;
428 sub persistence {
429 my $self = shift;
430 my $tree = shift;
431 my $node = shift || $tree->get_root_node;
432 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
434 my $key = 'ps_trait';
435 my $value = $node->get_tag_values($key);
437 #calculate
438 my $persistence = $self->_persistence($tree, $node, $value);
439 $node->set_tag_value('persistance', $persistence);
440 return $persistence;
444 =head2 count_subclusters
446 Example : count_clusters($tree, $node);
447 Description: Calculates the number of sub-clusters
448 in the subtree defined by the (internal)
449 node. Node defaults to the root.
450 Returns : int, count
451 Exceptions : all the nodes need to have the trait defined
452 Args : 1. Bio::Tree::TreeI object
453 2. Bio::Tree::NodeI object within the tree, optional
455 Depends on Fitch's parsimony score (PS).
457 =cut
459 sub _count_subclusters {
460 my $self = shift;
461 my $tree = shift;
462 my $node = shift;
463 my $value = shift || $self->throw("Value is needed");
465 my $key = 'ps_trait';
467 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
468 unless $node->has_tag($key);
470 if ($node->get_tag_values($key) eq $value) {
471 if ($node->get_tag_values('ps_score') == 0) {
472 return 0;
473 } else {
474 my $count = 0;
475 foreach my $child ($node->each_Descendent) {
476 $count += $self->_count_subclusters($tree, $child, $value);
478 return $count;
481 return 1;
484 sub count_subclusters {
485 my $self = shift;
486 my $tree = shift;
487 my $node = shift || $tree->get_root_node;
488 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
490 my $key = 'ps_trait';
491 my $value = $node->get_tag_values($key);
493 return $self->_count_subclusters($tree, $node, $value);
497 =head2 count_leaves
499 Example : count_leaves($tree, $node);
500 Description: Calculates the number of leaves with same trait
501 value as root in the subtree defined by the (internal)
502 node. Requires an unbroken line of identical trait values.
503 Node defaults to the root.
504 Returns : int, number of leaves with this trait value
505 Exceptions : all the nodes need to have the trait defined
506 Args : 1. Bio::Tree::TreeI object
507 2. Bio::Tree::NodeI object within the tree, optional
509 Depends on Fitch's parsimony score (PS).
511 =cut
513 sub _count_leaves {
514 my $self = shift;
515 my $tree = shift;
516 my $node = shift || $tree->get_root_node;
517 my $value = shift;
519 my $key = 'ps_trait';
521 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
522 unless $node->has_tag($key);
524 if ($node->get_tag_values($key) eq $value) {
525 #print $node->id, ": ", $node->get_tag_values($key), "\n";
526 return 1 if $node->is_Leaf; # end of recursion
528 my $count = 0;
529 foreach my $child ($node->each_Descendent) {
530 $count += $self->_count_leaves($tree, $child, $value);
532 return $count;
534 return 0;
537 sub count_leaves {
538 my $self = shift;
539 my $tree = shift;
540 my $node = shift || $tree->get_root_node;
541 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
543 my $key = 'ps_trait';
544 my $value = $node->get_tag_values($key);
546 return $self->_count_leaves($tree, $node, $value);
550 =head2 phylotype_length
552 Example : phylotype_length($tree, $node);
553 Description: Sums up the branch lengths within phylotype
554 exluding the subclusters where the trait values
555 are different
556 Returns : float, length
557 Exceptions : all the nodes need to have the trait defined
558 Args : 1. Bio::Tree::TreeI object
559 2. Bio::Tree::NodeI object within the tree, optional
561 Depends on Fitch's parsimony score (PS).
563 =cut
565 sub _phylotype_length {
566 my $self = shift;
567 my $tree = shift;
568 my $node = shift;
569 my $value = shift;
571 my $key = 'ps_trait';
573 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
574 unless $node->has_tag($key);
576 return 0 if $node->get_tag_values($key) ne $value;
577 return $node->branch_length if $node->is_Leaf; # end of recursion
579 my $length = 0;
580 foreach my $child ($node->each_Descendent) {
581 my $sub_len = $self->_phylotype_length($tree, $child, $value);
582 $length += $sub_len;
583 $length += $child->branch_length if not $child->is_Leaf and $sub_len;
585 return $length;
588 sub phylotype_length {
589 my $self = shift;
590 my $tree = shift;
591 my $node = shift || $tree->get_root_node;
593 my $key = 'ps_trait';
594 my $value = $node->get_tag_values($key);
596 return $self->_phylotype_length($tree, $node, $value);
600 =head2 sum_of_leaf_distances
602 Example : sum_of_leaf_distances($tree, $node);
603 Description: Sums up the branch lengths from root to leaf
604 exluding the subclusters where the trait values
605 are different
606 Returns : float, length
607 Exceptions : all the nodes need to have the trait defined
608 Args : 1. Bio::Tree::TreeI object
609 2. Bio::Tree::NodeI object within the tree, optional
611 Depends on Fitch's parsimony score (PS).
613 =cut
615 sub _sum_of_leaf_distances {
616 my $self = shift;
617 my $tree = shift;
618 my $node = shift;
619 my $value = shift;
621 my $key = 'ps_trait';
623 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
624 unless $node->has_tag($key);
625 return 0 if $node->get_tag_values($key) ne $value;
626 #return $node->branch_length if $node->is_Leaf; # end of recursion
627 return 0 if $node->is_Leaf; # end of recursion
629 my $length = 0;
630 foreach my $child ($node->each_Descendent) {
631 $length += $self->_count_leaves($tree, $child, $value) * $child->branch_length +
632 $self->_sum_of_leaf_distances($tree, $child, $value);
634 return $length;
637 sub sum_of_leaf_distances {
638 my $self = shift;
639 my $tree = shift;
640 my $node = shift || $tree->get_root_node;
642 my $key = 'ps_trait';
643 my $value = $node->get_tag_values($key);
645 return $self->_sum_of_leaf_distances($tree, $node, $value);
649 =head2 genetic_diversity
651 Example : genetic_diversity($tree, $node);
652 Description: Diversity is the sum of root to leaf distances
653 within the phylotype normalised by number of leaf
654 nodes
655 Returns : float, value of genetic diversity
656 Exceptions : all the nodes need to have the trait defined
657 Args : 1. Bio::Tree::TreeI object
658 2. Bio::Tree::NodeI object within the tree, optional
660 Depends on Fitch's parsimony score (PS).
662 =cut
664 sub genetic_diversity {
665 my $self = shift;
666 my $tree = shift;
667 my $node = shift || $tree->get_root_node;
669 return $self->sum_of_leaf_distances($tree, $node) /
670 $self->count_leaves($tree, $node);
674 =head2 statratio
676 Example : statratio($tree, $node);
677 Description: Ratio of the stem length and the genetic diversity of the
678 phylotype L<genetic_diversity>
679 Returns : float, separation score
680 Exceptions : all the nodes need to have the trait defined
681 Args : 1. Bio::Tree::TreeI object
682 2. Bio::Tree::NodeI object within the tree, optional
684 Statratio gives a measure of separation and variability within the phylotype.
685 Larger values identify more rapidly evolving and recent phylotypes.
687 Depends on Fitch's parsimony score (PS).
689 =cut
691 sub statratio {
692 my $self = shift;
693 my $tree = shift;
694 my $node = shift || $tree->get_root_node;
696 my $div = $self->genetic_diversity($tree, $node);
697 return 0 if $div == 0;
698 return $node->branch_length / $div;
703 =head2 ai
705 Example : ai($tree, $key, $node);
706 Description: Calculates the Association Index (AI) of Whang et
707 al. 2001 for the subtree defined by the (internal)
708 node. Node defaults to the root.
709 Returns : real
710 Exceptions : leaf nodes have to have the trait defined
711 Args : 1. Bio::Tree::TreeI object
712 2. trait name string
713 3. Bio::Tree::NodeI object within the tree, optional
715 Association index (AI) gives a more fine grained results than PS since
716 the result is a real number. ~0 E<lt>= AI.
718 Wang, T.H., Donaldson, Y.K., Brettle, R.P., Bell, J.E., Simmonds, P.,
719 2001. Identification of shared populations of human immunodeficiency
720 Virus Type 1 infecting microglia and tissue macrophages outside the
721 central nervous system. J. Virol. 75 (23), 11686-11699.
723 =cut
725 sub _node_ai {
726 my $self = shift;
727 my $node = shift;
728 my $key = shift;
730 my $traits;
731 my $leaf_count = 0;
732 for my $desc ( $node->get_all_Descendents ) {
733 next unless $desc->is_Leaf;
734 $leaf_count++;
735 $self->throw ("Node ". $desc->id. " needs a value for trait [$key]")
736 unless $desc->has_tag($key);
737 my $trait = $desc->get_tag_values($key);
738 $traits->{$trait}++;
740 my $most_common = 0;
741 foreach ( keys %$traits) {
742 $most_common = $traits->{$_} if $traits->{$_} > $most_common;
744 return sprintf "%1.6f", (1 - ($most_common/$leaf_count) ) / (2**($leaf_count-1) );
747 sub ai {
748 my $self = shift;
749 my $tree = shift;
750 my $key = shift || $self->throw("Trait name is needed");
751 my $start_node = shift || $tree->get_root_node;
752 return unless $start_node;
754 my $sum = 0;
755 for my $node ( $start_node->get_all_Descendents ) {
756 next if $node->is_Leaf;
757 $sum += $self->_node_ai($node, $key);
759 return $sum;
763 =head2 mc
765 Example : mc($tree, $key, $node);
766 Description: Calculates the Monophyletic Clade (MC) size statistics
767 for the subtree a defined by the (internal) node.
768 Node defaults to the root;
769 Returns : hashref with trait values as keys
770 Exceptions : leaf nodes have to have the trait defined
771 Args : 1. Bio::Tree::TreeI object
772 2. trait name string
773 3. Bio::Tree::NodeI object within the tree, optional
775 Monophyletic Clade (MC) size statistics by Salemi at al 2005. It is
776 calculated for each trait value. 1 E<lt>= MC E<lt>= nx, where nx is the
777 number of tips with value x:
779 pick the internal node with maximim value for
780 number of of tips with only trait x
782 MC was defined by Parker et al 2008.
784 Salemi, M., Lamers, S.L., Yu, S., de Oliveira, T., Fitch, W.M., McGrath, M.S.,
785 2005. Phylodynamic analysis of Human Immunodeficiency Virus Type 1 in
786 distinct brain compartments provides a model for the neuropathogenesis of
787 AIDS. J. Virol. 79 (17), 11343-11352.
789 Parker, J., Rambaut A., Pybus O., 2008. Correlating viral phenotypes
790 with phylogeny: Accounting for phylogenetic uncertainty Infection,
791 Genetics and Evolution 8 (2008), 239-246.
793 =cut
795 sub _node_mc {
796 my $self = shift;
797 my $node = shift;
798 my $key = shift;
800 my $traits;
801 my $leaf_count = 0;
802 for my $node2 ( $node->get_all_Descendents ) {
803 next unless $node2->is_Leaf;
804 $leaf_count++;
805 my $trait = $node2->get_tag_values($key);
806 $traits->{$trait}++;
808 return $traits;
811 sub mc {
812 my $self = shift;
813 my $tree = shift;
814 my $key = shift || die "Trait name is needed";
815 my $start_node = shift || $tree->get_root_node;
816 return unless $start_node;
818 my $sum; # hashref, keys are trait values
819 my $keys; # hashref, keys are trait values
820 foreach my $node ( $start_node->get_all_Descendents ) {
821 next if $node->is_Leaf;
822 my $traits = $self->_node_mc($node, $key);
823 if (scalar keys %$traits == 1) {
824 my ($value) = keys %$traits;
825 no warnings;
826 $sum->{$value} = $traits->{$value}
827 if $sum->{$value} < $traits->{$value};
828 } else {
829 map { $keys->{$_} = 1 } keys %$traits;
832 # check for cases where there are no clusters
833 foreach my $value (keys %$keys) {
834 $sum->{$value} = 1 unless defined $sum->{$value};
836 return $sum;