2 # BioPerl module for Bio::Tree::Compatible
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Gabriel Valiente <valiente@lsi.upc.edu>
8 # Copyright Gabriel Valiente
10 # You may distribute this module under the same terms as Perl itself
12 # POD documentation - main docs before the code
16 Bio::Tree::Compatible - Testing compatibility of phylogenetic trees
21 use Bio::Tree::Compatible;
23 my $input = Bio::TreeIO->new('-format' => 'newick',
24 '-file' => 'input.tre');
25 my $t1 = $input->next_tree;
26 my $t2 = $input->next_tree;
28 my ($incompat, $ilabels, $inodes) = Bio::Tree::Compatible::is_compatible($t1,$t2);
30 my %cluster1 = %{ Bio::Tree::Compatible::cluster_representation($t1) };
31 my %cluster2 = %{ Bio::Tree::Compatible::cluster_representation($t2) };
32 print "incompatible trees\n";
33 if (scalar(@$ilabels)) {
34 foreach my $label (@$ilabels) {
35 my $node1 = $t1->find_node(-id => $label);
36 my $node2 = $t2->find_node(-id => $label);
37 my @c1 = sort @{ $cluster1{$node1} };
38 my @c2 = sort @{ $cluster2{$node2} };
40 print " cluster"; map { print " ",$_ } @c1;
41 print " cluster"; map { print " ",$_ } @c2; print "\n";
44 if (scalar(@$inodes)) {
46 my $node1 = shift @$inodes;
47 my $node2 = shift @$inodes;
48 my @c1 = sort @{ $cluster1{$node1} };
49 my @c2 = sort @{ $cluster2{$node2} };
50 print "cluster"; map { print " ",$_ } @c1;
51 print " properly intersects cluster";
52 map { print " ",$_ } @c2; print "\n";
56 print "compatible trees\n";
61 NB: This module has exclusively class methods that work on Bio::Tree::TreeI
62 objects. An instance of Bio::Tree::Compatible cannot itself represent a tree,
63 and so typically there is no need to create one.
65 Bio::Tree::Compatible is a Perl tool for testing compatibility of
66 phylogenetic trees with nested taxa represented as Bio::Tree::Tree
67 objects. It is based on a recent characterization of ancestral
68 compatibility of semi-labeled trees in terms of their cluster
71 A semi-labeled tree is a phylogenetic tree with some of its internal
72 nodes labeled, and it can represent a classification tree as well as a
73 phylogenetic tree with nested taxa, with labeled internal nodes
74 corresponding to taxa at a higher level of aggregation or nesting than
75 that of their descendents.
77 Two semi-labeled trees are compatible if their topological
78 restrictions to the common labels are such that for each node label,
79 the smallest clusters containing it in each of the trees coincide and,
80 furthermore, no cluster in one of the trees properly intersects a
81 cluster of the other tree.
83 Future extensions of Bio::Tree::Compatible include a
84 Bio::Tree::Supertree module for combining compatible phylogenetic
85 trees with nested taxa into a common supertree.
91 User feedback is an integral part of the evolution of this and other
92 Bioperl modules. Send your comments and suggestions preferably to the
93 Bioperl mailing list. Your participation is much appreciated.
95 bioperl-l@bioperl.org - General discussion
96 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
100 Please direct usage questions or support issues to the mailing list:
102 I<bioperl-l@bioperl.org>
104 rather than to the module maintainer directly. Many experienced and
105 reponsive experts will be able look at the problem and quickly
106 address it. Please include a thorough description of the problem
107 with code and data examples if at all possible.
109 =head2 Reporting Bugs
111 Report bugs to the Bioperl bug tracking system to help us keep track
112 of the bugs and their resolution. Bug reports can be submitted via the
115 https://github.com/bioperl/bioperl-live/issues
121 =item * Philip Daniel and Charles Semple. Supertree Algorithms for
122 Nested Taxa. In: Olaf R. P. Bininda-Emonds (ed.) Phylogenetic
123 Supertrees: Combining Information to Reveal the Tree of Life,
124 I<Computational Biology>, vol. 4, chap. 7, pp. 151-171. Kluwer (2004).
126 =item * Charles Semple, Philip Daniel, Wim Hordijk, Roderic
127 D. M. Page, and Mike Steel: Supertree Algorithms for Ancestral
128 Divergence Dates and Nested Taxa. Bioinformatics B<20>(15), 2355-2360
131 =item * Merce Llabres, Jairo Rocha, Francesc Rossello, and Gabriel
132 Valiente: On the Ancestral Compatibility of Two Phylogenetic Trees
133 with Nested Taxa. J. Math. Biol. B<53>(3), 340-364 (2006).
137 =head1 AUTHOR - Gabriel Valiente
139 Email valiente@lsi.upc.edu
143 The rest of the documentation details each of the object methods.
147 package Bio
::Tree
::Compatible
;
151 # Object preamble - inherits from Bio::Root::Root
155 use base
qw(Bio::Root::Root);
157 =head2 postorder_traversal
159 Title : postorder_traversal
160 Usage : my @nodes = @{ $tree->postorder_traversal }
161 Function: Return list of nodes in postorder
162 Returns : reference to array of Bio::Tree::Node
165 For example, the postorder traversal of the tree
166 C<(((A,B)C,D),(E,F,G));> is a reference to an array of nodes with
167 internal_id 0 through 9, because the Newick standard representation
168 for phylogenetic trees is based on a postorder traversal.
176 + +-------D 9 +-------3
180 +-----+-----F +-----8-----6
186 sub postorder_traversal
{
190 push @stack, $self->get_root_node;
192 my $node = pop @stack;
194 foreach my $child ($node->each_Descendent(-sortby
=> 'internal_id')) {
198 my @postorder = reverse @queue;
202 =head2 cluster_representation
204 Title : cluster_representation
205 Usage : my %cluster = %{ $tree->cluster_representation }
206 Function: Compute the cluster representation of a tree
207 Returns : reference to hash of array of string indexed by
211 For example, the cluster representation of the tree
212 C<(((A,B)C,D),(E,F,G));> is a reference to a hash associating an array
213 of string (descendent labels) to each node, as follows:
224 9 --> [A,B,C,D,E,F,G]
228 sub cluster_representation
{
231 my @postorder = @
{ postorder_traversal
($tree) };
232 foreach my $node ( @postorder ) {
233 my @labeled = map { $_->id } grep { $_->id } $node->get_Descendents;
234 push @labeled, $node->id if $node->id;
235 $cluster{$node} = \
@labeled;
242 Title : common_labels
243 Usage : my $labels = $tree1->common_labels($tree2);
244 Function: Return set of common node labels
245 Returns : Set::Scalar
246 Args : Bio::Tree::Tree
248 For example, the common labels of the tree C<(((A,B)C,D),(E,F,G));>
249 and the tree C<((A,B)H,E,(J,(K)G)I);> are: C<[A,B,E,G]>.
257 + +-------D +-----------E
259 | +-----E | +-------J
269 my @labels1 = map { $_->id } grep { $_->id } $self->get_nodes;
270 my $common = Set
::Scalar
->new( @labels1 );
271 my @labels2 = map { $_->id } grep { $_->id } $arg->get_nodes;
272 my $temp = Set
::Scalar
->new( @labels2 );
273 return $common->intersection($temp);
276 =head2 topological_restriction
278 Title : topological_restriction
279 Usage : $tree->topological_restriction($labels)
280 Function: Compute the topological restriction of a tree to a subset
282 Returns : Bio::Tree::Tree
285 For example, the topological restrictions of each of the trees
286 C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to the labels
287 C<[A,B,E,G]> are as follows:
303 sub topological_restriction
{
304 my ($tree, $labels) = @_;
305 for my $node ( @
{ postorder_traversal
($tree) } ) {
306 unless (ref($node)) { # skip $node if already removed
307 my @cluster = map { $_->id } grep { $_->id } $node->get_Descendents;
308 push @cluster, $node->id if $node->id;
309 my $cluster = Set
::Scalar
->new(@cluster);
310 if ($cluster->is_disjoint($labels)) {
311 $tree->remove_Node($node);
313 if ($node->id and not $labels->has($node->id)) {
314 $node->{'_id'} = undef;
323 Title : is_compatible
324 Usage : $tree1->is_compatible($tree2)
325 Function: Test compatibility of two trees
327 Args : Bio::Tree::Tree
329 For example, the topological restrictions of the trees
330 C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to their common
331 labels, C<[A,B,E,G]>, are compatible. The respective cluster
332 representations are as follows:
342 As a second example, the trees C<(A,B);> and C<((B)A);> are
343 incompatible. Their respective cluster representations are as follows:
349 The reason is, the smallest cluster containing label C<A> is C<[A]> in
350 the first tree but C<[A,B]> in the second tree.
358 As a second example, the trees C<(((B,A),C),D);> and C<((A,(D,B)),C);>
359 are also incompatible. Their respective cluster representations are as
370 The reason is, cluster C<[A,B]> properly intersects cluster
371 C<[B,D]>. There are further incompatibilities between these trees:
372 C<[A,B,C]> properly intersects both C<[B,D]> and C<[A,B,D]>.
382 +-----------D +-----------C
387 my ($tree1, $tree2) = @_;
388 my $common = $tree1->Bio::Tree
::Compatible
::common_labels
($tree2);
389 $tree1->Bio::Tree
::Compatible
::topological_restriction
($common);
390 $tree2->Bio::Tree
::Compatible
::topological_restriction
($common);
391 my @postorder1 = @
{ postorder_traversal
($tree1) };
392 my @postorder2 = @
{ postorder_traversal
($tree2) };
393 my %cluster1 = %{ cluster_representation
($tree1) };
394 my %cluster2 = %{ cluster_representation
($tree2) };
395 my $incompat = 0; # false
397 foreach my $label ( $common->elements ) {
398 my $node1 = $tree1->find_node(-id
=> $label);
399 my @labels1 = @
{ $cluster1{$node1} };
400 my $cluster1 = Set
::Scalar
->new(@labels1);
401 my $node2 = $tree2->find_node(-id
=> $label);
402 my @labels2 = @
{ $cluster2{$node2} };
403 my $cluster2 = Set
::Scalar
->new(@labels2);
404 unless ( $cluster1->is_equal($cluster2) ) {
405 $incompat = 1; # true
406 push @labels, $label;
410 foreach my $node1 ( @postorder1 ) {
411 my @labels1 = @
{ $cluster1{$node1} };
412 my $cluster1 = Set
::Scalar
->new(@labels1);
413 foreach my $node2 ( @postorder2 ) {
414 my @labels2 = @
{$cluster2{$node2} };
415 my $cluster2 = Set
::Scalar
->new(@labels2);
416 if ($cluster1->is_properly_intersecting($cluster2)) {
417 $incompat = 1; # true
418 push @nodes, $node1, $node2;
422 return ($incompat, \
@labels, \
@nodes);