Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / TreeIO / phyloxml.pm
blob8ea578f6bf63849a061715bd8589cf8f2053970f
1 # $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
3 # BioPerl module for Bio::TreeIO::phyloxml
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Mira Han <mirhan@indiana.edu>
9 # Copyright Mira Han
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
19 =head1 SYNOPSIS
21 # do not use this module directly
22 use Bio::TreeIO;
23 my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
24 -file => 'tree.dnd');
25 my $tree = $treeio->next_tree;
27 =head1 DESCRIPTION
29 This module handles parsing and writing of phyloXML format.
31 =head1 FEEDBACK
33 =head2 Mailing Lists
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to the
37 Bioperl mailing list. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 =head2 Support
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
53 =head2 Reporting Bugs
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 of the bugs and their resolution. Bug reports can be submitted viax the
57 web:
59 https://github.com/bioperl/bioperl-live/issues
61 =head1 AUTHOR - Mira Han
63 Email mirhan@indiana.edu
65 =head1 APPENDIX
67 The rest of the documentation details each of the object methods.
68 Internal methods are usually preceded with a _
70 =cut
73 # Let the code begin...
76 package Bio::TreeIO::phyloxml;
78 use strict;
80 # Object preamble - inherits from Bio::Root::Root
82 use Bio::Tree::Tree;
83 use Bio::Tree::AnnotatableNode;
84 use Bio::Annotation::SimpleValue;
85 use Bio::Annotation::Relation;
86 use XML::LibXML;
87 use XML::LibXML::Reader;
88 use base qw(Bio::TreeIO);
91 sub _initialize
93 my($self, %args) = @_;
94 $args{-treetype} ||= 'Bio::Tree::Tree';
95 $args{-nodetype} ||= 'Bio::Tree::AnnotatableNode';
96 $self->SUPER::_initialize(%args);
98 # phyloxml TreeIO does not use SAX,
99 # therefore no need to attach EventHandler
100 # instead we will define a reader that is a pull-parser of libXML
101 if ($self->mode eq 'r') {
102 if ($self->_fh) {
103 $self->{'_reader'} = XML::LibXML::Reader->new(
104 IO => $self->_fh,
105 no_blanks => 1
108 if (!$self->{'_reader'}) {
109 $self->throw("XML::LibXML::Reader not initialized");
112 elsif ($self->mode eq 'w') {
113 # print default lines
114 $self->_print('<?xml version="1.0" encoding="UTF-8"?>',"\n");
115 $self->_print('<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.phyloxml.org" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd">');
118 $self->treetype($args{-treetype});
119 $self->nodetype($args{-nodetype});
120 $self->{'_lastitem'} = {}; # holds open items and the attribute hash
121 $self->_init_func();
124 sub _init_func
126 my ($self) = @_;
127 my %start_elements = (
128 'phylogeny' => \&element_phylogeny,
129 'clade' => \&element_clade,
130 'sequence_relation' => \&element_relation,
131 'clade_relation' => \&element_relation,
133 $self->{'_start_elements'} = \%start_elements;
134 my %end_elements = (
135 'phylogeny' => \&end_element_phylogeny,
136 'clade' => \&end_element_clade,
137 'sequence_relation' => \&end_element_relation,
138 'clade_relation' => \&end_element_relation,
140 $self->{'_end_elements'} = \%end_elements;
143 sub DESTROY {
144 my $self = shift;
145 if ($self->mode eq 'w') {
146 $self->_print('</phyloxml>');
147 $self->flush if $self->_flush_on_write && defined $self->_fh;
149 $self->SUPER::DESTROY;
152 =head2 next_tree
154 Title : next_tree
155 Usage : my $tree = $treeio->next_tree
156 Function: Gets the next tree in the stream
157 Returns : Bio::Tree::TreeI
158 Args : none
160 =cut
162 sub next_tree
164 my ($self) = @_;
165 my $reader = $self->{'_reader'};
166 my $tree;
167 while ($reader->read)
169 if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT)
171 if ($reader->name eq 'phylogeny')
173 $tree = $self->end_element_phylogeny();
174 last;
177 $self->processXMLNode;
179 return $tree;
182 =head2 add_attribute
184 Title : add_phyloXML_annotation
185 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -attr=>"id_source = \"A\"")
186 Function: add attributes to an object
187 Returns : the node that we added annotations to
188 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
189 -attr => string in the form "A = B", where A is the attribute name and B is the attribute value
191 =cut
193 sub add_attribute
195 my ($self, @args) = @_;
196 my ($obj, $attr) = $self->_rearrange([qw(OBJ ATTR)], @args);
198 if ($attr) {
199 $attr = '<dummy '.$attr.'/>';
202 my $oldreader = $self->{'_reader'}; # save reader
203 $self->{'_reader'} = XML::LibXML::Reader->new(
204 string => $attr,
205 no_blanks => 1
207 my $reader = $self->{'_reader'};
208 $self->{'_currentannotation'} = []; # holds annotationcollection
209 $self->{'_currenttext'} = '';
210 #$self->{'_id_link'} = {};
212 # pretend we saw a <clade> element
213 $self->{'_lastitem'}->{'dummy'}++;
214 push @{$self->{'_lastitem'}->{'current'}}, { 'dummy'=>{}}; # current holds current element and empty hash for its attributes
216 # push object to annotate
217 push @{$self->{'_currentitems'}}, $obj;
219 # read attributes of element
220 while ($reader->read)
222 #$self->processXMLNode;
223 $self->processAttribute($self->current_attr);
226 # if there is id_source add sequence to _id_link
227 if (exists $self->current_attr->{'id_source'}) {
228 my $idsrc = $self->current_attr->{'id_source'};
229 $self->{'_id_link'}->{$idsrc} = $obj;
232 # check idref
233 my $idref = '';
234 if (exists $self->current_attr->{'id_ref'}) {
235 $idref = $self->current_attr->{'id_ref'};
238 my $srcbyidref = '';
239 $srcbyidref = $self->{'_id_link'}->{$idref};
241 # exception when id_ref is defined but id_src is not, or vice versa.
242 if ($idref xor $srcbyidref) {
243 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
246 # if attribute exists then add Annotation::Collection with tag '_attr'
247 my $newac = $obj->annotation;
248 if ( scalar keys %{$self->current_attr} ) {
249 my $newattr = Bio::Annotation::Collection->new();
250 foreach my $tag (keys %{$self->current_attr}) {
251 my $sv = Bio::Annotation::SimpleValue->new(
252 -value => $self->current_attr->{$tag}
254 $newattr->add_Annotation($tag, $sv);
256 $newac->add_Annotation('_attr', $newattr);
259 # pop from temporary list
260 pop @{$self->{'_currentitems'}};
261 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
262 pop @{$self->{'_lastitem'}->{'current'}};
264 $self->{'_reader'} = $oldreader; # restore reader
265 return $obj;
269 =head2 add_phyloXML_annotation
271 Title : add_phyloXML_annotation
272 Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring)
273 my $tree = $treeio->add_phyloXML_annotation('-obj'=>$tree, '-xml'=>'<sequence_relation id_ref_0="A" id_ref_1="B" type="orthology"/>')
275 Function: add annotations to a node in the phyloXML format string
276 Returns : the node that we added annotations to
277 Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
278 -xml => string in phyloXML format that describes the annotation for the node
280 =cut
282 sub add_phyloXML_annotation
284 my ($self, @args) = @_;
285 my ($obj, $xml_string) = $self->_rearrange([qw(OBJ XML)], @args);
287 $xml_string = '<phyloxml>'.$xml_string.'</phyloxml>';
288 $self->debug( $xml_string );
290 my $oldreader = $self->{'_reader'}; # save reader
291 $self->{'_reader'} = XML::LibXML::Reader->new(
292 string => $xml_string,
293 no_blanks => 1
295 my $reader = $self->{'_reader'};
296 #$self->{'_currentannotation'} = []; # holds annotationcollection
297 #$self->{'_currenttext'} = '';
298 #$self->{'_id_link'} = {};
300 # pretend we saw a <clade> element
301 $self->{'_lastitem'}->{'clade'}++;
302 push @{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes
303 # our object to annotate (nodeI)
304 # push into temporary list
305 push @{$self->{'_currentitems'}}, $obj;
307 $reader->read; #read away the first element 'phyloxml'
308 while ($reader->read)
310 $self->processXMLNode;
313 # pop from temporary list
314 pop @{$self->{'_currentitems'}};
315 $self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
316 pop @{$self->{'_lastitem'}->{'current'}};
318 $self->{'_reader'} = $oldreader; # restore reader
319 return $obj;
323 =head2 write_tree
325 Title : write_tree
326 Usage : $treeio->write_tree($tree);
327 Function: Write a tree out to data stream in phyloxml format
328 Returns : none
329 Args : Bio::Tree::TreeI object
331 =cut
333 sub write_tree
335 my ($self, @trees) = @_;
336 foreach my $tree (@trees) {
337 my $root = $tree->get_root_node;
338 $self->_print("<phylogeny");
339 my @tags = $tree->get_all_tags();
340 my $attr_str = '';
341 foreach my $tag (@tags) {
342 my @values = $tree->get_tag_values($tag);
343 foreach (@values) {
344 $attr_str .= " ".$tag."=\"".$_."\"";
347 # check if rooted
348 my ($b_rooted) = $tree->get_tag_values('rooted');
349 if ($b_rooted) {
350 $attr_str .= " rooted=\"true\"";
352 else {
353 if($tree->is_binary($tree->get_root_node)) {
354 $attr_str .= " rooted=\"true\"";
356 else {
357 $attr_str .= " rooted=\"false\"";
360 $self->_print($attr_str);
361 $self->_print(">");
362 if ($root->isa('Bio::Tree::AnnotatableNode')) {
363 $self->_print($self->_write_tree_Helper_annotatableNode($root));
365 else {
366 $self->_print($self->_write_tree_Helper_generic($root));
369 # print clade relations
370 while (my $str = pop (@{$self->{'_tree_attr'}->{'clade_relation'}})) {
371 $self->_print($str);
373 # print sequence relations
374 while (my $str = pop (@{$self->{'_tree_attr'}->{'sequence_relation'}})) {
375 $self->_print($str);
377 $self->_print("</phylogeny>");
379 $self->flush if $self->_flush_on_write && defined $self->_fh;
380 return;
383 =head2 _write_tree_Helper_annotatableNode
385 Title : _write_tree_Helper_annotatableNode
386 Usage : internal method used by write_tree, not to be used directly
387 Function: recursive helper function of write_tree for the annotatableNodes.
388 translates annotations into xml elements.
389 Returns : string describing the node
390 Args : Bio::Node::AnnotatableNode object, string
392 =cut
394 sub _write_tree_Helper_annotatableNode
396 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
398 my $ac = $node->annotation;
400 # if clade_relation exists
401 my @relations = $ac->get_Annotations('clade_relation');
402 foreach (@relations) {
403 my $clade_rel = $self->_relation_to_string($node, $_, '');
404 # set as tree attr
405 push (@{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel);
408 # start <clade>
409 $str .= '<clade';
410 my ($attr) = $ac->get_Annotations('_attr'); # check id_source
411 if ($attr) {
412 my ($id_source) = $attr->get_Annotations('id_source');
413 if ($id_source) {
414 $str .= " id_source=\"".$id_source->value."\"";
417 $str .= ">";
419 # print all descendent nodes
420 foreach my $child ( $node->each_Descendent() ) {
421 $str = $self->_write_tree_Helper_annotatableNode($child, $str);
424 # print all annotations
425 $str = print_annotation( $node, $str, $ac );
427 # print all sequences
428 if ($node->has_sequence) {
429 foreach my $seq (@{$node->sequence}) {
430 # if sequence_relation exists
431 my @relations = $seq->annotation->get_Annotations('sequence_relation');
432 foreach (@relations) {
433 my $sequence_rel = $self->_relation_to_string($seq, $_, '');
434 # set as tree attr
435 push (@{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel);
437 $str = print_seq_annotation( $node, $str, $seq );
441 $str .= "</clade>";
443 return $str;
446 =head2 _write_tree_Helper_generic
448 Title : _write_tree_Helper_generic
449 Usage : internal method used by write_tree, not to be used directly
450 Function: recursive helper function of write_tree for generic NodesI.
451 all tags are translated into property elements.
452 Returns : string describing the node
453 Args : Bio::Node::NodeI object, string
455 =cut
457 sub _write_tree_Helper_generic
459 my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
461 # start <clade>
462 $str .= '<clade>';
464 # print all descendent nodes
465 foreach my $child ( $node->each_Descendent() ) {
466 $str = $self->_write_tree_Helper_generic($child, $str);
469 # print all tags
470 my @tags = $node->get_all_tags();
471 foreach my $tag (@tags) {
472 my @values = $node->get_tag_values($tag);
473 foreach my $val (@values) {
474 $str .= "<property datatype=\"xsd:string\" ref=\"tag:$tag\" applies_to=\"clade\">";
475 $str .=$val;
476 $str .= "</property>";
480 # print NodeI features
481 if ($node->id) {
482 $str .= "<name>";
483 $str .= $node->id;
484 $str .= "</name>";
486 if ($node->branch_length) {
487 $str .= "<branch_length>";
488 $str .= $node->branch_length;
489 $str .= "</branch_length>";
491 if ($node->bootstrap) {
492 $str .= "<confidence type = \"bootstrap\">";
493 $str .= $node->bootstrap;
494 $str .= "</confidence>";
497 $str .= "</clade>";
498 return $str;
501 =head2 _relation_to_string
503 Title : _relation_to_string
504 Usage : internal method used by write_tree, not to be used directly
505 Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements.
506 Returns : string describing the node
507 Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation,
508 the Annotation::Relation object,
509 the string
511 =cut
513 # It may be more appropriate to make Annotation::Relation have
514 # a to_string callback function,
515 # and have this subroutine set as the callback when we are in
516 # phyloXML context.
517 # I've put it here for now, since write_tree is the only place it is used.
519 sub _relation_to_string {
520 my ($self, $obj, $rel, $str) = @_;
522 my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source
523 if (@attr) {
524 my @id_source = $attr[0]->get_Annotations('id_source');
526 my ($id_ref_0) = $obj->annotation->get_nested_Annotations(
527 '-keys' => ['id_source'],
528 '-recursive' => 1);
529 my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations(
530 '-keys' => ['id_source'],
531 '-recursive' => 1);
533 my $confidence = $rel->confidence();
534 my $confidence_type = $rel->confidence_type();
535 $str .= "<";
536 $str .= $rel->tagname;
537 $str .= " id_ref_0=\"".$id_ref_0->value."\"";
538 $str .= " id_ref_1=\"".$id_ref_1->value."\"";
539 $str .= " type=\"".$rel->type."\"";
540 if ($confidence) {
541 $str .= " ><confidence";
542 if ($confidence_type) {
543 $str .= " type=\"".$confidence_type."\"";
545 $str .= ">";
546 $str .= $confidence;
547 $str .= "</confidence>";
548 $str .= "</";
549 $str .= $rel->tagname;
550 $str .= ">";
552 else {
553 $str .= "/>";
555 return $str;
558 =head2 read_annotation
560 Title : read_annotation
561 Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1);
562 Function: read text value (or attribute value) of the annotations corresponding to the element path
563 Returns : list of text values of the annotations matching the path
564 Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI)
565 -path => path of the nested elements
566 -attr => Boolean value to indicate whether to get the attribute of the element or the text value.
567 (default is 0, meaning text value is returned)
569 =cut
571 # It may be more appropriate to make a separate Annotation::phyloXML object
572 # and have this subroutine within that object so it can handle the
573 # reading and writing of the values and attributes.
574 # but since tagTree is a temporary stub and I didn't want to make
575 # a redundant object I've put it here for now.
577 sub read_annotation
579 my ($self, @args) = @_;
580 my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args);
581 my $ac = $obj->annotation;
582 if ($attr) {
583 my @elements = split ('/', $path);
584 my $final = pop @elements;
585 push (@elements, '_attr');
586 push (@elements, $final);
587 $path = join ('/', @elements);
588 return $self->_read_annotation_attr_Helper( [$ac], $path);
590 else {
591 return $self->_read_annotation_text_Helper( [$ac], $path);
595 sub _read_annotation_text_Helper
597 my ($self, $acs, $path) = @_;
598 my @elements = split ('/', $path);
599 my $key = shift @elements;
600 my @nextacs = ();
601 foreach my $ac (@$acs) {
602 foreach my $ann ($ac->get_Annotations($key)) {
603 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
606 if (@elements == 0) {
607 my @values = ();
608 my @texts = map {$_->get_Annotations('_text')} @nextacs;
609 foreach (@texts) {
610 $_ && push (@values, $_->value);
612 return @values;
614 else {
615 $path = join ('/', @elements);
616 return $self->_read_annotation_text_Helper( \@nextacs, $path);
620 sub _read_annotation_attr_Helper
622 my ($self, $acs, $path) = @_;
623 my @elements = split ('/', $path);
624 my $key = shift @elements;
625 my @nextacs = ();
626 foreach my $ac (@$acs) {
627 foreach my $ann ($ac->get_Annotations($key)) {
628 if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
631 if (@elements == 1) {
632 my $attrname = $elements[0];
633 my @sv = map {$_->get_Annotations($attrname)} @nextacs;
634 return map {$_->value} @sv;
636 else {
637 $path = join ('/', @elements);
638 return $self->_read_annotation_attr_Helper( \@nextacs, $path);
642 =head1 Methods for parsing the XML document
644 =cut
646 =head2 processXMLNode
648 Title : processXMLNode
649 Usage : $treeio->processXMLNode
650 Function: read the XML node and process according to the node type
651 Returns : none
652 Args : none
654 =cut
656 sub processXMLNode
658 my ($self) = @_;
659 my $reader = $self->{'_reader'};
660 my $nodetype = $reader->nodeType;
661 if ( $nodetype == XML_READER_TYPE_ELEMENT)
663 $self->{'_lastitem'}->{$reader->name}++;
664 push @{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
666 if (exists $self->{'_start_elements'}->{$reader->name}) {
667 my $method = $self->{'_start_elements'}->{$reader->name};
668 $self->$method();
670 else {
671 $self->element_default();
673 if ($reader->isEmptyElement) {
674 # element is complete
675 # set nodetype so it can jump and
676 # do procedures for XML_READER_TYPE_END_ELEMENT
677 $nodetype = XML_READER_TYPE_END_ELEMENT;
680 if ($nodetype == XML_READER_TYPE_TEXT)
682 $self->{'_currenttext'} = $reader->value;
684 if ($nodetype == XML_READER_TYPE_END_ELEMENT)
686 if (exists $self->{'_end_elements'}->{$reader->name}) {
687 my $method = $self->{'_end_elements'}->{$reader->name};
688 $self->$method();
690 else {
691 $self->end_element_default();
693 $self->{'_lastitem'}->{ $reader->name }--;
694 pop @{$self->{'_lastitem'}->{'current'}};
695 $self->{'_currenttext'} = '';
700 =head2 processAttribute
702 Title : processAttribute
703 Usage : $treeio->processAttribute(\%hash_for_attribute);
704 Function: reads the attributes of the current element into a hash
705 Returns : none
706 Args : hash reference where the attributes will be stored.
708 =cut
710 sub processAttribute
712 my ($self, $data) = @_;
713 my $reader = $self->{'_reader'};
715 # several ways of reading attributes:
716 # read all attributes:
717 if ($reader-> moveToFirstAttribute) {
718 do {
719 $data->{$reader->name()} = $reader->value;
720 } while ($reader-> moveToNextAttribute);
721 $reader-> moveToElement;
726 =head2 element_phylogeny
728 Title : element_phylogeny
729 Usage : $treeio->element_phylogeny
730 Function: initialize the parsing of a tree
731 Returns : none
732 Args : none
734 =cut
736 sub element_phylogeny
738 my ($self) = @_;
739 $self->{'_currentitems'} = []; # holds nodes while parsing current level
740 $self->{'_currentnodes'} = []; # holds nodes while constructing tree
741 $self->{'_currentannotation'} = []; # holds annotationcollection
742 $self->{'_currenttext'} = '';
743 $self->{'_levelcnt'} = [];
744 $self->{'_id_link'} = {};
745 $self->{'_tree_attr'} = $self->current_attr;
746 $self->processAttribute($self->current_attr);
747 return;
750 =head2 end_element_phylogeny
752 Title : end_element_phylogeny
753 Usage : $treeio->end_element_phylogeny
754 Function: ends the parsing of a tree building a Tree::TreeI object.
755 Returns : Tree::TreeI
756 Args : none
758 =cut
760 sub end_element_phylogeny
762 my ($self) = @_;
764 my $root;
765 # if there is more than one node in _currentnodes
766 # aggregate the nodes into trees basically ad-hoc.
767 if ( @{$self->{'_currentnodes'}} > 1)
769 $root = $self->nodetype->new(
770 -id => '',
771 tostring => \&node_to_string,
773 while ( @{$self->{'_currentnodes'}} ) {
774 my ($node) = ( shift @{$self->{'_currentnodes'}});
775 $root->add_Descendent($node);
778 # if there is only one node in _currentnodes
779 # that node is root.
780 elsif ( @{$self->{'_currentnodes'}} == 1)
782 $root = shift @{$self->{'_currentnodes'}};
785 my $tree = $self->treetype->new(
786 -root => $root,
787 -id => $self->current_attr->{'name'},
788 %{$self->current_attr}
790 foreach my $tag ( keys %{$self->current_attr} ) {
791 $tree->add_tag_value( $tag, $self->current_attr->{$tag} );
793 return $tree;
796 =head2 element_clade
798 Title : element_clade
799 Usage : $treeio->element_clade
800 Function: initialize the parsing of a node
801 creates a new AnnotatableNode with annotations
802 Returns : none
803 Args : none
805 =cut
807 sub element_clade
809 my ($self) = @_;
810 my $reader = $self->{'_reader'};
811 my %clade_attr = (); # doesn't use current attribute in order to save memory
812 $self->processAttribute(\%clade_attr);
813 # create a node (Annotatable Node)
814 my $tnode = $self->nodetype->new(
815 -id => '',
816 tostring => \&node_to_string,
817 %clade_attr,
819 # add all attributes as annotation collection with tag '_attr'
820 my $ac = $tnode->annotation;
821 my $newattr = Bio::Annotation::Collection->new();
822 foreach my $tag (keys %clade_attr) {
823 my $sv = Bio::Annotation::SimpleValue->new(
824 -value => $clade_attr{$tag}
826 $newattr->add_Annotation($tag, $sv);
828 $ac->add_Annotation('_attr', $newattr);
830 # if there is id_source add clade to _id_link
831 if (exists $clade_attr{'id_source'}) {
832 $self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode;
834 # push into temporary list
835 push @{$self->{'_currentitems'}}, $tnode;
838 =head2 end_element_clade
840 Title : end_element_clade
841 Usage : $treeio->end_element_clade
842 Function: ends the parsing of a node
843 Returns : none
844 Args : none
846 =cut
848 sub end_element_clade
850 my ($self) = @_;
851 my $reader = $self->{'_reader'};
853 my $curcount = scalar @{$self->{'_currentnodes'}};
854 my $level = $reader->depth() - 2;
855 my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
857 # pop from temporary list
858 my $tnode = pop @{$self->{'_currentitems'}};
859 if ( $childcnt > 0) {
860 if( $childcnt > $curcount)
862 $self->throw("something wrong with event construction treelevel ".
863 "$level is recorded as having $childcnt nodes ".
864 "but current nodes at this level is $curcount\n");
866 my @childnodes = splice( @{$self->{'_currentnodes'}}, - $childcnt);
867 for ( @childnodes ) {
868 $tnode->add_Descendent($_);
870 $self->{'_levelcnt'}->[$level+1] = 0;
872 push @{$self->{'_currentnodes'}}, $tnode;
873 $self->{'_levelcnt'}->[$level]++;
877 =head2 element_relation
879 Title : element_relation
880 Usage : $treeio->element_relation
881 Function: starts the parsing of clade relation & sequence relation
882 Returns : none
883 Args : none
885 =cut
887 sub element_relation
889 my ($self) = @_;
890 $self->processAttribute($self->current_attr);
891 my $relationtype = $self->current_attr->{'type'};
892 my $id_ref_0 = $self->current_attr->{'id_ref_0'};
893 my $id_ref_1 = $self->current_attr->{'id_ref_1'};
895 my @srcbyidref = ();
896 $srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0};
897 $srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1};
899 # exception when id_ref is defined but id_src is not, or vice versa.
900 if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) {
901 $self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]);
904 # set id_ref_0
905 my $ac0 = $srcbyidref[0]->annotation;
906 my $newann = Bio::Annotation::Relation->new(
907 '-type' => $relationtype,
908 '-to' => $srcbyidref[1],
909 '-tagname' => $self->current_element
911 $ac0->add_Annotation($self->current_element, $newann);
912 # set id_ref_1
913 my $ac1 = $srcbyidref[1]->annotation;
914 $newann = Bio::Annotation::Relation->new(
915 '-type' => $relationtype,
916 '-to' => $srcbyidref[0],
917 '-tagname' => $self->current_element
919 $ac1->add_Annotation($self->current_element, $newann);
920 push (@{$self->{'_currentannotation'}}, $newann);
923 =head2 end_element_relation
925 Title : end_element_relation
926 Usage : $treeio->end_element_relation
927 Function: ends the parsing of clade relation & sequence relation
928 Returns : none
929 Args : none
931 =cut
933 sub end_element_relation
935 my ($self) = @_;
936 my $ac = pop (@{$self->{'_currentannotation'}});
940 =head2 element_default
942 Title : element_default
943 Usage : $treeio->element_default
944 Function: starts the parsing of all other elements
945 Returns : none
946 Args : none
948 =cut
950 sub element_default
952 my ($self) = @_;
953 my $reader = $self->{'_reader'};
954 my $current = $self->current_element();
955 my $prev = $self->prev_element();
957 # read attributes of element
958 $self->processAttribute($self->current_attr);
960 # check idref
961 my $idref = '';
962 if (exists $self->current_attr->{'id_ref'}) {
963 $idref = $self->current_attr->{'id_ref'};
966 my $srcbyidref = '';
967 $srcbyidref = $self->{'_id_link'}->{$idref};
969 # exception when id_ref is defined but id_src is not, or vice versa.
970 if ($idref xor $srcbyidref) {
971 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
974 # we are annotating a Node
975 # set _currentannotation
976 if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) {
977 # find node to annotate
978 my $tnode;
979 if ($srcbyidref) {
980 $tnode = $srcbyidref;
982 else {
983 $tnode = $self->{'_currentitems'}->[-1];
985 my $ac = $tnode->annotation();
986 # add the new anncollection with the current element as key
987 my $newann = Bio::Annotation::Collection->new();
988 $ac->add_Annotation($current, $newann);
989 # push to current annotation
990 push (@{$self->{'_currentannotation'}}, $newann);
992 # we are within sequence_relation or clade_relation
993 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
994 # do nothing?
996 # we are already within an annotation
997 else {
998 my $ac = $self->{'_currentannotation'}->[-1];
999 if ($ac) {
1000 # add the new anncollection with the current element as key
1001 my $newann = Bio::Annotation::Collection->new();
1002 $ac->add_Annotation($current, $newann);
1003 push (@{$self->{'_currentannotation'}}, $newann);
1009 =head2 end_element_default
1011 Title : end_element_default
1012 Usage : $treeio->end_element_default
1013 Function: ends the parsing of all other elements
1014 Returns : none
1015 Args : none
1017 =cut
1019 sub end_element_default
1021 my ($self) = @_;
1022 my $reader = $self->{'_reader'};
1023 my $current = $self->current_element();
1024 my $prev = $self->prev_element();
1026 # check idsrc
1027 my $idsrc = $self->current_attr->{'id_source'};
1029 # check idref
1030 my $idref = '';
1031 if (exists $self->current_attr->{'id_ref'}) {
1032 $idref = $self->current_attr->{'id_ref'};
1033 delete $self->current_attr->{'id_ref'};
1036 my $srcbyidref = '';
1037 $srcbyidref = $self->{'_id_link'}->{$idref};
1039 # exception when id_ref is defined but id_src is not, or vice versa.
1040 if ($idref xor $srcbyidref) {
1041 $self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
1044 # we are annotating a Tree
1045 if ((!$srcbyidref) && $prev eq 'phylogeny') {
1046 # annotate Tree via tree attribute
1047 $self->prev_attr->{$current} = $self->{'_currenttext'};
1049 # we are within sequence_relation or clade_relation
1050 elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
1051 my $ann_relation = $self->{'_currentannotation'}->[-1];
1052 # we are here only with <confidence>
1053 if ($current eq 'confidence') {
1054 if (exists $self->current_attr->{'type'}) {
1055 $ann_relation->confidence_type($self->current_attr->{'type'});
1057 $ann_relation->confidence($self->{'_currenttext'});
1059 else {
1060 $self->throw($current, " is not allowed within <*_relation>");
1063 # we are annotating a Node
1064 elsif (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade'))
1066 # pop from current annotation
1067 my $ac = pop (@{$self->{'_currentannotation'}});
1068 $self->annotateNode( $current, $ac);
1070 # additional setups for compatibility with NodeI
1071 my $tnode;
1072 if ($srcbyidref) {
1073 $tnode = $srcbyidref;
1075 else {
1076 $tnode = $self->{'_currentitems'}->[-1];
1078 if ($current eq 'name') {
1079 $tnode->id($self->{'_currenttext'});
1081 elsif ($current eq 'branch_length') {
1082 $tnode->branch_length($self->{'_currenttext'});
1084 elsif ($current eq 'confidence') {
1085 if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
1086 $tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
1089 elsif ($current eq 'sequence') {
1090 # if annotation is <sequence>
1091 # transform the Bio::Annotation object into a Bio::Seq object
1092 my $str = '';
1093 # retrieve the sequence
1094 if (my ($molseq) = $ac->get_Annotations('mol_seq')) {
1095 my ($strac) = $molseq->get_Annotations('_text');
1096 $str = $strac->value();
1098 # create Seq object with sequence
1099 my $newseq = Bio::Seq->new( -seq => $str,
1100 -annotation=>$ac,
1101 -nowarnonempty=>1);
1102 $tnode->sequence($newseq);
1103 $ac->remove_Annotations('mol_seq');
1104 $tnode->annotation->remove_Annotations($current);
1105 # if there is id_source add sequence to _id_link
1106 if ($idsrc) {
1107 $self->{'_id_link'}->{$idsrc} = $newseq;
1110 elsif ($idsrc && $current eq 'taxonomy') {
1111 # if there is id_source add sequence to _id_link
1112 $self->{'_id_link'}->{$idsrc} = $ac;
1115 # we are within a default Annotation
1116 else {
1117 my $ac = pop (@{$self->{'_currentannotation'}});
1118 if ($ac) {
1119 $self->annotateNode( $current, $ac);
1125 =head2 annotateNode
1127 Title : annotateNode
1128 Usage : $treeio->annotateNode($element, $ac)
1129 Function: adds text value and attributes to the AnnotationCollection
1130 that has element name as key. If there are nested elements,
1131 optional AnnotationCollections are added recursively,
1132 with the nested element name as key.
1133 The structure of each AnnotationCollection is
1134 'element' => AnnotationCollection {
1135 '_text' => SimpleValue (text value)
1136 '_attr' => AnnotationCollection {
1137 attribute1 => SimpleValue (attribute value 1)
1138 attribute2 => SimpleValue (attribute value 2)
1141 ['nested element' => AnnotationCollection ]
1143 Returns : none
1144 Args : none
1146 =cut
1148 sub annotateNode
1150 my ($self, $element, $newac) = @_;
1151 # if attribute exists then add Annotation::Collection with tag '_attr'
1152 if ( scalar keys %{$self->current_attr} ) {
1153 my $newattr = Bio::Annotation::Collection->new();
1154 foreach my $tag (keys %{$self->current_attr}) {
1155 my $sv = Bio::Annotation::SimpleValue->new(
1156 -value => $self->current_attr->{$tag}
1158 $newattr->add_Annotation($tag, $sv);
1160 $newac->add_Annotation('_attr', $newattr);
1162 # if text exists add text as SimpleValue with tag '_text'
1163 if ( $self->{'_currenttext'} ) {
1164 my $newvalue = Bio::Annotation::SimpleValue->new( -value => $self->{'_currenttext'} );
1165 $newac->add_Annotation('_text', $newvalue);
1170 =head1 Methods for exploring the document
1172 =cut
1174 =head2 current_attr
1176 Title : current_attr
1177 Usage : $attr_hash = $treeio->current_attr;
1178 Function: returns the attribute hash for current item
1179 Returns : reference of the attribute hash
1180 Args : none
1182 =cut
1184 sub current_attr {
1185 my ($self) = @_;
1187 return 0 if ! defined $self->{'_lastitem'} ||
1188 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1189 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1190 (@keys == 1) || die "there should be only one key for each hash";
1191 return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
1194 =head2 prev_attr
1196 Title : prev_attr
1197 Usage : $hash_ref = $treeio->prev_attr
1198 Function: returns the attribute hash for previous item
1199 Returns : reference of the attribute hash
1200 Args : none
1202 =cut
1204 sub prev_attr {
1205 my ($self) = @_;
1207 return 0 if ! defined $self->{'_lastitem'} ||
1208 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1209 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1210 (@keys == 1) || die "there should be only one key for each hash";
1211 return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
1214 =head2 current_element
1216 Title : current_element
1217 Usage : $element = $treeio->current_element
1218 Function: returns the name of the current element
1219 Returns : string (element name)
1220 Args : none
1222 =cut
1224 sub current_element {
1225 my ($self) = @_;
1227 return 0 if ! defined $self->{'_lastitem'} ||
1228 ! defined $self->{'_lastitem'}->{'current'}->[-1];
1229 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
1230 (@keys == 1) || die "there should be only one key for each hash";
1231 return $keys[0];
1234 =head2 prev_element
1236 Title : prev_element
1237 Usage : $element = $treeio->current_element
1238 Function: returns the name of the previous element
1239 Returns : string (element name)
1240 Args : none
1242 =cut
1244 sub prev_element {
1245 my ($self) = @_;
1247 return 0 if ! defined $self->{'_lastitem'} ||
1248 ! defined $self->{'_lastitem'}->{'current'}->[-2];
1249 my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
1250 (@keys == 1) || die "there should be only one key for each hash";
1251 return $keys[0];
1255 =head2 treetype
1257 Title : treetype
1258 Usage : $obj->treetype($newval)
1259 Function: returns the tree type (default is Bio::Tree::Tree)
1260 Returns : value of treetype
1261 Args : newvalue (optional)
1264 =cut
1266 sub treetype{
1267 my ($self,$value) = @_;
1268 if( defined $value) {
1269 $self->{'treetype'} = $value;
1271 return $self->{'treetype'};
1274 =head2 nodetype
1276 Title : nodetype
1277 Usage : $obj->nodetype($newval)
1278 Function: returns the node type (default is Bio::Node::AnnotatableNode)
1279 Returns : value of nodetype
1280 Args : newvalue (optional)
1282 =cut
1284 sub nodetype{
1285 my ($self,$value) = @_;
1286 if( defined $value) {
1287 $self->{'nodetype'} = $value;
1289 return $self->{'nodetype'};
1293 =head1 Methods for implementing to_string callback for AnnotatableNode
1295 =cut
1297 =head2 node_to_string
1299 Title : node_to_string
1300 Usage : $annotatablenode->to_string_callback(\&node_to_string)
1301 Function: set as callback in AnnotatableNode, prints the node information in string
1302 Returns : string of node information
1303 Args : none
1305 =cut
1307 # this function is similar to _write_tree_Helper_annotatableNode,
1308 # but it is not recursive
1309 sub node_to_string
1311 my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
1312 # not a Bio::TreeIO::phyloxml
1313 my $str = '';
1314 my $ac = $self->annotation;
1316 # start <clade>
1317 $str .= '<clade';
1318 my @attr = $ac->get_Annotations('_attr'); # check id_source
1319 if (@attr) {
1320 my @id_source = $attr[0]->get_Annotations('id_source');
1321 if (@id_source) {
1322 $str .= " id_source=\"".$id_source[0]->value."\"";
1325 $str .= '>';
1327 # print all annotations
1328 $str = print_annotation( $self, $str, $ac );
1329 # print all sequences
1330 if ($self->has_sequence) {
1331 foreach my $seq (@{$self->sequence}) {
1332 $str = print_seq_annotation( $self, $str, $seq );
1336 $str .= '</clade>';
1337 return $str;
1340 =head2 print_annotation
1342 Title : print_annotation
1343 Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection)
1344 Function: prints the annotationCollection in a phyloXML format.
1345 Returns : string of annotation information
1346 Args : string to which the Annotation should be concatenated to,
1347 annotationCollection that holds the Annotations
1349 =cut
1351 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1352 # and have this subroutine within that object so it can handle the
1353 # reading and writing of the values and attributes.
1354 # especially since this function is used both by
1355 # Bio::TreeIO::phyloxml (through write_tree) and
1356 # Bio::Node::AnnotatableNode (through node_to_string).
1357 # but since tagTree is a temporary stub and I didn't want to make
1358 # a redundant object I've put it here for now.
1360 sub print_annotation
1362 my ($self, $str, $ac) = @_;
1364 my @all_anns = $ac->get_Annotations();
1365 foreach my $ann (@all_anns) {
1366 my $key = $ann->tagname;
1367 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1368 if ($ann->isa('Bio::Annotation::SimpleValue'))
1370 if ($key eq '_text') {
1371 $str .= $ann->value;
1373 else {
1374 $str .= "<$key>";
1375 $str .= $ann->value;
1376 $str .= "</$key>";
1379 elsif ($ann->isa('Bio::Annotation::Collection'))
1381 my @attrs = $ann->get_Annotations('_attr');
1382 if (@attrs) { # if there is a attribute collection
1383 $str .= "<$key";
1384 $str = print_attr($self, $str, $attrs[0]);
1385 $str .= ">";
1387 else {
1388 $str .= "<$key>";
1390 $str = print_annotation($self, $str, $ann);
1391 $str .= "</$key>";
1394 return $str;
1397 =head2 print_attr
1399 Title : print_attr
1400 Usage : $str = $annotatablenode->print_attr($str, $annotationcollection)
1401 Function: prints the annotationCollection in a phyloXML format.
1402 Returns : string of attributes
1403 Args : string to which the Annotation should be concatenated to,
1404 AnnotationCollection that holds the attributes
1406 =cut
1408 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1409 # and have this subroutine within that object so it can handle the
1410 # reading and writing of the values and attributes.
1411 # especially since this function is used both by
1412 # Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode
1413 # (through print_annotation).
1414 # but since tagTree is a temporary stub and I didn't want to make
1415 # a redundant object I've put it here for now.
1417 sub print_attr
1419 my ($self, $str, $ac) = @_;
1420 my @all_attrs = $ac->get_Annotations();
1421 foreach my $attr (@all_attrs) {
1422 if (!$attr->isa('Bio::Annotation::SimpleValue')) {
1423 $self->throw("attribute should be a SimpleValue");
1425 $str .= ' ';
1426 $str .= $attr->tagname;
1427 $str .= '=';
1428 $str .= '"'.$attr->value.'"';
1430 return $str;
1433 =head2 print_sequence_annotation
1435 Title : print_sequence_annotation
1436 Usage : $str = $node->print_seq_annotation( $str, $seq );
1437 Function: prints the Bio::Seq object associated with the node
1438 in a phyloXML format.
1439 Returns : string that describes the sequence
1440 Args : string to which the Annotation should be concatenated to,
1441 Seq object to print in phyloXML
1443 =cut
1445 # Again, it may be more appropriate to make a separate Annotation::phyloXML object
1446 # and have this subroutine within that object so it can handle the
1447 # reading and writing of the values and attributes.
1448 # especially since this function is used both by
1449 # Bio::TreeIO::phyloxml (through write_tree) and
1450 # Bio::Node::AnnotatableNode (through node_to_string).
1451 # but since tagTree is a temporary stub and I didn't want to make
1452 # a redundant object I've put it here for now.
1455 sub print_seq_annotation
1457 my ($self, $str, $seq) = @_;
1459 $str .= "<sequence";
1460 my ($attr) = $seq->annotation->get_Annotations('_attr'); # check id_source
1461 if ($attr) {
1462 my ($id_source) = $attr->get_Annotations('id_source');
1463 if ($id_source) {
1464 $str .= " id_source=\"".$id_source->value."\"";
1467 $str .= ">";
1469 my @all_anns = $seq->annotation->get_Annotations();
1470 foreach my $ann (@all_anns) {
1471 my $key = $ann->tagname;
1472 if ($key eq '_attr') { next; } # attributes are already printed in the previous level
1473 if ($ann->isa('Bio::Annotation::SimpleValue'))
1475 if ($key eq '_text') {
1476 $str .= $ann->value;
1478 else {
1479 $str .= "<$key>";
1480 $str .= $ann->value;
1481 $str .= "</$key>";
1484 elsif ($ann->isa('Bio::Annotation::Collection'))
1486 my @attrs = $ann->get_Annotations('_attr');
1487 if (@attrs) { # if there is a attribute collection
1488 $str .= "<$key";
1489 $str = print_attr($self, $str, $attrs[0]);
1490 $str .= ">";
1492 else {
1493 $str .= "<$key>";
1495 $str = print_annotation($self, $str, $ann);
1496 $str .= "</$key>";
1499 # print mol_seq
1500 if ($seq->seq()) {
1501 $str .= "<mol_seq>";
1502 $str .= $seq->seq();
1503 $str .= "</mol_seq>";
1506 $str .= "</sequence>";
1507 return $str;