1 # $Id: TagTree.pm 11693 2007-09-17 20:54:04Z cjfields $
3 # BioPerl module for Bio::Annotation::TagTree
5 # Cared for Chris Fields
7 # You may distribute this module under the same terms as perl itself.
8 # Refer to the Perl Artistic License (see the license accompanying this
9 # software package, or see http://www.perl.com/language/misc/Artistic.html)
10 # for the terms under which you may use, modify, and redistribute this module.
12 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
13 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 # POD documentation - main docs before the code
20 Bio::Annotation::TagTree - AnnotationI with tree-like hierarchal key-value
21 relationships ('structured tags') that can be represented as simple text.
25 use Bio::Annotation::TagTree;
26 use Bio::Annotation::Collection;
28 my $col = Bio::Annotation::Collection->new();
30 # data structure can be an array reference with a data structure
31 # corresponding to that defined by Data::Stag:
33 my $sv = Bio::Annotation::TagTree->new(-tagname => 'mytag1',
34 -value => $data_structure);
35 $col->add_Annotation($sv);
37 # regular text passed is parsed based on the tagformat().
38 my $sv2 = Bio::Annotation::TagTree->new(-tagname => 'mytag2',
41 $col->add_Annotation($sv2);
45 This takes tagged data values and stores them in a hierarchal structured
46 element-value hierarchy (complements of Chris Mungall's Data::Stag module). Data
47 can then be represented as text using a variety of output formats (indention,
48 itext, xml, spxr). Furthermore, the data structure can be queried using various
49 means. See L<Data::Stag> for details.
51 Data passed in using value() or the '-value' parameter upon instantiation
54 1) an array reference corresponding to the data structure for Data::Stag;
56 2) a text string in 'xml', 'itext', 'spxr', or 'indent' format. The default
57 format is 'xml'; this can be changed using tagformat() prior to using value() or
58 by passing in the proper format using '-tagformat' upon instantiation;
60 3) another Bio::Annotation::TagTree or Data::Stag node instance. In both cases
61 a deep copy (duplicate) of the instance is generated.
63 Beyond checking for an array reference no format guessing occurs (so, for
64 roundtrip tests ensure that the IO formats correspond). For now, we recommend
65 when using text input to set tagformat() to one of these formats prior to data
66 loading to ensure the proper Data::Stag parser is selected. After data loading,
67 the tagformat() can be changed to change the text string format returned by
68 value(). (this may be rectified in the future)
70 This Annotation type is fully BioSQL compatible and could be considered a
71 temporary replacement for nested Bio::Annotation::Collections, at least until
72 BioSQL and bioperl-db can support nested annotation collections.
78 User feedback is an integral part of the evolution of this and other
79 Bioperl modules. Send your comments and suggestions preferably to one
80 of the Bioperl mailing lists. Your participation is much appreciated.
82 bioperl-l@bioperl.org - General discussion
83 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87 Please direct usage questions or support issues to the mailing list:
89 I<bioperl-l@bioperl.org>
91 rather than to the module maintainer directly. Many experienced and
92 reponsive experts will be able look at the problem and quickly
93 address it. Please include a thorough description of the problem
94 with code and data examples if at all possible.
98 Report bugs to the Bioperl bug tracking system to help us keep track
99 the bugs and their resolution. Bug reports can be submitted via
102 https://github.com/bioperl/bioperl-live/issues
110 The rest of the documentation details each of the object methods. Internal
111 methods are usually preceded with a _
115 # Let the code begin...
117 package Bio
::Annotation
::TagTree
;
121 # Object preamble - inherits from Bio::Root::Root
123 use base
qw(Bio::Annotation::SimpleValue);
129 Usage : my $sv = Bio::Annotation::TagTree->new();
130 Function: Instantiate a new TagTree object
131 Returns : Bio::Annotation::TagTree object
132 Args : -value => $value to initialize the object data field [optional]
133 -tagname => $tag to initialize the tagname [optional]
134 -tagformat => format for output [optional]
135 (types 'xml', 'itext', 'sxpr', 'indent', default = 'itext')
136 -node => Data::Stag node or Bio::Annotation::TagTree instance
141 my ( $class, @args ) = @_;
142 my $self = $class->SUPER::new
();
143 my ( $node, $value, $tag, $format, $verbose ) = $self->_rearrange(
154 $self->throw("Cant use both node and value; mutually exclusive")
155 if defined $node && defined $value;
156 defined $tag && $self->tagname($tag);
158 $self->tagformat($format);
159 defined $value && $self->value($value);
160 defined $node && $self->node($node);
161 defined $verbose && $self->verbose($verbose);
165 =head1 AnnotationI implementing functions
172 Usage : my $text = $obj->as_text
173 Function: return the string "Value: $v" where $v is the value
181 return "TagTree: " . $self->value;
187 Usage : my $str = $ann->display_text();
188 Function: returns a string. Unlike as_text(), this method returns a string
189 formatted as would be expected for the specific implementation.
191 One can pass a callback as an argument which allows custom text
192 generation; the callback is passed the current instance and any text
196 Args : [optional] callback
201 my $DEFAULT_CB = sub { $_[0]->value || '' };
204 my ( $self, $cb ) = @_;
206 $self->throw("Callback must be a code reference") if ref $cb ne 'CODE';
215 Usage : my $hashtree = $value->hash_tree
216 Function: For supporting the AnnotationI interface just returns the value
217 as a hashref with the key 'value' pointing to the value
218 Maybe reimplement using Data::Stag::hash()?
227 $h->{'value'} = $self->value;
233 Usage : $obj->tagname($newval)
234 Function: Get/set the tagname for this annotation value.
236 Setting this is optional. If set, it obviates the need to provide
237 a tag to AnnotationCollection when adding this object.
239 Returns : value of tagname (a scalar)
240 Args : new value (a scalar, optional)
245 my ( $self, $value ) = @_;
246 if ( defined $value ) {
247 $self->{'tagname'} = $value;
249 return $self->{'tagname'};
252 =head1 Specific accessors for TagTree
259 Usage : $obj->value($newval)
260 Function: Get/set the value for this annotation.
261 Returns : value of value
262 Args : newvalue (optional)
267 my ( $self, $value ) = @_;
269 # set mode? This resets the entire tagged database
270 my $format = $self->tagformat;
273 if ( ref $value eq 'ARRAY' ) {
275 # note the tagname() is not used here; it is only used for
276 # storing this AnnotationI in the annotation collection
277 eval { $self->{db
} = Data
::Stag
->nodify($value) };
281 # assuming this is blessed; passing on to node() and copy
282 $self->node( $value, 'copy' );
287 # not trying to guess here for now; we go by the tagformat() setting
288 my $h = Data
::Stag
->getformathandler($format);
289 eval { $self->{db
} = Data
::Stag
->from( $format . 'str', $value ) };
291 $self->throw("Data::Stag error:\n$@") if $@
;
295 # How do we return a data structure?
296 # for now, we use the output (if there is a Data::Stag node present)
297 # may need to run an eval {} to catch Data::Stag output errors
298 $self->node->$format;
304 Usage : $obj->tagformat($newval)
305 Function: Get/set the output tag format for this annotation.
306 Returns : value of tagformat
307 Args : newvalue (optional) - format for the data passed into value
308 must be of values 'xml', 'indent', 'sxpr', 'itext', 'perl'
312 my %IS_VALID_FORMAT = map { $_ => 1 } qw(xml indent sxpr itext);
315 my ( $self, $value ) = @_;
316 if ( defined $value ) {
317 $self->throw( "$value is not a valid format; valid format types:\n"
318 . join( ',', map { "'$_'" } keys %IS_VALID_FORMAT ) )
319 if !exists $IS_VALID_FORMAT{$value};
320 $self->{'tagformat'} = $value;
322 return $self->{'tagformat'};
329 Function: Get/set the topmost Data::Stag node used for this annotation.
330 Returns : Data::Stag node implementation
331 (default is Data::Stag::StagImpl)
332 Args : (optional) Data::Stag node implementation
333 (optional)'copy' => flag to create a copy of the node
338 my ( $self, $value, $copy ) = @_;
339 if ( defined $value && ref $value ) {
341 $value->isa('Data::Stag::StagI')
342 ?
( $copy && $copy eq 'copy' ?
$value->duplicate : $value )
343 : $value->isa('Bio::Annotation::TagTree') ?
( $copy
344 && $copy eq 'copy' ?
$value->node->duplicate : $value->node )
346 'Object must be Data::Stag::StagI or Bio::Annotation::TagTree');
349 # lazily create Data::Stag instance if not present
350 if (!$self->{'db'}) {
351 $self->{'db'} = Data
::Stag
->new();
353 return $self->{'db'};
356 =head2 Data::Stag convenience methods
358 Because Data::Stag uses blessed arrays and the core Bioperl class uses blessed
359 hashes, TagTree uses an internal instance of a Data::Stag node for data storage.
360 Therefore the following methods actually delegate to the Data:::Stag internal
363 For consistency (since one could recursively check child nodes), methods retain
364 the same names as Data::Stag. Also, no 'magic' (AUTOLOAD'ed) methods are
365 employed, simply b/c full-fledged Data::Stag functionality can be attained by
366 grabbing the Data::Stag instance using node().
372 Function: Returns the element name (key name) for this node
381 return $self->node->element;
388 Function: Returns the data structure (array ref) for this node
397 return $self->node->data;
404 Function: Get the top-level array of Data::Stag nodes or (if the top level is
405 a terminal node) a scalar value.
407 This is similar to StructuredValue's get_values() method, with the
408 key difference being instead of array refs and scalars you get either
409 Data::Stag nodes or the value for this particular node.
411 For consistency (since one could recursively check nodes),
412 we use the same method name as Data::Stag children().
421 return $self->node->children;
428 Function: Get the top-level array of Data::Stag nodes. Unlike children(),
429 this only returns an array of nodes (if this is a terminal node,
430 no value is returned)
432 Returns : an array of nodes
439 return $self->node->subnodes;
446 Function: Returns the nodes or value for the named element or path
448 Returns : returns array of nodes or a scalar (if node is terminal)
449 dependent on wantarray
455 my ( $self, @vals ) = @_;
456 return $self->node->get(@vals);
463 Function: Recursively searches for and returns the nodes or values for the
464 named element or path
466 Returns : returns array of nodes or scalars (for terminal nodes)
472 my ( $self, @vals ) = @_;
473 return $self->node->find(@vals);
480 Function: Recursively searches for and returns a list of nodes
481 of the given element path
483 Returns : returns array of nodes
489 my ( $self, @vals ) = @_;
490 return $self->node->findnode(@vals);
499 Returns : returns array of nodes or values
505 my ( $self, @vals ) = @_;
506 return $self->node->findval(@vals);
512 Usage : $struct->addchild(['name' => [['foo'=> 'bar1']]]);
513 Function: add new child node to the current node. One can pass in a node, TagTree,
514 or data structure; for instance, in the above, this would translate
522 Args : first arg = element name
523 all other args are added as tag-value pairs
528 my ( $self, @vals ) = @_;
530 # check for element tag first (if no element, must be empty Data::Stag node)
531 if ( !$self->element ) {
533 # try to do the right thing; if more than one element, wrap in array ref
534 @vals > 1 ?
$self->value( \
@vals ) : $self->value( $vals[0] );
537 elsif ( !$self->node->ntnodes ) {
539 # if this is a terminal node, can't add to it (use set?)
540 $self->throw("Can't add child to node; only terminal node is present!");
543 return $self->node->addchild(@vals);
550 Usage : $struct->add('foo', 'bar1', 'bar2', 'bar3');
551 Function: add tag-value nodes to the current node. In the above, this would
552 translate to (in XML):
557 Args : first arg = element name
558 all other args are added as tag-value pairs
563 my ( $self, @vals ) = @_;
565 # check for empty object and die for now
566 if ( !$self->node->element ) {
567 $self->throw("Can't add to terminal element!");
569 return $self->node->add(@vals);
575 Usage : $struct->set('foo','bar');
576 Function: sets a single tag-value pair in the current node. Note this
577 differs from add() in that this replaces any data already present
579 Args : first arg = element name
580 all other args are added as tag-value pairs
585 my ( $self, @vals ) = @_;
587 # check for empty object
588 if ( !$self->node->element ) {
589 $self->throw("Can't add to tree; empty tree!");
591 return $self->node->set(@vals);
597 Usage : $struct->unset('foo');
598 Function: unsets all key-value pairs of the passed element from the
606 my ( $self, @vals ) = @_;
607 return $self->node->unset(@vals);
613 Usage : $struct->free
614 Function: removes all data from the current node
622 return $self->node->free;
628 Usage : $struct->hash;
629 Function: turns the tag-value tree into a hash, all data values are array refs
631 Args : first arg = element name
632 all other args are added as tag-value pairs
638 return $self->node->hash;
644 Usage : $struct->pairs;
645 Function: turns the tag-value tree into a hash, all data values are scalar
647 Args : first arg = element name
648 all other args are added as tag-value pairs, note that duplicates
655 return $self->node->pairs;
661 Usage : @persons = $s->qmatch('person', ('name'=>'fred'));
662 Function : returns all elements in the node tree which match the
663 element name and the key-value pair
664 Returns : Array of nodes
665 Args : return-element str, match-element str, match-value str
670 my ( $self, @vals ) = @_;
671 return $self->node->qmatch(@vals);
677 Usage : @termini = $s->tnodes;
678 Function : returns all terminal nodes below this node
679 Returns : Array of nodes
680 Args : return-element str, match-element str, match-value str
686 return $self->node->tnodes;
692 Usage : @termini = $s->ntnodes;
693 Function : returns all nonterminal nodes below this node
694 Returns : Array of nodes
695 Args : return-element str, match-element str, match-value str
701 return $self->node->ntnodes;
704 =head2 StructureValue-like methods
708 =head2 get_all_values
710 Title : get_all_values
711 Usage : @termini = $s->get_all_values;
712 Function : returns all terminal node values
713 Returns : Array of values
714 Args : return-element str, match-element str, match-value str
716 This is meant to emulate the values one would get from StructureValue's
717 get_all_values() method. Note, however, using this method dissociates the
718 tag-value relationship (i.e. you only get the value list, no elements)
724 my @kids = $self->children;
726 while ( my $val = shift @kids ) {
727 ( ref $val ) ?
push @kids, $val->children : push @vals, $val;