2 # BioPerl module for Bio::SeqIO::game::gameWriter
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Sheldon McKay <mckays@cshl.edu>
8 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqIO::game::gameWriter -- a class for writing game-XML
21 my $in = Bio::SeqIO->new( -format => 'genbank',
22 -file => 'myfile.gbk' );
23 my $out = Bio::SeqIO->new( -format => 'game',
24 -file => 'myfile.xml' );
26 # get a sequence object
27 my $seq = $in->next_seq;
29 #write it in GAME format
30 $out->write_seq($seq);
34 Bio::SeqIO::game::gameWriter writes GAME-XML (v. 1.2) that is readable
35 by Apollo. It is best not used directly. It is accessed via
42 User feedback is an integral part of the evolution of this and other
43 Bioperl modules. Send your comments and suggestions preferably to one
44 of the Bioperl mailing lists.
46 Your participation is much appreciated.
48 bioperl-l@bioperl.org - General discussion
49 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 Please direct usage questions or support issues to the mailing list:
55 I<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 of the bugs and their resolution. Bug reports can be submitted via the
68 https://github.com/bioperl/bioperl-live/issues
70 =head1 AUTHOR - Sheldon McKay
76 The rest of the documentation details each of the object
77 methods. Internal methods are usually preceded with a _
81 package Bio
::SeqIO
::game
::gameWriter
;
86 use Bio
::SeqFeature
::Generic
;
87 use Bio
::SeqFeature
::Tools
::Unflattener
;
89 use base
qw(Bio::SeqIO::game::gameSubs);
94 Usage : my $writer = Bio::SeqIO::game::gameWriter->new($seq);
95 Function: constructor method for gameWriter
96 Returns : a game writer object
97 Args : a Bio::SeqI implementing object
98 optionally, an argument to set map_position to on.
99 ( map => 1 ). This will create a map_position elemant
100 that will cause the feature coordinates to be remapped to
101 a parent seqeunce. A sequence name in the format seq:xxx-xxx
102 is expected to determine the offset for the map_position.
103 The default behavior is to have features mapped relative to
104 the sequence contained in the GAME-XML file
109 my ($caller, $seq, %arg) = @_;
110 my $class = ref($caller) || $caller;
111 my $self = bless ( { seq
=> $seq }, $class );
113 # make a <map_position> element only if requested
114 $self->{map} = 1 if $arg{map};
115 $self->{anon_set_counters
} = {}; #counters for numbering anonymous result and feature sets
121 Title : write_to_game
122 Usage : $writer->write_to_game
123 Function: writes the sequence object to game-XML
124 Returns : xml as a multiline string
131 my $seq = $self->{seq
};
132 my @feats = $seq->remove_SeqFeatures;
134 # intercept nested features
135 my @nested_feats = grep { $_->get_SeqFeatures } @feats;
136 @feats = grep { !$_->get_SeqFeatures } @feats;
137 map { $seq->add_SeqFeature($_) } @feats;
139 # NB -- Maybe this belongs in Bio::SeqFeatute::Tools::Unflattener
141 # # intercept non-coding RNAs and transposons with contained genes
142 # # GAME-XML has these features as top level annotations which contain
144 # my @gene_containers = ();
147 # if ( $_->primary_tag =~ /[^m]RNA|repeat_region|transpos/ &&
148 # $_->has_tag('gene') ) {
149 # my @genes = $_->get_tag_values('gene');
150 # my ($min, $max) = (10000000000000,-10000000000000);
151 # for my $g ( @genes ) {
153 # for my $item ( @feats ) {
154 # next unless $item->primary_tag eq 'gene';
155 # my ($n) = $item->get_tag_values('gene');
156 # next unless $n =~ /$g/;
160 # next unless $gene && ref $gene;
161 # $max = $gene->end if $gene->end > $max;
162 # $min = $gene->start if $gene->start < $min;
165 # push @gene_containers, $_ if $_->length >= ($max - $min);
168 # $seq->add_SeqFeature($_);
173 my $uf = Bio
::SeqFeature
::Tools
::Unflattener
->new;
174 $uf->unflatten_seq( -seq
=> $seq, use_magic
=> 1 );
176 # rearrange snRNA and transposon hierarchies
177 # $self->_rearrange_hierarchies($seq, @gene_containers);
179 # add back nested feats
180 $seq->add_SeqFeature( $_ ) foreach @nested_feats;
185 # write the XML to a string
186 my $xml_handle = IO
::String
->new($xml);
187 my $writer = XML
::Writer
->new(OUTPUT
=> $xml_handle,
192 $self->{writer
} = $writer;
193 # $writer->xmlDecl("UTF-8");
194 # $writer->doctype("game", 'game', "http://www.fruitfly.org/annot/gamexml.dtd.txt");
195 $writer->comment("GAME-XML generated by Bio::SeqIO::game::gameWriter");
196 $writer->comment("Created " . localtime);
197 $writer->comment('Questions: mckays@cshl.edu');
198 $writer->startTag('game', version
=> 1.2);
200 my @sources = grep { $_->primary_tag =~ /source|origin|region/i } $seq->get_SeqFeatures;
202 for my $source ( @sources ) {
203 next unless $source->length == $seq->length;
204 for ( qw{ name description db_xref organism md5checksum
} ) {
205 if ( $source->has_tag($_) ) {
206 $self->{has_organism
} = 1 if /organism/;
207 ($atts->{$_}) = $source->get_tag_values($_);
213 #set a name in the attributes if none was given
214 $atts->{name
} ||= $seq->accession_number ne 'unknown'
215 ?
$seq->accession_number : $seq->display_name;
217 $self->_seq($seq, $atts);
219 # make a map_position element if req'd
220 if ( $self->{map} ) {
222 if ( $atts->{mol_type
} || $seq->alphabet ) {
223 $seqtype = $atts->{mol_type
} || $seq->alphabet;
226 $seqtype = 'unknown';
231 seq
=> $atts->{name
},
235 my ($arm, $start, undef, $end) = $atts->{name
} =~ /(\S+):(-?\d+)(\.\.|-)(-?\d+)/;
236 $self->_element('arm', $arm) if $arm;
237 $self->_span($start, $end);
238 $writer->endTag('map_position');
241 for ( $seq->top_SeqFeatures ) {
243 if($_->isa('Bio::SeqFeature::Computation')) {
244 $self->_comp_analysis($_);
247 # if the feature has subfeatures, we will assume it is a gene
248 # (hope this is safe!)
249 if ( $_->get_SeqFeatures ) {
250 $self->_write_gene($_);
252 # non-gene stuff only
253 next if $_->primary_tag =~ /CDS|mRNA|exon|UTR/;
254 $self->_write_feature($_);
259 $writer->endTag('game');
264 =head2 _rearrange_hierarchies
266 Title : _rearrange_hierarchies
267 Usage : $self->_rearrange_hierarchies($seq)
268 Function: internal method to rearrange gene containment hierarchies
269 so that snRNA or transposon features contain their genes
270 rather than the other way around
272 Args : a Bio::RichSeq object
273 Note : Not currently used, may be removed
277 sub _rearrange_hierarchies
{ #renamed to not conflict with Bio::Root::_rearrange
278 my ($self, $seq, @containers) = @_;
279 my @feats = $seq->remove_SeqFeatures;
280 my @genes = grep { $_->primary_tag eq 'gene' } @feats;
281 my @addback = grep { $_->primary_tag ne 'gene' } @feats;
283 for ( @containers ) {
284 my @has_genes = $_->get_tag_values('gene');
285 for my $has_gene ( @has_genes ) {
286 for my $gene ( @genes ) {
288 my ($gname) = $gene->get_tag_values('gene');
289 if ( $gname eq $has_gene ) {
290 $_->add_SeqFeature($gene);
297 push @addback, (@containers, grep { defined $_ } @genes );
298 $seq->add_SeqFeature($_) foreach @addback;
302 =head2 _write_feature
304 Title : _write_feature
305 Usage : $seld->_write_feature($feat, 1)
306 Function: internal method for writing generic features as <annotation> elements
308 Args : a Bio::SeqFeature::Generic object and an optional flag to write a
309 bare feature set with no annotation wrapper
314 my ($self, $feat, $bare) = @_;
315 my $writer = $self->{writer
};
318 for ( 'standard_name', $feat->primary_tag, 'ID' ) {
319 $id = $self->_find_name($feat, $_ );
323 $id ||= $feat->primary_tag . '_' . ++$self->{$feat->primary_tag}->{id
};
326 $writer->startTag('annotation', id
=> $id);
327 $self->_element('name', $id);
328 $self->_element('type', $feat->primary_tag);
331 $writer->startTag('feature_set', id
=> $id);
332 $self->_element('name', $id);
333 $self->_element('type', $feat->primary_tag);
334 $self->_render_tags( $feat,
336 \
&_render_comment_tags
,
337 \
&_render_tags_as_properties
339 $self->_feature_span($id, $feat);
340 $writer->endTag('feature_set');
341 $writer->endTag('annotation') unless $bare;
347 Usage : $self->_write_gene($feature)
348 Function: internal method for rendering gene containment hierarchies into
349 a nested <annotation> element
351 Args : a nested Bio::SeqFeature::Generic gene feature
352 Note : A nested gene hierarchy (gene->mRNA->CDS->exon) is expected. If other gene
353 subfeatures occur as level one subfeatures (same level as mRNA subfeats)
354 an attempt will be made to link them to transcripts via the 'standard_name'
360 my ($self, $feat) = @_;
361 my $writer = $self->{writer
};
362 my $str = $feat->strand;
363 my $id = $self->_find_name($feat, 'standard_name')
364 || $self->_find_name($feat, 'gene')
365 || $self->_find_name($feat, $feat->primary_tag)
366 || $self->_find_name($feat, 'locus_tag')
367 || $self->_find_name($feat, 'symbol')
368 || $self->throw(<<EOM."Feature name was: '".($feat->display_name || 'not set')."'");
369 Could not find a gene/feature ID, feature must have a primary tag or a tag
370 with one of the names: 'standard_name', 'gene', 'locus_tag', or 'symbol'.
372 my $gid = $self->_find_name($feat, 'gene') || $id;
374 $writer->startTag('annotation', id
=> $id);
375 $self->_element('name', $gid);
376 $self->_element('type', $feat->primary_tag);
377 $self->_render_tags( $feat,
379 \
&_render_dbxref_tags
,
380 \
&_render_comment_tags
,
381 \
&_render_tags_as_properties
,
386 if ( $feat->primary_tag eq 'gene' ) {
390 # we are in a gene container; gene must then be one level down
391 @genes = grep { $_->primary_tag eq 'gene' } $feat->get_SeqFeatures;
394 for my $g ( @genes ) {
395 my $id ||= $self->_find_name($g, 'standard_name')
396 || $self->_find_name($g, 'gene')
397 || $self->_find_name($feat, 'locus_tag')
398 || $self->_find_name($feat, 'symbol')
399 || $self->throw("Could not find a gene ID");
400 my $gid ||= $self->_find_name($g, 'gene') || $self->_find_name($g);
402 $writer->startTag('gene', association
=> 'IS');
403 $self->_element('name', $gid);
404 $writer->endTag('gene');
407 my @mRNAs = grep { $_->primary_tag =~ /mRNA|transcript/ } $g->get_SeqFeatures;
408 my @other_stuff = grep { $_->primary_tag !~ /mRNA|transcript/ } $g->get_SeqFeatures;
409 my @variants = ('A' .. 'Z');
411 for my $mRNA (@mRNAs) {
413 # if the mRNA is a generic transcript, it must be a non-spliced RNA gene
414 # Make a synthetic exon to help build a hierarchy. We have to assume that
415 # the location is not segmented (otherwise it should be a mRNA)
416 if ( $mRNA->primary_tag eq 'transcript') {
417 my $exon = Bio
::SeqFeature
::Generic
->new ( -primary
=> 'exon' );
418 $exon->location($mRNA->location);
419 $mRNA->add_SeqFeature($exon);
422 # no subfeats? Huh? revert to generic feature
423 unless ( $mRNA->get_SeqFeatures ) {
424 $self->_write_feature($mRNA, 1); # 1 flag writes the bare feature
425 # with no annotation wrapper
429 my $name = $self->_find_name($mRNA, $mRNA->primary_tag)
430 || $self->_find_name($mRNA, 'standard_name');
433 my ($cds) = grep { $_->primary_tag eq 'CDS' } $mRNA->get_SeqFeatures;
435 # make sure we have the right CDS for alternatively spliced genes
436 # This is meant to deal with sequences from flattened game annotations,
437 # where both the mRNA and CDS have split locations
438 if ( $cds && @mRNAs > 1 && $name ) {
439 $cds = $self->_check_cds($cds, $name);
441 elsif ( $cds && @mRNAs == 1 ) {
442 # The mRNA/CDS pairing must be right. Get the transcript name from the CDS
443 if ( $cds->has_tag('standard_name') ) {
444 ($name) = $cds->get_tag_values('standard_name');
449 # assign a name to the transcript if it has no 'standard_name' binder
450 $name = $id . '-R' . (shift @variants);
456 ($sn) = $cds->get_tag_values('standard_name')
457 if $cds->has_tag('standard_name');
458 ($sn) ||= $cds->get_tag_values('mRNA')
459 if $cds->has_tag('mRNA');
461 # the protein needs a name
462 my $psn = $self->protein_id($cds, $sn);
463 $self->{curr_pname
} = $psn;
465 # the mRNA need to know the name of its protein
466 unless ( $feat->has_tag('protein_id') ) {
467 $feat->add_tag_value('protein_id', $psn);
470 # define the translation offset
471 my ($c_start, $c_end);
472 if ( $cds->has_tag('codon_start') ){
473 ($c_start) = $cds->get_tag_values('codon_start');
474 $cds->remove_tag('codon_start');
479 my $cs = Bio
::SeqFeature
::Generic
->new;
480 if ( $c_start == 1 ) {
481 $c_start = $cds->strand > 0 ?
$cds->start : $cds->end;
483 if ( $cds->strand < 1 ) {
485 $c_start = $c_start - 2;
488 $c_end = $c_start + 2;
490 $cs->start($c_start);
492 $cs->strand($cds->strand);
493 $cs->primary_tag('start_codon');
494 $cs->add_tag_value( 'standard_name' => $name );
498 if ( $cds->has_tag('problem') ) {
499 my ($val) = $cds->get_tag_values('problem');
500 $cds->remove_tag('problem');
501 $attributes{problem
} = $val;
504 my ($aa) = $cds->get_tag_values('translation')
505 if $cds->has_tag('translation');
508 $cds->remove_tag('translation');
510 $add_seq{residues
} = $aa;
511 $add_seq{header
} = ['seq',
513 length => length $aa,
516 if ( $cds->has_tag('product_desc') ) {
517 ($add_seq{desc
}) = $cds->get_tag_values('product_desc');
518 $cds->remove_tag('product_desc');
521 unless ( $add_seq{desc
} && $add_seq{desc
} =~ /cds_boundaries/ ) {
522 my $start = $cds->start;
524 my $str = $cds->strand;
525 my $acc = $self->{seq
}->accession || $self->{seq
}->display_id;
526 $str = $str < 0 ?
'[-]' : '';
527 $add_seq{desc
} = "translation from_gene[$gid] " .
528 "cds_boundaries:(" . $acc .
529 ":$start..$end$str) transcript_info:[$name]";
531 $self->{add_seqs
} ||= [];
532 push @
{$self->{add_seqs
}}, \
%add_seq;
537 $writer->startTag('feature_set', id
=> $name);
538 $self->_element('name', $name);
539 $self->_element('type', 'transcript');
540 $self->_render_tags($_,
542 \
&_render_comment_tags
,
543 \
&_render_tags_as_properties
,
544 ) for ( $mRNA, ($cds) || () );
546 # any UTR's, etc associated with this transcript?
547 for my $thing ( @other_stuff ) {
548 if ( $thing->has_tag('standard_name') ) {
549 my ($v) = $thing->get_tag_values('standard_name');
557 push @units, grep { $_->primary_tag eq 'exon' } $mRNA->get_SeqFeatures;
558 @units = sort { $a->start <=> $b->start } @units;
563 @units = reverse @units;
566 for my $unit ( @units ) {
567 if ( $unit->primary_tag eq 'exon' ) {
569 $ename .= ':' . ++$count;
570 $self->_feature_span($ename, $unit);
572 elsif ( $unit->primary_tag eq 'start_codon' ) {
573 $self->_feature_span(($sn || $gid), $unit, $self->{curr_pname
});
576 my $uname = $unit->primary_tag . ":$id";
577 $self->_feature_span($uname, $unit);
580 $self->{curr_pname
} = '';
581 $writer->endTag('feature_set');
584 $self->{other_stuff
} = \
@other_stuff;
587 $writer->endTag('annotation');
589 # add the protein sequences
590 for ( @
{$self->{add_seqs
}} ) {
592 $writer->startTag(@
{$h{header
}});
593 my @desc = split /\s+/, $h{desc
};
595 for my $word (@desc) {
596 my ($lastline) = $desc =~ /.*^(.+)$/sm;
598 $desc .= length $lastline < 50 ?
" $word " : "\n $word ";
600 $self->_element('description', "\n $desc\n ");
602 my $aa = $h{residues
};
603 $aa =~ s/(\w{60})/$1\n /g;
605 $aa = "\n " . $aa . "\n ";
606 $self->_element('residues', $aa);
607 $writer->endTag('seq');
608 $self->{add_seqs
} = [];
611 # Is there anything else associated with the gene? We have to write other
612 # features as stand-alone annotations or apollo will assume they are
614 for my $thing ( @
{$self->{other_stuff
}} ) {
615 next if $thing->has_tag('standard_name');
616 $self->_write_feature($thing);
618 $self->{other_stuff
} = [];
625 Usage : $self->_check_cds($cds, $name)
626 Function: internal method to check if the CDS associated with an mRNA is
627 the correct alternative splice variant
628 Returns : a Bio::SeqFeature::Generic CDS object
629 Args : the CDS object plus the transcript\'s 'standard_name'
630 Note : this method only works if alternatively spliced transcripts are bound
631 together by a 'standard_name' or 'mRNA' qualifier. If none is present,
632 we will hope that the exons were derived from a segmented RNA or a CDS
633 with no associated mRNA feature. Neither of these two cases would be
634 confounded by alternative splice variants.
640 my ($self, $cds, $name) = @_;
641 my $cname = $self->_find_name( $cds, 'standard_name' )
642 || $self->_find_name( $cds, 'mRNA');
645 if ( $cname eq $name ) {
649 my @CDS = grep { $_->primary_tag eq 'CDS' } @
{$self->{feats
}};
651 my ($sname) = $_->_find_name( $_, 'standard_name' )
652 || $_->_find_name( $_, $_->primary_tag );
653 return $_ if $sname eq $name;
664 =head2 _comp_analysis
676 my ($self, $feat) = @_;
677 my $writer = $self->{writer
};
679 $writer->startTag('computational_analysis');
680 $self->_element('program', $feat->program_name || 'unknown program');
681 $self->_element('database', $feat->database_name) if $feat->database_name;
682 $self->_element('version', $feat->program_version) if $feat->program_version;
683 $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
684 $self->_render_tags($feat,
686 \
&_render_tags_as_properties
,
688 $self->_comp_result($feat);
689 $writer->endTag('computational_analysis');
695 Desc : recursively render a feature and its subfeatures as
696 <result_set> and <result_span> elements
697 Ret : nothing meaningful
704 my ($self,$feat) = @_;
706 #check that all our subfeatures have the same strand
709 #write result sets for things that have subfeatures, or things
711 if( my @subfeats = $feat->get_SeqFeatures or $feat->get_all_tags ) {
712 my $writer = $self->{writer
};
713 $writer->startTag('result_set',
714 ($feat->can('computation_id') && defined($feat->computation_id))
715 ?
(id
=> $feat->computation_id) : ()
717 my $fakename = $feat->primary_tag || 'no_name';
718 $self->_element('name', $feat->display_name || ($fakename).'_'.++$self->{anon_result_set_counters
}{$fakename} );
719 $self->_seq_relationship('query', $feat);
720 $self->_render_tags($feat,
721 \
&_render_output_tags
723 for (@subfeats) { #render the subfeats, if any
724 $self->_comp_result($_);
726 $self->_comp_result_span($feat); #also have a span to hold this info
727 $writer->endTag('result_set');
729 #just write result spans for simple things
730 $self->_comp_result_span($feat);
734 =head2 _comp_result_span
736 Usage: _comp_result_span('foo12',$feature);
737 Desc : write GAME XML for a Bio::SeqFeature::Computation feature
738 that has no subfeatures
739 Ret : nothing meaningful
740 Args : name for this span (some kind of identifier),
741 SeqFeature object to put into this span
747 sub _comp_result_span
{
749 my ($self, $feat) = @_;
750 my $writer = $self->{writer
};
752 $writer->startTag('result_span',
753 ($feat->can('computation_id') && defined($feat->computation_id) ?
(id
=> $feat->computation_id) : ())
755 $self->_element('name', $feat->display_name) if $feat->display_name;
756 $self->_element('type', $feat->primary_tag) if $feat->primary_tag;
757 my $has_score = $feat->can('has_score') ?
$feat->has_score : defined($feat->score);
758 $self->_element('score', $feat->score) if $has_score;
759 $self->_render_tags($feat,
760 \
&_render_output_tags
762 $self->_seq_relationship('query', $feat);
763 $self->_render_tags($feat,
764 \
&_render_target_tags
,
766 $writer->endTag('result_span');
781 my ($self,$feat,@render_funcs) = @_;
783 my @tagnames = $feat->get_all_tags;
785 #do a chain-of-responsibility down the allowed
786 #tag handlers types for the context in which this is
788 foreach my $func (@render_funcs) {
789 @tagnames = $self->$func($feat,@tagnames);
793 =head2 _render_output_tags
796 Desc : print out <output> elements, with contents
797 taken from the SeqFeature::Computation's 'output' tag
798 Ret : array of tag names this did not render
799 Args : feature object, list of tag names to maybe render
801 In game xml, only <result_span> and <result_set> elements can
802 have <output> elements.
806 sub _render_output_tags
{
807 my ($self, $feat, @tagnames) = @_;
808 my $writer = $self->{writer
};
811 for my $tag (@tagnames) {
812 if(lc($tag) eq 'output') {
813 my @outputs = $feat->get_tag_values($tag);
814 while(my($type,$val) = splice @outputs,0,2) {
815 $writer->startTag('output');
816 $self->_element('type',$type);
817 $self->_element('value',$val);
818 $writer->endTag('output');
822 push @passed_up,$tag;
828 =head2 _render_tags_as_properties
833 Args : feature object, array of tag names
837 In game xml, <annotation>, <computational_analysis>,
838 and <feature_set> elements can have properties.
842 sub _render_tags_as_properties
{
843 my ($self,$feat,@tagnames) = @_;
845 foreach my $tag (@tagnames) {
846 if( $tag ne $feat->primary_tag ) {
847 $self->_property($tag,$_) for $feat->get_tag_values($tag);
853 =head2 _render_comment_tags
857 Ret : names of tags that were not comment tags
858 Args : feature object, tag names available for us to render
859 Side Effects: writes XML
862 In game xml, <annotation> and <feature_set> elements can
867 sub _render_comment_tags
{
868 my ($self,$feat,@tagnames) = @_;
869 my $writer = $self->{writer
};
871 for my $tag ( @tagnames ) {
872 if( lc($tag) eq 'comment' ) {
873 for my $val ($feat->get_tag_values($tag)) {
874 if ( $val =~ /=.+?;.+=/ ) {
875 $self->_unflatten_attribute('comment', $val);
877 $writer->startTag('comment');
878 $self->_element('text', $val);
879 $writer->endTag('comment');
883 push @passed_up,$tag;
889 =head2 _render_date_tags
893 Ret : names of tags that were not date tags
894 Args : feature, list of tag names available for us to render
895 Side Effects: writes XML for <date> elements
898 In game xml, <annotation>, <computational_analysis>,
899 <transaction>, <comment>, and <feature_set> elements
904 sub _render_date_tags
{
905 my ($self,$feat,@tagnames) = @_;
909 foreach my $tag (@tagnames) {
910 if ( lc($tag) eq 'date' ) {
911 ($date) = $feat->get_tag_values($tag);
912 } elsif ( lc($tag) eq 'timestamp' ) {
913 ($timestamp{'timestamp'}) = $feat->get_tag_values($tag);
914 #ignore timestamps, they are folded in with date elem above
916 push @passed_up,$tag;
919 $self->_element('date', $date, \
%timestamp) if defined($date);
923 =head2 _render_dbxref_tags
925 Desc : look for xref tags and render them if they are there
926 Ret : tag names that we didn't render
927 Args : feature object, list of tag names to render
928 Side Effects: writes a <dbxref> element if a tag with name
929 matching /xref$/i is present
932 In game xml, <annotation> and <seq> elements can have dbxrefs.
936 #TODO: can't sequences also have database xrefs? how to find those?
937 sub _render_dbxref_tags
{
938 my ($self, $feat, @tagnames) = @_;
940 for my $tag ( @tagnames ) { #look through all the tags
941 if( $tag =~ /xref$/i ) { #if they are xref tags
942 my $writer = $self->{writer
};
943 for my $val ( $feat->get_all_tag_values($tag) ) { #get all their values
944 if( my ($db,$dbid) = $val =~ /(\S+):(\S+)/ ) { #and render them as xrefs
945 $writer->startTag('dbxref');
946 $self->_element('xref_db', $db);
947 $dbid = $val if $db =~ /^[A-Z]O$/; # -> ontology, like GO
948 $self->_element('db_xref_id', $dbid);
949 $writer->endTag('dbxref');
953 push @passed_up,$tag;
960 =head2 _render_target_tags
963 Desc : process any 'Target' tags that would indicate a sequence alignment subject
964 Ret : array of tag names that we didn't render
965 Args : feature object
966 Side Effects: writes a <seq_relationship> of type 'subject' if it finds
967 any properly formed tags named 'Target'
970 In game xml, <result_span>, <feature_span>, and <result_set> can have
971 <seq_relationship>s. <result_set> can only have one, a 'query' relation.
975 sub _render_target_tags
{
976 my ($self,$feat,@tagnames) = @_;
978 foreach my $tag (@tagnames) {
979 if($tag eq 'Target' && (my @alignment = $feat->get_tag_values('Target')) >= 3) {
980 $self->_seq_relationship('subject',
981 Bio
::Location
::Simple
->new( -start
=> $alignment[1],
982 -end
=> $alignment[2],
988 push @passed_up, $tag;
998 Usage : $self->_property($tag => $value);
999 Function: an internal method to write property XML elements
1001 Args : a tag/value pair
1006 my ($self, $tag, $val) = @_;
1007 my $writer = $self->{writer
};
1009 if ( length $val > 45 ) {
1010 my @val = split /\s+/, $val;
1013 for my $word (@val) {
1014 my ($lastline) = $val =~ /.*^(.+)$/sm;
1016 $val .= length $lastline < 45 ?
" $word " : "\n $word";
1018 $val = "\n $val\n ";
1019 $val =~ s/(\S)\s{2}(\S)/$1 $2/g;
1021 $writer->startTag('property');
1022 $self->_element('type', $tag);
1023 $self->_element('value', $val);
1024 $writer->endTag('property');
1027 =head2 _unflatten_attribute
1029 Title : _unflatten_attribute
1030 Usage : $self->_unflatten_attribute($name, $value)
1031 Function: an internal method to unflatten and write comment or evidence elements
1033 Args : a list of strings
1037 sub _unflatten_attribute
{
1038 my ($self, $name, $val) = @_;
1039 my $writer = $self->{writer
};
1041 my @pairs = split ';', $val;
1042 for my $p ( @pairs ) {
1043 my @pair = split '=', $p;
1044 $pair[0] =~ s/^\s+|\s+$//g;
1045 $pair[1] =~ s/^\s+|\s+$//g;
1046 $pair{$pair[0]} = $pair[1];
1048 $writer->startTag($name);
1049 for ( keys %pair ) {
1050 $self->_element($_, $pair{$_});
1052 $writer->endTag($name);
1060 Usage : $self->_xref($value)
1061 Function: an internal method to write db_xref elements
1063 Args : a list of strings
1068 my ($self, @xrefs) = @_;
1069 my $writer = $self->{writer
};
1070 for my $xref ( @xrefs ) {
1071 my ($db, $acc) = $xref =~ /(\S+):(\S+)/;
1072 $writer->startTag('dbxref');
1073 $self->_element('xref_db', $db);
1074 $acc = $xref if $db eq 'GO';
1075 $self->_element('db_xref_id', $acc);
1076 $writer->endTag('dbxref');
1080 =head2 _feature_span
1082 Title : _feature_span
1083 Usage : $self->_feature_span($name, $type, $loc)
1084 Function: an internal method to write a feature_span element
1085 (the actual feature with coordinates)
1087 Args : a feature name and Bio::SeqFeatureI-compliant object
1092 my ($self, $name, $feat, $pname) = @_;
1093 my $type = $feat->primary_tag;
1094 my $writer = $self->{writer
};
1095 my %atts = ( id
=> $name );
1099 $atts{produces_seq
} = $pname;
1102 $writer->startTag('feature_span', %atts );
1103 $self->_element('name', $name);
1104 $self->_element('type', $type);
1105 $self->_seq_relationship('query', $feat);
1106 $writer->endTag('feature_span');
1109 =head2 _seq_relationship
1111 Title : _seq_relationship
1112 Usage : $self->_seq_relationship($type, $loc)
1113 Function: an internal method to handle feature_span sequence relationships
1115 Args : feature type, a Bio::LocationI-compliant object,
1116 (optional) sequence name (defaults to the query seq)
1117 and (optional) alignment string
1121 sub _seq_relationship
{
1122 my ($self, $type, $loc, $seqname, $alignment) = @_;
1123 my $writer = $self->{'writer'};
1125 $seqname ||= #if no seqname passed in, use the name of our annotating seq
1126 $self->{seq
}->accession_number ne 'unknown' && $self->{seq
}->accession_number
1127 || $self->{seq
}->display_id || 'unknown';
1134 $writer->_element('alignment',$alignment) if $alignment;
1135 $writer->endTag('seq_relationship');
1141 Usage : $self->_element($name, $chars, $atts)
1142 Function: an internal method to generate 'generic' XML elements
1145 my $content = 'bar';
1146 my $attributes = { baz => 1 };
1148 $self->_element($name, $content, $attributes);
1150 Args : the element name and content plus a ref to an attribute hash
1155 my ($self, $name, $chars, $atts) = @_;
1156 my $writer = $self->{writer
};
1157 my %atts = $atts ?
%$atts : ();
1159 $writer->startTag($name, %atts);
1160 $writer->characters($chars);
1161 $writer->endTag($name);
1167 Usage : $self->_span($loc)
1168 Function: an internal method to write the 'span' element
1170 Args : a Bio::LocationI-compliant object
1175 my ($self, @loc) = @_;
1176 my ($loc, $start, $end);
1181 elsif ( @loc == 2 ) {
1182 ($start, $end) = @loc;
1186 ($start, $end) = ($loc->start, $loc->end);
1187 ($start, $end) = ($end, $start) if $loc->strand < 0;
1190 ($start, $end) = (1, $self->{seq
}->length);
1193 my $writer = $self->{writer
};
1194 $writer->startTag('span');
1195 $self->_element('start', $start);
1196 $self->_element('end', $end);
1197 $writer->endTag('span');
1203 Usage : $self->_seq($seq, $dna)
1204 Function: an internal method to print the 'sequence' element
1206 Args : and Bio::SeqI-compliant object and a reference to an attribute hash
1211 my ($self, $seq, $atts) = @_;
1213 my $writer = $self->{'writer'};
1217 my $alphabet = $seq->alphabet;
1218 $alphabet ||= $seq->mol_type if $seq->can('mol_type');
1219 $alphabet =~ s/protein/aa/;
1220 $alphabet =~ s/rna/cdna/;
1223 id
=> $atts->{name
},
1224 length => $seq->length,
1229 if ( $atts->{md5checksum
} ) {
1230 push @seq, (md5checksum
=> $atts->{md5checksum
});
1231 delete $atts->{md5checksum
};
1233 $writer->startTag(@seq);
1235 for my $k ( keys %{$atts} ) {
1236 if ( $k =~ /xref/ ) {
1237 $self->_xref($atts->{$k});
1240 $self->_element($k, $atts->{$k});
1244 # add leading spaces and line breaks for
1245 # nicer xml formatting/indentation
1247 my $dna = $seq->seq;
1248 $dna =~ s/(\w{60})/$1\n$sp/g;
1249 $dna = "\n$sp" . $dna . "\n ";
1251 if ( $seq->species && !$self->{has_organism
}) {
1252 my $species = $seq->species->binomial;
1253 $self->_element('organism', $species);
1256 $self->_element('residues', $dna);
1257 $writer->endTag('seq');
1263 Usage : my $name = $self->_find_name($feature)
1264 Function: an internal method to look for a gene name
1266 Args : a Bio::SeqFeatureI-compliant object
1271 my ($self, $feat, $key) = @_;
1274 if ( $key && $feat->has_tag($key) ) {
1275 ($name) = $feat->get_tag_values($key);
1279 # warn "Could not find name '$key'\n";