bp_process_wormbase: move program to new Bio-DB-Ace distribution
[bioperl-live.git] / Bio / SeqIO / seqxml.pm
blob79a9e589e4ebd09cbd29b585702ec64e220e1cc6
1 # BioPerl module for Bio::SeqIO::seqxml
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Dave Messina <dmessina@cpan.org>
7 # Copyright Dave Messina
9 # You may distribute this module under the same terms as perl itself
10 # _history
11 # December 2009 - initial version
12 # July 2 2010 - updated for SeqXML v0.2
13 # November 11 2010 - added schemaLocation
14 # December 9 2010 - SeqXML v0.3
17 # POD documentation - main docs before the code
19 =head1 NAME
21 Bio::SeqIO::seqxml - SeqXML sequence input/output stream
23 =head1 SYNOPSIS
25 # Do not use this module directly. Use it via the Bio::SeqIO class.
27 use Bio::SeqIO;
29 # read a SeqXML file
30 my $seqio = Bio::SeqIO->new(-format => 'seqxml',
31 -file => 'my_seqs.xml');
33 while (my $seq_object = $seqio->next_seq) {
34 print join("\t",
35 $seq_object->display_id,
36 $seq_object->description,
37 $seq_object->seq,
38 ), "\n";
41 # write a SeqXML file
43 # Note that you can (optionally) specify the source
44 # (usually a database) and source version.
45 my $seqwriter = Bio::SeqIO->new(-format => 'seqxml',
46 -file => ">outfile.xml",
47 -source => 'Ensembl',
48 -sourceVersion => '56');
49 $seqwriter->write_seq($seq_object);
51 # once you've written all of your seqs, you may want to do
52 # an explicit close to get the closing </seqXML> tag
53 $seqwriter->close;
55 =head1 DESCRIPTION
57 This object can transform Bio::Seq objects to and from SeqXML format.
58 For more information on the SeqXML standard, visit L<http://www.seqxml.org>.
60 In short, SeqXML is a lightweight sequence format that takes advantage
61 of the validation capabilities of XML while not overburdening you
62 with a strict and complicated schema.
64 This module is based in part (particularly the XML-parsing part) on
65 Bio::TreeIO::phyloxml by Mira Han.
67 =head1 FEEDBACK
69 =head2 Mailing Lists
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to one
73 of the Bioperl mailing lists. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 =head2 Support
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
89 =head2 Reporting Bugs
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 the bugs and their resolution. Bug reports can be submitted via the
93 web:
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHORS - Dave Messina
99 Email: I<dmessina@cpan.org>
101 =head1 CONTRIBUTORS
104 =head1 APPENDIX
106 The rest of the documentation details each of the object
107 methods. Internal methods are usually preceded with a _
109 =cut
111 # Let the code begin...
113 package Bio::SeqIO::seqxml;
115 use strict;
117 use Bio::Seq;
118 use Bio::Seq::SeqFactory;
119 use Bio::Species;
120 use Bio::Annotation::DBLink;
121 use Bio::Annotation::SimpleValue;
122 use XML::LibXML;
123 use XML::LibXML::Reader;
124 use XML::Writer;
126 use base qw(Bio::SeqIO);
128 # define seqXML header stuff
129 # there's no API for XMLNS XMLNS_XSI; you must set them here.
130 use constant SEQXML_VERSION => 0.3;
131 use constant SCHEMA_LOCATION => 'http://www.seqxml.org/0.3/seqxml.xsd';
132 use constant XMLNS_XSI => 'http://www.w3.org/2001/XMLSchema-instance';
134 =head2 _initialize
136 Title : _initialize
137 Usage : $self->_initialize(@args)
138 Function: constructor (for internal use only).
140 Besides the usual SeqIO arguments (-file, -fh, etc.),
141 Bio::SeqIO::seqxml accepts three arguments which are used
142 when writing out a seqxml file. They are all optional.
143 Returns : none
144 Args : -source => source string (usually a database name)
145 -sourceVersion => source version. The version number of the source
146 -seqXMLversion => the version of seqXML that will be used
147 Throws : Exception if XML::LibXML::Reader or XML::Writer
148 is not initialized
150 =cut
152 sub _initialize {
153 my ( $self, @args ) = @_;
155 $self->SUPER::_initialize(@args);
156 if ( !defined $self->sequence_factory ) {
157 $self->sequence_factory(
158 Bio::Seq::SeqFactory->new(
159 -verbose => $self->verbose(),
160 -type => 'Bio::Seq',
165 # holds version and source data
166 $self->{'_seqxml_metadata'} = {};
168 # load any passed parameters
169 my %params = @args;
170 if ($params{'-sourceVersion'}) {
171 $self->sourceVersion($params{'-sourceVersion'});
173 if ($params{'-source'}) {
174 $self->source($params{'-source'});
176 if ($params{'-seqXMLversion'}) {
177 $self->seqXMLversion($params{'-seqXMLversion'});
179 # reading in SeqXML
180 if ( $self->mode eq 'r' ) {
181 if ( $self->_fh ) {
182 $self->{'_reader'} = XML::LibXML::Reader->new(
183 IO => $self->_fh,
184 no_blanks => 1,
187 if ( !$self->{'_reader'} ) {
188 $self->throw("XML::LibXML::Reader not initialized");
191 # holds data temporarily during parsing
192 $self->{'_current_entry_data'} = {};
194 $self->_initialize_seqxml_node_methods();
196 # read SeqXML header
197 $self->parseHeader();
200 # writing out SeqXML
201 elsif ( $self->mode eq 'w' ) {
202 if ( $self->_fh ) {
203 $self->{'_writer'} = XML::Writer->new(
204 OUTPUT => $self->_fh,
205 DATA_MODE => 1,
206 DATA_INDENT => 1,
208 if ( !$self->{'_writer'} ) {
209 $self->throw("XML::Writer not initialized");
212 # write SeqXML header
213 $self->{'_writer'}->xmlDecl("UTF-8");
214 if ($self->source || $self->sourceVersion) {
215 $self->{'_writer'}->startTag(
216 'seqXML',
217 'seqXMLversion' => $self->seqXMLversion(SEQXML_VERSION),
218 'xmlns:xsi' => XMLNS_XSI,
219 'xsi:noNamespaceSchemaLocation' => $self->schemaLocation(SCHEMA_LOCATION),
220 'source' => $self->source,
221 'sourceVersion' => $self->sourceVersion,
224 else {
225 $self->{'_writer'}->startTag(
226 'seqXML',
227 'seqXMLversion' => $self->seqXMLversion(SEQXML_VERSION),
228 'xmlns:xsi' => XMLNS_XSI,
229 'xsi:noNamespaceSchemaLocation' => $self->schemaLocation(SCHEMA_LOCATION),
237 =head2 next_seq
239 Title : next_seq
240 Usage : $seq = $stream->next_seq()
241 Function: returns the next sequence in the stream
242 Returns : L<Bio::Seq> object, or nothing if no more available
243 Args : none
245 =cut
247 sub next_seq {
248 my ($self) = @_;
249 my $reader = $self->{'_reader'};
250 my $entry;
252 while ( $reader->read ) {
254 # we're done if we hit </entry>
255 if ( $reader->nodeType == XML_READER_TYPE_END_ELEMENT ) {
256 if ( $reader->name eq 'entry' ) {
257 $entry = $self->end_element_entry();
258 last;
261 $self->processXMLnode;
264 return $entry;
267 =head2 write_seq
269 Title : write_seq
270 Usage : $stream->write_seq(@seq)
271 Function: Writes the $seq object into the stream
272 Returns : 1 for success and 0 for error
273 Args : Array of 1 or more L<Bio::PrimarySeqI> objects
275 =cut
277 sub write_seq {
278 my ( $self, @seqs ) = @_;
279 my $writer = $self->{'_writer'};
281 foreach my $seqobj (@seqs) {
282 $self->throw("Trying to write with no seq!") unless defined $seqobj;
284 if ( !ref $seqobj || !$seqobj->isa('Bio::SeqI') ) {
285 $self->warn(
286 " $seqobj is not a SeqI compliant module. Attempting to dump, but may fail!"
290 # opening tag, ID, and source (if present -- it's optional)
291 my $id = $seqobj->display_id;
292 my ($source_obj) = $seqobj->get_Annotations('source');
293 if (defined $source_obj && defined $id) {
294 $writer->startTag( 'entry', 'id' => $id, 'source' => $source_obj->value );
296 elsif (defined $id) {
297 $writer->startTag( 'entry', 'id' => $id );
299 else {
300 $self->throw(" $seqobj has no ID!");
303 # species and NCBI taxID
304 if ( $seqobj->species ) {
305 my $name = $seqobj->species->node_name;
306 my $taxid = $seqobj->species->ncbi_taxid;
307 if ( $name && ( $taxid =~ /[0-9]+/ ) ) {
308 $writer->emptyTag(
309 'species',
310 'name' => $name,
311 'ncbiTaxID' => $taxid
314 else {
315 $self->throw("$seqobj has malformed species data");
319 # description
320 if ( $seqobj->desc ) {
321 $writer->dataElement( 'description', $seqobj->desc );
324 # sequence
325 # - throws if seq is empty or missing because having a sequence
326 # is a SeqXML requirement
327 if ( $seqobj->seq ) {
328 # check that there's actually sequence in there
329 unless ( length($seqobj->seq) > 0 ) {
330 $self->throw("sequence entry $id lacks a sequence!");
332 my $alphabet = $seqobj->alphabet;
333 my %seqtype = (
334 'rna' => 'RNAseq',
335 'dna' => 'DNAseq',
336 'protein' => 'AAseq'
338 unless ( exists( $seqtype{$alphabet} ) ) {
339 $self->throw("invalid sequence alphabet $alphabet!");
341 $writer->dataElement( $seqtype{$alphabet}, $seqobj->seq );
343 else {
344 $self->throw("sequence entry $id lacks a sequence!");
347 # Database crossreferences
348 my @dblinks = $seqobj->get_Annotations('dblink');
349 foreach my $dblink (@dblinks) {
350 unless ( $dblink->database && $dblink->primary_id ) {
351 $self->throw("dblink $dblink is malformed");
353 if (defined($dblink->type)) {
354 $writer->emptyTag(
355 'DBRef',
356 'type' => $dblink->type,
357 'source' => $dblink->database,
358 'id' => $dblink->primary_id,
361 else {
362 $writer->emptyTag(
363 'DBRef',
364 'source' => $dblink->database,
365 'id' => $dblink->primary_id,
370 # properties
371 my @annotations = $seqobj->get_Annotations();
372 foreach my $annot_obj (@annotations) {
373 next if ( $annot_obj->tagname eq 'dblink' );
374 next if ( $annot_obj->tagname eq 'source' ); # handled above
376 # SeqXML doesn't support references
377 next if ( $annot_obj->tagname eq 'reference' );
379 unless ( $annot_obj->tagname ) {
380 $self->throw("property $annot_obj is missing a tagname");
382 if ( $annot_obj->value ) {
383 $writer->emptyTag(
384 'property',
385 'name' => $annot_obj->tagname,
386 'value' => $annot_obj->value,
389 else {
390 $writer->emptyTag(
391 'property',
392 'name' => $annot_obj->tagname,
398 # closing tag
399 $writer->endTag('entry');
401 # make sure it gets written to the file
402 $self->flush if $self->_flush_on_write && defined $self->_fh;
403 return 1;
407 =head2 _initialize_seqxml_node_methods
409 Title : _initialize_seqxml_node_methods
410 Usage : $self->_initialize_xml_node_methods
411 Function: sets up code ref mapping of each seqXML node type
412 to a method for processing that node type
413 Returns : none
414 Args : none
416 =cut
418 sub _initialize_seqxml_node_methods {
419 my ($self) = @_;
421 my %start_elements = (
422 'seqXML' => \&element_seqXML,
423 'entry' => \&element_entry,
424 'species' => \&element_species,
425 'description' => \&element_description,
426 'RNAseq' => \&element_RNAseq,
427 'DNAseq' => \&element_DNAseq,
428 'AAseq' => \&element_AAseq,
429 'DBRef' => \&element_DBRef,
430 'property' => \&element_property,
432 $self->{'_start_elements'} = \%start_elements;
434 my %end_elements = (
435 'seqXML' => \&end_element_default,
436 'entry' => \&end_element_entry,
437 'species' => \&end_element_default,
438 'description' => \&end_element_default,
439 'RNAseq' => \&end_element_RNAseq,
440 'DNAseq' => \&end_element_DNAseq,
441 'AAseq' => \&end_element_AAseq,
442 'DBRef' => \&end_element_default,
443 'property' => \&end_element_default,
445 $self->{'_end_elements'} = \%end_elements;
449 =head2 schemaLocation
451 Title : schemaLocation
452 Usage : $self->schemaLocation
453 Function: gets/sets the schema location in the <seqXML> header
454 Returns : the schema location string
455 Args : To set the schemaLocation, call with a schemaLocation as the argument.
457 =cut
459 sub schemaLocation {
460 my ( $self, $value ) = @_;
461 my $metadata = $self->{'_seqxml_metadata'};
463 # set if a value is supplied
464 if ($value) {
465 $metadata->{'schemaLocation'} = $value;
468 return $metadata->{'schemaLocation'};
471 =head2 source
473 Title : source
474 Usage : $self->source
475 Function: gets/sets the data source in the <seqXML> header
476 Returns : the data source string
477 Args : To set the source, call with a source string as the argument.
479 =cut
481 sub source {
482 my ( $self, $value ) = @_;
483 my $metadata = $self->{'_seqxml_metadata'};
485 # set if a value is supplied
486 if ($value) {
487 $metadata->{'source'} = $value;
490 return $metadata->{'source'};
493 =head2 sourceVersion
495 Title : sourceVersion
496 Usage : $self->sourceVersion
497 Function: gets/sets the data source version in the <seqXML> header
498 Returns : the data source version string
499 Args : To set the source version, call with a source version string
500 as the argument.
502 =cut
504 sub sourceVersion {
505 my ( $self, $value ) = @_;
506 my $metadata = $self->{'_seqxml_metadata'};
508 # set if a value is supplied
509 if ($value) {
510 $metadata->{'sourceVersion'} = $value;
513 return $metadata->{'sourceVersion'};
516 =head2 seqXMLversion
518 Title : seqXMLversion
519 Usage : $self->seqXMLversion
520 Function: gets/sets the seqXML version in the <seqXML> header
521 Returns : the seqXML version string.
522 Args : To set the seqXML version, call with a seqXML version string
523 as the argument.
525 =cut
527 sub seqXMLversion {
528 my ( $self, $value ) = @_;
529 my $metadata = $self->{'_seqxml_metadata'};
531 # set if a value is supplied
532 if ($value) {
533 $metadata->{'seqXMLversion'} = $value;
536 return $metadata->{'seqXMLversion'};
539 =head1 Methods for parsing the XML document
541 =cut
543 =head2 processXMLNode
545 Title : processXMLNode
546 Usage : $seqio->processXMLNode
547 Function: reads the XML node and processes according to the node type
548 Returns : none
549 Args : none
550 Throws : Exception on unexpected XML node type, warnings on unexpected
551 XML element names.
553 =cut
555 sub processXMLnode {
556 my ($self) = @_;
557 my $reader = $self->{'_reader'};
558 my $nodetype = $reader->nodeType;
560 if ( $nodetype == XML_READER_TYPE_ELEMENT ) {
561 $self->{'_current_element_name'} = $reader->name;
563 if ( exists $self->{'_start_elements'}->{ $reader->name } ) {
564 my $method = $self->{'_start_elements'}->{ $reader->name };
565 $self->$method();
567 else {
568 my $name = $reader->name;
569 $self->warn("unexpected start element encountered: $name");
572 elsif ( $nodetype == XML_READER_TYPE_TEXT ) {
574 # store key-value pair of element name and the corresponding text
575 my $name = $self->{'_current_element_name'};
576 $self->{'_current_entry_data'}->{$name} = $reader->value;
579 elsif ( $nodetype == XML_READER_TYPE_END_ELEMENT ) {
580 if ( exists $self->{'_end_elements'}->{ $reader->name } ) {
581 my $method = $self->{'_end_elements'}->{ $reader->name };
582 $self->$method();
584 else {
585 my $name = $reader->name;
586 $self->warn("unexpected end element encountered: $name");
588 $self->{'_current_element_name'} = {}; # empty current element name
590 else {
591 $self->throw(
592 "unexpected node type " . $nodetype,
593 " encountered (name: ",
594 $reader->name, ")\n"
598 if ( $self->debug ) {
599 printf "%d %d %s %d\n",
601 $reader->depth, $reader->nodeType,
602 $reader->name, $reader->isEmptyElement
607 =head2 processAttribute
609 Title : processAttribute
610 Usage : $seqio->processAttribute(\%hash_for_attribute);
611 Function: reads the attributes of the current element into a hash
612 Returns : none
613 Args : hash reference where the attributes will be stored.
615 =cut
617 sub processAttribute {
618 my ( $self, $data ) = @_;
619 my $reader = $self->{'_reader'};
621 # several ways of reading attributes:
622 # read all attributes:
623 if ( $reader->moveToFirstAttribute ) {
624 do {
625 $data->{ $reader->name() } = $reader->value;
626 } while ( $reader->moveToNextAttribute );
627 $reader->moveToElement;
631 =head2 parseHeader
633 Title : parseHeader
634 Usage : $self->parseHeader();
635 Function: reads the opening <seqXML> block and grabs the metadata from it,
636 namely the source, sourceVersion, and seqXMLversion.
637 Returns : none
638 Args : none
639 Throws : Exception if it hits an <entry> tag, because that means it's
640 missed the <seqXML> tag and read too far into the file.
642 =cut
644 sub parseHeader {
645 my ($self) = @_;
646 my $reader = $self->{'_reader'};
648 while($reader->read) {
650 # just read the header
651 if ( $reader->nodeType == XML_READER_TYPE_ELEMENT ) {
652 if ( $reader->name eq 'seqXML' ) {
653 $self->element_seqXML();
654 last;
656 elsif ( $reader->name eq 'entry' ) {
657 my $name = $reader->name;
658 $self->throw("Missed the opening <seqXML> tag. Got $name instead.");
664 =head2 element_seqXML
666 Title : element_seqXML
667 Usage : $self->element_seqXML
668 Function: processes the opening <seqXML> node
669 Returns : none
670 Args : none
672 =cut
674 sub element_seqXML {
675 my ($self) = @_;
676 my $reader = $self->{'_reader'};
678 # reset for every new <seqXML> block
679 $self->{'_seqxml_metadata'} = {};
681 if ( $reader->hasAttributes() ) {
682 $self->processAttribute( $self->{'_seqxml_metadata'} );
684 else {
685 $self->throw("no SeqXML metadata!");
689 =head2 element_entry
691 Title : element_entry
692 Usage : $self->element_entry
693 Function: processes a sequence <entry> node
694 Returns : none
695 Args : none
696 Throws : Exception if sequence ID is not present in <entry> element
698 =cut
700 sub element_entry {
701 my ($self) = @_;
702 my $reader = $self->{'_reader'};
704 if ( $reader->hasAttributes() ) {
705 $self->processAttribute( $self->{'_current_entry_data'} );
707 else {
708 $self->throw("no sequence ID!");
712 =head2 element_species
714 Title : element_entry
715 Usage : $self->element_entry
716 Function: processes a <species> node, creating a Bio::Species object
717 Returns : none
718 Args : none
719 Throws : Exception if <species> tag exists but is empty,
720 or if the attributes 'name' or 'ncbiTaxID' are undefined
722 =cut
724 sub element_species {
725 my ($self) = @_;
726 my $reader = $self->{'_reader'};
727 my $data = $self->{'_current_entry_data'};
729 my $species_data = {};
730 my $species_obj;
732 if ( $reader->hasAttributes() ) {
733 $self->processAttribute($species_data);
735 else {
736 $self->throw("no species information!");
739 if ( defined $species_data->{'name'}
740 && defined $species_data->{'ncbiTaxID'} )
742 $species_obj =
743 Bio::Species->new( -ncbi_taxid => $species_data->{'ncbiTaxID'}, );
744 $species_obj->node_name( $species_data->{'name'} );
745 $data->{'species'} = $species_obj;
747 else {
748 $self->throw("<species> attributes name and ncbiTaxID are undefined");
753 =head2 element_description
755 Title : element_description
756 Usage : $self->element_description
757 Function: processes a sequence <description> node;
758 a no-op -- description text is read by
759 processXMLnode
760 Returns : none
761 Args : none
763 =cut
765 sub element_description {
766 my ($self) = @_;
769 =head2 element_RNAseq
771 Title : element_RNAseq
772 Usage : $self->element_RNAseq
773 Function: processes a sequence <RNAseq> node
774 Returns : none
775 Args : none
777 =cut
779 sub element_RNAseq {
780 my ($self) = @_;
781 my $reader = $self->{'_reader'};
783 my $data = $self->{'_current_entry_data'};
784 $data->{'alphabet'} = 'rna';
785 $data->{'sequence'} = $data->{'RNAseq'};
789 =head2 element_DNAseq
791 Title : element_DNAseq
792 Usage : $self->element_DNAseq
793 Function: processes a sequence <DNAseq> node
794 Returns : none
795 Args : none
797 =cut
799 sub element_DNAseq {
800 my ($self) = @_;
801 my $reader = $self->{'_reader'};
803 my $data = $self->{'_current_entry_data'};
804 $data->{'alphabet'} = 'dna';
805 $data->{'sequence'} = $data->{'DNAseq'};
809 =head2 element_AAseq
811 Title : element_AAseq
812 Usage : $self->element_AAseq
813 Function: processes a sequence <AAseq> node
814 Returns : none
815 Args : none
817 =cut
819 sub element_AAseq {
820 my ($self) = @_;
821 my $reader = $self->{'_reader'};
823 my $data = $self->{'_current_entry_data'};
824 $data->{'alphabet'} = 'protein';
825 $data->{'sequence'} = $data->{'AAseq'};
829 =head2 element_DBRef
831 Title : element_DBRef
832 Usage : $self->element_DBRef
833 Function: processes a sequence <DBRef> node,
834 creating a Bio::Annotation::DBLink object
835 Returns : none
836 Args : none
838 =cut
840 sub element_DBRef {
841 my ($self) = @_;
842 my $reader = $self->{'_reader'};
843 my $data = $self->{'_current_entry_data'};
845 my $DBRef = {};
846 my $annotation_obj;
848 if ( $reader->hasAttributes() ) {
849 $self->processAttribute($DBRef);
851 else {
852 $self->throw("no DBRef data!");
855 if ( defined $DBRef->{'source'}
856 && defined $DBRef->{'id'}
857 && defined $DBRef->{'type'})
859 $annotation_obj = Bio::Annotation::DBLink->new(
860 -primary_id => $DBRef->{'id'},
861 -database => $DBRef->{'source'},
862 -type => $DBRef->{'type'},
863 -tagname => 'dblink',
865 push @{ $data->{'DBRefs'} }, $annotation_obj;
867 else {
868 $self->throw("malformed DBRef data!");
872 =head2 element_property
874 Title : element_property
875 Usage : $self->element_property
876 Function: processes a sequence <property> node, creating a
877 Bio::Annotation::SimpleValue object
878 Returns : none
879 Args : none
881 =cut
883 sub element_property {
884 my ($self) = @_;
885 my $reader = $self->{'_reader'};
886 my $data = $self->{'_current_entry_data'};
888 my $property = {};
889 my $annotation_obj;
891 if ( $reader->hasAttributes() ) {
892 $self->processAttribute($property);
894 else {
895 $self->throw("no property data!");
898 if ( defined $property->{'name'} ) {
899 $annotation_obj =
900 Bio::Annotation::SimpleValue->new( -tagname => $property->{'name'} );
902 if ( defined $property->{'value'} ) {
903 $annotation_obj->value( $property->{'value'} );
906 push @{ $data->{'properties'} }, $annotation_obj;
908 else {
909 $self->throw("malformatted property!");
913 =head2 end_element_RNAseq
915 Title : end_element_RNAseq
916 Usage : $self->end_element_RNAseq
917 Function: processes a sequence <RNAseq> node
918 Returns : none
919 Args : none
921 =cut
923 sub end_element_RNAseq {
924 my ($self) = @_;
925 my $reader = $self->{'_reader'};
927 my $data = $self->{'_current_entry_data'};
928 $data->{'alphabet'} = 'rna';
929 $data->{'sequence'} = $data->{'RNAseq'};
932 =head2 end_element_DNAseq
934 Title : end_element_DNAseq
935 Usage : $self->end_element_DNAseq
936 Function: processes a sequence <DNAseq> node
937 Returns : none
938 Args : none
940 =cut
942 sub end_element_DNAseq {
943 my ($self) = @_;
944 my $reader = $self->{'_reader'};
946 my $data = $self->{'_current_entry_data'};
947 $data->{'alphabet'} = 'dna';
948 $data->{'sequence'} = $data->{'DNAseq'};
952 =head2 end_element_AAseq
954 Title : end_element_AAseq
955 Usage : $self->end_element_AAseq
956 Function: processes a sequence <AAseq> node
957 Returns : none
958 Args : none
960 =cut
962 sub end_element_AAseq {
963 my ($self) = @_;
964 my $reader = $self->{'_reader'};
966 my $data = $self->{'_current_entry_data'};
967 $data->{'alphabet'} = 'protein';
968 $data->{'sequence'} = $data->{'AAseq'};
972 =head2 end_element_entry
974 Title : end_element_entry
975 Usage : $self->end_element_entry
976 Function: processes the closing </entry> node, creating the Seq object
977 Returns : a Bio::Seq object
978 Args : none
979 Throws : Exception if sequence, sequence ID, or alphabet are missing
981 =cut
983 sub end_element_entry {
984 my ($self) = @_;
985 my $reader = $self->{'_reader'};
987 my $data = $self->{'_current_entry_data'};
989 # make sure we've got at least a seq, an ID, and an alphabet
990 unless ( $data->{'sequence'} && length($data->{'sequence'}) > 0) {
991 $self->throw("this entry lacks a sequence");
993 unless ( $data->{'id'} ) {
994 $self->throw("this entry lacks an id");
996 unless ( $data->{'alphabet'} ) {
997 $self->throw("this entry lacks an alphabet");
1000 # create new sequence object with minimum necessary parameters
1001 my $seq_obj = $self->sequence_factory->create(
1002 -seq => $data->{'sequence'},
1003 -alphabet => $data->{'alphabet'},
1004 -id => $data->{'id'},
1005 -primary_id => $data->{'id'},
1008 # add additional parameters if available
1009 if ( $data->{'description'} ) {
1010 $seq_obj->desc( $data->{'description'} );
1012 if ( $data->{'species'} ) {
1013 $seq_obj->species( $data->{'species'} );
1015 if ( $data->{'DBRefs'} ) {
1016 foreach my $annotation_obj ( @{ $data->{'DBRefs'} } ) {
1017 $seq_obj->add_Annotation($annotation_obj);
1020 if ( $data->{'properties'} ) {
1021 foreach my $annotation_obj ( @{ $data->{'properties'} } ) {
1022 $seq_obj->add_Annotation($annotation_obj);
1025 if ( $data->{'source'} ) {
1026 my $annotation_obj = Bio::Annotation::SimpleValue->new(
1027 '-tagname' => 'source',
1028 '-value' => $data->{'source'},
1030 $seq_obj->add_Annotation($annotation_obj);
1033 # empty the temporary data store
1034 $self->{'_current_entry_data'} = {};
1036 return $seq_obj;
1039 =head2 end_element_default
1041 Title : end_element_default
1042 Usage : $self->end_element_default
1043 Function: processes all other closing tags;
1044 a no-op.
1045 Returns : none
1046 Args : none
1048 =cut
1050 sub end_element_default {
1051 my ($self) = @_;
1054 =head2 DESTROY
1056 Title : DESTROY
1057 Usage : called automatically by Perl just before object
1058 goes out of scope
1059 Function: performs a write flush
1060 Returns : none
1061 Args : none
1063 =cut
1065 sub DESTROY {
1066 my $self = shift;
1067 $self->flush if $self->_flush_on_write && defined $self->_fh;
1068 $self->SUPER::DESTROY;
1071 =head2 close
1073 Title : close
1074 Usage : $seqio_obj->close().
1075 Function: writes closing </seqXML> tag.
1077 close() will be called automatically by Perl when your
1078 program exits, but if you want to use the seqXML file
1079 you've written before then, you'll need to do an explicit
1080 close first to get the final </seqXML> tag.
1081 Returns : none
1082 Args : none
1084 =cut
1086 sub close {
1087 my $self = shift;
1088 if ( $self->mode eq 'w' && $self->{'_writer'}->within_element('seqXML') ) {
1089 $self->{'_writer'}->endTag("seqXML");
1090 $self->{'_writer'}->end();
1092 $self->SUPER::close();