1 # BioPerl module for Bio::SeqIO::tinyseqHandler
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Donald Jackson, donald.jackson@bms.com
7 # Copyright Bristol-Myers Squibb
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqIO::tinyseq::tinyseqHandler - XML event handlers to support NCBI TinySeq XML parsing
19 Do not use this module directly; use the SeqIO handler system:
21 $stream = Bio::SeqIO->new( -file => $filename, -format => 'tinyseq' );
23 while ( my $seq = $stream->next_seq ) {
29 This object provides event handler methods for parsing sequence files
30 in the NCBI TinySeq XML format. A TinySeq is a lightweight XML file
31 of sequence information on one or more sequences, analgous to FASTA
34 See L<https://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.mod.dtd> for the DTD.
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49 Please direct usage questions or support issues to the mailing list:
51 I<bioperl-l@bioperl.org>
53 rather than to the module maintainer directly. Many experienced and
54 reponsive experts will be able look at the problem and quickly
55 address it. Please include a thorough description of the problem
56 with code and data examples if at all possible.
60 Report bugs to the Bioperl bug tracking system to help us keep track
61 of the bugs and their resolution. Bug reports can be submitted via
64 https://github.com/bioperl/bioperl-live/issues
68 L<Bio::SeqIO>, L<Bio::Seq>.
72 Donald Jackson, E<lt>donald.jackson@bms.comE<gt>
76 The rest of the documentation details each of the object methods.
77 Internal methods are usually preceded with a _
81 package Bio
::SeqIO
::tinyseq
::tinyseqHandler
;
89 use base qw(Bio::Root::Root);
91 # %ATTMAP defines correspondence between TSeq elements, PrimarySeq attributes
92 # Format: element_name => { xml_attname => pseq_attname }
93 %ATTMAP = ( TSeq_sequence
=> { Data
=> '-seq'},
94 TSeq_gi
=> { Data
=> '-primary_id' },
95 TSeq_defline
=> { Data
=> '-desc' },
96 TSeq_sid
=> { Data
=> '-sid' },
97 TSeq_accver
=> { Data
=> '-accver' },
98 TSeq_taxid
=> { Data
=> '-taxid' },
99 TSeq_orgname
=> { Data
=> '-organism' }
105 Usage : $handler = Bio::SeqIO::tinyseq::tinyseqHandler->new()
106 Function : instantiates a tinyseqHandler for use by
108 Returns : Bio::SeqIO::tinyseq::tinyseqHandler object
114 my ($proto, @args) = @_;
115 my $class = ref($proto) || $proto;
117 my $self = bless({}, $class);
122 #######################################
123 # Event handling methods for PerlSax #
124 #######################################
127 my ($self, $doctype) = @_;
128 # make sure we have a tinyseq
129 unless ($doctype->{'SystemId'} eq 'https://www.ncbi.nlm.nih.gov/dtd/NCBI_TSeq.dtd') {
130 $self->throw("This document doesn't use the NCBI TinySeq dtd; it's a ", $doctype->{'SystemId'} );
135 =head2 start_document
137 Title : start_document
139 Function : start_document handler for use by XML::Parser::PerlSAX
148 $self->{'_seqatts'} = [];
149 $self->{'_elements'} = [];
156 Function : end_document handler for use by XML::Parser::PerlSAX
164 return $self->{'_seqatts'};
169 Title : start_element
171 Function : start_element handler for use by XML::Parser::PerlSAX
178 my ($self, $starting) = @_;
180 push(@
{$self->{'_elements'}}, $starting);
187 Function : end_element handler for use by XML::Parser::PerlSAX
194 my ($self, $ending) = @_;
196 # do I have a handler for this element?
197 my $ename = $ending->{'Name'};
198 $self->$ename if ($self->can($ename));
205 Function : characters handler for use by XML::Parser::PerlSAX
212 my ($self, $characters) = @_;
214 my $data = $characters->{'Data'};
216 return unless (defined($data) and $data =~ /\S/);
218 my $current = $self->_current_element;
219 $current->{'Data'} = $data;
223 ###########################################
224 # Element-specific handlers
225 # called at END of element name
226 ##########################################
232 Function : event handler for END of a TSeq element
233 Returns : loh of parsed sequence atts for Bio::SeqIO::tinyseq
243 # map elements onto PrimarySeq keys
244 while (my $element = pop @
{ $self->{'_elements'} }) {
245 my $element_name = $element->{'Name'};
246 last if ($element_name eq 'TSeq');
248 my $conversion = $ATTMAP{$element_name} or next;
250 while(my($element_att, $pseq_att) = each %$conversion) {
251 $seqatts{$pseq_att} = $element->{$element_att};
255 push(@
{ $self->{'_seqatts'} }, \
%seqatts);
259 #############################################
260 # Utility method to return current element info
261 ##############################################
263 =head2 _current_element
265 Title : _current_element
266 Usage : Internal method
267 Function : Utility method to return current element info
268 Returns : XML::Parser::PerlSAX hash for current element
273 sub _current_element
{
275 return $self->{'_elements'}->[-1];