Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / SeqIO / bsml_sax.pm
blob4a48af6bfa4e540db6e8860f4d27f5dc9d9d1cb9
1 # BioPerl module for Bio::SeqIO::bsml_sax
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Jason Stajich
8 =head1 NAME
10 Bio::SeqIO::bsml_sax - BSML sequence input/output stream using SAX
12 =head1 SYNOPSIS
14 It is probably best not to use this object directly, but rather go
15 through the SeqIO handler system. To read a BSML file:
17 $stream = Bio::SeqIO->new( -file => $filename, -format => 'bsml');
19 while ( my $bioSeqObj = $stream->next_seq() ) {
20 # do something with $bioSeqObj
23 To write a Seq object to the current file handle in BSML XML format:
25 $stream->write_seq( -seq => $seqObj);
27 If instead you would like a XML::DOM object containing the BSML, use:
29 my $newXmlObject = $stream->to_bsml( -seq => $seqObj);
31 =head1 DEPENDENCIES
33 In addition to parts of the Bio:: hierarchy, this module uses:
35 XML::SAX
37 =head1 DESCRIPTION
39 This object can transform Bio::Seq objects to and from BSML (XML)
40 flatfiles.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to one
48 of the Bioperl mailing lists. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 the bugs and their resolution. Bug reports can be submitted via the
68 web:
70 https://github.com/bioperl/bioperl-live/issues
72 =head1 AUTHOR - Jason Stajich
74 Email jason-at-bioperl-dot-org
76 =cut
78 package Bio::SeqIO::bsml_sax;
80 use vars qw($Default_Source);
81 use strict;
83 use Bio::SeqFeature::Generic;
84 use Bio::Species;
85 use XML::SAX;
86 use Bio::Seq::SeqFactory;
87 use Bio::Annotation::Collection;
88 use Bio::Annotation::Comment;
89 use Bio::Annotation::Reference;
90 use Bio::Annotation::DBLink;
92 use base qw(Bio::SeqIO XML::SAX::Base);
94 $Default_Source = 'BSML';
96 sub _initialize {
97 my ($self) = shift;
98 $self->SUPER::_initialize(@_);
99 $self->{'_parser'} = XML::SAX::ParserFactory->parser('Handler' => $self);
100 if( ! defined $self->sequence_factory ) {
101 $self->sequence_factory(Bio::Seq::SeqFactory->new
102 (-verbose => $self->verbose(),
103 -type => 'Bio::Seq::RichSeq'));
105 return;
108 =head1 METHODS
110 =cut
112 =head2 next_seq
114 Title : next_seq
115 Usage : my $bioSeqObj = $stream->next_seq
116 Function: Retrieves the next sequence from a SeqIO::bsml stream.
117 Returns : A reference to a Bio::Seq::RichSeq object
118 Args :
120 =cut
122 sub next_seq {
123 my $self = shift;
124 if( @{$self->{'_seendata'}->{'_seqs'} || []} ||
125 eof($self->_fh)) {
126 return shift @{$self->{'_seendata'}->{'_seqs'}};
128 $self->{'_parser'}->parse_file($self->_fh);
129 return shift @{$self->{'_seendata'}->{'_seqs'}};
132 # XML::SAX::Base methods
134 sub start_document {
135 my ($self,$doc) = @_;
136 $self->{'_seendata'} = {'_seqs' => [],
137 '_authors' => [],
138 '_feats' => [] };
139 $self->SUPER::start_document($doc);
142 sub end_document {
143 my ($self,$doc) = @_;
144 $self->SUPER::end_document($doc);
148 sub start_element {
149 my ($self,$ele) = @_;
150 my $name = uc($ele->{'LocalName'});
151 my $attr = $ele->{'Attributes'};
152 my $seqid = defined $self->{'_seendata'}->{'_seqs'}->[-1] ?
153 $self->{'_seendata'}->{'_seqs'}->[-1]->display_id : undef;
154 for my $k ( keys %$attr ) {
155 $attr->{uc $k} = $attr->{$k};
156 delete $attr->{$k};
158 if( $name eq 'BSML' ) {
160 } elsif( $name eq 'DEFINITIONS' ) {
161 } elsif( $name eq 'SEQUENCES' ) {
163 } elsif( $name eq 'SEQUENCE' ) {
164 my ($id,$acc,$title,
165 $desc,$length,$topology,
166 $mol) = map { $attr->{'{}'.$_}->{'Value'} } qw(ID IC-ACCKEY
167 TITLE COMMENT
168 LENGTH
169 TOPOLOGY
170 MOLECULE);
171 push @{$self->{'_seendata'}->{'_seqs'}},
172 $self->sequence_factory->create
174 -display_id => $id,
175 -accession_number => $acc,
176 -description => $desc,
177 -length => $length,
178 -is_circular => ($topology =~ /^linear$/i) ? 0 : 1,
179 -molecule => $mol,
182 } elsif( $name eq 'FEATURE-TABLES' ) {
183 } elsif( $name eq 'ATTRIBUTE' ) {
184 my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
185 my ($name,$content) = map { $attr->{'{}'.$_}->{'Value'} } qw(NAME CONTENT);
186 if($name =~ /^version$/i ) {
187 my ($version);
188 if($content =~ /^[^\.]+\.(\d+)/) {
189 $version = $1;
190 } else { $version = $content }
191 $curseq->seq_version($version);
192 } elsif( $name eq 'organism-species') {
193 my ($genus,$species,$subsp) = split(/\s+/,$content,3);
194 $curseq->species(Bio::Species->new(-sub_species => $subsp,
195 -classification =>
196 [$species,$genus]));
197 } elsif( $name eq 'organism-classification' ) {
198 my (@class) =(split(/\s*;\s*/,$content),$curseq->species->species);
199 $curseq->species->classification([reverse @class]);
200 } elsif( $name eq 'database-xref' ) {
201 my ($db,$id) = split(/:/,$content);
202 $curseq->annotation->add_Annotation('dblink',
203 Bio::Annotation::DBLink->new
204 ( -database => $db,
205 -primary_id=> $id));
206 } elsif( $name eq 'date-created' ||
207 $name eq 'date-last-updated' ) {
208 $curseq->add_date($content);
210 } elsif( $name eq 'FEATURE' ) {
211 my ($id,$class,$type,$title,$display_auto)
212 = map { $attr->{'{}'.$_}->{'Value'} } qw(ID CLASS VALUE-TYPE
213 TITLE DISPLAY-AUTO);
215 push @{$self->{'_seendata'}->{'_feats'}},
216 Bio::SeqFeature::Generic->new
217 ( -seq_id => $self->{'_seendata'}->{'_seqs'}->[-1]->display_id,
218 -source_tag => $Default_Source,
219 -primary_tag => $type,
220 -tag => {'ID' => $id,
223 } elsif( $name eq 'QUALIFIER') {
224 my ($type,$value) = map { $attr->{'{}'.$_}->{'Value'} } qw(VALUE-TYPE
225 VALUE);
226 my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
227 $curfeat->add_tag_value($type,$value);
228 } elsif( $name eq 'INTERVAL-LOC' ) {
229 my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
230 my ($start,$end,$strand) =
231 map { $attr->{'{}'.$_}->{'Value'} } qw(STARTPOS
232 ENDPOS
233 COMPLEMENT);
235 $curfeat->start($start);
236 $curfeat->end($end);
237 $curfeat->strand(-1) if($strand);
238 } elsif( $name eq 'REFERENCE' ) {
239 push @{$self->{'_seendata'}->{'_annot'}},
240 Bio::Annotation::Reference->new();
243 push @{$self->{'_state'}}, $name;
244 $self->SUPER::start_element($ele);
247 sub end_element {
248 my ($self,$ele) = @_;
249 pop @{$self->{'_state'}};
250 my $name = uc $ele->{'LocalName'};
251 my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
252 if( $name eq 'REFERENCE') {
253 my $ref = pop @{$self->{'_seendata'}->{'_annot'}};
254 $curseq->annotation->add_Annotation('reference',$ref);
255 } elsif( $name eq 'FEATURE' ) {
256 my $feat = pop @{$self->{'_seendata'}->{'_feats'}};
257 $curseq->add_SeqFeature($feat);
259 $self->SUPER::end_element($ele);
262 sub characters {
263 my ($self,$data) = @_;
264 if( ! @{$self->{'_state'}} ) {
265 $self->warn("Calling characters with no previous start_element call. Ignoring data");
266 } else {
267 my $curseq = $self->{'_seendata'}->{'_seqs'}->[-1];
268 my $curfeat = $self->{'_seendata'}->{'_feats'}->[-1];
269 my $curannot = $self->{'_seendata'}->{'_annot'}->[-1];
270 my $name = $self->{'_state'}->[-1];
271 if( $name eq 'REFAUTHORS' ) {
272 $curannot->authors($data->{'Data'});
273 } elsif( $name eq 'REFTITLE') {
274 $curannot->title($data->{'Data'});
275 } elsif( $name eq 'REFJOURNAL') {
276 $curannot->location($data->{'Data'});
277 } elsif( $name eq 'SEQ-DATA') {
278 $data->{'Data'} =~ s/\s+//g;
279 $curseq->seq($data->{'Data'});
282 $self->SUPER::characters($data);