2 # BioPerl module for Bio::SeqIO::embldriver
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqIO::embldriver - EMBL sequence input/output stream
20 It is probably best not to use this object directly, but
21 rather go through the SeqIO handler system. Go:
23 $stream = Bio::SeqIO->new(-file => $filename, -format => 'embldriver');
25 while ( (my $seq = $stream->next_seq()) ) {
26 # do something with $seq
31 This object can transform Bio::Seq objects to and from EMBL flat
34 There is a lot of flexibility here about how to dump things which
35 should be documented more fully.
37 There should be a common object that this and Genbank share (probably
38 with Swissprot). Too much of the magic is identical.
40 =head2 Optional functions
46 (output only) shows the dna or not
50 (output only) provides a sorting func which is applied to the FTHelpers
53 =item _id_generation_func()
55 This is function which is called as
57 print "ID ", $func($annseq), "\n";
59 To generate the ID line. If it is not there, it generates a sensible ID
60 line using a number of tools.
62 If you want to output annotations in EMBL format they need to be
63 stored in a Bio::Annotation::Collection object which is accessible
64 through the Bio::SeqI interface method L<annotation()|annotation>.
66 The following are the names of the keys which are polled from a
67 L<Bio::Annotation::Collection> object.
69 reference - Should contain Bio::Annotation::Reference objects
70 comment - Should contain Bio::Annotation::Comment objects
71 dblink - Should contain Bio::Annotation::DBLink objects
79 User feedback is an integral part of the evolution of this and other
80 Bioperl modules. Send your comments and suggestions preferably to one
81 of the Bioperl mailing lists. Your participation is much appreciated.
83 bioperl-l@bioperl.org - General discussion
84 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
88 Please direct usage questions or support issues to the mailing list:
90 I<bioperl-l@bioperl.org>
92 rather than to the module maintainer directly. Many experienced and
93 reponsive experts will be able look at the problem and quickly
94 address it. Please include a thorough description of the problem
95 with code and data examples if at all possible.
99 Report bugs to the Bioperl bug tracking system to help us keep track
100 the bugs and their resolution. Bug reports can be submitted via
103 https://github.com/bioperl/bioperl-live/issues
105 =head1 AUTHOR - Ewan Birney
107 Email birney@ebi.ac.uk
111 The rest of the documentation details each of the object
112 methods. Internal methods are usually preceded with a _
116 # Let the code begin...
118 package Bio
::SeqIO
::embldriver
;
120 use vars
qw(%FTQUAL_NO_QUOTE);
122 use Bio::SeqIO::Handler::GenericRichSeqHandler;
125 use base qw(Bio::SeqIO);
127 my %FTQUAL_NO_QUOTE = map {$_ => 1} qw(
131 direction estimated_length
135 rpt_unit_range tag_peptide
136 transl_except transl_table
148 OC
=> 'CLASSIFICATION',
161 AH
=> 'TPA_HEADER', # Third party annotation
162 AS
=> 'TPA_DATA', # Third party annotation
174 OC
=> 'CLASSIFICATION',
175 OH
=> 'HOST', # not currently handled, bundled with organism data for now
185 AS
=> 'ASSEMBLYINFO', # Third party annotation
194 # signals to process what's in the hash prior to next round
195 # these should be changed to map secondary data
196 my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
199 my($self,@args) = @_;
201 $self->SUPER::_initialize
(@args);
202 my $handler = $self->_rearrange([qw(HANDLER)],@args);
203 # hash for functions for decoding keys.
204 $handler ?
$self->seqhandler($handler) :
205 $self->seqhandler(Bio
::SeqIO
::Handler
::GenericRichSeqHandler
->new(
207 -verbose
=> $self->verbose,
208 -builder
=> $self->sequence_builder
211 if( ! defined $self->sequence_factory ) {
212 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
213 (-verbose
=> $self->verbose(),
214 -type
=> 'Bio::Seq::RichSeq'));
221 Usage : $seq = $stream->next_seq()
222 Function: returns the next sequence in the stream
223 Returns : Bio::Seq object
230 my $hobj = $self->seqhandler;
232 my ($featkey, $qual, $annkey, $delim, $seqdata);
236 while(defined(my $line = $self->_readline)) {
237 next PARSER
if $line =~ m{^\s*$};
239 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
240 next PARSER
if ($ann eq 'XX' || $ann eq 'FH');
245 if ($data =~ m{^(\S+)\s+([^\n]+)}) {
246 $hobj->data_handler($seqdata) if $seqdata;
248 ($seqdata->{FEATURE_KEY
}, $data) = ($1, $2);
249 $seqdata->{NAME
} = $ann;
251 } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
252 ($qual, $data) = ($1, $2 ||'');
253 $ct = (exists $seqdata->{$qual}) ?
254 ((ref($seqdata->{$qual})) ?
scalar(@
{ $seqdata->{$qual} }) : 1)
258 $data =~ tr{"}{}d; # we don't care about quotes yet...
259 my $delim = ($FTQUAL_NO_QUOTE{$qual}) ?
'' : ' ';
261 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
265 if (!ref($seqdata->{$qual})) {
266 $seqdata->{$qual} = [$seqdata->{$qual}];
268 (exists $seqdata->{$qual}->[$ct]) ?
269 (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
270 (($seqdata->{$qual}->[$ct]) .= $data);
275 last PARSER
if $ann eq '//';
276 if ($ann ne $lastann) {
277 if (!$SEC{$ann} && $seqdata) {
278 $hobj->data_handler($seqdata);
279 # can't use undef here; it can lead to subtle mem leaks
282 $annkey = (!$SEC{$ann}) ?
'DATA' : # primary data
284 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
287 # toss the data for SQ lines; this needs to be done after the
288 # call to the data handler
290 next PARSER
if $ann eq 'SQ';
291 my $delim = $DELIM{$ann} || ' ';
292 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
293 $delim.$data : $data;
297 # this should only be sequence (fingers crossed!)
299 while (defined ($line = $self->_readline)) {
300 if (index($line, '//') == 0) {
301 $data =~ tr{0-9 \n}{}d;
302 $seqdata->{DATA
} = $data;
303 #$self->debug(Dumper($seqdata));
304 $hobj->data_handler($seqdata);
314 $hobj->data_handler($seqdata) if $seqdata;
316 return $hobj->build_sequence;
323 while(defined(my $line = $self->_readline)) {
324 next if $line =~ m{^\s*$};
326 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
328 $self->debug("Ann: [$ann]\n\tData: [$data]\n");
329 last PARSER
if $ann =~ m{//};
336 Usage : $stream->write_seq($seq)
337 Function: writes the $seq object (must be seq) to the stream
338 Returns : 1 for success and 0 for error
339 Args : array of 1 to n Bio::SeqI objects
344 shift->throw("Use Bio::SeqIO::embl for output");
345 # maybe make a Writer class as well????
351 Usage : $stream->seqhandler($handler)
352 Function: Get/Set the Bio::Seq::HandlerBaseI object
353 Returns : Bio::Seq::HandlerBaseI
354 Args : Bio::Seq::HandlerBaseI
359 my ($self, $handler) = @_;
361 $self->throw("Not a Bio::HandlerBaseI") unless
362 ref($handler) && $handler->isa("Bio::HandlerBaseI");
363 $self->{'_seqhandler'} = $handler;
365 return $self->{'_seqhandler'};