2 # BioPerl module for Bio::SeqIO::embldriver
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqIO::embldriver - EMBL sequence input/output stream
20 It is probably best not to use this object directly, but
21 rather go through the SeqIO handler system. Go:
23 $stream = Bio::SeqIO->new(-file => $filename, -format => 'embldriver');
25 while ( (my $seq = $stream->next_seq()) ) {
26 # do something with $seq
31 This object can transform Bio::Seq objects to and from EMBL flat
34 There is a lot of flexibility here about how to dump things which
35 should be documented more fully.
37 There should be a common object that this and Genbank share (probably
38 with Swissprot). Too much of the magic is identical.
40 =head2 Optional functions
46 (output only) shows the dna or not
50 (output only) provides a sorting func which is applied to the FTHelpers
53 =item _id_generation_func()
55 This is function which is called as
57 print "ID ", $func($annseq), "\n";
59 To generate the ID line. If it is not there, it generates a sensible ID
60 line using a number of tools.
62 If you want to output annotations in EMBL format they need to be
63 stored in a Bio::Annotation::Collection object which is accessible
64 through the Bio::SeqI interface method L<annotation()|annotation>.
66 The following are the names of the keys which are polled from a
67 L<Bio::Annotation::Collection> object.
69 reference - Should contain Bio::Annotation::Reference objects
70 comment - Should contain Bio::Annotation::Comment objects
71 dblink - Should contain Bio::Annotation::DBLink objects
79 User feedback is an integral part of the evolution of this and other
80 Bioperl modules. Send your comments and suggestions preferably to one
81 of the Bioperl mailing lists. Your participation is much appreciated.
83 bioperl-l@bioperl.org - General discussion
84 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
88 Please direct usage questions or support issues to the mailing list:
90 I<bioperl-l@bioperl.org>
92 rather than to the module maintainer directly. Many experienced and
93 reponsive experts will be able look at the problem and quickly
94 address it. Please include a thorough description of the problem
95 with code and data examples if at all possible.
99 Report bugs to the Bioperl bug tracking system to help us keep track
100 the bugs and their resolution. Bug reports can be submitted via
103 https://github.com/bioperl/bioperl-live/issues
105 =head1 AUTHOR - Ewan Birney
107 Email birney@ebi.ac.uk
111 The rest of the documentation details each of the object
112 methods. Internal methods are usually preceded with a _
116 # Let the code begin...
118 package Bio
::SeqIO
::embldriver
;
119 use vars
qw(%FTQUAL_NO_QUOTE);
121 use Bio::SeqIO::Handler::GenericRichSeqHandler;
124 use base qw(Bio::SeqIO);
126 my %FTQUAL_NO_QUOTE = map {$_ => 1} qw(
129 cons_splice direction
133 transl_except transl_table
145 OC
=> 'CLASSIFICATION',
158 AH
=> 'TPA_HEADER', # Third party annotation
159 AS
=> 'TPA_DATA', # Third party annotation
171 OC
=> 'CLASSIFICATION',
172 OH
=> 'HOST', # not currently handled, bundled with organism data for now
182 AS
=> 'ASSEMBLYINFO', # Third party annotation
191 # signals to process what's in the hash prior to next round
192 # these should be changed to map secondary data
193 my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
196 my($self,@args) = @_;
198 $self->SUPER::_initialize
(@args);
199 my $handler = $self->_rearrange([qw(HANDLER)],@args);
200 # hash for functions for decoding keys.
201 $handler ?
$self->seqhandler($handler) :
202 $self->seqhandler(Bio
::SeqIO
::Handler
::GenericRichSeqHandler
->new(
204 -verbose
=> $self->verbose,
205 -builder
=> $self->sequence_builder
208 if( ! defined $self->sequence_factory ) {
209 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
210 (-verbose
=> $self->verbose(),
211 -type
=> 'Bio::Seq::RichSeq'));
218 Usage : $seq = $stream->next_seq()
219 Function: returns the next sequence in the stream
220 Returns : Bio::Seq object
227 my $hobj = $self->seqhandler;
229 my ($featkey, $qual, $annkey, $delim, $seqdata);
233 while(defined(my $line = $self->_readline)) {
234 next PARSER
if $line =~ m{^\s*$};
236 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
237 next PARSER
if ($ann eq 'XX' || $ann eq 'FH');
242 if ($data =~ m{^(\S+)\s+([^\n]+)}) {
243 $hobj->data_handler($seqdata) if $seqdata;
245 ($seqdata->{FEATURE_KEY
}, $data) = ($1, $2);
246 $seqdata->{NAME
} = $ann;
248 } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
249 ($qual, $data) = ($1, $2 ||'');
250 $ct = (exists $seqdata->{$qual}) ?
251 ((ref($seqdata->{$qual})) ?
scalar(@
{ $seqdata->{$qual} }) : 1)
255 $data =~ tr{"}{}d; # we don't care about quotes yet...
256 my $delim = ($FTQUAL_NO_QUOTE{$qual}) ?
'' : ' ';
258 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
262 if (!ref($seqdata->{$qual})) {
263 $seqdata->{$qual} = [$seqdata->{$qual}];
265 (exists $seqdata->{$qual}->[$ct]) ?
266 (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
267 (($seqdata->{$qual}->[$ct]) .= $data);
272 last PARSER
if $ann eq '//';
273 if ($ann ne $lastann) {
274 if (!$SEC{$ann} && $seqdata) {
275 $hobj->data_handler($seqdata);
276 # can't use undef here; it can lead to subtle mem leaks
279 $annkey = (!$SEC{$ann}) ?
'DATA' : # primary data
281 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
284 # toss the data for SQ lines; this needs to be done after the
285 # call to the data handler
287 next PARSER
if $ann eq 'SQ';
288 my $delim = $DELIM{$ann} || ' ';
289 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
290 $delim.$data : $data;
294 # this should only be sequence (fingers crossed!)
296 while (defined ($line = $self->_readline)) {
297 if (index($line, '//') == 0) {
298 $data =~ tr{0-9 \n}{}d;
299 $seqdata->{DATA
} = $data;
300 #$self->debug(Dumper($seqdata));
301 $hobj->data_handler($seqdata);
311 $hobj->data_handler($seqdata) if $seqdata;
313 return $hobj->build_sequence;
320 while(defined(my $line = $self->_readline)) {
321 next if $line =~ m{^\s*$};
323 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
325 $self->debug("Ann: [$ann]\n\tData: [$data]\n");
326 last PARSER
if $ann =~ m{//};
333 Usage : $stream->write_seq($seq)
334 Function: writes the $seq object (must be seq) to the stream
335 Returns : 1 for success and 0 for error
336 Args : array of 1 to n Bio::SeqI objects
341 shift->throw("Use Bio::SeqIO::embl for output");
342 # maybe make a Writer class as well????
348 Usage : $stream->seqhandler($handler)
349 Function: Get/Set the Bio::Seq::HandlerBaseI object
350 Returns : Bio::Seq::HandlerBaseI
351 Args : Bio::Seq::HandlerBaseI
356 my ($self, $handler) = @_;
358 $self->throw("Not a Bio::HandlerBaseI") unless
359 ref($handler) && $handler->isa("Bio::HandlerBaseI");
360 $self->{'_seqhandler'} = $handler;
362 return $self->{'_seqhandler'};