2 # BioPerl module for Bio::SeqIO::swissdriver
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Bioperl project bioperl-l(at)bioperl.org
8 # Copyright Chris Fields and contributors see AUTHORS section
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SeqIO::swissdriver - SwissProt/UniProt handler-based push parser
20 #It is probably best not to use this object directly, but
21 #rather go through the SeqIO handler:
23 $stream = Bio::SeqIO->new(-file => $filename,
24 -format => 'swissdriver');
26 while ( my $seq = $stream->next_seq() ) {
27 # do something with $seq
32 This object can transform Bio::Seq objects to and from UniProt flat file
33 databases. The key difference between this parser and the tried-and-true
34 Bio::SeqIO::swiss parser is this version separates the parsing and data
35 manipulation into a 'driver' method (next_seq) and separate object handlers
36 which deal with the data passed to it.
40 The main purpose of the driver routine, in this case next_seq(), is to carve out
41 the data into meaningful chunks which are passed along to relevant handlers (see
44 Each chunk of data in the has a NAME tag attached to it, similar to that for XML
45 parsing. This designates the type of data passed (annotation type or seqfeature)
46 and the handler to be called for processing the data.
52 User feedback is an integral part of the evolution of this and other
53 Bioperl modules. Send your comments and suggestions preferably to one
54 of the Bioperl mailing lists. Your participation is much appreciated.
56 bioperl-l@bioperl.org - General discussion
57 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
61 Please direct usage questions or support issues to the mailing list:
63 I<bioperl-l@bioperl.org>
65 rather than to the module maintainer directly. Many experienced and
66 reponsive experts will be able look at the problem and quickly
67 address it. Please include a thorough description of the problem
68 with code and data examples if at all possible.
72 Report bugs to the Bioperl bug tracking system to help us keep track
73 the bugs and their resolution. Bug reports can be submitted via the web:
75 https://github.com/bioperl/bioperl-live/issues
77 =head1 AUTHOR - Bioperl Project
79 bioperl-l at bioperl.org
83 The rest of the documentation details each of the object
84 methods. Internal methods are usually preceded with a _
88 # POD is at the end of the module
90 # Let the code begin...
92 # Let the code begin...
94 package Bio
::SeqIO
::swissdriver
;
95 use vars
qw(%FTQUAL_NO_QUOTE);
97 use Bio::SeqIO::Handler::GenericRichSeqHandler;
100 use base qw(Bio::SeqIO);
102 # signals to process what's in the hash prior to next round, maps ann => names
104 OC
=> 'CLASSIFICATION',
105 OH
=> 'HOST', # not currently handled, bundled with organism data for now
115 AS
=> 'ASSEMBLYINFO', # Third party annotation
119 # add specialized delimiters here for easier postprocessing
127 my($self,@args) = @_;
129 $self->SUPER::_initialize
(@args);
130 my $handler = $self->_rearrange([qw(HANDLER)],@args);
131 # hash for functions for decoding keys.
132 $handler ?
$self->seqhandler($handler) :
133 $self->seqhandler(Bio
::SeqIO
::Handler
::GenericRichSeqHandler
->new(
135 -verbose
=> $self->verbose,
136 -builder
=> $self->sequence_builder
138 if( ! defined $self->sequence_factory ) {
139 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new
140 (-verbose
=> $self->verbose(),
141 -type
=> 'Bio::Seq::RichSeq'));
148 Usage : $seq = $stream->next_seq()
149 Function: returns the next sequence in the stream
150 Returns : Bio::Seq object
157 my $hobj = $self->seqhandler;
159 # these contain values that need to carry over each round
160 my ($featkey, $qual, $annkey, $seqdata, $location);
165 while(defined(my $line = $self->_readline)) {
167 my ($ann, $data) = split(m{\s+}, $line, 2);
171 if ($data =~ m{^(\w+)\s+([\d\?\<]+)\s+([\d\?\>]+)(?:\s+?(\S.*))?}ox) {
172 # has location data and desc
174 $hobj->data_handler($seqdata);
177 ($seqdata->{FEATURE_KEY
}, my $loc1, my $loc2, $data) = ($1, $2, $3, $4);
178 $qual = 'description';
179 $seqdata->{$qual} = $data;
180 $seqdata->{NAME
} = $ann;
181 $seqdata->{LOCATION
} = "$loc1..$loc2" if defined $loc1;
183 } elsif ($data =~ m{^\s+/([^=]+)(?:=(.+))?}ox) {
185 ($qual, $data) = ($1, $2 || '');
186 $ct = ($seqdata->{$qual}) ?
187 ((ref($seqdata->{$qual})) ?
scalar(@
{ $seqdata->{$qual} }) : 1)
192 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
195 if (!ref($seqdata->{$qual})) {
196 $seqdata->{$qual} = [$seqdata->{$qual}];
198 ($seqdata->{$qual}->[$ct]) ?
199 ($seqdata->{$qual}->[$ct] .= ' '.$data) :
200 ($seqdata->{$qual}->[$ct] .= $data);
204 if ($ann ne $lastann) {
205 if (!$SEC{$ann} && $seqdata) {
206 $hobj->data_handler($seqdata);
207 # can't use undef here; it can lead to subtle mem leaks
210 $annkey = (!$SEC{$ann}) ?
'DATA' : # primary data
212 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
214 last PARSER
if $ann eq '//';
215 next PARSER
if $ann eq 'SQ';
216 my $delim = $DELIM{$ann} || ' ';
217 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
218 $delim.$data : $data;
222 # this should only be sequence (fingers crossed!)
224 while (defined ($line = $self->_readline)) {
225 if (index($line, '//') == 0) {
226 $data =~ tr{0-9 \n}{}d;
227 $seqdata->{DATA
} = $data;
228 #$self->debug(Dumper($seqdata));
229 $hobj->data_handler($seqdata);
239 # some files have no // for the last file; this catches the last bit o' data
240 $hobj->data_handler($seqdata) if $seqdata;
241 return $hobj->build_sequence;
247 Usage : $stream->write_seq($seq)
248 Function: writes the $seq object (must be seq) to the stream
249 Returns : 1 for success and 0 for error
250 Args : array of 1 to n Bio::SeqI objects
255 shift->throw("Use Bio::SeqIO::swiss write_seq() for output");
256 # maybe make a Writer class as well????
262 Usage : $stream->seqhandler($handler)
263 Function: Get/Set the Bio::Seq::HandlerBaseI object
264 Returns : Bio::Seq::HandlerBaseI
265 Args : Bio::Seq::HandlerBaseI
270 my ($self, $handler) = @_;
272 $self->throw("Not a Bio::HandlerBaseI") unless
273 ref($handler) && $handler->isa("Bio::HandlerBaseI");
274 $self->{'_seqhandler'} = $handler;
276 return $self->{'_seqhandler'};