2 # BioPerl module for Bio::DB::DBFetch
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
20 #do not use this module directly
24 Allows the dynamic retrieval of entries from databases using the
25 dbfetch script at EBI:
26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
28 In order to make changes transparent we have host type (currently only
29 ebi) and location (defaults to ebi) separated out. This allows later
30 additions of more servers in different geographical locations.
32 This is a superclass which is called by instantiable subclasses with
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Heikki Lehvaslaiho
67 Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
76 # Let the code begin...
78 package Bio
::DB
::DBFetch
;
81 use vars
qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
85 use HTTP::Request::Common;
87 use base qw(Bio::DB::WebDBSeqI);
89 # the new way to make modules a little more lightweight
93 $DEFAULTSERVERTYPE = 'dbfetch';
94 $DEFAULTLOCATION = 'ebi';
98 =head1 Routines from Bio::DB::WebDBSeqI
103 Usage : my $url = $self->get_request
104 Function: returns a HTTP::Request object
106 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
111 my ($self, @qualifiers) = @_;
112 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
115 $self->throw("Must specify a value for UIDs to fetch")
116 unless defined $uids;
118 my $format_string = '';
119 $format ||= $self->default_format;
120 ($format, $tmp) = $self->request_format($format);
121 $format_string = "&format=$format";
122 my $url = $self->location_url();
124 if( ref($uids) =~ /ARRAY/i ) {
125 $uid = join (',', @
$uids);
126 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
127 if scalar @
$uids >50;
132 return GET
$url. $format_string. '&id='. $uid;
136 =head2 postprocess_data
138 Title : postprocess_data
139 Usage : $self->postprocess_data ( 'type' => 'string',
140 'location' => \$datastr);
141 Function: process downloaded data before loading into a Bio::SeqIO
143 Args : hash with two keys - 'type' can be 'string' or 'file'
144 - 'location' either file location or string
145 reference containing data
149 # remove occasional blank lines at top of web output
150 sub postprocess_data
{
151 my ($self, %args) = @_;
152 if ($args{type
} eq 'string') {
153 ${$args{location
}} =~ s/^\s+//; # get rid of leading whitespace
155 elsif ($args{type
} eq 'file') {
157 open $F,"<", $args{location
} or $self->throw("Cannot open $args{location}: $!");
163 open $F,">", $args{location
} or $self->throw("Cannot write to $args{location}: $!");
169 =head2 default_format
171 Title : default_format
172 Usage : my $format = $self->default_format
173 Function: Returns default sequence format for this module
181 return $self->{'_default_format'};
184 =head1 Bio::DB::DBFetch specific routines
186 =head2 get_Stream_by_id
188 Title : get_Stream_by_id
189 Usage : $seq = $db->get_Stream_by_id($ref);
190 Function: Retrieves Seq objects from the server 'en masse', rather than one
191 at a time. For large numbers of sequences, this is far superior
192 than get_Stream_by_[id/acc]().
194 Returns : a Bio::SeqIO stream object
195 Args : $ref : either an array reference, a filename, or a filehandle
196 from which to get the list of unique ids/accession numbers.
198 NOTE: for backward compatibility, this method is also called
203 sub get_Stream_by_id
{
204 my ($self, $ids) = @_;
205 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
208 =head2 get_Seq_by_version
210 Title : get_Seq_by_version
211 Usage : $seq = $db->get_Seq_by_version('X77802.1');
212 Function: Gets a Bio::Seq object by accession number
213 Returns : A Bio::Seq object
214 Args : version number (as a string)
215 Throws : "version does not exist" exception
219 sub get_Seq_by_version
{
220 my ($self,$seqid) = @_;
221 my $seqio = $self->get_Stream_by_acc([$seqid]);
222 $self->throw("version does not exist") if( !defined $seqio );
223 return $seqio->next_seq();
226 =head2 request_format
228 Title : request_format
229 Usage : my ($req_format, $ioformat) = $self->request_format;
230 $self->request_format("genbank");
231 $self->request_format("fasta");
232 Function: Get/Set sequence format retrieval. The get-form will normally not
233 be used outside of this and derived modules.
234 Returns : Array of two strings, the first representing the format for
235 retrieval, and the second specifying the corresponding SeqIO format.
236 Args : $format = sequence format
241 my ($self, $value) = @_;
242 if( defined $value ) {
244 $self->{'_format'} = $value;
245 return ($value, $value);
247 $value = $self->{'_format'};
248 if( $value and defined $self->formatmap->{$value} ) {
249 return ($value, $self->formatmap->{$value});
251 # Try to fall back to a default.
252 return ($self->default_format, $self->default_format );
260 Usage : my $servertype = $self->servertype
261 $self->servertype($servertype);
262 Function: Get/Set server type
264 Args : server type string [optional]
269 my ($self, $servertype) = @_;
270 if( defined $servertype && $servertype ne '') {
271 $self->throw("You gave an invalid server type ($servertype)".
272 " - available types are ".
273 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
274 $self->{'_servertype'} = $servertype;
276 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
277 return $self->{'_servertype'};
283 Usage : my $location = $self->hostlocation()
284 $self->hostlocation($location)
285 Function: Set/Get Hostlocation
286 Returns : string representing hostlocation
287 Args : string specifying hostlocation [optional]
292 my ($self, $location ) = @_;
293 my $servertype = $self->servertype;
294 $self->throw("Must have a valid servertype defined not $servertype")
295 unless defined $servertype;
296 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
297 if( defined $location && $location ne '' ) {
298 $location = lc $location;
299 if( ! $hosts{$location} ) {
300 $self->throw("Must specify a known host, not $location,".
301 " possible values (".
302 join(",", sort keys %hosts ). ")");
304 $self->{'_hostlocation'} = $location;
306 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
307 return $self->{'_hostlocation'};
313 Usage : my $url = $self->location_url()
314 Function: Get host url
315 Returns : string representing url
322 my $servertype = $self->servertype();
323 my $location = $self->hostlocation();
324 if( ! defined $location || !defined $servertype ) {
325 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
327 return sprintf($self->hosts->{$servertype}->{'baseurl'},
328 $self->hosts->{$servertype}->{'hosts'}->{$location});
331 =head1 Bio::DB::DBFetch routines
333 These methods allow subclasses to pass parameters.
339 Function: get/set for host hash
346 my ($self, $value) = @_;
347 if (defined $value) {
348 $self->{'_hosts'} = $value;
350 unless (exists $self->{'_hosts'}) {
353 return $self->{'_hosts'};
361 Function: get/set for format hash
368 my ($self, $value) = @_;
369 if (defined $value) {
370 $self->{'_formatmap'} = $value;
372 unless (exists $self->{'_formatmap'}) {
375 return $self->{'_formatmap'};