maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / DB / DBFetch.pm
blob8ebf0fc3dd620ee95337a2e7d32cd7bff01aaecf
2 # BioPerl module for Bio::DB::DBFetch
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
8 # Copyright Heikki Lehvaslaiho
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
18 =head1 SYNOPSIS
20 #do not use this module directly
22 =head1 DESCRIPTION
24 Allows the dynamic retrieval of entries from databases using the
25 dbfetch script at EBI:
26 L<http:E<sol>E<sol>www.ebi.ac.ukE<sol>cgi-binE<sol>dbfetch>.
28 In order to make changes transparent we have host type (currently only
29 ebi) and location (defaults to ebi) separated out. This allows later
30 additions of more servers in different geographical locations.
32 This is a superclass which is called by instantiable subclasses with
33 correct parameters.
35 =head1 FEEDBACK
37 =head2 Mailing Lists
39 User feedback is an integral part of the evolution of this and other
40 Bioperl modules. Send your comments and suggestions preferably to one
41 of the Bioperl mailing lists. Your participation is much appreciated.
43 bioperl-l@bioperl.org - General discussion
44 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
46 =head2 Support
48 Please direct usage questions or support issues to the mailing list:
50 I<bioperl-l@bioperl.org>
52 rather than to the module maintainer directly. Many experienced and
53 reponsive experts will be able look at the problem and quickly
54 address it. Please include a thorough description of the problem
55 with code and data examples if at all possible.
57 =head2 Reporting Bugs
59 Report bugs to the Bioperl bug tracking system to help us keep track
60 the bugs and their resolution. Bug reports can be submitted via the
61 web:
63 https://github.com/bioperl/bioperl-live/issues
65 =head1 AUTHOR - Heikki Lehvaslaiho
67 Email Heikki Lehvaslaiho E<lt>heikki-at-bioperl-dot-orgE<gt>
69 =head1 APPENDIX
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with a _
74 =cut
76 # Let the code begin...
78 package Bio::DB::DBFetch;
80 use strict;
81 use vars qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
82 $DEFAULTSERVERTYPE);
84 $MODVERSION = '0.1';
85 use HTTP::Request::Common;
87 use base qw(Bio::DB::WebDBSeqI);
89 # the new way to make modules a little more lightweight
91 BEGIN {
92 # global vars
93 $DEFAULTSERVERTYPE = 'dbfetch';
94 $DEFAULTLOCATION = 'ebi';
98 =head1 Routines from Bio::DB::WebDBSeqI
100 =head2 get_request
102 Title : get_request
103 Usage : my $url = $self->get_request
104 Function: returns a HTTP::Request object
105 Returns :
106 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
108 =cut
110 sub get_request {
111 my ($self, @qualifiers) = @_;
112 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
113 @qualifiers);
115 $self->throw("Must specify a value for UIDs to fetch")
116 unless defined $uids;
117 my $tmp;
118 my $format_string = '';
119 $format ||= $self->default_format;
120 ($format, $tmp) = $self->request_format($format);
121 $format_string = "&format=$format";
122 my $url = $self->location_url();
123 my $uid;
124 if( ref($uids) =~ /ARRAY/i ) {
125 $uid = join (',', @$uids);
126 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
127 if scalar @$uids >50;
128 } else {
129 $uid = $uids;
132 return GET $url. $format_string. '&id='. $uid;
136 =head2 postprocess_data
138 Title : postprocess_data
139 Usage : $self->postprocess_data ( 'type' => 'string',
140 'location' => \$datastr);
141 Function: process downloaded data before loading into a Bio::SeqIO
142 Returns : void
143 Args : hash with two keys - 'type' can be 'string' or 'file'
144 - 'location' either file location or string
145 reference containing data
147 =cut
149 # remove occasional blank lines at top of web output
150 sub postprocess_data {
151 my ($self, %args) = @_;
152 if ($args{type} eq 'string') {
153 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
155 elsif ($args{type} eq 'file') {
156 my $F;
157 open $F,"<", $args{location} or $self->throw("Cannot open $args{location}: $!");
158 my @data = <$F>;
159 for (@data) {
160 last unless /^\s+$/;
161 shift @data;
163 open $F,">", $args{location} or $self->throw("Cannot write to $args{location}: $!");
164 print $F @data;
165 close $F;
169 =head2 default_format
171 Title : default_format
172 Usage : my $format = $self->default_format
173 Function: Returns default sequence format for this module
174 Returns : string
175 Args : none
177 =cut
179 sub default_format {
180 my ($self) = @_;
181 return $self->{'_default_format'};
184 =head1 Bio::DB::DBFetch specific routines
186 =head2 get_Stream_by_id
188 Title : get_Stream_by_id
189 Usage : $seq = $db->get_Stream_by_id($ref);
190 Function: Retrieves Seq objects from the server 'en masse', rather than one
191 at a time. For large numbers of sequences, this is far superior
192 than get_Stream_by_[id/acc]().
193 Example :
194 Returns : a Bio::SeqIO stream object
195 Args : $ref : either an array reference, a filename, or a filehandle
196 from which to get the list of unique ids/accession numbers.
198 NOTE: for backward compatibility, this method is also called
199 get_Stream_by_batch.
201 =cut
203 sub get_Stream_by_id {
204 my ($self, $ids) = @_;
205 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
208 =head2 get_Seq_by_version
210 Title : get_Seq_by_version
211 Usage : $seq = $db->get_Seq_by_version('X77802.1');
212 Function: Gets a Bio::Seq object by accession number
213 Returns : A Bio::Seq object
214 Args : version number (as a string)
215 Throws : "version does not exist" exception
217 =cut
219 sub get_Seq_by_version {
220 my ($self,$seqid) = @_;
221 my $seqio = $self->get_Stream_by_acc([$seqid]);
222 $self->throw("version does not exist") if( !defined $seqio );
223 return $seqio->next_seq();
226 =head2 request_format
228 Title : request_format
229 Usage : my ($req_format, $ioformat) = $self->request_format;
230 $self->request_format("genbank");
231 $self->request_format("fasta");
232 Function: Get/Set sequence format retrieval. The get-form will normally not
233 be used outside of this and derived modules.
234 Returns : Array of two strings, the first representing the format for
235 retrieval, and the second specifying the corresponding SeqIO format.
236 Args : $format = sequence format
238 =cut
240 sub request_format {
241 my ($self, $value) = @_;
242 if( defined $value ) {
243 $value = lc $value;
244 $self->{'_format'} = $value;
245 return ($value, $value);
247 $value = $self->{'_format'};
248 if( $value and defined $self->formatmap->{$value} ) {
249 return ($value, $self->formatmap->{$value});
250 } else {
251 # Try to fall back to a default.
252 return ($self->default_format, $self->default_format );
257 =head2 servertype
259 Title : servertype
260 Usage : my $servertype = $self->servertype
261 $self->servertype($servertype);
262 Function: Get/Set server type
263 Returns : string
264 Args : server type string [optional]
266 =cut
268 sub servertype {
269 my ($self, $servertype) = @_;
270 if( defined $servertype && $servertype ne '') {
271 $self->throw("You gave an invalid server type ($servertype)".
272 " - available types are ".
273 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
274 $self->{'_servertype'} = $servertype;
276 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
277 return $self->{'_servertype'};
280 =head2 hostlocation
282 Title : hostlocation
283 Usage : my $location = $self->hostlocation()
284 $self->hostlocation($location)
285 Function: Set/Get Hostlocation
286 Returns : string representing hostlocation
287 Args : string specifying hostlocation [optional]
289 =cut
291 sub hostlocation {
292 my ($self, $location ) = @_;
293 my $servertype = $self->servertype;
294 $self->throw("Must have a valid servertype defined not $servertype")
295 unless defined $servertype;
296 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
297 if( defined $location && $location ne '' ) {
298 $location = lc $location;
299 if( ! $hosts{$location} ) {
300 $self->throw("Must specify a known host, not $location,".
301 " possible values (".
302 join(",", sort keys %hosts ). ")");
304 $self->{'_hostlocation'} = $location;
306 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
307 return $self->{'_hostlocation'};
310 =head2 location_url
312 Title : location
313 Usage : my $url = $self->location_url()
314 Function: Get host url
315 Returns : string representing url
316 Args : none
318 =cut
320 sub location_url {
321 my ($self) = @_;
322 my $servertype = $self->servertype();
323 my $location = $self->hostlocation();
324 if( ! defined $location || !defined $servertype ) {
325 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
327 return sprintf($self->hosts->{$servertype}->{'baseurl'},
328 $self->hosts->{$servertype}->{'hosts'}->{$location});
331 =head1 Bio::DB::DBFetch routines
333 These methods allow subclasses to pass parameters.
335 =head2 hosts
337 Title : hosts
338 Usage :
339 Function: get/set for host hash
340 Returns :
341 Args : optional hash
343 =cut
345 sub hosts {
346 my ($self, $value) = @_;
347 if (defined $value) {
348 $self->{'_hosts'} = $value;
350 unless (exists $self->{'_hosts'}) {
351 return ('');
352 } else {
353 return $self->{'_hosts'};
357 =head2 formatmap
359 Title : formatmap
360 Usage :
361 Function: get/set for format hash
362 Returns :
363 Args : optional hash
365 =cut
367 sub formatmap {
368 my ($self, $value) = @_;
369 if (defined $value) {
370 $self->{'_formatmap'} = $value;
372 unless (exists $self->{'_formatmap'}) {
373 return ('');
374 } else {
375 return $self->{'_formatmap'};
381 __END__