Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / DB / WebDBSeqI.pm
blobeff5189d02f568a6abb5eb0cdefc09f2c34b835e
2 # BioPerl module for Bio::DB::WebDBSeqI
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases
18 for retrieving sequences
20 =head1 SYNOPSIS
22 # get a WebDBSeqI object somehow
23 # assuming it is a nucleotide db
24 my $seq = $db->get_Seq_by_id('ROA1_HUMAN')
26 =head1 DESCRIPTION
28 Provides core set of functionality for connecting to a web based
29 database for retrieving sequences.
31 Users wishing to add another Web Based Sequence Dabatase will need to
32 extend this class (see L<Bio::DB::SwissProt> or L<Bio::DB::NCBIHelper> for
33 examples) and implement the get_request method which returns a
34 HTTP::Request for the specified uids (accessions, ids, etc depending
35 on what query types the database accepts).
37 =head1 FEEDBACK
39 =head2 Mailing Lists
41 User feedback is an integral part of the
42 evolution of this and other Bioperl modules. Send
43 your comments and suggestions preferably to one
44 of the Bioperl mailing lists. Your participation
45 is much appreciated.
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to
64 help us keep track the bugs and their resolution.
65 Bug reports can be submitted via the web.
67 https://github.com/bioperl/bioperl-live/issues
69 =head1 AUTHOR - Jason Stajich
71 Email E<lt> jason@bioperl.org E<gt>
73 =head1 APPENDIX
75 The rest of the documentation details each of the
76 object methods. Internal methods are usually
77 preceded with a _
79 =cut
81 # Let the code begin...
83 package Bio::DB::WebDBSeqI;
84 use strict;
85 use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE
86 $DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES);
88 use Bio::SeqIO;
89 use Bio::Root::IO;
90 use LWP::UserAgent;
91 use POSIX 'setsid';
92 use HTTP::Request::Common;
93 use HTTP::Response;
94 use File::Spec;
95 use IO::Pipe;
96 use IO::String;
97 use Bio::Root::Root;
99 use base qw(Bio::DB::RandomAccessI);
101 BEGIN {
102 $MODVERSION = '0.8';
103 %RETRIEVAL_TYPES = ('io_string' => 1,
104 'tempfile' => 1,
105 'pipeline' => 1,
107 $DEFAULT_RETRIEVAL_TYPE = 'pipeline';
108 $DEFAULTFORMAT = 'fasta';
109 $LAST_INVOCATION_TIME = 0;
112 sub new {
113 my ($class, @args) = @_;
114 my $self = $class->SUPER::new(@args);
115 my ($baseaddress, $params, $ret_type, $format,$delay,$db) =
116 $self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)],
117 @args);
119 $ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type);
120 $baseaddress && $self->url_base_address($baseaddress);
121 $params && $self->url_params($params);
122 $db && $self->db($db);
123 $ret_type && $self->retrieval_type($ret_type);
124 $delay = $self->delay_policy unless defined $delay;
125 $self->delay($delay);
128 # insure we always have a default format set for retrieval
129 # even though this will be immedietly overwritten by most sub classes
130 $format = $self->default_format unless ( defined $format &&
131 $format ne '' );
133 $self->request_format($format);
134 my $ua = LWP::UserAgent->new(env_proxy => 1);
135 $ua->agent(ref($self) ."/$MODVERSION");
136 $self->ua($ua);
137 $self->{'_authentication'} = [];
138 return $self;
141 # from Bio::DB::RandomAccessI
143 =head2 get_Seq_by_id
145 Title : get_Seq_by_id
146 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
147 Function: Gets a Bio::Seq object by its name
148 Returns : a Bio::Seq object
149 Args : the id (as a string) of a sequence
150 Throws : "id does not exist" exception
153 =cut
155 sub get_Seq_by_id {
156 my ($self,$seqid) = @_;
157 $self->_sleep;
158 my $seqio = $self->get_Stream_by_id([$seqid]);
159 $self->throw("id does not exist") if( !defined $seqio ) ;
160 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
161 $self->warn("When complexity is set to 0, use get_Stream_by_id\n".
162 "Returning Bio::SeqIO object");
163 return $seqio;
165 my @seqs;
166 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
168 # Since $seqio will not be used anymore, explicitly close its filehandle
169 # or it will cause trouble later on cleanup
170 $seqio->close;
172 $self->throw("id '$seqid' does not exist") unless @seqs;
173 if( wantarray ) { return @seqs } else { return shift @seqs }
176 =head2 get_Seq_by_acc
178 Title : get_Seq_by_acc
179 Usage : $seq = $db->get_Seq_by_acc('X77802');
180 Function: Gets a Bio::Seq object by accession number
181 Returns : A Bio::Seq object
182 Args : accession number (as a string)
183 Throws : "acc does not exist" exception
185 =cut
187 sub get_Seq_by_acc {
188 my ($self,$seqid) = @_;
189 $self->_sleep;
190 my $seqio = $self->get_Stream_by_acc($seqid);
191 $self->throw("acc '$seqid' does not exist") if( ! defined $seqio );
192 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
193 $self->warn("When complexity is set to 0, use get_Stream_by_acc\n".
194 "Returning Bio::SeqIO object");
195 return $seqio;
197 my @seqs;
198 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
199 $self->throw("acc $seqid does not exist") unless @seqs;
200 if( wantarray ) { return @seqs } else { return shift @seqs }
204 =head2 get_Seq_by_gi
206 Title : get_Seq_by_gi
207 Usage : $seq = $db->get_Seq_by_gi('405830');
208 Function: Gets a Bio::Seq object by gi number
209 Returns : A Bio::Seq object
210 Args : gi number (as a string)
211 Throws : "gi does not exist" exception
213 =cut
215 sub get_Seq_by_gi {
216 my ($self,$seqid) = @_;
217 $self->_sleep;
218 my $seqio = $self->get_Stream_by_gi($seqid);
219 $self->throw("gi does not exist") if( !defined $seqio );
220 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
221 $self->warn("When complexity is set to 0, use get_Stream_by_gi\n".
222 "Returning Bio::SeqIO object");
223 return $seqio;
225 my @seqs;
226 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
227 $self->throw("gi does not exist") unless @seqs;
228 if( wantarray ) { return @seqs } else { return shift @seqs }
231 =head2 get_Seq_by_version
233 Title : get_Seq_by_version
234 Usage : $seq = $db->get_Seq_by_version('X77802.1');
235 Function: Gets a Bio::Seq object by sequence version
236 Returns : A Bio::Seq object
237 Args : accession.version (as a string)
238 Throws : "acc.version does not exist" exception
240 =cut
242 sub get_Seq_by_version {
243 my ($self,$seqid) = @_;
244 $self->_sleep;
245 my $seqio = $self->get_Stream_by_version($seqid);
246 $self->throw("accession.version does not exist") if( !defined $seqio );
247 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
248 $self->warn("When complexity is set to 0, use get_Stream_by_version\n".
249 "Returning Bio::SeqIO object");
250 return $seqio;
252 my @seqs;
253 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
254 $self->throw("accession.version does not exist") unless @seqs;
255 if( wantarray ) { return @seqs } else { return shift @seqs }
258 # implementing class must define these
260 =head2 get_request
262 Title : get_request
263 Usage : my $url = $self->get_request
264 Function: returns a HTTP::Request object
265 Returns :
266 Args : %qualifiers = a hash of qualifiers (ids, format, etc)
268 =cut
270 sub get_request {
271 my ($self) = @_;
272 my $msg = "Implementing class must define method get_request in class WebDBSeqI";
273 $self->throw($msg);
276 # class methods
278 =head2 get_Stream_by_id
280 Title : get_Stream_by_id
281 Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
282 Function: Gets a series of Seq objects by unique identifiers
283 Returns : a Bio::SeqIO stream object
284 Args : $ref : a reference to an array of unique identifiers for
285 the desired sequence entries
288 =cut
290 sub get_Stream_by_id {
291 my ($self, $ids) = @_;
292 my ($webfmt,$localfmt) = $self->request_format;
293 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single',
294 '-format' => $webfmt);
297 *get_Stream_by_batch = sub {
298 my $self = shift;
299 $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
300 $self->get_Stream_by_id(@_)
304 =head2 get_Stream_by_acc
306 Title : get_Stream_by_acc
307 Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
308 Function: Gets a series of Seq objects by accession numbers
309 Returns : a Bio::SeqIO stream object
310 Args : $ref : a reference to an array of accession numbers for
311 the desired sequence entries
312 Note : For GenBank, this just calls the same code for get_Stream_by_id()
314 =cut
316 sub get_Stream_by_acc {
317 my ($self, $ids ) = @_;
318 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
322 =head2 get_Stream_by_gi
324 Title : get_Stream_by_gi
325 Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]);
326 Function: Gets a series of Seq objects by gi numbers
327 Returns : a Bio::SeqIO stream object
328 Args : $ref : a reference to an array of gi numbers for
329 the desired sequence entries
330 Note : For GenBank, this just calls the same code for get_Stream_by_id()
332 =cut
334 sub get_Stream_by_gi {
335 my ($self, $ids ) = @_;
336 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi');
339 =head2 get_Stream_by_version
341 Title : get_Stream_by_version
342 Usage : $seq = $db->get_Stream_by_version([$version1, $version2]);
343 Function: Gets a series of Seq objects by accession.versions
344 Returns : a Bio::SeqIO stream object
345 Args : $ref : a reference to an array of accession.version strings for
346 the desired sequence entries
347 Note : For GenBank, this is implemented in NCBIHelper
349 =cut
351 sub get_Stream_by_version {
352 my ($self, $ids ) = @_;
353 # $self->throw("Implementing class should define this method!");
354 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work
357 =head2 get_Stream_by_query
359 Title : get_Stream_by_query
360 Usage : $stream = $db->get_Stream_by_query($query);
361 Function: Gets a series of Seq objects by way of a query string or oject
362 Returns : a Bio::SeqIO stream object
363 Args : $query : A string that uses the appropriate query language
364 for the database or a Bio::DB::QueryI object. It is suggested
365 that you create the Bio::DB::Query object first and interrogate
366 it for the entry count before you fetch a potentially large stream.
368 =cut
370 sub get_Stream_by_query {
371 my ($self, $query ) = @_;
372 return $self->get_seq_stream('-query' => $query, '-mode'=>'query');
375 =head2 default_format
377 Title : default_format
378 Usage : my $format = $self->default_format
379 Function: Returns default sequence format for this module
380 Returns : string
381 Args : none
383 =cut
385 sub default_format {
386 return $DEFAULTFORMAT;
389 # sorry, but this is hacked in because of BioFetch problems...
390 sub db {
391 my $self = shift;
392 my $d = $self->{_db};
393 $self->{_db} = shift if @_;
397 =head2 request_format
399 Title : request_format
400 Usage : my ($req_format, $ioformat) = $self->request_format;
401 $self->request_format("genbank");
402 $self->request_format("fasta");
403 Function: Get/Set sequence format retrieval. The get-form will normally not
404 be used outside of this and derived modules.
405 Returns : Array of two strings, the first representing the format for
406 retrieval, and the second specifying the corresponding SeqIO format.
407 Args : $format = sequence format
409 =cut
411 sub request_format {
412 my ($self, $value) = @_;
414 if( defined $value ) {
415 $self->{'_format'} = [ $value, $value];
417 return @{$self->{'_format'}};
420 =head2 get_seq_stream
422 Title : get_seq_stream
423 Usage : my $seqio = $self->get_seq_stream(%qualifiers)
424 Function: builds a url and queries a web db
425 Returns : a Bio::SeqIO stream capable of producing sequence
426 Args : %qualifiers = a hash qualifiers that the implementing class
427 will process to make a url suitable for web querying
429 =cut
431 sub get_seq_stream {
432 my ($self, %qualifiers) = @_;
433 my ($rformat, $ioformat) = $self->request_format();
434 my $seen = 0;
435 foreach my $key ( keys %qualifiers ) {
436 if( $key =~ /format/i ) {
437 $rformat = $qualifiers{$key};
438 $seen = 1;
441 $qualifiers{'-format'} = $rformat if( !$seen);
442 ($rformat, $ioformat) = $self->request_format($rformat);
443 # These parameters are implemented for Bio::DB::GenBank objects only
444 if($self->isa('Bio::DB::GenBank')) {
445 $self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start());
446 $self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop());
447 $self->strand() && ($qualifiers{'-strand'} = $self->strand());
448 defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity());
450 my $request = $self->get_request(%qualifiers);
451 $request->proxy_authorization_basic($self->authentication)
452 if ( $self->authentication);
453 $self->debug("request is ". $request->as_string(). "\n");
455 # workaround for MSWin systems
456 $self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/;
458 if ($self->retrieval_type =~ /pipeline/) {
459 # Try to create a stream using POSIX fork-and-pipe facility.
460 # this is a *big* win when fetching thousands of sequences from
461 # a web database because we can return the first entry while
462 # transmission is still in progress.
463 # Also, no need to keep sequence in memory or in a temporary file.
464 # If this fails (Windows, MacOS 9), we fall back to non-pipelined access.
466 # fork and pipe: _stream_request()=><STREAM>
467 my ($result,$stream) = $self->_open_pipe();
469 if (defined $result) {
470 $DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugger
471 if (!$result) { # in child process
472 $self->_stream_request($request,$stream);
473 POSIX::_exit(0); #prevent END blocks from executing in this forked child
475 else {
476 return Bio::SeqIO->new('-verbose' => $self->verbose,
477 '-format' => $ioformat,
478 '-fh' => $stream);
481 else {
482 $self->retrieval_type('io_string');
486 if ($self->retrieval_type =~ /temp/i) {
487 my $dir = $self->io->tempdir( CLEANUP => 1);
488 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
489 close $fh;
490 my $resp = $self->_request($request, $tmpfile);
491 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
492 $self->throw("WebDBSeqI Error - check query sequences!\n");
494 $self->postprocess_data('type' => 'file',
495 'location' => $tmpfile);
496 # this may get reset when requesting batch mode
497 ($rformat,$ioformat) = $self->request_format();
498 if( $self->verbose > 0 ) {
499 open my $ERR, '<', $tmpfile or $self->throw("Could not read file '$tmpfile': $!");
500 while(<$ERR>) { $self->debug($_);}
501 close $ERR;
504 return Bio::SeqIO->new('-verbose' => $self->verbose,
505 '-format' => $ioformat,
506 '-file' => $tmpfile);
509 if ($self->retrieval_type =~ /io_string/i ) {
510 my $resp = $self->_request($request);
511 my $content = $resp->content_ref;
512 $self->debug( "content is $$content\n");
513 if (!$resp->is_success() || length($$content) == 0) {
514 $self->throw("WebDBSeqI Error - check query sequences!\n");
516 ($rformat,$ioformat) = $self->request_format();
517 $self->postprocess_data('type'=> 'string',
518 'location' => $content);
519 $self->debug( "str is $$content\n");
520 return Bio::SeqIO->new('-verbose' => $self->verbose,
521 '-format' => $ioformat,
522 '-fh' => new IO::String($$content));
525 # if we got here, we don't know how to handle the retrieval type
526 $self->throw("retrieval type " . $self->retrieval_type .
527 " unsupported\n");
530 =head2 url_base_address
532 Title : url_base_address
533 Usage : my $address = $self->url_base_address or
534 $self->url_base_address($address)
535 Function: Get/Set the base URL for the Web Database
536 Returns : Base URL for the Web Database
537 Args : $address - URL for the WebDatabase
539 =cut
541 sub url_base_address {
542 my $self = shift;
543 my $d = $self->{'_baseaddress'};
544 $self->{'_baseaddress'} = shift if @_;
549 =head2 proxy
551 Title : proxy
552 Usage : $httpproxy = $db->proxy('http') or
553 $db->proxy(['http','ftp'], 'http://myproxy' )
554 Function: Get/Set a proxy for use of proxy
555 Returns : a string indicating the proxy
556 Args : $protocol : an array ref of the protocol(s) to set/get
557 $proxyurl : url of the proxy to use for the specified protocol
558 $username : username (if proxy requires authentication)
559 $password : password (if proxy requires authentication)
561 =cut
563 sub proxy {
564 my ($self,$protocol,$proxy,$username,$password) = @_;
565 return if ( !defined $self->ua || !defined $protocol
566 || !defined $proxy );
567 $self->authentication($username, $password)
568 if ($username && $password);
569 return $self->ua->proxy($protocol,$proxy);
572 =head2 authentication
574 Title : authentication
575 Usage : $db->authentication($user,$pass)
576 Function: Get/Set authentication credentials
577 Returns : Array of user/pass
578 Args : Array or user/pass
581 =cut
583 sub authentication{
584 my ($self,$u,$p) = @_;
586 if( defined $u && defined $p ) {
587 $self->{'_authentication'} = [ $u,$p];
589 return @{$self->{'_authentication'}};
593 =head2 retrieval_type
595 Title : retrieval_type
596 Usage : $self->retrieval_type($type);
597 my $type = $self->retrieval_type
598 Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile)
599 Returns : string representing retrieval type
600 Args : $value - the value to store
602 This setting affects how the data stream from the remote web server is
603 processed and passed to the Bio::SeqIO layer. Three types of retrieval
604 types are currently allowed:
606 pipeline Perform a fork in an attempt to begin streaming
607 while the data is still downloading from the remote
608 server. Disk, memory and speed efficient, but will
609 not work on Windows or MacOS 9 platforms.
611 io_string Store downloaded database entry(s) in memory. Can be
612 problematic for batch downloads because entire set
613 of entries must fit in memory. Alll entries must be
614 downloaded before processing can begin.
616 tempfile Store downloaded database entry(s) in a temporary file.
617 All entries must be downloaded before processing can
618 begin.
620 The default is pipeline, with automatic fallback to io_string if
621 pipelining is not available.
623 =cut
625 sub retrieval_type {
626 my ($self, $value) = @_;
627 if( defined $value ) {
628 $value = lc $value;
629 if( ! $RETRIEVAL_TYPES{$value} ) {
630 $self->warn("invalid retrieval type $value must be one of (" .
631 join(",", keys %RETRIEVAL_TYPES), ")");
632 $value = $DEFAULT_RETRIEVAL_TYPE;
634 $self->{'_retrieval_type'} = $value;
636 return $self->{'_retrieval_type'};
639 =head2 url_params
641 Title : url_params
642 Usage : my $params = $self->url_params or
643 $self->url_params($params)
644 Function: Get/Set the URL parameters for the Web Database
645 Returns : url parameters for Web Database
646 Args : $params - parameters to be appended to the URL for the WebDatabase
648 =cut
650 sub url_params {
651 my ($self, $value) = @_;
652 if( defined $value ) {
653 $self->{'_urlparams'} = $value;
657 =head2 ua
659 Title : ua
660 Usage : my $ua = $self->ua or
661 $self->ua($ua)
662 Function: Get/Set a LWP::UserAgent for use
663 Returns : reference to LWP::UserAgent Object
664 Args : $ua - must be a LWP::UserAgent
666 =cut
668 sub ua {
669 my ($self, $ua) = @_;
670 if( defined $ua && $ua->isa("LWP::UserAgent") ) {
671 $self->{'_ua'} = $ua;
673 return $self->{'_ua'};
676 =head2 postprocess_data
678 Title : postprocess_data
679 Usage : $self->postprocess_data ( 'type' => 'string',
680 'location' => \$datastr);
681 Function: process downloaded data before loading into a Bio::SeqIO
682 Returns : void
683 Args : hash with two keys - 'type' can be 'string' or 'file'
684 - 'location' either file location or string
685 reference containing data
687 =cut
689 sub postprocess_data {
690 my ( $self, %args) = @_;
691 return;
694 # private methods
695 sub _request {
696 my ($self, $url,$tmpfile) = @_;
697 my ($resp);
698 if( defined $tmpfile && $tmpfile ne '' ) {
699 $resp = $self->ua->request($url, $tmpfile);
700 } else {
701 $resp = $self->ua->request($url);
704 if( $resp->is_error ) {
705 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
707 return $resp;
710 #mod_perl-safe replacement for the open(BLEH,'-|') call. if running
711 #under mod_perl, detects it and closes the child's STDIN and STDOUT
712 #handles
713 sub _open_pipe {
714 my ($self) = @_;
715 # is mod_perl running? Which API?
716 my $mp = $self->mod_perl_api;
717 if($mp and ! our $loaded_apache_sp) {
718 my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess';
719 eval $load_api;
720 $@ and $self->throw("$@\n$load_api module required for running under mod_perl");
721 $loaded_apache_sp = 1;
724 my $pipe = IO::Pipe->new();
726 local $SIG{CHLD} = 'IGNORE';
727 defined(my $pid = fork)
728 or $self->throw("Couldn't fork: $!");
730 unless($pid) {
731 #CHILD
732 $pipe->writer();
734 #if we're running under mod_perl, clean up some things after this fork
735 if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) {
736 $r->cleanup_for_exec;
737 #don't read or write the mod_perl parent's tied filehandles
738 close STDIN; close STDOUT;
739 setsid() or $self->throw('Could not detach from parent');
741 } else {
742 #PARENT
743 $pipe->reader();
745 return ( $pid, $pipe );
748 # send web request to specified filehandle, or stdout, for streaming purposes
749 sub _stream_request {
750 my $self = shift;
751 my $request = shift;
752 my $dest_fh = shift || \*STDOUT;
754 # fork so as to pipe output of fetch process through to
755 # postprocess_data method call.
756 my ($child,$fetch) = $self->_open_pipe();
758 if ($child) {
759 #PARENT
760 local ($/) = "//\n"; # assume genbank/swiss format
761 $| = 1;
762 my $records = 0;
763 while (my $record = <$fetch>) {
764 $records++;
765 $self->postprocess_data('type' => 'string',
766 'location' => \$record);
767 print $dest_fh $record;
769 $/ = "\n"; # reset to be safe;
770 close $dest_fh; #must explicitly close here, because the hard
771 #exits don't cloes them for us
773 else {
774 #CHILD
775 $| = 1;
776 my $resp = $self->ua->request($request,
777 sub { print $fetch $_[0] }
779 if( $resp->is_error ) {
780 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
782 close $fetch; #must explicitly close here, because the hard exists
783 #don't close them for us
784 POSIX::_exit(0);
788 sub io {
789 my ($self,$io) = @_;
791 if(defined($io) || (! exists($self->{'_io'}))) {
792 $io = Bio::Root::IO->new() unless $io;
793 $self->{'_io'} = $io;
795 return $self->{'_io'};
799 =head2 delay
801 Title : delay
802 Usage : $secs = $self->delay([$secs])
803 Function: get/set number of seconds to delay between fetches
804 Returns : number of seconds to delay
805 Args : new value
807 NOTE: the default is to use the value specified by delay_policy().
808 This can be overridden by calling this method, or by passing the
809 -delay argument to new().
811 =cut
813 sub delay {
814 my $self = shift;
815 my $d = $self->{'_delay'};
816 $self->{'_delay'} = shift if @_;
820 =head2 delay_policy
822 Title : delay_policy
823 Usage : $secs = $self->delay_policy
824 Function: return number of seconds to delay between calls to remote db
825 Returns : number of seconds to delay
826 Args : none
828 NOTE: The default delay policy is 0s. Override in subclasses to
829 implement delays. The timer has only second resolution, so the delay
830 will actually be +/- 1s.
832 =cut
834 sub delay_policy {
835 my $self = shift;
836 return 0;
839 =head2 _sleep
841 Title : _sleep
842 Usage : $self->_sleep
843 Function: sleep for a number of seconds indicated by the delay policy
844 Returns : none
845 Args : none
847 NOTE: This method keeps track of the last time it was called and only
848 imposes a sleep if it was called more recently than the delay_policy()
849 allows.
851 =cut
853 sub _sleep {
854 my $self = shift;
855 my $last_invocation = $LAST_INVOCATION_TIME;
856 if (time - $LAST_INVOCATION_TIME < $self->delay) {
857 my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
858 warn "sleeping for $delay seconds\n" if $self->verbose > 0;
859 sleep $delay;
861 $LAST_INVOCATION_TIME = time;
864 =head2 mod_perl_api
866 Title : mod_perl_api
867 Usage : $version = self->mod_perl_api
868 Function: Returns API version of mod_perl being used based on set env. variables
869 Returns : mod_perl API version; if mod_perl isn't loaded, returns 0
870 Args : none
872 =cut
874 sub mod_perl_api {
875 my $self = shift;
876 my $v = $ENV{MOD_PERL} ?
877 ( exists $ENV{MOD_PERL_API_VERSION} && $ENV{MOD_PERL_API_VERSION} >= 2 ) ?
880 : 0;
881 return $v;