2 # BioPerl module for fallback HTTP get operations.
4 # Module is proxy-aware
6 # Please direct questions and support issues to <bioperl-l@bioperl.org>
8 # Cared for by Chris Dagdigian <dag@sonsorol.org>
9 # but all of the good stuff was written by
12 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
18 Bio::Root::HTTPget - module for fallback HTTP get operations when
23 use Bio::Root::HTTPget;
24 my $web = Bio::Root::HTTPget->new();
26 my $response = $web->get('http://localhost');
27 $response = $web->get('http://localhost/images');
29 $response = eval { $web->get('http://fred:secret@localhost/ladies_only/')
32 $response = eval { $web->get('http://jeff:secret@localhost/ladies_only/')
35 $response = $web->get('http://localhost/images/navauthors.gif');
36 $response = $web->get(-url=>'http://www.google.com',
37 -proxy=>'http://www.modperl.com');
41 This is basically an last-chance module for doing network HTTP get
42 requests in situations where more advanced external CPAN modules such
43 as LWP:: are not installed.
45 The particular reason this module was developed was so that the Open
46 Bio Database Access code can fallback to fetching the default registry
47 files from http://open-bio.org/registry/ without having to depend on
48 external dependencies like Bundle::LWP for network HTTP access.
50 The core of this module was written by Lincoln Stein. It can handle proxies
51 and HTTP-based proxy authentication.
57 User feedback is an integral part of the evolution of this
58 and other Bioperl modules. Send your comments and suggestions preferably
59 to one of the Bioperl mailing lists.
60 Your participation is much appreciated.
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
67 Report bugs to the Bioperl bug tracking system to help us keep track
68 the bugs and their resolution. Bug reports can be submitted via the
71 https://github.com/bioperl/bioperl-live/issues
73 =head1 AUTHOR - Lincoln Stein
75 # Please direct questions and support issues to I<bioperl-l@bioperl.org>
77 Cared for by Chris Dagdigian <dag@sonsorol.org>
81 The rest of the documentation details each of the object
82 methods. Internal methods are usually preceded with a _
87 # Let the code begin...
89 package Bio
::Root
::HTTPget
;
93 use IO
::Socket
qw(:DEFAULT :crlf);
95 use base
qw(Bio::Root::Root);
98 # default attributes, in case used as a class/sub call
104 Usage : my $resp = get(-url => $url);
107 Args : -url => URL to HTTPGet
108 -proxy => proxy to use
109 -user => username for proxy or authentication
110 -pass => password for proxy or authentication
117 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
121 my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
122 __PACKAGE__
->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
123 my $dest = $proxy || $url;
125 my ($host,$port,$path,$user,$pass)
126 = _http_parse_url
($dest) or __PACKAGE__
->throw("invalid URL $url");
127 $auth_user ||= $user;
128 $auth_pass ||= $pass;
131 $proxy = $self->proxy;
133 unless ($auth_user) {
134 ($auth_user, $auth_pass) = $self->authentication;
137 $path = $url if $proxy;
139 # set up the connection
140 my $socket = _http_connect
($host,$port) or __PACKAGE__
->throw("can't connect: $@");
143 print $socket "GET $path HTTP/1.0$CRLF";
144 print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
145 # Support virtual hosts
146 print $socket "HOST: $host$CRLF";
148 if ($auth_user && $auth_pass) { # authentication information
149 my $token = _encode_base64
("$auth_user:$auth_pass");
150 print $socket "Authorization: Basic $token$CRLF";
152 print $socket "$CRLF";
157 local $/ = "$CRLF$CRLF";
158 $response = <$socket>;
161 my ($status_line,@other_lines) = split $CRLF,$response;
162 my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
163 or __PACKAGE__
->throw("invalid response from web server: got $response");
165 my %headers = map {/^(\S+): (.+)/} @other_lines;
166 if ($stat_code == 302 || $stat_code == 301) { # redirect
167 my $location = $headers{Location
} or
168 __PACKAGE__
->throw("invalid redirect: no Location header");
169 return get
(-url
=> $location, -proxy
=> $proxy, -timeout
=> $timeout, -user
=> $auth_user, -pass
=> $auth_pass); # recursive call
172 elsif ($stat_code == 401) { # auth required
173 my $auth_required = $headers{'WWW-Authenticate'};
174 $auth_required =~ /^Basic realm="([^\"]+)"/
175 or __PACKAGE__
->throw("server requires unknown type of".
176 " authentication: $auth_required");
177 __PACKAGE__
->throw("request failed: $status_line, realm = $1");
180 elsif ($stat_code != 200) {
181 __PACKAGE__
->throw("request failed: $status_line");
186 my $bytes = read($socket,$response,2048,length $response);
187 last unless $bytes > 0;
206 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
209 my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
210 __PACKAGE__
->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
211 my $dest = $proxy || $url;
213 my ($host,$port,$path,$user,$pass)
214 = _http_parse_url
($dest) or __PACKAGE__
->throw("invalid URL $url");
215 $auth_user ||= $user;
216 $auth_pass ||= $pass;
217 $path = $url if $proxy;
219 # set up the connection
220 my $socket = _http_connect
($host,$port) or __PACKAGE__
->throw("can't connect: $@");
223 print $socket "GET $path HTTP/1.0$CRLF";
224 print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
225 # Support virtual hosts
226 print $socket "HOST: $host$CRLF";
228 if ($auth_user && $auth_pass) { # authentication information
229 my $token = _encode_base64
("$auth_user:$auth_pass");
230 print $socket "Authorization: Basic $token$CRLF";
232 print $socket "$CRLF";
237 local $/ = "$CRLF$CRLF";
238 $response = <$socket>;
241 my ($status_line,@other_lines) = split $CRLF,$response;
242 my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
243 or __PACKAGE__
->throw("invalid response from web server: got $response");
245 my %headers = map {/^(\S+): (.+)/} @other_lines;
246 if ($stat_code == 302 || $stat_code == 301) { # redirect
247 my $location = $headers{Location
} or
248 __PACKAGE__
->throw("invalid redirect: no Location header");
249 return getFH
(-url
=> $location, -proxy
=> $proxy, -timeout
=> $timeout, -user
=> $auth_user, -pass
=> $auth_pass); # recursive call
252 elsif ($stat_code == 401) { # auth required
253 my $auth_required = $headers{'WWW-Authenticate'};
254 $auth_required =~ /^Basic realm="([^\"]+)"/
255 or __PACKAGE__
->throw("server requires unknown type of ".
256 "authentication: $auth_required");
257 __PACKAGE__
->throw("request failed: $status_line, realm = $1");
260 elsif ($stat_code != 200) {
261 __PACKAGE__
->throw("request failed: $status_line");
264 # Now that we are reasonably sure the socket and request
265 # are OK we pass the socket back as a filehandle so it can
266 # be processed by the caller...
273 =head2 _http_parse_url
284 sub _http_parse_url
{
286 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
290 my ($user,$pass,$hostent,$path) =
291 $url =~ m!^http://(?:([^:]+):([^:]+)@)?([^/]+)(/?[^\#]*)! or return;
293 my ($host,$port) = split(':',$hostent);
294 return ($host,$port||80,$path,$user,$pass);
309 my ($host,$port,$timeout) = @_;
310 my $sock = IO
::Socket
::INET
->new(Proto
=> 'tcp',
320 =head2 _encode_base64
333 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
338 $eol = "\n" unless defined $eol;
339 pos($_[0]) = 0; # ensure start at the beginning
341 $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));
343 $res =~ tr
|` -_|AA-Za-z0-9+/|; # `# help emacs
344 # fix padding at the end
345 my $padding = (3 - length($_[0]) % 3) % 3;
346 $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
347 # break encoded string into lines of no more than 76 characters each
349 $res =~ s/(.{1,76})/$1$eol/g;
358 Usage : $httpproxy = $db->proxy('http') or
359 $db->proxy(['http','ftp'], 'http://myproxy' )
360 Function: Get/Set a proxy for use of proxy. Defaults to environment variable
361 http_proxy if present.
362 Returns : a string indicating the proxy
363 Args : $protocol : string for the protocol to set/get
364 $proxyurl : url of the proxy to use for the specified protocol
365 $username : username (if proxy requires authentication)
366 $password : password (if proxy requires authentication)
372 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
375 my ($protocol,$proxy,$username,$password) = @_;
376 my $atts = ref($self) ?
$self : \
%attributes;
377 $protocol ||= 'http';
379 if (defined $ENV{http_proxy
}) {
380 $proxy = $ENV{http_proxy
};
381 if ($proxy =~ /\@/) {
382 ($username, $password, $proxy) = $proxy =~ m{http://(\S+):(\S+)\@(\S+)};
383 $proxy = 'http://'.$proxy;
387 if (defined $proxy) {
388 # default to class method call
389 __PACKAGE__
->authentication($username, $password)
390 if ($username && $password);
391 $atts->{'_proxy'}->{$protocol} = $proxy;
393 return $atts->{'_proxy'}->{$protocol};
399 Usage : my $old_prozy = $db->clear_proxy('http')
400 Function: Unsets (clears) the proxy for the protocol indicated
401 Returns : a string indicating the old proxy value
402 Args : $protocol : string for the protocol to clear
408 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
412 my $atts = ref($self) ?
$self : \
%attributes;
413 $protocol ||= 'http';
414 delete $atts->{'_proxy'}->{$protocol};
417 =head2 authentication
419 Title : authentication
420 Usage : $db->authentication($user,$pass)
421 Function: Get/Set authentication credentials
422 Returns : Array of user/pass
423 Args : Array or user/pass
430 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
433 my $atts = ref($self) ?
$self : \
%attributes;
436 my $atts = ref($self) ?
$self : \
%attributes;
438 $atts->{'_authentication'} = [ $u,$p];
440 return @
{$atts->{'_authentication'} || []};