t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / Root / HTTPget.pm
blobf9e027ae7f14ab3d2c714a18f91006d3b540e7c9
2 # BioPerl module for fallback HTTP get operations.
4 # Module is proxy-aware
6 # Please direct questions and support issues to <bioperl-l@bioperl.org>
8 # Cared for by Chris Dagdigian <dag@sonsorol.org>
9 # but all of the good stuff was written by
10 # Lincoln Stein.
12 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
16 =head1 NAME
18 Bio::Root::HTTPget - module for fallback HTTP get operations when
19 LWP:: is unavailable
21 =head1 SYNOPSIS
23 use Bio::Root::HTTPget;
24 my $web = Bio::Root::HTTPget->new();
26 my $response = $web->get('http://localhost');
27 $response = $web->get('http://localhost/images');
29 $response = eval { $web->get('http://fred:secret@localhost/ladies_only/')
30 } or warn $@;
32 $response = eval { $web->get('http://jeff:secret@localhost/ladies_only/')
33 } or warn $@;
35 $response = $web->get('http://localhost/images/navauthors.gif');
36 $response = $web->get(-url=>'http://www.google.com',
37 -proxy=>'http://www.modperl.com');
39 =head1 DESCRIPTION
41 This is basically an last-chance module for doing network HTTP get
42 requests in situations where more advanced external CPAN modules such
43 as LWP:: are not installed.
45 The particular reason this module was developed was so that the Open
46 Bio Database Access code can fallback to fetching the default registry
47 files from http://open-bio.org/registry/ without having to depend on
48 external dependencies like Bundle::LWP for network HTTP access.
50 The core of this module was written by Lincoln Stein. It can handle proxies
51 and HTTP-based proxy authentication.
53 =head1 FEEDBACK
55 =head2 Mailing Lists
57 User feedback is an integral part of the evolution of this
58 and other Bioperl modules. Send your comments and suggestions preferably
59 to one of the Bioperl mailing lists.
60 Your participation is much appreciated.
62 bioperl-l@bioperl.org - General discussion
63 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 =head2 Reporting Bugs
67 Report bugs to the Bioperl bug tracking system to help us keep track
68 the bugs and their resolution. Bug reports can be submitted via the
69 web:
71 https://github.com/bioperl/bioperl-live/issues
73 =head1 AUTHOR - Lincoln Stein
75 # Please direct questions and support issues to I<bioperl-l@bioperl.org>
77 Cared for by Chris Dagdigian <dag@sonsorol.org>
79 =head1 APPENDIX
81 The rest of the documentation details each of the object
82 methods. Internal methods are usually preceded with a _
84 =cut
87 # Let the code begin...
89 package Bio::Root::HTTPget;
91 use strict;
92 use warnings;
93 use IO::Socket qw(:DEFAULT :crlf);
95 use base qw(Bio::Root::Root);
98 # default attributes, in case used as a class/sub call
99 my %attributes;
101 =head2 get
103 Title : get
104 Usage : my $resp = get(-url => $url);
105 Function:
106 Returns : string
107 Args : -url => URL to HTTPGet
108 -proxy => proxy to use
109 -user => username for proxy or authentication
110 -pass => password for proxy or authentication
111 -timeout => timeout
113 =cut
115 sub get {
116 my $self;
117 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
118 $self = shift;
121 my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
122 __PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
123 my $dest = $proxy || $url;
125 my ($host,$port,$path,$user,$pass)
126 = _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
127 $auth_user ||= $user;
128 $auth_pass ||= $pass;
129 if ($self) {
130 unless ($proxy) {
131 $proxy = $self->proxy;
133 unless ($auth_user) {
134 ($auth_user, $auth_pass) = $self->authentication;
137 $path = $url if $proxy;
139 # set up the connection
140 my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
142 # the request
143 print $socket "GET $path HTTP/1.0$CRLF";
144 print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
145 # Support virtual hosts
146 print $socket "HOST: $host$CRLF";
148 if ($auth_user && $auth_pass) { # authentication information
149 my $token = _encode_base64("$auth_user:$auth_pass");
150 print $socket "Authorization: Basic $token$CRLF";
152 print $socket "$CRLF";
154 # read the response
155 my $response;
157 local $/ = "$CRLF$CRLF";
158 $response = <$socket>;
161 my ($status_line,@other_lines) = split $CRLF,$response;
162 my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
163 or __PACKAGE__->throw("invalid response from web server: got $response");
165 my %headers = map {/^(\S+): (.+)/} @other_lines;
166 if ($stat_code == 302 || $stat_code == 301) { # redirect
167 my $location = $headers{Location} or
168 __PACKAGE__->throw("invalid redirect: no Location header");
169 return get(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass); # recursive call
172 elsif ($stat_code == 401) { # auth required
173 my $auth_required = $headers{'WWW-Authenticate'};
174 $auth_required =~ /^Basic realm="([^\"]+)"/
175 or __PACKAGE__->throw("server requires unknown type of".
176 " authentication: $auth_required");
177 __PACKAGE__->throw("request failed: $status_line, realm = $1");
180 elsif ($stat_code != 200) {
181 __PACKAGE__->throw("request failed: $status_line");
184 $response = '';
185 while (1) {
186 my $bytes = read($socket,$response,2048,length $response);
187 last unless $bytes > 0;
190 $response;
193 =head2 getFH
195 Title : getFH
196 Usage :
197 Function:
198 Example :
199 Returns : string
200 Args :
202 =cut
204 sub getFH {
205 my $self;
206 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
207 $self = shift;
209 my ($url,$proxy,$timeout,$auth_user,$auth_pass) =
210 __PACKAGE__->_rearrange([qw(URL PROXY TIMEOUT USER PASS)],@_);
211 my $dest = $proxy || $url;
213 my ($host,$port,$path,$user,$pass)
214 = _http_parse_url($dest) or __PACKAGE__->throw("invalid URL $url");
215 $auth_user ||= $user;
216 $auth_pass ||= $pass;
217 $path = $url if $proxy;
219 # set up the connection
220 my $socket = _http_connect($host,$port) or __PACKAGE__->throw("can't connect: $@");
222 # the request
223 print $socket "GET $path HTTP/1.0$CRLF";
224 print $socket "User-Agent: Bioperl fallback fetcher/1.0$CRLF";
225 # Support virtual hosts
226 print $socket "HOST: $host$CRLF";
228 if ($auth_user && $auth_pass) { # authentication information
229 my $token = _encode_base64("$auth_user:$auth_pass");
230 print $socket "Authorization: Basic $token$CRLF";
232 print $socket "$CRLF";
234 # read the response
235 my $response;
237 local $/ = "$CRLF$CRLF";
238 $response = <$socket>;
241 my ($status_line,@other_lines) = split $CRLF,$response;
242 my ($stat_code,$stat_msg) = $status_line =~ m!^HTTP/1\.[01] (\d+) (.+)!
243 or __PACKAGE__->throw("invalid response from web server: got $response");
245 my %headers = map {/^(\S+): (.+)/} @other_lines;
246 if ($stat_code == 302 || $stat_code == 301) { # redirect
247 my $location = $headers{Location} or
248 __PACKAGE__->throw("invalid redirect: no Location header");
249 return getFH(-url => $location, -proxy => $proxy, -timeout => $timeout, -user => $auth_user, -pass => $auth_pass); # recursive call
252 elsif ($stat_code == 401) { # auth required
253 my $auth_required = $headers{'WWW-Authenticate'};
254 $auth_required =~ /^Basic realm="([^\"]+)"/
255 or __PACKAGE__->throw("server requires unknown type of ".
256 "authentication: $auth_required");
257 __PACKAGE__->throw("request failed: $status_line, realm = $1");
260 elsif ($stat_code != 200) {
261 __PACKAGE__->throw("request failed: $status_line");
264 # Now that we are reasonably sure the socket and request
265 # are OK we pass the socket back as a filehandle so it can
266 # be processed by the caller...
268 $socket;
273 =head2 _http_parse_url
275 Title :
276 Usage :
277 Function:
278 Example :
279 Returns :
280 Args :
282 =cut
284 sub _http_parse_url {
285 my $self;
286 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
287 $self = shift;
289 my $url = shift;
290 my ($user,$pass,$hostent,$path) =
291 $url =~ m!^http://(?:([^:]+):([^:]+)@)?([^/]+)(/?[^\#]*)! or return;
292 $path ||= '/';
293 my ($host,$port) = split(':',$hostent);
294 return ($host,$port||80,$path,$user,$pass);
297 =head2 _http_connect
299 Title :
300 Usage :
301 Function:
302 Example :
303 Returns :
304 Args :
306 =cut
308 sub _http_connect {
309 my ($host,$port,$timeout) = @_;
310 my $sock = IO::Socket::INET->new(Proto => 'tcp',
311 Type => SOCK_STREAM,
312 PeerHost => $host,
313 PeerPort => $port,
314 Timeout => $timeout,
316 $sock;
320 =head2 _encode_base64
322 Title :
323 Usage :
324 Function:
325 Example :
326 Returns :
327 Args :
329 =cut
331 sub _encode_base64 {
332 my $self;
333 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
334 $self = shift;
336 my $res = "";
337 my $eol = $_[1];
338 $eol = "\n" unless defined $eol;
339 pos($_[0]) = 0; # ensure start at the beginning
341 $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));
343 $res =~ tr|` -_|AA-Za-z0-9+/|; # `# help emacs
344 # fix padding at the end
345 my $padding = (3 - length($_[0]) % 3) % 3;
346 $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
347 # break encoded string into lines of no more than 76 characters each
348 if (length $eol) {
349 $res =~ s/(.{1,76})/$1$eol/g;
351 return $res;
355 =head2 proxy
357 Title : proxy
358 Usage : $httpproxy = $db->proxy('http') or
359 $db->proxy(['http','ftp'], 'http://myproxy' )
360 Function: Get/Set a proxy for use of proxy. Defaults to environment variable
361 http_proxy if present.
362 Returns : a string indicating the proxy
363 Args : $protocol : string for the protocol to set/get
364 $proxyurl : url of the proxy to use for the specified protocol
365 $username : username (if proxy requires authentication)
366 $password : password (if proxy requires authentication)
368 =cut
370 sub proxy {
371 my $self;
372 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
373 $self = shift;
375 my ($protocol,$proxy,$username,$password) = @_;
376 my $atts = ref($self) ? $self : \%attributes;
377 $protocol ||= 'http';
378 if (!$proxy) {
379 if (defined $ENV{http_proxy}) {
380 $proxy = $ENV{http_proxy};
381 if ($proxy =~ /\@/) {
382 ($username, $password, $proxy) = $proxy =~ m{http://(\S+):(\S+)\@(\S+)};
383 $proxy = 'http://'.$proxy;
387 if (defined $proxy) {
388 # default to class method call
389 __PACKAGE__->authentication($username, $password)
390 if ($username && $password);
391 $atts->{'_proxy'}->{$protocol} = $proxy;
393 return $atts->{'_proxy'}->{$protocol};
396 =head2 clear_proxy
398 Title : clear_proxy
399 Usage : my $old_prozy = $db->clear_proxy('http')
400 Function: Unsets (clears) the proxy for the protocol indicated
401 Returns : a string indicating the old proxy value
402 Args : $protocol : string for the protocol to clear
404 =cut
406 sub clear_proxy {
407 my $self;
408 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
409 $self = shift;
411 my ($protocol) = @_;
412 my $atts = ref($self) ? $self : \%attributes;
413 $protocol ||= 'http';
414 delete $atts->{'_proxy'}->{$protocol};
417 =head2 authentication
419 Title : authentication
420 Usage : $db->authentication($user,$pass)
421 Function: Get/Set authentication credentials
422 Returns : Array of user/pass
423 Args : Array or user/pass
426 =cut
428 sub authentication {
429 my $self;
430 if($_[0] && (ref($_[0]) or $_[0] =~ /^Bio::/)) {
431 $self = shift;
433 my $atts = ref($self) ? $self : \%attributes;
434 if (@_) {
435 my ($u,$p) = @_;
436 my $atts = ref($self) ? $self : \%attributes;
438 $atts->{'_authentication'} = [ $u,$p];
440 return @{$atts->{'_authentication'} || []};