1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirectory manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
66 urllib2.install_opener(opener)
68 f = urllib2.urlopen('http://www.python.org/')
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
83 # documentation (getting there)
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
106 from cStringIO
import StringIO
108 from StringIO
import StringIO
116 # not sure how many of these need to be gotten rid of
117 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
118 addinfourl
, splitport
, splitgophertype
, splitquery
, \
119 splitattr
, ftpwrapper
, noheaders
121 # support for proxies via environment variables
122 from urllib
import getproxies
124 # support for FileHandler
125 from urllib
import localhost
, url2pathname
127 __version__
= "2.0a1"
130 def urlopen(url
, data
=None):
133 _opener
= build_opener()
134 return _opener
.open(url
, data
)
136 def install_opener(opener
):
140 # do these error classes make sense?
141 # make sure all of the IOError stuff is overridden. we just want to be
144 class URLError(IOError):
145 # URLError is a sub-type of IOError, but it doesn't share any of
146 # the implementation. need to override __init__ and __str__
147 def __init__(self
, reason
):
151 return '<urlopen error %s>' % self
.reason
153 class HTTPError(URLError
, addinfourl
):
154 """Raised when HTTP error occurs, but also acts like non-error return"""
155 __super_init
= addinfourl
.__init
__
157 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
158 self
.__super
_init
(fp
, hdrs
, url
)
167 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
170 # XXX is this safe? what if user catches exception, then
171 # extracts fp and discards exception?
175 class GopherError(URLError
):
181 def __init__(self
, url
, data
=None, headers
={}):
182 # unwrap('<URL:type://host/path>') --> 'type://host/path'
183 self
.__original
= unwrap(url
)
185 # self.__r_type is what's left after doing the splittype
190 self
.headers
.update(headers
)
192 def __getattr__(self
, attr
):
193 # XXX this is a fallback mechanism to guard against these
194 # methods getting called in a non-standard order. this may be
195 # too complicated and/or unnecessary.
196 # XXX should the __r_XXX attributes be public?
197 if attr
[:12] == '_Request__r_':
199 if hasattr(Request
, 'get_' + name
):
200 getattr(self
, 'get_' + name
)()
201 return getattr(self
, attr
)
202 raise AttributeError, attr
204 def add_data(self
, data
):
208 return self
.data
is not None
213 def get_full_url(self
):
214 return self
.__original
217 if self
.type is None:
218 self
.type, self
.__r
_type
= splittype(self
.__original
)
219 if self
.type is None:
220 raise ValueError, "unknown url type: %s" % self
.__original
224 if self
.host
is None:
225 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
227 self
.host
= unquote(self
.host
)
230 def get_selector(self
):
233 def set_proxy(self
, host
, type):
234 self
.host
, self
.type = host
, type
235 self
.__r
_host
= self
.__original
237 def add_header(self
, key
, val
):
238 # useful for something like authentication
239 self
.headers
[key
] = val
241 class OpenerDirector
:
243 server_version
= "Python-urllib/%s" % __version__
244 self
.addheaders
= [('User-agent', server_version
)]
245 # manage the individual handlers
247 self
.handle_open
= {}
248 self
.handle_error
= {}
250 def add_handler(self
, handler
):
252 for meth
in get_methods(handler
):
253 if meth
[-5:] == '_open':
255 if self
.handle_open
.has_key(protocol
):
256 self
.handle_open
[protocol
].append(handler
)
258 self
.handle_open
[protocol
] = [handler
]
262 j
= meth
[i
+1:].find('_') + i
+ 1
263 if j
!= -1 and meth
[i
+1:j
] == 'error':
270 dict = self
.handle_error
.get(proto
, {})
271 if dict.has_key(kind
):
272 dict[kind
].append(handler
)
274 dict[kind
] = [handler
]
275 self
.handle_error
[proto
] = dict
279 self
.handlers
.append(handler
)
280 handler
.add_parent(self
)
286 for handler
in self
.handlers
:
290 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
291 # XXX raise an exception if no one else should try to handle
292 # this url. return None if you can't but someone else could.
293 handlers
= chain
.get(kind
, ())
294 for handler
in handlers
:
295 func
= getattr(handler
, meth_name
)
298 if result
is not None:
301 def open(self
, fullurl
, data
=None):
302 # accept a URL or a Request object
303 if isinstance(fullurl
, types
.StringType
):
304 req
= Request(fullurl
, data
)
309 assert isinstance(req
, Request
) # really only care about interface
311 result
= self
._call
_chain
(self
.handle_open
, 'default',
316 type_
= req
.get_type()
317 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
322 return self
._call
_chain
(self
.handle_open
, 'unknown',
325 def error(self
, proto
, *args
):
326 if proto
in ['http', 'https']:
327 # XXX http[s] protocols are special cased
328 dict = self
.handle_error
['http'] # https is not different then http
329 proto
= args
[2] # YUCK!
330 meth_name
= 'http_error_%d' % proto
334 dict = self
.handle_error
335 meth_name
= proto
+ '_error'
337 args
= (dict, proto
, meth_name
) + args
338 result
= self
._call
_chain
(*args
)
343 args
= (dict, 'default', 'http_error_default') + orig_args
344 return self
._call
_chain
(*args
)
346 def is_callable(obj
):
347 # not quite like builtin callable (which I didn't know existed),
348 # not entirely sure it needs to be different
349 if type(obj
) in (types
.BuiltinFunctionType
,
350 types
.BuiltinMethodType
, types
.LambdaType
,
353 if isinstance(obj
, types
.InstanceType
):
354 return hasattr(obj
, '__call__')
357 def get_methods(inst
):
360 classes
.append(inst
.__class
__)
364 classes
= classes
+ list(klass
.__bases
__)
365 for name
in dir(klass
):
366 attr
= getattr(klass
, name
)
367 if isinstance(attr
, types
.UnboundMethodType
):
369 for name
in dir(inst
):
370 if is_callable(getattr(inst
, name
)):
372 return methods
.keys()
374 # XXX probably also want an abstract factory that knows things like
375 # the fact that a ProxyHandler needs to get inserted first.
376 # would also know when it makes sense to skip a superclass in favor of
377 # a subclass and when it might make sense to include both
379 def build_opener(*handlers
):
380 """Create an opener object from a list of handlers.
382 The opener will use several default handlers, including support
383 for HTTP and FTP. If there is a ProxyHandler, it must be at the
384 front of the list of handlers. (Yuck.)
386 If any of the handlers passed as arguments are subclasses of the
387 default handlers, the default handlers will not be used.
390 opener
= OpenerDirector()
391 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
392 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
393 FTPHandler
, FileHandler
]
394 if hasattr(httplib
, 'HTTPS'):
395 default_classes
.append(HTTPSHandler
)
397 for klass
in default_classes
:
398 for check
in handlers
:
399 if isinstance(check
, types
.ClassType
):
400 if issubclass(check
, klass
):
402 elif isinstance(check
, types
.InstanceType
):
403 if isinstance(check
, klass
):
406 default_classes
.remove(klass
)
408 for klass
in default_classes
:
409 opener
.add_handler(klass())
412 if isinstance(h
, types
.ClassType
):
414 opener
.add_handler(h
)
418 def add_parent(self
, parent
):
423 class HTTPDefaultErrorHandler(BaseHandler
):
424 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
425 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
427 class HTTPRedirectHandler(BaseHandler
):
428 # Implementation note: To avoid the server sending us into an
429 # infinite loop, the request object needs to track what URLs we
430 # have already seen. Do this by adding a handler-specific
431 # attribute to the Request object.
432 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
433 if headers
.has_key('location'):
434 newurl
= headers
['location']
435 elif headers
.has_key('uri'):
436 newurl
= headers
['uri']
439 newurl
= urlparse
.urljoin(req
.get_full_url(), newurl
)
441 # XXX Probably want to forget about the state of the current
442 # request, although that might interact poorly with other
443 # handlers that also use handler-specific request attributes
444 new
= Request(newurl
, req
.get_data())
445 new
.error_302_dict
= {}
446 if hasattr(req
, 'error_302_dict'):
447 if len(req
.error_302_dict
)>10 or \
448 req
.error_302_dict
.has_key(newurl
):
449 raise HTTPError(req
.get_full_url(), code
,
450 self
.inf_msg
+ msg
, headers
, fp
)
451 new
.error_302_dict
.update(req
.error_302_dict
)
452 new
.error_302_dict
[newurl
] = newurl
454 # Don't close the fp until we are sure that we won't use it
459 return self
.parent
.open(new
)
461 http_error_301
= http_error_302
463 inf_msg
= "The HTTP server returned a redirect error that would" \
464 "lead to an infinite loop.\n" \
465 "The last 302 error message was:\n"
467 class ProxyHandler(BaseHandler
):
468 def __init__(self
, proxies
=None):
470 proxies
= getproxies()
471 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
472 self
.proxies
= proxies
473 for type, url
in proxies
.items():
474 setattr(self
, '%s_open' % type,
475 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
476 meth(r
, proxy
, type))
478 def proxy_open(self
, req
, proxy
, type):
479 orig_type
= req
.get_type()
480 type, r_type
= splittype(proxy
)
481 host
, XXX
= splithost(r_type
)
483 user_pass
, host
= host
.split('@', 1)
484 user_pass
= base64
.encodestring(unquote(user_pass
)).strip()
485 req
.add_header('Proxy-Authorization', 'Basic '+user_pass
)
487 req
.set_proxy(host
, type)
488 if orig_type
== type:
489 # let other handlers take care of it
490 # XXX this only makes sense if the proxy is before the
494 # need to start over, because the other handlers don't
495 # grok the proxy's URL type
496 return self
.parent
.open(req
)
498 # feature suggested by Duncan Booth
499 # XXX custom is not a good name
501 # either pass a function to the constructor or override handle
502 def __init__(self
, proto
, func
=None, proxy_addr
=None):
505 self
.addr
= proxy_addr
507 def handle(self
, req
):
508 if self
.func
and self
.func(req
):
514 class CustomProxyHandler(BaseHandler
):
515 def __init__(self
, *proxies
):
518 def proxy_open(self
, req
):
519 proto
= req
.get_type()
521 proxies
= self
.proxies
[proto
]
526 req
.set_proxy(p
.get_proxy())
527 return self
.parent
.open(req
)
530 def do_proxy(self
, p
, req
):
531 return self
.parent
.open(req
)
533 def add_proxy(self
, cpo
):
534 if self
.proxies
.has_key(cpo
.proto
):
535 self
.proxies
[cpo
.proto
].append(cpo
)
537 self
.proxies
[cpo
.proto
] = [cpo
]
539 class HTTPPasswordMgr
:
543 def add_password(self
, realm
, uri
, user
, passwd
):
544 # uri could be a single URI or a sequence
545 if isinstance(uri
, types
.StringType
):
547 uri
= tuple(map(self
.reduce_uri
, uri
))
548 if not self
.passwd
.has_key(realm
):
549 self
.passwd
[realm
] = {}
550 self
.passwd
[realm
][uri
] = (user
, passwd
)
552 def find_user_password(self
, realm
, authuri
):
553 domains
= self
.passwd
.get(realm
, {})
554 authuri
= self
.reduce_uri(authuri
)
555 for uris
, authinfo
in domains
.items():
557 if self
.is_suburi(uri
, authuri
):
561 def reduce_uri(self
, uri
):
562 """Accept netloc or URI and extract only the netloc and path"""
563 parts
= urlparse
.urlparse(uri
)
565 return parts
[1], parts
[2] or '/'
569 def is_suburi(self
, base
, test
):
570 """Check if test is below base in a URI tree
572 Both args must be URIs in reduced form.
576 if base
[0] != test
[0]:
578 common
= posixpath
.commonprefix((base
[1], test
[1]))
579 if len(common
) == len(base
[1]):
584 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr
):
586 def find_user_password(self
, realm
, authuri
):
587 user
, password
= HTTPPasswordMgr
.find_user_password(self
,realm
,authuri
)
589 return user
, password
590 return HTTPPasswordMgr
.find_user_password(self
, None, authuri
)
593 class AbstractBasicAuthHandler
:
595 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
597 # XXX there can actually be multiple auth-schemes in a
598 # www-authenticate header. should probably be a lot more careful
599 # in parsing them to extract multiple alternatives
601 def __init__(self
, password_mgr
=None):
602 if password_mgr
is None:
603 password_mgr
= HTTPPasswordMgr()
604 self
.passwd
= password_mgr
605 self
.add_password
= self
.passwd
.add_password
606 self
.__current
_realm
= None
607 # if __current_realm is not None, then the server must have
608 # refused our name/password and is asking for authorization
609 # again. must be careful to set it to None on successful
612 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
613 # XXX could be multiple headers
614 authreq
= headers
.get(authreq
, None)
616 mo
= AbstractBasicAuthHandler
.rx
.match(authreq
)
618 scheme
, realm
= mo
.groups()
619 if scheme
.lower() == 'basic':
620 return self
.retry_http_basic_auth(host
, req
, realm
)
622 def retry_http_basic_auth(self
, host
, req
, realm
):
623 if self
.__current
_realm
is None:
624 self
.__current
_realm
= realm
626 self
.__current
_realm
= realm
628 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
630 raw
= "%s:%s" % (user
, pw
)
631 auth
= base64
.encodestring(raw
).strip()
632 req
.add_header(self
.header
, 'Basic %s' % auth
)
633 resp
= self
.parent
.open(req
)
634 self
.__current
_realm
= None
637 self
.__current
_realm
= None
640 class HTTPBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
642 header
= 'Authorization'
644 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
645 host
= urlparse
.urlparse(req
.get_full_url())[1]
646 return self
.http_error_auth_reqed('www-authenticate',
650 class ProxyBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
652 header
= 'Proxy-Authorization'
654 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
655 host
= req
.get_host()
656 return self
.http_error_auth_reqed('proxy-authenticate',
660 class AbstractDigestAuthHandler
:
662 def __init__(self
, passwd
=None):
664 passwd
= HTTPPasswordMgr()
666 self
.add_password
= self
.passwd
.add_password
667 self
.__current
_realm
= None
669 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
670 authreq
= headers
.get(self
.header
, None)
672 kind
= authreq
.split()[0]
674 return self
.retry_http_digest_auth(req
, authreq
)
676 def retry_http_digest_auth(self
, req
, auth
):
677 token
, challenge
= auth
.split(' ', 1)
678 chal
= parse_keqv_list(parse_http_list(challenge
))
679 auth
= self
.get_authorization(req
, chal
)
681 req
.add_header(self
.header
, 'Digest %s' % auth
)
682 resp
= self
.parent
.open(req
)
683 self
.__current
_realm
= None
686 def get_authorization(self
, req
, chal
):
688 realm
= chal
['realm']
689 nonce
= chal
['nonce']
690 algorithm
= chal
.get('algorithm', 'MD5')
691 # mod_digest doesn't send an opaque, even though it isn't
692 # supposed to be optional
693 opaque
= chal
.get('opaque', None)
697 if self
.__current
_realm
is None:
698 self
.__current
_realm
= realm
700 self
.__current
_realm
= realm
703 H
, KD
= self
.get_algorithm_impls(algorithm
)
707 user
, pw
= self
.passwd
.find_user_password(realm
,
712 # XXX not implemented yet
714 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
718 A1
= "%s:%s:%s" % (user
, realm
, pw
)
719 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
720 # XXX selector: what about proxies and full urls
722 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
723 # XXX should the partial digests be encoded too?
725 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
726 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
729 base
= base
+ ', opaque="%s"' % opaque
731 base
= base
+ ', digest="%s"' % entdig
732 if algorithm
!= 'MD5':
733 base
= base
+ ', algorithm="%s"' % algorithm
736 def get_algorithm_impls(self
, algorithm
):
737 # lambdas assume digest modules are imported at the top level
738 if algorithm
== 'MD5':
739 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
740 elif algorithm
== 'SHA':
741 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
743 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
746 def get_entity_digest(self
, data
, chal
):
747 # XXX not implemented yet
751 class HTTPDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
752 """An authentication protocol defined by RFC 2069
754 Digest authentication improves on basic authentication because it
755 does not transmit passwords in the clear.
758 header
= 'Authorization'
760 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
761 host
= urlparse
.urlparse(req
.get_full_url())[1]
762 self
.http_error_auth_reqed('www-authenticate', host
, req
, headers
)
765 class ProxyDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
767 header
= 'Proxy-Authorization'
769 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
770 host
= req
.get_host()
771 self
.http_error_auth_reqed('proxy-authenticate', host
, req
, headers
)
774 def encode_digest(digest
):
777 n
= (ord(c
) >> 4) & 0xf
778 hexrep
.append(hex(n
)[-1])
780 hexrep
.append(hex(n
)[-1])
781 return ''.join(hexrep
)
784 class AbstractHTTPHandler(BaseHandler
):
786 def do_open(self
, http_class
, req
):
787 host
= req
.get_host()
789 raise URLError('no host given')
792 h
= http_class(host
) # will parse host:port
794 data
= req
.get_data()
795 h
.putrequest('POST', req
.get_selector())
796 if not req
.headers
.has_key('Content-type'):
797 h
.putheader('Content-type',
798 'application/x-www-form-urlencoded')
799 if not req
.headers
.has_key('Content-length'):
800 h
.putheader('Content-length', '%d' % len(data
))
802 h
.putrequest('GET', req
.get_selector())
803 except socket
.error
, err
:
806 h
.putheader('Host', host
)
807 for args
in self
.parent
.addheaders
:
809 for k
, v
in req
.headers
.items():
815 code
, msg
, hdrs
= h
.getreply()
818 return addinfourl(fp
, hdrs
, req
.get_full_url())
820 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
823 class HTTPHandler(AbstractHTTPHandler
):
825 def http_open(self
, req
):
826 return self
.do_open(httplib
.HTTP
, req
)
829 if hasattr(httplib
, 'HTTPS'):
830 class HTTPSHandler(AbstractHTTPHandler
):
832 def https_open(self
, req
):
833 return self
.do_open(httplib
.HTTPS
, req
)
836 class UnknownHandler(BaseHandler
):
837 def unknown_open(self
, req
):
838 type = req
.get_type()
839 raise URLError('unknown url type: %s' % type)
841 def parse_keqv_list(l
):
842 """Parse list of key=value strings where keys are not duplicated."""
845 k
, v
= elt
.split('=', 1)
846 if v
[0] == '"' and v
[-1] == '"':
851 def parse_http_list(s
):
852 """Parse lists as described by RFC 2068 Section 2.
854 In particular, parse comman-separated lists where the elements of
855 the list may include quoted-strings. A quoted-string could
858 # XXX this function could probably use more testing
870 list.append(s
[start
:])
874 raise ValueError, "unbalanced quotes"
876 list.append(s
[start
:i
+c
])
881 list.append(s
[start
:i
+c
])
889 list.append(s
[start
:i
+c
])
895 return map(lambda x
: x
.strip(), list)
897 class FileHandler(BaseHandler
):
898 # Use local file or FTP depending on form of URL
899 def file_open(self
, req
):
900 url
= req
.get_selector()
901 if url
[:2] == '//' and url
[2:3] != '/':
903 return self
.parent
.open(req
)
905 return self
.open_local_file(req
)
907 # names for the localhost
910 if FileHandler
.names
is None:
911 FileHandler
.names
= (socket
.gethostbyname('localhost'),
912 socket
.gethostbyname(socket
.gethostname()))
913 return FileHandler
.names
915 # not entirely sure what the rules are here
916 def open_local_file(self
, req
):
917 mtype
= mimetypes
.guess_type(req
.get_selector())[0]
918 headers
= mimetools
.Message(StringIO('Content-Type: %s\n' \
919 % (mtype
or 'text/plain')))
920 host
= req
.get_host()
921 file = req
.get_selector()
923 host
, port
= splitport(host
)
925 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
926 return addinfourl(open(url2pathname(file), 'rb'),
927 headers
, 'file:'+file)
928 raise URLError('file not on local host')
930 class FTPHandler(BaseHandler
):
931 def ftp_open(self
, req
):
932 host
= req
.get_host()
934 raise IOError, ('ftp error', 'no host given')
935 # XXX handle custom username & password
937 host
= socket
.gethostbyname(host
)
938 except socket
.error
, msg
:
940 host
, port
= splitport(host
)
942 port
= ftplib
.FTP_PORT
943 path
, attrs
= splitattr(req
.get_selector())
945 dirs
= path
.split('/')
946 dirs
, file = dirs
[:-1], dirs
[-1]
947 if dirs
and not dirs
[0]:
949 user
= passwd
= '' # XXX
951 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
952 type = file and 'I' or 'D'
954 attr
, value
= splitattr(attr
)
955 if attr
.lower() == 'type' and \
956 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
958 fp
, retrlen
= fw
.retrfile(file, type)
959 if retrlen
is not None and retrlen
>= 0:
960 sf
= StringIO('Content-Length: %d\n' % retrlen
)
961 headers
= mimetools
.Message(sf
)
963 headers
= noheaders()
964 return addinfourl(fp
, headers
, req
.get_full_url())
965 except ftplib
.all_errors
, msg
:
966 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
968 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
969 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
970 ## fw.ftp.set_debuglevel(1)
973 class CacheFTPHandler(FTPHandler
):
974 # XXX would be nice to have pluggable cache strategies
975 # XXX this stuff is definitely not thread safe
983 def setTimeout(self
, t
):
986 def setMaxConns(self
, m
):
989 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
990 key
= user
, passwd
, host
, port
991 if self
.cache
.has_key(key
):
992 self
.timeout
[key
] = time
.time() + self
.delay
994 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
995 self
.timeout
[key
] = time
.time() + self
.delay
997 return self
.cache
[key
]
999 def check_cache(self
):
1000 # first check for old ones
1002 if self
.soonest
<= t
:
1003 for k
, v
in self
.timeout
.items():
1005 self
.cache
[k
].close()
1008 self
.soonest
= min(self
.timeout
.values())
1010 # then check the size
1011 if len(self
.cache
) == self
.max_conns
:
1012 for k
, v
in self
.timeout
.items():
1013 if v
== self
.soonest
:
1017 self
.soonest
= min(self
.timeout
.values())
1019 class GopherHandler(BaseHandler
):
1020 def gopher_open(self
, req
):
1021 host
= req
.get_host()
1023 raise GopherError('no host given')
1024 host
= unquote(host
)
1025 selector
= req
.get_selector()
1026 type, selector
= splitgophertype(selector
)
1027 selector
, query
= splitquery(selector
)
1028 selector
= unquote(selector
)
1030 query
= unquote(query
)
1031 fp
= gopherlib
.send_query(selector
, query
, host
)
1033 fp
= gopherlib
.send_selector(selector
, host
)
1034 return addinfourl(fp
, noheaders(), req
.get_full_url())
1036 #bleck! don't use this yet
1037 class OpenerFactory
:
1039 default_handlers
= [UnknownHandler
, HTTPHandler
,
1040 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
1041 FTPHandler
, FileHandler
]
1042 proxy_handlers
= [ProxyHandler
]
1044 replacement_handlers
= []
1046 def add_proxy_handler(self
, ph
):
1047 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
1049 def add_handler(self
, h
):
1050 self
.handlers
= self
.handlers
+ [h
]
1052 def replace_handler(self
, h
):
1055 def build_opener(self
):
1056 opener
= OpenerDirector()
1057 for ph
in self
.proxy_handlers
:
1058 if isinstance(ph
, types
.ClassType
):
1060 opener
.add_handler(ph
)
1062 if __name__
== "__main__":
1063 # XXX some of the test code depends on machine configurations that
1064 # are internal to CNRI. Need to set up a public server with the
1065 # right authentication configuration for test purposes.
1066 if socket
.gethostname() == 'bitdiddle':
1067 localhost
= 'bitdiddle.cnri.reston.va.us'
1068 elif socket
.gethostname() == 'bitdiddle.concentric.net':
1069 localhost
= 'localhost'
1073 # Thanks to Fred for finding these!
1074 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1075 'gopher://gopher.vt.edu:10010/10/33',
1078 'file://nonsensename/etc/passwd',
1079 'ftp://www.python.org/pub/tmp/httplib.py',
1080 'ftp://www.python.org/pub/tmp/imageop.c',
1081 'ftp://www.python.org/pub/tmp/blat',
1082 'http://www.espn.com/', # redirect
1083 'http://www.python.org/Spanish/Inquistion/',
1084 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1085 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1086 'http://www.python.org/',
1087 'ftp://prep.ai.mit.edu/welcome.msg',
1088 'ftp://www.python.org/pub/tmp/figure.prn',
1089 'ftp://www.python.org/pub/tmp/interp.pl',
1090 'http://checkproxy.cnri.reston.va.us/test/test.html',
1093 if localhost
is not None:
1095 'file://%s/etc/passwd' % localhost
,
1096 'http://%s/simple/' % localhost
,
1097 'http://%s/digest/' % localhost
,
1098 'http://%s/not/found.h' % localhost
,
1101 bauth
= HTTPBasicAuthHandler()
1102 bauth
.add_password('basic_test_realm', localhost
, 'jhylton',
1104 dauth
= HTTPDigestAuthHandler()
1105 dauth
.add_password('digest_test_realm', localhost
, 'jhylton',
1109 cfh
= CacheFTPHandler()
1112 # XXX try out some custom proxy objects too!
1114 host
= req
.get_host()
1116 if host
[-18:] == '.cnri.reston.va.us':
1118 p
= CustomProxy('http', at_cnri
, 'proxy.cnri.reston.va.us')
1119 ph
= CustomProxyHandler(p
)
1121 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1124 if isinstance(url
, types
.TupleType
):
1130 f
= urlopen(url
, req
)
1131 except IOError, err
:
1132 print "IOError:", err
1133 except socket
.error
, err
:
1134 print "socket.error:", err
1138 print "read %d bytes" % len(buf
)