1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirectory manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
66 urllib2.install_opener(opener)
68 f = urllib2.urlopen('http://www.python.org/')
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
83 # documentation (getting there)
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
107 from cStringIO
import StringIO
109 from StringIO
import StringIO
117 # not sure how many of these need to be gotten rid of
118 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
119 addinfourl
, splitport
, splitgophertype
, splitquery
, \
120 splitattr
, ftpwrapper
, noheaders
122 # support for proxies via environment variables
123 from urllib
import getproxies
125 # support for FileHandler
126 from urllib
import localhost
, url2pathname
128 __version__
= "2.0a1"
131 def urlopen(url
, data
=None):
134 _opener
= build_opener()
135 return _opener
.open(url
, data
)
137 def install_opener(opener
):
141 # do these error classes make sense?
142 # make sure all of the IOError stuff is overridden. we just want to be
145 class URLError(IOError):
146 # URLError is a sub-type of IOError, but it doesn't share any of
147 # the implementation. need to override __init__ and __str__
148 def __init__(self
, reason
):
152 return '<urlopen error %s>' % self
.reason
154 class HTTPError(URLError
, addinfourl
):
155 """Raised when HTTP error occurs, but also acts like non-error return"""
156 __super_init
= addinfourl
.__init
__
158 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
159 self
.__super
_init
(fp
, hdrs
, url
)
168 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
171 # XXX is this safe? what if user catches exception, then
172 # extracts fp and discards exception?
176 class GopherError(URLError
):
182 def __init__(self
, url
, data
=None, headers
={}):
183 # unwrap('<URL:type://host/path>') --> 'type://host/path'
184 self
.__original
= unwrap(url
)
186 # self.__r_type is what's left after doing the splittype
191 self
.headers
.update(headers
)
193 def __getattr__(self
, attr
):
194 # XXX this is a fallback mechanism to guard against these
195 # methods getting called in a non-standard order. this may be
196 # too complicated and/or unnecessary.
197 # XXX should the __r_XXX attributes be public?
198 if attr
[:12] == '_Request__r_':
200 if hasattr(Request
, 'get_' + name
):
201 getattr(self
, 'get_' + name
)()
202 return getattr(self
, attr
)
203 raise AttributeError, attr
205 def add_data(self
, data
):
209 return self
.data
is not None
214 def get_full_url(self
):
215 return self
.__original
218 if self
.type is None:
219 self
.type, self
.__r
_type
= splittype(self
.__original
)
220 if self
.type is None:
221 raise ValueError, "unknown url type: %s" % self
.__original
225 if self
.host
is None:
226 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
228 self
.host
= unquote(self
.host
)
231 def get_selector(self
):
234 def set_proxy(self
, host
, type):
235 self
.host
, self
.type = host
, type
236 self
.__r
_host
= self
.__original
238 def add_header(self
, key
, val
):
239 # useful for something like authentication
240 self
.headers
[key
] = val
242 class OpenerDirector
:
244 server_version
= "Python-urllib/%s" % __version__
245 self
.addheaders
= [('User-agent', server_version
)]
246 # manage the individual handlers
248 self
.handle_open
= {}
249 self
.handle_error
= {}
251 def add_handler(self
, handler
):
253 for meth
in get_methods(handler
):
254 if meth
[-5:] == '_open':
256 if self
.handle_open
.has_key(protocol
):
257 self
.handle_open
[protocol
].append(handler
)
259 self
.handle_open
[protocol
] = [handler
]
263 j
= meth
[i
+1:].find('_') + i
+ 1
264 if j
!= -1 and meth
[i
+1:j
] == 'error':
271 dict = self
.handle_error
.get(proto
, {})
272 if dict.has_key(kind
):
273 dict[kind
].append(handler
)
275 dict[kind
] = [handler
]
276 self
.handle_error
[proto
] = dict
280 self
.handlers
.append(handler
)
281 handler
.add_parent(self
)
287 for handler
in self
.handlers
:
291 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
292 # XXX raise an exception if no one else should try to handle
293 # this url. return None if you can't but someone else could.
294 handlers
= chain
.get(kind
, ())
295 for handler
in handlers
:
296 func
= getattr(handler
, meth_name
)
299 if result
is not None:
302 def open(self
, fullurl
, data
=None):
303 # accept a URL or a Request object
304 if type(fullurl
) == types
.StringType
:
305 req
= Request(fullurl
, data
)
310 assert isinstance(req
, Request
) # really only care about interface
312 result
= self
._call
_chain
(self
.handle_open
, 'default',
317 type_
= req
.get_type()
318 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
323 return self
._call
_chain
(self
.handle_open
, 'unknown',
326 def error(self
, proto
, *args
):
327 if proto
in ['http', 'https']:
328 # XXX http[s] protocols are special cased
329 dict = self
.handle_error
['http'] # https is not different then http
330 proto
= args
[2] # YUCK!
331 meth_name
= 'http_error_%d' % proto
335 dict = self
.handle_error
336 meth_name
= proto
+ '_error'
338 args
= (dict, proto
, meth_name
) + args
339 result
= self
._call
_chain
(*args
)
344 args
= (dict, 'default', 'http_error_default') + orig_args
345 return self
._call
_chain
(*args
)
347 def is_callable(obj
):
348 # not quite like builtin callable (which I didn't know existed),
349 # not entirely sure it needs to be different
350 if type(obj
) in (types
.BuiltinFunctionType
,
351 types
.BuiltinMethodType
, types
.LambdaType
,
354 if type(obj
) == types
.InstanceType
:
355 return hasattr(obj
, '__call__')
358 def get_methods(inst
):
361 classes
.append(inst
.__class
__)
365 classes
= classes
+ list(klass
.__bases
__)
366 for name
in dir(klass
):
367 attr
= getattr(klass
, name
)
368 if type(attr
) == types
.UnboundMethodType
:
370 for name
in dir(inst
):
371 if is_callable(getattr(inst
, name
)):
373 return methods
.keys()
375 # XXX probably also want an abstract factory that knows things like
376 # the fact that a ProxyHandler needs to get inserted first.
377 # would also know when it makes sense to skip a superclass in favor of
378 # a subclass and when it might make sense to include both
380 def build_opener(*handlers
):
381 """Create an opener object from a list of handlers.
383 The opener will use several default handlers, including support
384 for HTTP and FTP. If there is a ProxyHandler, it must be at the
385 front of the list of handlers. (Yuck.)
387 If any of the handlers passed as arguments are subclasses of the
388 default handlers, the default handlers will not be used.
391 opener
= OpenerDirector()
392 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
393 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
394 FTPHandler
, FileHandler
]
395 if hasattr(httplib
, 'HTTPS'):
396 default_classes
.append(HTTPSHandler
)
398 for klass
in default_classes
:
399 for check
in handlers
:
400 if type(check
) == types
.ClassType
:
401 if issubclass(check
, klass
):
403 elif type(check
) == types
.InstanceType
:
404 if isinstance(check
, klass
):
407 default_classes
.remove(klass
)
409 for klass
in default_classes
:
410 opener
.add_handler(klass())
413 if type(h
) == types
.ClassType
:
415 opener
.add_handler(h
)
419 def add_parent(self
, parent
):
424 class HTTPDefaultErrorHandler(BaseHandler
):
425 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
426 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
428 class HTTPRedirectHandler(BaseHandler
):
429 # Implementation note: To avoid the server sending us into an
430 # infinite loop, the request object needs to track what URLs we
431 # have already seen. Do this by adding a handler-specific
432 # attribute to the Request object.
433 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
434 if headers
.has_key('location'):
435 newurl
= headers
['location']
436 elif headers
.has_key('uri'):
437 newurl
= headers
['uri']
443 newurl
= urlparse
.urljoin(req
.get_full_url(), newurl
)
445 # XXX Probably want to forget about the state of the current
446 # request, although that might interact poorly with other
447 # handlers that also use handler-specific request attributes
448 new
= Request(newurl
, req
.get_data())
449 new
.error_302_dict
= {}
450 if hasattr(req
, 'error_302_dict'):
451 if len(req
.error_302_dict
)>10 or \
452 req
.error_302_dict
.has_key(newurl
):
453 raise HTTPError(req
.get_full_url(), code
,
454 self
.inf_msg
+ msg
, headers
)
455 new
.error_302_dict
.update(req
.error_302_dict
)
456 new
.error_302_dict
[newurl
] = newurl
457 return self
.parent
.open(new
)
459 http_error_301
= http_error_302
461 inf_msg
= "The HTTP server returned a redirect error that would" \
462 "lead to an infinite loop.\n" \
463 "The last 302 error message was:\n"
465 class ProxyHandler(BaseHandler
):
466 def __init__(self
, proxies
=None):
468 proxies
= getproxies()
469 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
470 self
.proxies
= proxies
471 for type, url
in proxies
.items():
472 setattr(self
, '%s_open' % type,
473 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
474 meth(r
, proxy
, type))
476 def proxy_open(self
, req
, proxy
, type):
477 orig_type
= req
.get_type()
478 type, r_type
= splittype(proxy
)
479 host
, XXX
= splithost(r_type
)
481 user_pass
, host
= host
.split('@', 1)
482 user_pass
= base64
.encodestring(unquote(user_pass
)).strip()
483 req
.add_header('Proxy-Authorization', 'Basic '+user_pass
)
485 req
.set_proxy(host
, type)
486 if orig_type
== type:
487 # let other handlers take care of it
488 # XXX this only makes sense if the proxy is before the
492 # need to start over, because the other handlers don't
493 # grok the proxy's URL type
494 return self
.parent
.open(req
)
496 # feature suggested by Duncan Booth
497 # XXX custom is not a good name
499 # either pass a function to the constructor or override handle
500 def __init__(self
, proto
, func
=None, proxy_addr
=None):
503 self
.addr
= proxy_addr
505 def handle(self
, req
):
506 if self
.func
and self
.func(req
):
512 class CustomProxyHandler(BaseHandler
):
513 def __init__(self
, *proxies
):
516 def proxy_open(self
, req
):
517 proto
= req
.get_type()
519 proxies
= self
.proxies
[proto
]
524 req
.set_proxy(p
.get_proxy())
525 return self
.parent
.open(req
)
528 def do_proxy(self
, p
, req
):
530 return self
.parent
.open(req
)
532 def add_proxy(self
, cpo
):
533 if self
.proxies
.has_key(cpo
.proto
):
534 self
.proxies
[cpo
.proto
].append(cpo
)
536 self
.proxies
[cpo
.proto
] = [cpo
]
538 class HTTPPasswordMgr
:
542 def add_password(self
, realm
, uri
, user
, passwd
):
543 # uri could be a single URI or a sequence
544 if type(uri
) == types
.StringType
:
546 uri
= tuple(map(self
.reduce_uri
, uri
))
547 if not self
.passwd
.has_key(realm
):
548 self
.passwd
[realm
] = {}
549 self
.passwd
[realm
][uri
] = (user
, passwd
)
551 def find_user_password(self
, realm
, authuri
):
552 domains
= self
.passwd
.get(realm
, {})
553 authuri
= self
.reduce_uri(authuri
)
554 for uris
, authinfo
in domains
.items():
556 if self
.is_suburi(uri
, authuri
):
560 def reduce_uri(self
, uri
):
561 """Accept netloc or URI and extract only the netloc and path"""
562 parts
= urlparse
.urlparse(uri
)
564 return parts
[1], parts
[2] or '/'
568 def is_suburi(self
, base
, test
):
569 """Check if test is below base in a URI tree
571 Both args must be URIs in reduced form.
575 if base
[0] != test
[0]:
577 common
= posixpath
.commonprefix((base
[1], test
[1]))
578 if len(common
) == len(base
[1]):
583 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr
):
585 def find_user_password(self
, realm
, authuri
):
586 user
, password
= HTTPPasswordMgr
.find_user_password(self
,realm
,authuri
)
588 return user
, password
589 return HTTPPasswordMgr
.find_user_password(self
, None, authuri
)
592 class AbstractBasicAuthHandler
:
594 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
596 # XXX there can actually be multiple auth-schemes in a
597 # www-authenticate header. should probably be a lot more careful
598 # in parsing them to extract multiple alternatives
600 def __init__(self
, password_mgr
=None):
601 if password_mgr
is None:
602 password_mgr
= HTTPPasswordMgr()
603 self
.passwd
= password_mgr
604 self
.add_password
= self
.passwd
.add_password
605 self
.__current
_realm
= None
606 # if __current_realm is not None, then the server must have
607 # refused our name/password and is asking for authorization
608 # again. must be careful to set it to None on successful
611 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
612 # XXX could be multiple headers
613 authreq
= headers
.get(authreq
, None)
615 mo
= AbstractBasicAuthHandler
.rx
.match(authreq
)
617 scheme
, realm
= mo
.groups()
618 if scheme
.lower() == 'basic':
619 return self
.retry_http_basic_auth(host
, req
, realm
)
621 def retry_http_basic_auth(self
, host
, req
, realm
):
622 if self
.__current
_realm
is None:
623 self
.__current
_realm
= realm
625 self
.__current
_realm
= realm
627 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
629 raw
= "%s:%s" % (user
, pw
)
630 auth
= base64
.encodestring(raw
).strip()
631 req
.add_header(self
.header
, 'Basic %s' % auth
)
632 resp
= self
.parent
.open(req
)
633 self
.__current
_realm
= None
636 self
.__current
_realm
= None
639 class HTTPBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
641 header
= 'Authorization'
643 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
644 host
= urlparse
.urlparse(req
.get_full_url())[1]
645 return self
.http_error_auth_reqed('www-authenticate',
649 class ProxyBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
651 header
= 'Proxy-Authorization'
653 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
654 host
= req
.get_host()
655 return self
.http_error_auth_reqed('proxy-authenticate',
659 class AbstractDigestAuthHandler
:
661 def __init__(self
, passwd
=None):
663 passwd
= HTTPPassowrdMgr()
665 self
.add_password
= self
.passwd
.add_password
666 self
.__current
_realm
= None
668 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
669 authreq
= headers
.get(self
.header
, None)
671 kind
= authreq
.split()[0]
673 return self
.retry_http_digest_auth(req
, authreq
)
675 def retry_http_digest_auth(self
, req
, auth
):
676 token
, challenge
= auth
.split(' ', 1)
677 chal
= parse_keqv_list(parse_http_list(challenge
))
678 auth
= self
.get_authorization(req
, chal
)
680 req
.add_header(self
.header
, 'Digest %s' % auth
)
681 resp
= self
.parent
.open(req
)
682 self
.__current
_realm
= None
685 def get_authorization(self
, req
, chal
):
687 realm
= chal
['realm']
688 nonce
= chal
['nonce']
689 algorithm
= chal
.get('algorithm', 'MD5')
690 # mod_digest doesn't send an opaque, even though it isn't
691 # supposed to be optional
692 opaque
= chal
.get('opaque', None)
696 if self
.__current
_realm
is None:
697 self
.__current
_realm
= realm
699 self
.__current
_realm
= realm
702 H
, KD
= self
.get_algorithm_impls(algorithm
)
706 user
, pw
= self
.passwd
.find_user_password(realm
,
711 # XXX not implemented yet
713 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
717 A1
= "%s:%s:%s" % (user
, realm
, pw
)
718 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
719 # XXX selector: what about proxies and full urls
721 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
722 # XXX should the partial digests be encoded too?
724 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
725 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
728 base
= base
+ ', opaque="%s"' % opaque
730 base
= base
+ ', digest="%s"' % entdig
731 if algorithm
!= 'MD5':
732 base
= base
+ ', algorithm="%s"' % algorithm
735 def get_algorithm_impls(self
, algorithm
):
736 # lambdas assume digest modules are imported at the top level
737 if algorithm
== 'MD5':
738 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
739 elif algorithm
== 'SHA':
740 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
742 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
745 def get_entity_digest(self
, data
, chal
):
746 # XXX not implemented yet
750 class HTTPDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
751 """An authentication protocol defined by RFC 2069
753 Digest authentication improves on basic authentication because it
754 does not transmit passwords in the clear.
757 header
= 'Authorization'
759 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
760 host
= urlparse
.urlparse(req
.get_full_url())[1]
761 self
.http_error_auth_reqed('www-authenticate', host
, req
, headers
)
764 class ProxyDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
766 header
= 'Proxy-Authorization'
768 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
769 host
= req
.get_host()
770 self
.http_error_auth_reqed('proxy-authenticate', host
, req
, headers
)
773 def encode_digest(digest
):
776 n
= (ord(c
) >> 4) & 0xf
777 hexrep
.append(hex(n
)[-1])
779 hexrep
.append(hex(n
)[-1])
780 return ''.join(hexrep
)
783 class AbstractHTTPHandler(BaseHandler
):
785 def do_open(self
, http_class
, req
):
786 host
= req
.get_host()
788 raise URLError('no host given')
791 h
= http_class(host
) # will parse host:port
793 data
= req
.get_data()
794 h
.putrequest('POST', req
.get_selector())
795 if not req
.headers
.has_key('Content-type'):
796 h
.putheader('Content-type',
797 'application/x-www-form-urlencoded')
798 if not req
.headers
.has_key('Content-length'):
799 h
.putheader('Content-length', '%d' % len(data
))
801 h
.putrequest('GET', req
.get_selector())
802 except socket
.error
, err
:
805 h
.putheader('Host', host
)
806 for args
in self
.parent
.addheaders
:
808 for k
, v
in req
.headers
.items():
814 code
, msg
, hdrs
= h
.getreply()
817 return addinfourl(fp
, hdrs
, req
.get_full_url())
819 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
822 class HTTPHandler(AbstractHTTPHandler
):
824 def http_open(self
, req
):
825 return self
.do_open(httplib
.HTTP
, req
)
828 if hasattr(httplib
, 'HTTPS'):
829 class HTTPSHandler(AbstractHTTPHandler
):
831 def https_open(self
, req
):
832 return self
.do_open(httplib
.HTTPS
, req
)
835 class UnknownHandler(BaseHandler
):
836 def unknown_open(self
, req
):
837 type = req
.get_type()
838 raise URLError('unknown url type: %s' % type)
840 def parse_keqv_list(l
):
841 """Parse list of key=value strings where keys are not duplicated."""
844 k
, v
= elt
.split('=', 1)
845 if v
[0] == '"' and v
[-1] == '"':
850 def parse_http_list(s
):
851 """Parse lists as described by RFC 2068 Section 2.
853 In particular, parse comman-separated lists where the elements of
854 the list may include quoted-strings. A quoted-string could
857 # XXX this function could probably use more testing
869 list.append(s
[start
:])
873 raise ValueError, "unbalanced quotes"
875 list.append(s
[start
:i
+c
])
880 list.append(s
[start
:i
+c
])
888 list.append(s
[start
:i
+c
])
894 return map(lambda x
: x
.strip(), list)
896 class FileHandler(BaseHandler
):
897 # Use local file or FTP depending on form of URL
898 def file_open(self
, req
):
899 url
= req
.get_selector()
900 if url
[:2] == '//' and url
[2:3] != '/':
902 return self
.parent
.open(req
)
904 return self
.open_local_file(req
)
906 # names for the localhost
909 if FileHandler
.names
is None:
910 FileHandler
.names
= (socket
.gethostbyname('localhost'),
911 socket
.gethostbyname(socket
.gethostname()))
912 return FileHandler
.names
914 # not entirely sure what the rules are here
915 def open_local_file(self
, req
):
916 mtype
= mimetypes
.guess_type(req
.get_selector())[0]
917 headers
= mimetools
.Message(StringIO('Content-Type: %s\n' \
918 % (mtype
or 'text/plain')))
919 host
= req
.get_host()
920 file = req
.get_selector()
922 host
, port
= splitport(host
)
924 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
925 return addinfourl(open(url2pathname(file), 'rb'),
926 headers
, 'file:'+file)
927 raise URLError('file not on local host')
929 class FTPHandler(BaseHandler
):
930 def ftp_open(self
, req
):
931 host
= req
.get_host()
933 raise IOError, ('ftp error', 'no host given')
934 # XXX handle custom username & password
936 host
= socket
.gethostbyname(host
)
937 except socket
.error
, msg
:
939 host
, port
= splitport(host
)
941 port
= ftplib
.FTP_PORT
942 path
, attrs
= splitattr(req
.get_selector())
944 dirs
= path
.split('/')
945 dirs
, file = dirs
[:-1], dirs
[-1]
946 if dirs
and not dirs
[0]:
948 user
= passwd
= '' # XXX
950 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
951 type = file and 'I' or 'D'
953 attr
, value
= splitattr(attr
)
954 if attr
.lower() == 'type' and \
955 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
957 fp
, retrlen
= fw
.retrfile(file, type)
958 if retrlen
is not None and retrlen
>= 0:
959 sf
= StringIO('Content-Length: %d\n' % retrlen
)
960 headers
= mimetools
.Message(sf
)
962 headers
= noheaders()
963 return addinfourl(fp
, headers
, req
.get_full_url())
964 except ftplib
.all_errors
, msg
:
965 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
967 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
968 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
969 ## fw.ftp.set_debuglevel(1)
972 class CacheFTPHandler(FTPHandler
):
973 # XXX would be nice to have pluggable cache strategies
974 # XXX this stuff is definitely not thread safe
982 def setTimeout(self
, t
):
985 def setMaxConns(self
, m
):
988 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
989 key
= user
, passwd
, host
, port
990 if self
.cache
.has_key(key
):
991 self
.timeout
[key
] = time
.time() + self
.delay
993 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
994 self
.timeout
[key
] = time
.time() + self
.delay
996 return self
.cache
[key
]
998 def check_cache(self
):
999 # first check for old ones
1001 if self
.soonest
<= t
:
1002 for k
, v
in self
.timeout
.items():
1004 self
.cache
[k
].close()
1007 self
.soonest
= min(self
.timeout
.values())
1009 # then check the size
1010 if len(self
.cache
) == self
.max_conns
:
1011 for k
, v
in self
.timeout
.items():
1012 if v
== self
.soonest
:
1016 self
.soonest
= min(self
.timeout
.values())
1018 class GopherHandler(BaseHandler
):
1019 def gopher_open(self
, req
):
1020 host
= req
.get_host()
1022 raise GopherError('no host given')
1023 host
= unquote(host
)
1024 selector
= req
.get_selector()
1025 type, selector
= splitgophertype(selector
)
1026 selector
, query
= splitquery(selector
)
1027 selector
= unquote(selector
)
1029 query
= unquote(query
)
1030 fp
= gopherlib
.send_query(selector
, query
, host
)
1032 fp
= gopherlib
.send_selector(selector
, host
)
1033 return addinfourl(fp
, noheaders(), req
.get_full_url())
1035 #bleck! don't use this yet
1036 class OpenerFactory
:
1038 default_handlers
= [UnknownHandler
, HTTPHandler
,
1039 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
1040 FTPHandler
, FileHandler
]
1041 proxy_handlers
= [ProxyHandler
]
1043 replacement_handlers
= []
1045 def add_proxy_handler(self
, ph
):
1046 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
1048 def add_handler(self
, h
):
1049 self
.handlers
= self
.handlers
+ [h
]
1051 def replace_handler(self
, h
):
1054 def build_opener(self
):
1055 opener
= OpenerDirectory()
1056 for ph
in self
.proxy_handlers
:
1057 if type(ph
) == types
.ClassType
:
1059 opener
.add_handler(ph
)
1061 if __name__
== "__main__":
1062 # XXX some of the test code depends on machine configurations that
1063 # are internal to CNRI. Need to set up a public server with the
1064 # right authentication configuration for test purposes.
1065 if socket
.gethostname() == 'bitdiddle':
1066 localhost
= 'bitdiddle.cnri.reston.va.us'
1067 elif socket
.gethostname() == 'bitdiddle.concentric.net':
1068 localhost
= 'localhost'
1072 # Thanks to Fred for finding these!
1073 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1074 'gopher://gopher.vt.edu:10010/10/33',
1077 'file://nonsensename/etc/passwd',
1078 'ftp://www.python.org/pub/tmp/httplib.py',
1079 'ftp://www.python.org/pub/tmp/imageop.c',
1080 'ftp://www.python.org/pub/tmp/blat',
1081 'http://www.espn.com/', # redirect
1082 'http://www.python.org/Spanish/Inquistion/',
1083 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1084 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1085 'http://www.python.org/',
1086 'ftp://prep.ai.mit.edu/welcome.msg',
1087 'ftp://www.python.org/pub/tmp/figure.prn',
1088 'ftp://www.python.org/pub/tmp/interp.pl',
1089 'http://checkproxy.cnri.reston.va.us/test/test.html',
1092 if localhost
is not None:
1094 'file://%s/etc/passwd' % localhost
,
1095 'http://%s/simple/' % localhost
,
1096 'http://%s/digest/' % localhost
,
1097 'http://%s/not/found.h' % localhost
,
1100 bauth
= HTTPBasicAuthHandler()
1101 bauth
.add_password('basic_test_realm', localhost
, 'jhylton',
1103 dauth
= HTTPDigestAuthHandler()
1104 dauth
.add_password('digest_test_realm', localhost
, 'jhylton',
1108 cfh
= CacheFTPHandler()
1111 # XXX try out some custom proxy objects too!
1113 host
= req
.get_host()
1115 if host
[-18:] == '.cnri.reston.va.us':
1117 p
= CustomProxy('http', at_cnri
, 'proxy.cnri.reston.va.us')
1118 ph
= CustomProxyHandler(p
)
1120 #install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1123 if type(url
) == types
.TupleType
:
1129 f
= urlopen(url
, req
)
1130 except IOError, err
:
1131 print "IOError:", err
1132 except socket
.error
, err
:
1133 print "socket.error:", err
1137 print "read %d bytes" % len(buf
)