1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirectory manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 # build a new opener that adds authentication and caching FTP handlers
61 opener = urllib2.build_opener(authinfo, urllib2.CacheFTPHandler)
64 urllib2.install_opener(opener)
66 f = urllib2.urlopen('http://www.python.org/')
72 # If an authentication error handler that tries to perform
73 # authentication for some reason but fails, how should the error be
74 # signalled? The client needs to know the HTTP error code. But if
75 # the handler knows that the problem was, e.g., that it didn't know
76 # that hash algo that requested in the challenge, it would be good to
77 # pass that information along to the client, too.
81 # documentation (getting there)
83 # abstract factory for opener
84 # ftp errors aren't handled cleanly
85 # gopher can return a socket.error
86 # check digest against correct (i.e. non-apache) implementation
106 from cStringIO
import StringIO
108 from StringIO
import StringIO
116 # not sure how many of these need to be gotten rid of
117 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
118 addinfourl
, splitport
, splitgophertype
, splitquery
, \
119 splitattr
, ftpwrapper
, noheaders
121 # support for proxies via environment variables
122 from urllib
import getproxies
124 # support for FileHandler
125 from urllib
import localhost
, thishost
, url2pathname
, pathname2url
127 # support for GopherHandler
128 from urllib
import splitgophertype
, splitquery
130 __version__
= "2.0a1"
133 def urlopen(url
, data
=None):
136 _opener
= build_opener()
137 return _opener
.open(url
, data
)
139 def install_opener(opener
):
143 # do these error classes make sense?
144 # make sure all of the IOError stuff is overridden. we just want to be
147 class URLError(IOError):
148 # URLError is a sub-type of IOError, but it doesn't share any of
149 # the implementation. need to override __init__ and __str__
150 def __init__(self
, reason
):
154 return '<urlopen error %s>' % self
.reason
156 class HTTPError(URLError
, addinfourl
):
157 """Raised when HTTP error occurs, but also acts like non-error return"""
159 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
160 addinfourl
.__init
__(self
, fp
, hdrs
, url
)
169 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
172 # XXX is this safe? what if user catches exception, then
173 # extracts fp and discards exception?
176 class GopherError(URLError
):
180 def __init__(self
, url
, data
=None, headers
={}):
181 # unwrap('<URL:type://host/path>') --> 'type://host/path'
182 self
.__original
= unwrap(url
)
184 # self.__r_type is what's left after doing the splittype
189 self
.headers
.update(headers
)
191 def __getattr__(self
, attr
):
192 # XXX this is a fallback mechanism to guard against these
193 # methods getting called in a non-standard order. this may be
194 # too complicated and/or unnecessary.
195 # XXX should the __r_XXX attributes be public?
196 if attr
[:12] == '_Request__r_':
198 if hasattr(Request
, 'get_' + name
):
199 getattr(self
, 'get_' + name
)()
200 return getattr(self
, attr
)
201 raise AttributeError, attr
203 def add_data(self
, data
):
207 return self
.data
is not None
212 def get_full_url(self
):
213 return self
.__original
216 if self
.type is None:
217 self
.type, self
.__r
_type
= splittype(self
.__original
)
221 if self
.host
is None:
222 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
224 self
.host
= unquote(self
.host
)
227 def get_selector(self
):
230 def set_proxy(self
, proxy
):
232 # XXX this code is based on urllib, but it doesn't seem
233 # correct. specifically, if the proxy has a port number then
234 # splittype will return the hostname as the type and the port
235 # will be include with everything else
236 self
.type, self
.__r
_type
= splittype(self
.__proxy
)
237 self
.host
, XXX
= splithost(self
.__r
_type
)
238 self
.host
= unquote(self
.host
)
239 self
.__r
_host
= self
.__original
241 def add_header(self
, key
, val
):
242 # useful for something like authentication
243 self
.headers
[key
] = val
245 class OpenerDirector
:
247 server_version
= "Python-urllib/%s" % __version__
248 self
.addheaders
= [('User-agent', server_version
)]
249 # manage the individual handlers
251 self
.handle_open
= {}
252 self
.handle_error
= {}
254 def add_handler(self
, handler
):
256 for meth
in get_methods(handler
):
257 if meth
[-5:] == '_open':
259 if self
.handle_open
.has_key(protocol
):
260 self
.handle_open
[protocol
].append(handler
)
262 self
.handle_open
[protocol
] = [handler
]
265 i
= string
.find(meth
, '_')
266 j
= string
.find(meth
[i
+1:], '_') + i
+ 1
267 if j
!= -1 and meth
[i
+1:j
] == 'error':
271 kind
= string
.atoi(kind
)
274 dict = self
.handle_error
.get(proto
, {})
275 if dict.has_key(kind
):
276 dict[kind
].append(handler
)
278 dict[kind
] = [handler
]
279 self
.handle_error
[proto
] = dict
283 self
.handlers
.append(handler
)
284 handler
.add_parent(self
)
290 for handler
in self
.handlers
:
294 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
295 # XXX raise an exception if no one else should try to handle
296 # this url. return None if you can't but someone else could.
297 handlers
= chain
.get(kind
, ())
298 for handler
in handlers
:
299 func
= getattr(handler
, meth_name
)
300 result
= apply(func
, args
)
301 if result
is not None:
304 def open(self
, fullurl
, data
=None):
305 # accept a URL or a Request object
306 if type(fullurl
) == types
.StringType
:
307 req
= Request(fullurl
, data
)
312 assert isinstance(req
, Request
) # really only care about interface
314 result
= self
._call
_chain
(self
.handle_open
, 'default',
319 type_
= req
.get_type()
320 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
325 return self
._call
_chain
(self
.handle_open
, 'unknown',
328 def error(self
, proto
, *args
):
330 # XXX http protocol is special cased
331 dict = self
.handle_error
[proto
]
332 proto
= args
[2] # YUCK!
333 meth_name
= 'http_error_%d' % proto
337 dict = self
.handle_error
338 meth_name
= proto
+ '_error'
340 args
= (dict, proto
, meth_name
) + args
341 result
= apply(self
._call
_chain
, args
)
346 args
= (dict, 'default', 'http_error_default') + orig_args
347 return apply(self
._call
_chain
, args
)
349 def is_callable(obj
):
350 # not quite like builtin callable (which I didn't know existed),
351 # not entirely sure it needs to be different
352 if type(obj
) in (types
.BuiltinFunctionType
,
353 types
.BuiltinMethodType
, types
.LambdaType
,
356 if type(obj
) == types
.InstanceType
:
357 return hasattr(obj
, '__call__')
360 def get_methods(inst
):
363 classes
.append(inst
.__class
__)
367 classes
= classes
+ list(klass
.__bases
__)
368 for name
in dir(klass
):
369 attr
= getattr(klass
, name
)
370 if type(attr
) == types
.UnboundMethodType
:
372 for name
in dir(inst
):
373 if is_callable(getattr(inst
, name
)):
375 return methods
.keys()
377 # XXX probably also want an abstract factory that knows things like
378 # the fact that a ProxyHandler needs to get inserted first.
379 # would also know when it makes sense to skip a superclass in favor of
380 # a subclass and when it might make sense to include both
382 def build_opener(*handlers
):
383 """Create an opener object from a list of handlers.
385 The opener will use several default handlers, including support
386 for HTTP and FTP. If there is a ProxyHandler, it must be at the
387 front of the list of handlers. (Yuck.)
389 If any of the handlers passed as arguments are subclasses of the
390 default handlers, the default handlers will not be used.
393 opener
= OpenerDirector()
394 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
395 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
396 FTPHandler
, FileHandler
]
398 for klass
in default_classes
:
399 for check
in handlers
:
400 if type(check
) == types
.ClassType
:
401 if issubclass(check
, klass
):
403 elif type(check
) == types
.InstanceType
:
404 if isinstance(check
, klass
):
407 default_classes
.remove(klass
)
409 for klass
in default_classes
:
410 opener
.add_handler(klass())
413 if type(h
) == types
.ClassType
:
415 opener
.add_handler(h
)
419 def add_parent(self
, parent
):
424 class HTTPDefaultErrorHandler(BaseHandler
):
425 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
426 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
428 class HTTPRedirectHandler(BaseHandler
):
429 # Implementation note: To avoid the server sending us into an
430 # infinite loop, the request object needs to track what URLs we
431 # have already seen. Do this by adding a handler-specific
432 # attribute to the Request object.
433 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
434 if headers
.has_key('location'):
435 newurl
= headers
['location']
436 elif headers
.has_key('uri'):
437 newurl
= headers
['uri']
443 # XXX Probably want to forget about the state of the current
444 # request, although that might interact poorly with other
445 # handlers that also use handler-specific request attributes
446 new
= Request(newurl
, req
.get_data())
447 new
.error_302_dict
= {}
448 if hasattr(req
, 'error_302_dict'):
449 if req
.error_302_dict
.has_key(newurl
):
450 raise HTTPError(req
.get_full_url(), code
,
451 self
.inf_msg
+ msg
, headers
)
452 new
.error_302_dict
.update(req
.error_302_dict
)
453 new
.error_302_dict
[newurl
] = newurl
454 return self
.parent
.open(new
)
456 http_error_301
= http_error_302
458 inf_msg
= "The HTTP server returned a redirect error that would" \
459 "lead to an infinite loop.\n" \
460 "The last 302 error message was:\n"
462 class ProxyHandler(BaseHandler
):
463 def __init__(self
, proxies
=None):
465 proxies
= getproxies()
466 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
467 self
.proxies
= proxies
468 for type, url
in proxies
.items():
469 setattr(self
, '%s_open' % type,
470 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
471 meth(r
, proxy
, type))
473 def proxy_open(self
, req
, proxy
, type):
474 orig_type
= req
.get_type()
476 if orig_type
== type:
477 # let other handlers take care of it
478 # XXX this only makes sense if the proxy is before the
482 # need to start over, because the other handlers don't
483 # grok the proxy's URL type
484 return self
.parent
.open(req
)
486 # feature suggested by Duncan Booth
487 # XXX custom is not a good name
489 # either pass a function to the constructor or override handle
490 def __init__(self
, proto
, func
=None, proxy_addr
=None):
493 self
.addr
= proxy_addr
495 def handle(self
, req
):
496 if self
.func
and self
.func(req
):
502 class CustomProxyHandler(BaseHandler
):
503 def __init__(self
, *proxies
):
506 def proxy_open(self
, req
):
507 proto
= req
.get_type()
509 proxies
= self
.proxies
[proto
]
514 req
.set_proxy(p
.get_proxy())
515 return self
.parent
.open(req
)
518 def do_proxy(self
, p
, req
):
520 return self
.parent
.open(req
)
522 def add_proxy(self
, cpo
):
523 if self
.proxies
.has_key(cpo
.proto
):
524 self
.proxies
[cpo
.proto
].append(cpo
)
526 self
.proxies
[cpo
.proto
] = [cpo
]
528 class HTTPPasswordMgr
:
532 def add_password(self
, realm
, uri
, user
, passwd
):
533 # uri could be a single URI or a sequence
534 if type(uri
) == types
.StringType
:
536 uri
= tuple(map(self
.reduce_uri
, uri
))
537 if not self
.passwd
.has_key(realm
):
538 self
.passwd
[realm
] = {}
539 self
.passwd
[realm
][uri
] = (user
, passwd
)
541 def find_user_password(self
, realm
, authuri
):
542 domains
= self
.passwd
.get(realm
, {})
543 authuri
= self
.reduce_uri(authuri
)
544 for uris
, authinfo
in domains
.items():
546 if self
.is_suburi(uri
, authuri
):
550 def reduce_uri(self
, uri
):
551 """Accept netloc or URI and extract only the netloc and path"""
552 parts
= urlparse
.urlparse(uri
)
554 return parts
[1], parts
[2] or '/'
558 def is_suburi(self
, base
, test
):
559 """Check if test is below base in a URI tree
561 Both args must be URIs in reduced form.
565 if base
[0] != test
[0]:
567 common
= os
.path
.commonprefix((base
[1], test
[1]))
568 if len(common
) == len(base
[1]):
573 class HTTPBasicAuthHandler(BaseHandler
):
574 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
576 # XXX there can actually be multiple auth-schemes in a
577 # www-authenticate header. should probably be a lot more careful
578 # in parsing them to extract multiple alternatives
581 self
.passwd
= HTTPPasswordMgr()
582 self
.add_password
= self
.passwd
.add_password
583 self
.__current
_realm
= None
584 # if __current_realm is not None, then the server must have
585 # refused our name/password and is asking for authorization
586 # again. must be careful to set it to None on successful
589 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
590 # XXX could be mult. headers
591 authreq
= headers
.get('www-authenticate', None)
593 mo
= HTTPBasicAuthHandler
.rx
.match(authreq
)
595 scheme
, realm
= mo
.groups()
596 if string
.lower(scheme
) == 'basic':
597 return self
.retry_http_basic_auth(req
, realm
)
599 def retry_http_basic_auth(self
, req
, realm
):
600 if self
.__current
_realm
is None:
601 self
.__current
_realm
= realm
603 self
.__current
_realm
= realm
605 # XXX host isn't really the correct URI?
606 host
= req
.get_host()
607 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
609 raw
= "%s:%s" % (user
, pw
)
610 auth
= string
.strip(base64
.encodestring(raw
))
611 req
.add_header('Authorization', 'Basic %s' % auth
)
612 resp
= self
.parent
.open(req
)
613 self
.__current
_realm
= None
616 self
.__current
_realm
= None
619 class HTTPDigestAuthHandler(BaseHandler
):
620 """An authentication protocol defined by RFC 2069
622 Digest authentication improves on basic authentication because it
623 does not transmit passwords in the clear.
627 self
.passwd
= HTTPPasswordMgr()
628 self
.add_password
= self
.passwd
.add_password
629 self
.__current
_realm
= None
631 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
632 # XXX could be mult. headers
633 authreq
= headers
.get('www-authenticate', None)
635 kind
= string
.split(authreq
)[0]
637 return self
.retry_http_digest_auth(req
, authreq
)
639 def retry_http_digest_auth(self
, req
, auth
):
640 token
, challenge
= string
.split(auth
, ' ', 1)
641 chal
= parse_keqv_list(parse_http_list(challenge
))
642 auth
= self
.get_authorization(req
, chal
)
644 req
.add_header('Authorization', 'Digest %s' % auth
)
645 resp
= self
.parent
.open(req
)
646 self
.__current
_realm
= None
649 def get_authorization(self
, req
, chal
):
651 realm
= chal
['realm']
652 nonce
= chal
['nonce']
653 algorithm
= chal
.get('algorithm', 'MD5')
654 # mod_digest doesn't send an opaque, even though it isn't
655 # supposed to be optional
656 opaque
= chal
.get('opaque', None)
660 if self
.__current
_realm
is None:
661 self
.__current
_realm
= realm
663 self
.__current
_realm
= realm
666 H
, KD
= self
.get_algorithm_impls(algorithm
)
670 user
, pw
= self
.passwd
.find_user_password(realm
,
675 # XXX not implemented yet
677 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
681 A1
= "%s:%s:%s" % (user
, realm
, pw
)
682 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
683 # XXX selector: what about proxies and full urls
685 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
686 # XXX should the partial digests be encoded too?
688 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
689 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
692 base
= base
+ ', opaque="%s"' % opaque
694 base
= base
+ ', digest="%s"' % entdig
695 if algorithm
!= 'MD5':
696 base
= base
+ ', algorithm="%s"' % algorithm
699 def get_algorithm_impls(self
, algorithm
):
700 # lambdas assume digest modules are imported at the top level
701 if algorithm
== 'MD5':
702 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
703 elif algorithm
== 'SHA':
704 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
706 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
709 def get_entity_digest(self
, data
, chal
):
710 # XXX not implemented yet
713 def encode_digest(digest
):
716 n
= (ord(c
) >> 4) & 0xf
717 hexrep
.append(hex(n
)[-1])
719 hexrep
.append(hex(n
)[-1])
720 return string
.join(hexrep
, '')
723 class HTTPHandler(BaseHandler
):
724 def http_open(self
, req
):
725 # XXX devise a new mechanism to specify user/password
726 host
= req
.get_host()
728 raise URLError('no host given')
730 h
= httplib
.HTTP(host
) # will parse host:port
731 ## h.set_debuglevel(1)
733 data
= req
.get_data()
734 h
.putrequest('POST', req
.get_selector())
735 h
.putheader('Content-type', 'application/x-www-form-urlencoded')
736 h
.putheader('Content-length', '%d' % len(data
))
738 h
.putrequest('GET', req
.get_selector())
739 # XXX proxies would have different host here
740 h
.putheader('Host', host
)
741 for args
in self
.parent
.addheaders
:
742 apply(h
.putheader
, args
)
743 for k
, v
in req
.headers
.items():
747 h
.send(data
+ '\r\n')
749 code
, msg
, hdrs
= h
.getreply()
752 return addinfourl(fp
, hdrs
, req
.get_full_url())
754 # want to make sure the socket is closed, even if error
755 # handling doesn't return immediately. the socket won't
756 # actually be closed until fp is also closed.
760 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
762 class UnknownHandler(BaseHandler
):
763 def unknown_open(self
, req
):
764 type = req
.get_type()
765 raise URLError('unknown url type: %s' % type)
767 def parse_keqv_list(l
):
768 """Parse list of key=value strings where keys are not duplicated."""
771 k
, v
= string
.split(elt
, '=', 1)
772 if v
[0] == '"' and v
[-1] == '"':
777 def parse_http_list(s
):
778 """Parse lists as described by RFC 2068 Section 2.
780 In particular, parse comman-separated lists where the elements of
781 the list may include quoted-strings. A quoted-string could
784 # XXX this function could probably use more testing
793 c
= string
.find(cur
, ',')
794 q
= string
.find(cur
, '"')
796 list.append(s
[start
:])
800 raise ValueError, "unbalanced quotes"
802 list.append(s
[start
:i
+c
])
807 list.append(s
[start
:i
+c
])
815 list.append(s
[start
:i
+c
])
821 return map(string
.strip
, list)
823 class FileHandler(BaseHandler
):
824 # Use local file or FTP depending on form of URL
825 def file_open(self
, req
):
826 url
= req
.get_selector()
827 if url
[:2] == '//' and url
[2:3] != '/':
829 return self
.parent
.open(req
)
831 return self
.open_local_file(req
)
833 # names for the localhost
836 if FileHandler
.names
is None:
837 FileHandler
.names
= (socket
.gethostbyname('localhost'),
838 socket
.gethostbyname(socket
.gethostname()))
839 return FileHandler
.names
841 # not entirely sure what the rules are here
842 def open_local_file(self
, req
):
843 mtype
= mimetypes
.guess_type(req
.get_selector())[0]
844 headers
= mimetools
.Message(StringIO('Content-Type: %s\n' \
845 % (mtype
or 'text/plain')))
846 host
= req
.get_host()
847 file = req
.get_selector()
849 host
, port
= splitport(host
)
851 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
852 return addinfourl(open(url2pathname(file), 'rb'),
853 headers
, 'file:'+file)
854 raise URLError('file not on local host')
856 class FTPHandler(BaseHandler
):
857 def ftp_open(self
, req
):
858 host
= req
.get_host()
860 raise IOError, ('ftp error', 'no host given')
861 # XXX handle custom username & password
862 host
= socket
.gethostbyname(host
)
863 host
, port
= splitport(host
)
865 port
= ftplib
.FTP_PORT
866 path
, attrs
= splitattr(req
.get_selector())
868 dirs
= string
.splitfields(path
, '/')
869 dirs
, file = dirs
[:-1], dirs
[-1]
870 if dirs
and not dirs
[0]:
872 user
= passwd
= '' # XXX
874 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
875 type = file and 'I' or 'D'
877 attr
, value
= splitattr(attr
)
878 if string
.lower(attr
) == 'type' and \
879 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
880 type = string
.upper(value
)
881 fp
, retrlen
= fw
.retrfile(file, type)
882 if retrlen
is not None and retrlen
>= 0:
883 sf
= StringIO('Content-Length: %d\n' % retrlen
)
884 headers
= mimetools
.Message(sf
)
886 headers
= noheaders()
887 return addinfourl(fp
, headers
, req
.get_full_url())
888 except ftplib
.all_errors
, msg
:
889 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
891 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
892 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
893 ## fw.ftp.set_debuglevel(1)
896 class CacheFTPHandler(FTPHandler
):
897 # XXX would be nice to have pluggable cache strategies
898 # XXX this stuff is definitely not thread safe
906 def setTimeout(self
, t
):
909 def setMaxConns(self
, m
):
912 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
913 key
= user
, passwd
, host
, port
914 if self
.cache
.has_key(key
):
915 self
.timeout
[key
] = time
.time() + self
.delay
917 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
918 self
.timeout
[key
] = time
.time() + self
.delay
920 return self
.cache
[key
]
922 def check_cache(self
):
923 # first check for old ones
925 if self
.soonest
<= t
:
926 for k
, v
in self
.timeout
.items():
928 self
.cache
[k
].close()
931 self
.soonest
= min(self
.timeout
.values())
933 # then check the size
934 if len(self
.cache
) == self
.max_conns
:
935 for k
, v
in self
.timeout
.items():
936 if v
== self
.soonest
:
940 self
.soonest
= min(self
.timeout
.values())
942 class GopherHandler(BaseHandler
):
943 def gopher_open(self
, req
):
944 host
= req
.get_host()
946 raise GopherError('no host given')
948 selector
= req
.get_selector()
949 type, selector
= splitgophertype(selector
)
950 selector
, query
= splitquery(selector
)
951 selector
= unquote(selector
)
953 query
= unquote(query
)
954 fp
= gopherlib
.send_query(selector
, query
, host
)
956 fp
= gopherlib
.send_selector(selector
, host
)
957 return addinfourl(fp
, noheaders(), req
.get_full_url())
959 #bleck! don't use this yet
962 default_handlers
= [UnknownHandler
, HTTPHandler
,
963 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
964 FTPHandler
, FileHandler
]
965 proxy_handlers
= [ProxyHandler
]
967 replacement_handlers
= []
969 def add_proxy_handler(self
, ph
):
970 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
972 def add_handler(self
, h
):
973 self
.handlers
= self
.handlers
+ [h
]
975 def replace_handler(self
, h
):
978 def build_opener(self
):
979 opener
= OpenerDirectory()
980 for ph
in self
.proxy_handlers
:
981 if type(ph
) == types
.ClassType
:
983 opener
.add_handler(ph
)
985 if __name__
== "__main__":
986 # XXX some of the test code depends on machine configurations that
987 # are internal to CNRI. Need to set up a public server with the
988 # right authentication configuration for test purposes.
989 if socket
.gethostname() == 'bitdiddle':
990 localhost
= 'bitdiddle.cnri.reston.va.us'
991 elif socket
.gethostname() == 'walden':
992 localhost
= 'localhost'
996 # Thanks to Fred for finding these!
997 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
998 'gopher://gopher.vt.edu:10010/10/33',
1001 'file://nonsensename/etc/passwd',
1002 'ftp://www.python.org/pub/tmp/httplib.py',
1003 'ftp://www.python.org/pub/tmp/imageop.c',
1004 'ftp://www.python.org/pub/tmp/blat',
1005 'http://www.espn.com/', # redirect
1006 'http://www.python.org/Spanish/Inquistion/',
1007 ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py',
1008 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1009 'http://www.python.org/',
1010 'ftp://prep.ai.mit.edu/welcome.msg',
1011 'ftp://www.python.org/pub/tmp/figure.prn',
1012 'ftp://www.python.org/pub/tmp/interp.pl',
1013 'http://checkproxy.cnri.reston.va.us/test/test.html',
1016 if localhost
is not None:
1018 'file://%s/etc/passwd' % localhost
,
1019 'http://%s/simple/' % localhost
,
1020 'http://%s/digest/' % localhost
,
1021 'http://%s/not/found.h' % localhost
,
1024 bauth
= HTTPBasicAuthHandler()
1025 bauth
.add_password('basic_test_realm', localhost
, 'jhylton',
1027 dauth
= HTTPDigestAuthHandler()
1028 dauth
.add_password('digest_test_realm', localhost
, 'jhylton',
1032 cfh
= CacheFTPHandler()
1035 # XXX try out some custom proxy objects too!
1037 host
= req
.get_host()
1039 if host
[-18:] == '.cnri.reston.va.us':
1041 p
= CustomProxy('http', at_cnri
, 'proxy.cnri.reston.va.us')
1042 ph
= CustomProxyHandler(p
)
1044 install_opener(build_opener(dauth
, bauth
, cfh
, GopherHandler
, ph
))
1047 if type(url
) == types
.TupleType
:
1053 f
= urlopen(url
, req
)
1054 except IOError, err
:
1055 print "IOError:", err
1056 except socket
.error
, err
:
1057 print "socket.error:", err
1061 print "read %d bytes" % len(buf
)