1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirector manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
66 urllib2.install_opener(opener)
68 f = urllib2.urlopen('http://www.python.org/')
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
83 # documentation (getting there)
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
108 from cStringIO
import StringIO
110 from StringIO
import StringIO
118 # not sure how many of these need to be gotten rid of
119 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
120 addinfourl
, splitport
, splitgophertype
, splitquery
, \
121 splitattr
, ftpwrapper
, noheaders
123 # support for proxies via environment variables
124 from urllib
import getproxies
126 # support for FileHandler
127 from urllib
import localhost
, url2pathname
129 __version__
= "2.0a1"
132 def urlopen(url
, data
=None):
135 _opener
= build_opener()
136 return _opener
.open(url
, data
)
138 def install_opener(opener
):
142 # do these error classes make sense?
143 # make sure all of the IOError stuff is overridden. we just want to be
146 class URLError(IOError):
147 # URLError is a sub-type of IOError, but it doesn't share any of
148 # the implementation. need to override __init__ and __str__
149 def __init__(self
, reason
):
153 return '<urlopen error %s>' % self
.reason
155 class HTTPError(URLError
, addinfourl
):
156 """Raised when HTTP error occurs, but also acts like non-error return"""
157 __super_init
= addinfourl
.__init
__
159 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
165 # The addinfourl classes depend on fp being a valid file
166 # object. In some cases, the HTTPError may not have a valid
167 # file object. If this happens, the simplest workaround is to
168 # not initialize the base classes.
170 self
.__super
_init
(fp
, hdrs
, url
)
173 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
176 # XXX is this safe? what if user catches exception, then
177 # extracts fp and discards exception?
181 class GopherError(URLError
):
187 def __init__(self
, url
, data
=None, headers
={}):
188 # unwrap('<URL:type://host/path>') --> 'type://host/path'
189 self
.__original
= unwrap(url
)
191 # self.__r_type is what's left after doing the splittype
196 self
.headers
.update(headers
)
198 def __getattr__(self
, attr
):
199 # XXX this is a fallback mechanism to guard against these
200 # methods getting called in a non-standard order. this may be
201 # too complicated and/or unnecessary.
202 # XXX should the __r_XXX attributes be public?
203 if attr
[:12] == '_Request__r_':
205 if hasattr(Request
, 'get_' + name
):
206 getattr(self
, 'get_' + name
)()
207 return getattr(self
, attr
)
208 raise AttributeError, attr
210 def add_data(self
, data
):
214 return self
.data
is not None
219 def get_full_url(self
):
220 return self
.__original
223 if self
.type is None:
224 self
.type, self
.__r
_type
= splittype(self
.__original
)
225 if self
.type is None:
226 raise ValueError, "unknown url type: %s" % self
.__original
230 if self
.host
is None:
231 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
233 self
.host
= unquote(self
.host
)
236 def get_selector(self
):
239 def set_proxy(self
, host
, type):
240 self
.host
, self
.type = host
, type
241 self
.__r
_host
= self
.__original
243 def add_header(self
, key
, val
):
244 # useful for something like authentication
245 self
.headers
[key
] = val
247 class OpenerDirector
:
249 server_version
= "Python-urllib/%s" % __version__
250 self
.addheaders
= [('User-Agent', server_version
)]
251 # manage the individual handlers
253 self
.handle_open
= {}
254 self
.handle_error
= {}
256 def add_handler(self
, handler
):
258 for meth
in dir(handler
):
259 if meth
[-5:] == '_open':
261 if protocol
in self
.handle_open
:
262 self
.handle_open
[protocol
].append(handler
)
264 self
.handle_open
[protocol
] = [handler
]
268 j
= meth
[i
+1:].find('_') + i
+ 1
269 if j
!= -1 and meth
[i
+1:j
] == 'error':
276 dict = self
.handle_error
.get(proto
, {})
278 dict[kind
].append(handler
)
280 dict[kind
] = [handler
]
281 self
.handle_error
[proto
] = dict
285 self
.handlers
.append(handler
)
286 handler
.add_parent(self
)
292 for handler
in self
.handlers
:
296 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
297 # XXX raise an exception if no one else should try to handle
298 # this url. return None if you can't but someone else could.
299 handlers
= chain
.get(kind
, ())
300 for handler
in handlers
:
301 func
= getattr(handler
, meth_name
)
304 if result
is not None:
307 def open(self
, fullurl
, data
=None):
308 # accept a URL or a Request object
309 if isinstance(fullurl
, basestring
):
310 req
= Request(fullurl
, data
)
315 assert isinstance(req
, Request
) # really only care about interface
317 result
= self
._call
_chain
(self
.handle_open
, 'default',
322 type_
= req
.get_type()
323 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
328 return self
._call
_chain
(self
.handle_open
, 'unknown',
331 def error(self
, proto
, *args
):
332 if proto
in ['http', 'https']:
333 # XXX http[s] protocols are special-cased
334 dict = self
.handle_error
['http'] # https is not different than http
335 proto
= args
[2] # YUCK!
336 meth_name
= 'http_error_%d' % proto
340 dict = self
.handle_error
341 meth_name
= proto
+ '_error'
343 args
= (dict, proto
, meth_name
) + args
344 result
= self
._call
_chain
(*args
)
349 args
= (dict, 'default', 'http_error_default') + orig_args
350 return self
._call
_chain
(*args
)
352 # XXX probably also want an abstract factory that knows things like
353 # the fact that a ProxyHandler needs to get inserted first.
354 # would also know when it makes sense to skip a superclass in favor of
355 # a subclass and when it might make sense to include both
357 def build_opener(*handlers
):
358 """Create an opener object from a list of handlers.
360 The opener will use several default handlers, including support
361 for HTTP and FTP. If there is a ProxyHandler, it must be at the
362 front of the list of handlers. (Yuck.)
364 If any of the handlers passed as arguments are subclasses of the
365 default handlers, the default handlers will not be used.
368 opener
= OpenerDirector()
369 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
370 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
371 FTPHandler
, FileHandler
]
372 if hasattr(httplib
, 'HTTPS'):
373 default_classes
.append(HTTPSHandler
)
375 for klass
in default_classes
:
376 for check
in handlers
:
377 if inspect
.isclass(check
):
378 if issubclass(check
, klass
):
380 elif isinstance(check
, klass
):
383 default_classes
.remove(klass
)
385 for klass
in default_classes
:
386 opener
.add_handler(klass())
389 if inspect
.isclass(h
):
391 opener
.add_handler(h
)
395 def add_parent(self
, parent
):
400 class HTTPDefaultErrorHandler(BaseHandler
):
401 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
402 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
404 class HTTPRedirectHandler(BaseHandler
):
405 # Implementation note: To avoid the server sending us into an
406 # infinite loop, the request object needs to track what URLs we
407 # have already seen. Do this by adding a handler-specific
408 # attribute to the Request object.
409 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
410 if 'location' in headers
:
411 newurl
= headers
['location']
412 elif 'uri' in headers
:
413 newurl
= headers
['uri']
416 newurl
= urlparse
.urljoin(req
.get_full_url(), newurl
)
418 # XXX Probably want to forget about the state of the current
419 # request, although that might interact poorly with other
420 # handlers that also use handler-specific request attributes
421 new
= Request(newurl
, req
.get_data(), req
.headers
)
422 new
.error_302_dict
= {}
423 if hasattr(req
, 'error_302_dict'):
424 if len(req
.error_302_dict
)>10 or \
425 newurl
in req
.error_302_dict
:
426 raise HTTPError(req
.get_full_url(), code
,
427 self
.inf_msg
+ msg
, headers
, fp
)
428 new
.error_302_dict
.update(req
.error_302_dict
)
429 new
.error_302_dict
[newurl
] = newurl
431 # Don't close the fp until we are sure that we won't use it
436 return self
.parent
.open(new
)
438 http_error_301
= http_error_302
440 inf_msg
= "The HTTP server returned a redirect error that would" \
441 "lead to an infinite loop.\n" \
442 "The last 302 error message was:\n"
444 class ProxyHandler(BaseHandler
):
445 def __init__(self
, proxies
=None):
447 proxies
= getproxies()
448 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
449 self
.proxies
= proxies
450 for type, url
in proxies
.items():
451 setattr(self
, '%s_open' % type,
452 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
453 meth(r
, proxy
, type))
455 def proxy_open(self
, req
, proxy
, type):
456 orig_type
= req
.get_type()
457 type, r_type
= splittype(proxy
)
458 host
, XXX
= splithost(r_type
)
460 user_pass
, host
= host
.split('@', 1)
462 user
, password
= user_pass
.split(':', 1)
463 user_pass
= base64
.encodestring('%s:%s' % (unquote(user
),
465 req
.add_header('Proxy-Authorization', 'Basic ' + user_pass
)
467 req
.set_proxy(host
, type)
468 if orig_type
== type:
469 # let other handlers take care of it
470 # XXX this only makes sense if the proxy is before the
474 # need to start over, because the other handlers don't
475 # grok the proxy's URL type
476 return self
.parent
.open(req
)
478 # feature suggested by Duncan Booth
479 # XXX custom is not a good name
481 # either pass a function to the constructor or override handle
482 def __init__(self
, proto
, func
=None, proxy_addr
=None):
485 self
.addr
= proxy_addr
487 def handle(self
, req
):
488 if self
.func
and self
.func(req
):
494 class CustomProxyHandler(BaseHandler
):
495 def __init__(self
, *proxies
):
498 def proxy_open(self
, req
):
499 proto
= req
.get_type()
501 proxies
= self
.proxies
[proto
]
506 req
.set_proxy(p
.get_proxy())
507 return self
.parent
.open(req
)
510 def do_proxy(self
, p
, req
):
511 return self
.parent
.open(req
)
513 def add_proxy(self
, cpo
):
514 if cpo
.proto
in self
.proxies
:
515 self
.proxies
[cpo
.proto
].append(cpo
)
517 self
.proxies
[cpo
.proto
] = [cpo
]
519 class HTTPPasswordMgr
:
523 def add_password(self
, realm
, uri
, user
, passwd
):
524 # uri could be a single URI or a sequence
525 if isinstance(uri
, basestring
):
527 uri
= tuple(map(self
.reduce_uri
, uri
))
528 if not realm
in self
.passwd
:
529 self
.passwd
[realm
] = {}
530 self
.passwd
[realm
][uri
] = (user
, passwd
)
532 def find_user_password(self
, realm
, authuri
):
533 domains
= self
.passwd
.get(realm
, {})
534 authuri
= self
.reduce_uri(authuri
)
535 for uris
, authinfo
in domains
.items():
537 if self
.is_suburi(uri
, authuri
):
541 def reduce_uri(self
, uri
):
542 """Accept netloc or URI and extract only the netloc and path"""
543 parts
= urlparse
.urlparse(uri
)
545 return parts
[1], parts
[2] or '/'
549 def is_suburi(self
, base
, test
):
550 """Check if test is below base in a URI tree
552 Both args must be URIs in reduced form.
556 if base
[0] != test
[0]:
558 common
= posixpath
.commonprefix((base
[1], test
[1]))
559 if len(common
) == len(base
[1]):
564 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr
):
566 def find_user_password(self
, realm
, authuri
):
567 user
, password
= HTTPPasswordMgr
.find_user_password(self
,realm
,authuri
)
569 return user
, password
570 return HTTPPasswordMgr
.find_user_password(self
, None, authuri
)
573 class AbstractBasicAuthHandler
:
575 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re
.I
)
577 # XXX there can actually be multiple auth-schemes in a
578 # www-authenticate header. should probably be a lot more careful
579 # in parsing them to extract multiple alternatives
581 def __init__(self
, password_mgr
=None):
582 if password_mgr
is None:
583 password_mgr
= HTTPPasswordMgr()
584 self
.passwd
= password_mgr
585 self
.add_password
= self
.passwd
.add_password
587 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
588 # XXX could be multiple headers
589 authreq
= headers
.get(authreq
, None)
591 mo
= AbstractBasicAuthHandler
.rx
.match(authreq
)
593 scheme
, realm
= mo
.groups()
594 if scheme
.lower() == 'basic':
595 return self
.retry_http_basic_auth(host
, req
, realm
)
597 def retry_http_basic_auth(self
, host
, req
, realm
):
598 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
600 raw
= "%s:%s" % (user
, pw
)
601 auth
= 'Basic %s' % base64
.encodestring(raw
).strip()
602 if req
.headers
.get(self
.auth_header
, None) == auth
:
604 req
.add_header(self
.auth_header
, auth
)
605 return self
.parent
.open(req
)
609 class HTTPBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
611 auth_header
= 'Authorization'
613 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
614 host
= urlparse
.urlparse(req
.get_full_url())[1]
615 return self
.http_error_auth_reqed('www-authenticate',
619 class ProxyBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
621 auth_header
= 'Proxy-Authorization'
623 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
624 host
= req
.get_host()
625 return self
.http_error_auth_reqed('proxy-authenticate',
629 class AbstractDigestAuthHandler
:
631 def __init__(self
, passwd
=None):
633 passwd
= HTTPPasswordMgr()
635 self
.add_password
= self
.passwd
.add_password
637 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
638 authreq
= headers
.get(self
.auth_header
, None)
640 kind
= authreq
.split()[0]
642 return self
.retry_http_digest_auth(req
, authreq
)
644 def retry_http_digest_auth(self
, req
, auth
):
645 token
, challenge
= auth
.split(' ', 1)
646 chal
= parse_keqv_list(parse_http_list(challenge
))
647 auth
= self
.get_authorization(req
, chal
)
649 auth_val
= 'Digest %s' % auth
650 if req
.headers
.get(self
.auth_header
, None) == auth_val
:
652 req
.add_header(self
.auth_header
, auth_val
)
653 resp
= self
.parent
.open(req
)
656 def get_authorization(self
, req
, chal
):
658 realm
= chal
['realm']
659 nonce
= chal
['nonce']
660 algorithm
= chal
.get('algorithm', 'MD5')
661 # mod_digest doesn't send an opaque, even though it isn't
662 # supposed to be optional
663 opaque
= chal
.get('opaque', None)
667 H
, KD
= self
.get_algorithm_impls(algorithm
)
671 user
, pw
= self
.passwd
.find_user_password(realm
,
676 # XXX not implemented yet
678 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
682 A1
= "%s:%s:%s" % (user
, realm
, pw
)
683 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
684 # XXX selector: what about proxies and full urls
686 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
687 # XXX should the partial digests be encoded too?
689 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
690 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
693 base
= base
+ ', opaque="%s"' % opaque
695 base
= base
+ ', digest="%s"' % entdig
696 if algorithm
!= 'MD5':
697 base
= base
+ ', algorithm="%s"' % algorithm
700 def get_algorithm_impls(self
, algorithm
):
701 # lambdas assume digest modules are imported at the top level
702 if algorithm
== 'MD5':
703 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
704 elif algorithm
== 'SHA':
705 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
707 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
710 def get_entity_digest(self
, data
, chal
):
711 # XXX not implemented yet
715 class HTTPDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
716 """An authentication protocol defined by RFC 2069
718 Digest authentication improves on basic authentication because it
719 does not transmit passwords in the clear.
722 header
= 'Authorization'
724 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
725 host
= urlparse
.urlparse(req
.get_full_url())[1]
726 self
.http_error_auth_reqed('www-authenticate', host
, req
, headers
)
729 class ProxyDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
731 header
= 'Proxy-Authorization'
733 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
734 host
= req
.get_host()
735 self
.http_error_auth_reqed('proxy-authenticate', host
, req
, headers
)
738 def encode_digest(digest
):
741 n
= (ord(c
) >> 4) & 0xf
742 hexrep
.append(hex(n
)[-1])
744 hexrep
.append(hex(n
)[-1])
745 return ''.join(hexrep
)
748 class AbstractHTTPHandler(BaseHandler
):
750 def do_open(self
, http_class
, req
):
751 host
= req
.get_host()
753 raise URLError('no host given')
756 h
= http_class(host
) # will parse host:port
758 data
= req
.get_data()
759 h
.putrequest('POST', req
.get_selector())
760 if not 'Content-type' in req
.headers
:
761 h
.putheader('Content-type',
762 'application/x-www-form-urlencoded')
763 if not 'Content-length' in req
.headers
:
764 h
.putheader('Content-length', '%d' % len(data
))
766 h
.putrequest('GET', req
.get_selector())
767 except socket
.error
, err
:
770 scheme
, sel
= splittype(req
.get_selector())
771 sel_host
, sel_path
= splithost(sel
)
772 h
.putheader('Host', sel_host
or host
)
773 for args
in self
.parent
.addheaders
:
775 if name
not in req
.headers
:
777 for k
, v
in req
.headers
.items():
783 code
, msg
, hdrs
= h
.getreply()
786 return addinfourl(fp
, hdrs
, req
.get_full_url())
788 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
791 class HTTPHandler(AbstractHTTPHandler
):
793 def http_open(self
, req
):
794 return self
.do_open(httplib
.HTTP
, req
)
797 if hasattr(httplib
, 'HTTPS'):
798 class HTTPSHandler(AbstractHTTPHandler
):
800 def https_open(self
, req
):
801 return self
.do_open(httplib
.HTTPS
, req
)
804 class UnknownHandler(BaseHandler
):
805 def unknown_open(self
, req
):
806 type = req
.get_type()
807 raise URLError('unknown url type: %s' % type)
809 def parse_keqv_list(l
):
810 """Parse list of key=value strings where keys are not duplicated."""
813 k
, v
= elt
.split('=', 1)
814 if v
[0] == '"' and v
[-1] == '"':
819 def parse_http_list(s
):
820 """Parse lists as described by RFC 2068 Section 2.
822 In particular, parse comman-separated lists where the elements of
823 the list may include quoted-strings. A quoted-string could
826 # XXX this function could probably use more testing
838 list.append(s
[start
:])
842 raise ValueError, "unbalanced quotes"
844 list.append(s
[start
:i
+c
])
849 list.append(s
[start
:i
+c
])
857 list.append(s
[start
:i
+c
])
863 return map(lambda x
: x
.strip(), list)
865 class FileHandler(BaseHandler
):
866 # Use local file or FTP depending on form of URL
867 def file_open(self
, req
):
868 url
= req
.get_selector()
869 if url
[:2] == '//' and url
[2:3] != '/':
871 return self
.parent
.open(req
)
873 return self
.open_local_file(req
)
875 # names for the localhost
878 if FileHandler
.names
is None:
879 FileHandler
.names
= (socket
.gethostbyname('localhost'),
880 socket
.gethostbyname(socket
.gethostname()))
881 return FileHandler
.names
883 # not entirely sure what the rules are here
884 def open_local_file(self
, req
):
885 host
= req
.get_host()
886 file = req
.get_selector()
887 localfile
= url2pathname(file)
888 stats
= os
.stat(localfile
)
890 modified
= rfc822
.formatdate(stats
.st_mtime
)
891 mtype
= mimetypes
.guess_type(file)[0]
892 headers
= mimetools
.Message(StringIO(
893 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
894 (mtype
or 'text/plain', size
, modified
)))
896 host
, port
= splitport(host
)
898 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
899 return addinfourl(open(localfile
, 'rb'),
900 headers
, 'file:'+file)
901 raise URLError('file not on local host')
903 class FTPHandler(BaseHandler
):
904 def ftp_open(self
, req
):
905 host
= req
.get_host()
907 raise IOError, ('ftp error', 'no host given')
908 # XXX handle custom username & password
910 host
= socket
.gethostbyname(host
)
911 except socket
.error
, msg
:
913 host
, port
= splitport(host
)
915 port
= ftplib
.FTP_PORT
916 path
, attrs
= splitattr(req
.get_selector())
918 dirs
= path
.split('/')
919 dirs
, file = dirs
[:-1], dirs
[-1]
920 if dirs
and not dirs
[0]:
922 user
= passwd
= '' # XXX
924 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
925 type = file and 'I' or 'D'
927 attr
, value
= splitattr(attr
)
928 if attr
.lower() == 'type' and \
929 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
931 fp
, retrlen
= fw
.retrfile(file, type)
933 mtype
= mimetypes
.guess_type(req
.get_full_url())[0]
935 headers
+= "Content-Type: %s\n" % mtype
936 if retrlen
is not None and retrlen
>= 0:
937 headers
+= "Content-Length: %d\n" % retrlen
938 sf
= StringIO(headers
)
939 headers
= mimetools
.Message(sf
)
940 return addinfourl(fp
, headers
, req
.get_full_url())
941 except ftplib
.all_errors
, msg
:
942 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
944 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
945 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
946 ## fw.ftp.set_debuglevel(1)
949 class CacheFTPHandler(FTPHandler
):
950 # XXX would be nice to have pluggable cache strategies
951 # XXX this stuff is definitely not thread safe
959 def setTimeout(self
, t
):
962 def setMaxConns(self
, m
):
965 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
966 key
= user
, passwd
, host
, port
967 if key
in self
.cache
:
968 self
.timeout
[key
] = time
.time() + self
.delay
970 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
971 self
.timeout
[key
] = time
.time() + self
.delay
973 return self
.cache
[key
]
975 def check_cache(self
):
976 # first check for old ones
978 if self
.soonest
<= t
:
979 for k
, v
in self
.timeout
.items():
981 self
.cache
[k
].close()
984 self
.soonest
= min(self
.timeout
.values())
986 # then check the size
987 if len(self
.cache
) == self
.max_conns
:
988 for k
, v
in self
.timeout
.items():
989 if v
== self
.soonest
:
993 self
.soonest
= min(self
.timeout
.values())
995 class GopherHandler(BaseHandler
):
996 def gopher_open(self
, req
):
997 host
= req
.get_host()
999 raise GopherError('no host given')
1000 host
= unquote(host
)
1001 selector
= req
.get_selector()
1002 type, selector
= splitgophertype(selector
)
1003 selector
, query
= splitquery(selector
)
1004 selector
= unquote(selector
)
1006 query
= unquote(query
)
1007 fp
= gopherlib
.send_query(selector
, query
, host
)
1009 fp
= gopherlib
.send_selector(selector
, host
)
1010 return addinfourl(fp
, noheaders(), req
.get_full_url())
1012 #bleck! don't use this yet
1013 class OpenerFactory
:
1015 default_handlers
= [UnknownHandler
, HTTPHandler
,
1016 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
1017 FTPHandler
, FileHandler
]
1018 proxy_handlers
= [ProxyHandler
]
1020 replacement_handlers
= []
1022 def add_proxy_handler(self
, ph
):
1023 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
1025 def add_handler(self
, h
):
1026 self
.handlers
= self
.handlers
+ [h
]
1028 def replace_handler(self
, h
):
1031 def build_opener(self
):
1032 opener
= OpenerDirector()
1033 for ph
in self
.proxy_handlers
:
1034 if inspect
.isclass(ph
):
1036 opener
.add_handler(ph
)
1038 if __name__
== "__main__":
1039 # XXX some of the test code depends on machine configurations that
1040 # are internal to CNRI. Need to set up a public server with the
1041 # right authentication configuration for test purposes.
1042 if socket
.gethostname() == 'bitdiddle':
1043 localhost
= 'bitdiddle.cnri.reston.va.us'
1044 elif socket
.gethostname() == 'bitdiddle.concentric.net':
1045 localhost
= 'localhost'
1049 # Thanks to Fred for finding these!
1050 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1051 'gopher://gopher.vt.edu:10010/10/33',
1054 'file://nonsensename/etc/passwd',
1055 'ftp://www.python.org/pub/python/misc/sousa.au',
1056 'ftp://www.python.org/pub/tmp/blat',
1057 'http://www.espn.com/', # redirect
1058 'http://www.python.org/Spanish/Inquistion/',
1059 ('http://www.python.org/cgi-bin/faqw.py',
1060 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1061 'http://www.python.org/',
1062 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
1065 ## if localhost is not None:
1067 ## 'file://%s/etc/passwd' % localhost,
1068 ## 'http://%s/simple/' % localhost,
1069 ## 'http://%s/digest/' % localhost,
1070 ## 'http://%s/not/found.h' % localhost,
1073 ## bauth = HTTPBasicAuthHandler()
1074 ## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1076 ## dauth = HTTPDigestAuthHandler()
1077 ## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1081 cfh
= CacheFTPHandler()
1084 ## # XXX try out some custom proxy objects too!
1085 ## def at_cnri(req):
1086 ## host = req.get_host()
1088 ## if host[-18:] == '.cnri.reston.va.us':
1090 ## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1091 ## ph = CustomProxyHandler(p)
1093 ## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1094 install_opener(build_opener(cfh
, GopherHandler
))
1097 if isinstance(url
, tuple):
1103 f
= urlopen(url
, req
)
1104 except IOError, err
:
1105 print "IOError:", err
1106 except socket
.error
, err
:
1107 print "socket.error:", err
1111 print "read %d bytes" % len(buf
)