1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirector manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
66 urllib2.install_opener(opener)
68 f = urllib2.urlopen('http://www.python.org/')
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
83 # documentation (getting there)
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
108 from cStringIO
import StringIO
110 from StringIO
import StringIO
118 # not sure how many of these need to be gotten rid of
119 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
120 addinfourl
, splitport
, splitgophertype
, splitquery
, \
121 splitattr
, ftpwrapper
, noheaders
123 # support for proxies via environment variables
124 from urllib
import getproxies
126 # support for FileHandler
127 from urllib
import localhost
, url2pathname
129 __version__
= "2.0a1"
132 def urlopen(url
, data
=None):
135 _opener
= build_opener()
136 return _opener
.open(url
, data
)
138 def install_opener(opener
):
142 # do these error classes make sense?
143 # make sure all of the IOError stuff is overridden. we just want to be
146 class URLError(IOError):
147 # URLError is a sub-type of IOError, but it doesn't share any of
148 # the implementation. need to override __init__ and __str__
149 def __init__(self
, reason
):
153 return '<urlopen error %s>' % self
.reason
155 class HTTPError(URLError
, addinfourl
):
156 """Raised when HTTP error occurs, but also acts like non-error return"""
157 __super_init
= addinfourl
.__init
__
159 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
165 # The addinfourl classes depend on fp being a valid file
166 # object. In some cases, the HTTPError may not have a valid
167 # file object. If this happens, the simplest workaround is to
168 # not initialize the base classes.
170 self
.__super
_init
(fp
, hdrs
, url
)
173 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
176 # XXX is this safe? what if user catches exception, then
177 # extracts fp and discards exception?
181 class GopherError(URLError
):
187 def __init__(self
, url
, data
=None, headers
={}):
188 # unwrap('<URL:type://host/path>') --> 'type://host/path'
189 self
.__original
= unwrap(url
)
191 # self.__r_type is what's left after doing the splittype
196 self
.headers
.update(headers
)
198 def __getattr__(self
, attr
):
199 # XXX this is a fallback mechanism to guard against these
200 # methods getting called in a non-standard order. this may be
201 # too complicated and/or unnecessary.
202 # XXX should the __r_XXX attributes be public?
203 if attr
[:12] == '_Request__r_':
205 if hasattr(Request
, 'get_' + name
):
206 getattr(self
, 'get_' + name
)()
207 return getattr(self
, attr
)
208 raise AttributeError, attr
210 def add_data(self
, data
):
214 return self
.data
is not None
219 def get_full_url(self
):
220 return self
.__original
223 if self
.type is None:
224 self
.type, self
.__r
_type
= splittype(self
.__original
)
225 if self
.type is None:
226 raise ValueError, "unknown url type: %s" % self
.__original
230 if self
.host
is None:
231 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
233 self
.host
= unquote(self
.host
)
236 def get_selector(self
):
239 def set_proxy(self
, host
, type):
240 self
.host
, self
.type = host
, type
241 self
.__r
_host
= self
.__original
243 def add_header(self
, key
, val
):
244 # useful for something like authentication
245 self
.headers
[key
] = val
247 class OpenerDirector
:
249 server_version
= "Python-urllib/%s" % __version__
250 self
.addheaders
= [('User-Agent', server_version
)]
251 # manage the individual handlers
253 self
.handle_open
= {}
254 self
.handle_error
= {}
256 def add_handler(self
, handler
):
258 for meth
in dir(handler
):
259 if meth
[-5:] == '_open':
261 if protocol
in self
.handle_open
:
262 self
.handle_open
[protocol
].append(handler
)
264 self
.handle_open
[protocol
] = [handler
]
268 j
= meth
[i
+1:].find('_') + i
+ 1
269 if j
!= -1 and meth
[i
+1:j
] == 'error':
276 dict = self
.handle_error
.get(proto
, {})
278 dict[kind
].append(handler
)
280 dict[kind
] = [handler
]
281 self
.handle_error
[proto
] = dict
285 self
.handlers
.append(handler
)
286 handler
.add_parent(self
)
292 for handler
in self
.handlers
:
296 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
297 # XXX raise an exception if no one else should try to handle
298 # this url. return None if you can't but someone else could.
299 handlers
= chain
.get(kind
, ())
300 for handler
in handlers
:
301 func
= getattr(handler
, meth_name
)
304 if result
is not None:
307 def open(self
, fullurl
, data
=None):
308 # accept a URL or a Request object
309 if isinstance(fullurl
, basestring
):
310 req
= Request(fullurl
, data
)
315 assert isinstance(req
, Request
) # really only care about interface
317 result
= self
._call
_chain
(self
.handle_open
, 'default',
322 type_
= req
.get_type()
323 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
328 return self
._call
_chain
(self
.handle_open
, 'unknown',
331 def error(self
, proto
, *args
):
332 if proto
in ['http', 'https']:
333 # XXX http[s] protocols are special-cased
334 dict = self
.handle_error
['http'] # https is not different than http
335 proto
= args
[2] # YUCK!
336 meth_name
= 'http_error_%d' % proto
340 dict = self
.handle_error
341 meth_name
= proto
+ '_error'
343 args
= (dict, proto
, meth_name
) + args
344 result
= self
._call
_chain
(*args
)
349 args
= (dict, 'default', 'http_error_default') + orig_args
350 return self
._call
_chain
(*args
)
352 # XXX probably also want an abstract factory that knows things like
353 # the fact that a ProxyHandler needs to get inserted first.
354 # would also know when it makes sense to skip a superclass in favor of
355 # a subclass and when it might make sense to include both
357 def build_opener(*handlers
):
358 """Create an opener object from a list of handlers.
360 The opener will use several default handlers, including support
361 for HTTP and FTP. If there is a ProxyHandler, it must be at the
362 front of the list of handlers. (Yuck.)
364 If any of the handlers passed as arguments are subclasses of the
365 default handlers, the default handlers will not be used.
368 opener
= OpenerDirector()
369 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
370 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
371 FTPHandler
, FileHandler
]
372 if hasattr(httplib
, 'HTTPS'):
373 default_classes
.append(HTTPSHandler
)
375 for klass
in default_classes
:
376 for check
in handlers
:
377 if inspect
.isclass(check
):
378 if issubclass(check
, klass
):
380 elif isinstance(check
, klass
):
383 default_classes
.remove(klass
)
385 for klass
in default_classes
:
386 opener
.add_handler(klass())
389 if inspect
.isclass(h
):
391 opener
.add_handler(h
)
395 def add_parent(self
, parent
):
400 class HTTPDefaultErrorHandler(BaseHandler
):
401 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
402 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
404 class HTTPRedirectHandler(BaseHandler
):
405 # Implementation note: To avoid the server sending us into an
406 # infinite loop, the request object needs to track what URLs we
407 # have already seen. Do this by adding a handler-specific
408 # attribute to the Request object.
409 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
410 if 'location' in headers
:
411 newurl
= headers
['location']
412 elif 'uri' in headers
:
413 newurl
= headers
['uri']
416 newurl
= urlparse
.urljoin(req
.get_full_url(), newurl
)
418 # XXX Probably want to forget about the state of the current
419 # request, although that might interact poorly with other
420 # handlers that also use handler-specific request attributes
421 new
= Request(newurl
, req
.get_data(), req
.headers
)
422 new
.error_302_dict
= {}
423 if hasattr(req
, 'error_302_dict'):
424 if len(req
.error_302_dict
)>10 or \
425 newurl
in req
.error_302_dict
:
426 raise HTTPError(req
.get_full_url(), code
,
427 self
.inf_msg
+ msg
, headers
, fp
)
428 new
.error_302_dict
.update(req
.error_302_dict
)
429 new
.error_302_dict
[newurl
] = newurl
431 # Don't close the fp until we are sure that we won't use it
436 return self
.parent
.open(new
)
438 http_error_301
= http_error_302
440 inf_msg
= "The HTTP server returned a redirect error that would" \
441 "lead to an infinite loop.\n" \
442 "The last 302 error message was:\n"
444 class ProxyHandler(BaseHandler
):
445 def __init__(self
, proxies
=None):
447 proxies
= getproxies()
448 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
449 self
.proxies
= proxies
450 for type, url
in proxies
.items():
451 setattr(self
, '%s_open' % type,
452 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
453 meth(r
, proxy
, type))
455 def proxy_open(self
, req
, proxy
, type):
456 orig_type
= req
.get_type()
457 type, r_type
= splittype(proxy
)
458 host
, XXX
= splithost(r_type
)
460 user_pass
, host
= host
.split('@', 1)
462 user
, password
= user_pass
.split(':', 1)
463 user_pass
= base64
.encodestring('%s:%s' % (unquote(user
),
465 req
.add_header('Proxy-Authorization', 'Basic ' + user_pass
)
467 req
.set_proxy(host
, type)
468 if orig_type
== type:
469 # let other handlers take care of it
470 # XXX this only makes sense if the proxy is before the
474 # need to start over, because the other handlers don't
475 # grok the proxy's URL type
476 return self
.parent
.open(req
)
478 # feature suggested by Duncan Booth
479 # XXX custom is not a good name
481 # either pass a function to the constructor or override handle
482 def __init__(self
, proto
, func
=None, proxy_addr
=None):
485 self
.addr
= proxy_addr
487 def handle(self
, req
):
488 if self
.func
and self
.func(req
):
494 class CustomProxyHandler(BaseHandler
):
495 def __init__(self
, *proxies
):
498 def proxy_open(self
, req
):
499 proto
= req
.get_type()
501 proxies
= self
.proxies
[proto
]
506 req
.set_proxy(p
.get_proxy())
507 return self
.parent
.open(req
)
510 def do_proxy(self
, p
, req
):
511 return self
.parent
.open(req
)
513 def add_proxy(self
, cpo
):
514 if cpo
.proto
in self
.proxies
:
515 self
.proxies
[cpo
.proto
].append(cpo
)
517 self
.proxies
[cpo
.proto
] = [cpo
]
519 class HTTPPasswordMgr
:
523 def add_password(self
, realm
, uri
, user
, passwd
):
524 # uri could be a single URI or a sequence
525 if isinstance(uri
, basestring
):
527 uri
= tuple(map(self
.reduce_uri
, uri
))
528 if not realm
in self
.passwd
:
529 self
.passwd
[realm
] = {}
530 self
.passwd
[realm
][uri
] = (user
, passwd
)
532 def find_user_password(self
, realm
, authuri
):
533 domains
= self
.passwd
.get(realm
, {})
534 authuri
= self
.reduce_uri(authuri
)
535 for uris
, authinfo
in domains
.items():
537 if self
.is_suburi(uri
, authuri
):
541 def reduce_uri(self
, uri
):
542 """Accept netloc or URI and extract only the netloc and path"""
543 parts
= urlparse
.urlparse(uri
)
545 return parts
[1], parts
[2] or '/'
549 def is_suburi(self
, base
, test
):
550 """Check if test is below base in a URI tree
552 Both args must be URIs in reduced form.
556 if base
[0] != test
[0]:
558 common
= posixpath
.commonprefix((base
[1], test
[1]))
559 if len(common
) == len(base
[1]):
564 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr
):
566 def find_user_password(self
, realm
, authuri
):
567 user
, password
= HTTPPasswordMgr
.find_user_password(self
,realm
,authuri
)
569 return user
, password
570 return HTTPPasswordMgr
.find_user_password(self
, None, authuri
)
573 class AbstractBasicAuthHandler
:
575 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re
.I
)
577 # XXX there can actually be multiple auth-schemes in a
578 # www-authenticate header. should probably be a lot more careful
579 # in parsing them to extract multiple alternatives
581 def __init__(self
, password_mgr
=None):
582 if password_mgr
is None:
583 password_mgr
= HTTPPasswordMgr()
584 self
.passwd
= password_mgr
585 self
.add_password
= self
.passwd
.add_password
587 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
588 # XXX could be multiple headers
589 authreq
= headers
.get(authreq
, None)
591 mo
= AbstractBasicAuthHandler
.rx
.match(authreq
)
593 scheme
, realm
= mo
.groups()
594 if scheme
.lower() == 'basic':
595 return self
.retry_http_basic_auth(host
, req
, realm
)
597 def retry_http_basic_auth(self
, host
, req
, realm
):
598 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
600 raw
= "%s:%s" % (user
, pw
)
601 auth
= 'Basic %s' % base64
.encodestring(raw
).strip()
602 if req
.headers
.get(self
.auth_header
, None) == auth
:
604 req
.add_header(self
.auth_header
, auth
)
605 return self
.parent
.open(req
)
609 class HTTPBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
611 auth_header
= 'Authorization'
613 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
614 host
= urlparse
.urlparse(req
.get_full_url())[1]
615 return self
.http_error_auth_reqed('www-authenticate',
619 class ProxyBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
621 auth_header
= 'Proxy-Authorization'
623 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
624 host
= req
.get_host()
625 return self
.http_error_auth_reqed('proxy-authenticate',
629 class AbstractDigestAuthHandler
:
631 def __init__(self
, passwd
=None):
633 passwd
= HTTPPasswordMgr()
635 self
.add_password
= self
.passwd
.add_password
637 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
638 authreq
= headers
.get(self
.auth_header
, None)
640 kind
= authreq
.split()[0]
642 return self
.retry_http_digest_auth(req
, authreq
)
644 def retry_http_digest_auth(self
, req
, auth
):
645 token
, challenge
= auth
.split(' ', 1)
646 chal
= parse_keqv_list(parse_http_list(challenge
))
647 auth
= self
.get_authorization(req
, chal
)
649 auth_val
= 'Digest %s' % auth
650 if req
.headers
.get(self
.auth_header
, None) == auth_val
:
652 req
.add_header(self
.auth_header
, auth_val
)
653 resp
= self
.parent
.open(req
)
656 def get_authorization(self
, req
, chal
):
658 realm
= chal
['realm']
659 nonce
= chal
['nonce']
660 algorithm
= chal
.get('algorithm', 'MD5')
661 # mod_digest doesn't send an opaque, even though it isn't
662 # supposed to be optional
663 opaque
= chal
.get('opaque', None)
667 H
, KD
= self
.get_algorithm_impls(algorithm
)
671 user
, pw
= self
.passwd
.find_user_password(realm
,
676 # XXX not implemented yet
678 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
682 A1
= "%s:%s:%s" % (user
, realm
, pw
)
683 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
684 # XXX selector: what about proxies and full urls
686 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
687 # XXX should the partial digests be encoded too?
689 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
690 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
693 base
= base
+ ', opaque="%s"' % opaque
695 base
= base
+ ', digest="%s"' % entdig
696 if algorithm
!= 'MD5':
697 base
= base
+ ', algorithm="%s"' % algorithm
700 def get_algorithm_impls(self
, algorithm
):
701 # lambdas assume digest modules are imported at the top level
702 if algorithm
== 'MD5':
703 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
704 elif algorithm
== 'SHA':
705 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
707 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
710 def get_entity_digest(self
, data
, chal
):
711 # XXX not implemented yet
715 class HTTPDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
716 """An authentication protocol defined by RFC 2069
718 Digest authentication improves on basic authentication because it
719 does not transmit passwords in the clear.
722 header
= 'Authorization'
724 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
725 host
= urlparse
.urlparse(req
.get_full_url())[1]
726 self
.http_error_auth_reqed('www-authenticate', host
, req
, headers
)
729 class ProxyDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
731 header
= 'Proxy-Authorization'
733 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
734 host
= req
.get_host()
735 self
.http_error_auth_reqed('proxy-authenticate', host
, req
, headers
)
738 def encode_digest(digest
):
741 n
= (ord(c
) >> 4) & 0xf
742 hexrep
.append(hex(n
)[-1])
744 hexrep
.append(hex(n
)[-1])
745 return ''.join(hexrep
)
748 class AbstractHTTPHandler(BaseHandler
):
750 def do_open(self
, http_class
, req
):
751 host
= req
.get_host()
753 raise URLError('no host given')
756 h
= http_class(host
) # will parse host:port
758 data
= req
.get_data()
759 h
.putrequest('POST', req
.get_selector())
760 if not 'Content-type' in req
.headers
:
761 h
.putheader('Content-type',
762 'application/x-www-form-urlencoded')
763 if not 'Content-length' in req
.headers
:
764 h
.putheader('Content-length', '%d' % len(data
))
766 h
.putrequest('GET', req
.get_selector())
767 except socket
.error
, err
:
770 scheme
, sel
= splittype(req
.get_selector())
771 sel_host
, sel_path
= splithost(sel
)
772 h
.putheader('Host', sel_host
or host
)
773 for args
in self
.parent
.addheaders
:
774 if name
not in req
.headers
:
776 for k
, v
in req
.headers
.items():
782 code
, msg
, hdrs
= h
.getreply()
785 return addinfourl(fp
, hdrs
, req
.get_full_url())
787 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
790 class HTTPHandler(AbstractHTTPHandler
):
792 def http_open(self
, req
):
793 return self
.do_open(httplib
.HTTP
, req
)
796 if hasattr(httplib
, 'HTTPS'):
797 class HTTPSHandler(AbstractHTTPHandler
):
799 def https_open(self
, req
):
800 return self
.do_open(httplib
.HTTPS
, req
)
803 class UnknownHandler(BaseHandler
):
804 def unknown_open(self
, req
):
805 type = req
.get_type()
806 raise URLError('unknown url type: %s' % type)
808 def parse_keqv_list(l
):
809 """Parse list of key=value strings where keys are not duplicated."""
812 k
, v
= elt
.split('=', 1)
813 if v
[0] == '"' and v
[-1] == '"':
818 def parse_http_list(s
):
819 """Parse lists as described by RFC 2068 Section 2.
821 In particular, parse comman-separated lists where the elements of
822 the list may include quoted-strings. A quoted-string could
825 # XXX this function could probably use more testing
837 list.append(s
[start
:])
841 raise ValueError, "unbalanced quotes"
843 list.append(s
[start
:i
+c
])
848 list.append(s
[start
:i
+c
])
856 list.append(s
[start
:i
+c
])
862 return map(lambda x
: x
.strip(), list)
864 class FileHandler(BaseHandler
):
865 # Use local file or FTP depending on form of URL
866 def file_open(self
, req
):
867 url
= req
.get_selector()
868 if url
[:2] == '//' and url
[2:3] != '/':
870 return self
.parent
.open(req
)
872 return self
.open_local_file(req
)
874 # names for the localhost
877 if FileHandler
.names
is None:
878 FileHandler
.names
= (socket
.gethostbyname('localhost'),
879 socket
.gethostbyname(socket
.gethostname()))
880 return FileHandler
.names
882 # not entirely sure what the rules are here
883 def open_local_file(self
, req
):
884 host
= req
.get_host()
885 file = req
.get_selector()
886 localfile
= url2pathname(file)
887 stats
= os
.stat(localfile
)
889 modified
= rfc822
.formatdate(stats
.st_mtime
)
890 mtype
= mimetypes
.guess_type(file)[0]
891 headers
= mimetools
.Message(StringIO(
892 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
893 (mtype
or 'text/plain', size
, modified
)))
895 host
, port
= splitport(host
)
897 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
898 return addinfourl(open(localfile
, 'rb'),
899 headers
, 'file:'+file)
900 raise URLError('file not on local host')
902 class FTPHandler(BaseHandler
):
903 def ftp_open(self
, req
):
904 host
= req
.get_host()
906 raise IOError, ('ftp error', 'no host given')
907 # XXX handle custom username & password
909 host
= socket
.gethostbyname(host
)
910 except socket
.error
, msg
:
912 host
, port
= splitport(host
)
914 port
= ftplib
.FTP_PORT
915 path
, attrs
= splitattr(req
.get_selector())
917 dirs
= path
.split('/')
918 dirs
, file = dirs
[:-1], dirs
[-1]
919 if dirs
and not dirs
[0]:
921 user
= passwd
= '' # XXX
923 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
924 type = file and 'I' or 'D'
926 attr
, value
= splitattr(attr
)
927 if attr
.lower() == 'type' and \
928 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
930 fp
, retrlen
= fw
.retrfile(file, type)
932 mtype
= mimetypes
.guess_type(req
.get_full_url())[0]
934 headers
+= "Content-Type: %s\n" % mtype
935 if retrlen
is not None and retrlen
>= 0:
936 headers
+= "Content-Length: %d\n" % retrlen
937 sf
= StringIO(headers
)
938 headers
= mimetools
.Message(sf
)
939 return addinfourl(fp
, headers
, req
.get_full_url())
940 except ftplib
.all_errors
, msg
:
941 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
943 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
944 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
945 ## fw.ftp.set_debuglevel(1)
948 class CacheFTPHandler(FTPHandler
):
949 # XXX would be nice to have pluggable cache strategies
950 # XXX this stuff is definitely not thread safe
958 def setTimeout(self
, t
):
961 def setMaxConns(self
, m
):
964 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
965 key
= user
, passwd
, host
, port
966 if key
in self
.cache
:
967 self
.timeout
[key
] = time
.time() + self
.delay
969 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
970 self
.timeout
[key
] = time
.time() + self
.delay
972 return self
.cache
[key
]
974 def check_cache(self
):
975 # first check for old ones
977 if self
.soonest
<= t
:
978 for k
, v
in self
.timeout
.items():
980 self
.cache
[k
].close()
983 self
.soonest
= min(self
.timeout
.values())
985 # then check the size
986 if len(self
.cache
) == self
.max_conns
:
987 for k
, v
in self
.timeout
.items():
988 if v
== self
.soonest
:
992 self
.soonest
= min(self
.timeout
.values())
994 class GopherHandler(BaseHandler
):
995 def gopher_open(self
, req
):
996 host
= req
.get_host()
998 raise GopherError('no host given')
1000 selector
= req
.get_selector()
1001 type, selector
= splitgophertype(selector
)
1002 selector
, query
= splitquery(selector
)
1003 selector
= unquote(selector
)
1005 query
= unquote(query
)
1006 fp
= gopherlib
.send_query(selector
, query
, host
)
1008 fp
= gopherlib
.send_selector(selector
, host
)
1009 return addinfourl(fp
, noheaders(), req
.get_full_url())
1011 #bleck! don't use this yet
1012 class OpenerFactory
:
1014 default_handlers
= [UnknownHandler
, HTTPHandler
,
1015 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
1016 FTPHandler
, FileHandler
]
1017 proxy_handlers
= [ProxyHandler
]
1019 replacement_handlers
= []
1021 def add_proxy_handler(self
, ph
):
1022 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
1024 def add_handler(self
, h
):
1025 self
.handlers
= self
.handlers
+ [h
]
1027 def replace_handler(self
, h
):
1030 def build_opener(self
):
1031 opener
= OpenerDirector()
1032 for ph
in self
.proxy_handlers
:
1033 if inspect
.isclass(ph
):
1035 opener
.add_handler(ph
)
1037 if __name__
== "__main__":
1038 # XXX some of the test code depends on machine configurations that
1039 # are internal to CNRI. Need to set up a public server with the
1040 # right authentication configuration for test purposes.
1041 if socket
.gethostname() == 'bitdiddle':
1042 localhost
= 'bitdiddle.cnri.reston.va.us'
1043 elif socket
.gethostname() == 'bitdiddle.concentric.net':
1044 localhost
= 'localhost'
1048 # Thanks to Fred for finding these!
1049 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1050 'gopher://gopher.vt.edu:10010/10/33',
1053 'file://nonsensename/etc/passwd',
1054 'ftp://www.python.org/pub/python/misc/sousa.au',
1055 'ftp://www.python.org/pub/tmp/blat',
1056 'http://www.espn.com/', # redirect
1057 'http://www.python.org/Spanish/Inquistion/',
1058 ('http://www.python.org/cgi-bin/faqw.py',
1059 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1060 'http://www.python.org/',
1061 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
1064 ## if localhost is not None:
1066 ## 'file://%s/etc/passwd' % localhost,
1067 ## 'http://%s/simple/' % localhost,
1068 ## 'http://%s/digest/' % localhost,
1069 ## 'http://%s/not/found.h' % localhost,
1072 ## bauth = HTTPBasicAuthHandler()
1073 ## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1075 ## dauth = HTTPDigestAuthHandler()
1076 ## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1080 cfh
= CacheFTPHandler()
1083 ## # XXX try out some custom proxy objects too!
1084 ## def at_cnri(req):
1085 ## host = req.get_host()
1087 ## if host[-18:] == '.cnri.reston.va.us':
1089 ## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1090 ## ph = CustomProxyHandler(p)
1092 ## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1093 install_opener(build_opener(cfh
, GopherHandler
))
1096 if isinstance(url
, tuple):
1102 f
= urlopen(url
, req
)
1103 except IOError, err
:
1104 print "IOError:", err
1105 except socket
.error
, err
:
1106 print "socket.error:", err
1110 print "read %d bytes" % len(buf
)