1 """An extensible library for opening URLs using a variety of protocols
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
8 The OpenerDirectory manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301 & 302 redirect errors, and the HTTPDigestAuthHandler deals
15 with digest authentication.
17 urlopen(url, data=None) -- basic usage is that same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 IOError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
24 build_opener -- function that creates a new OpenerDirector instance.
25 will install the default handlers. accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. if one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
30 install_opener -- installs a new opener as the default opener.
35 Request -- an object that encapsulates the state of a request. the
36 state can be a simple as the URL. it can also include extra HTTP
37 headers, e.g. a User-Agent.
42 URLError-- a subclass of IOError, individual protocols have their own
45 HTTPError-- also a valid HTTP response, so you can treat an HTTP error
46 as an exceptional event or valid response
49 BaseHandler and parent
50 _call_chain conventions
56 # set up authentication info
57 authinfo = urllib2.HTTPBasicAuthHandler()
58 authinfo.add_password('realm', 'host', 'username', 'password')
60 proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"})
62 # build a new opener that adds authentication and caching FTP handlers
63 opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler)
66 urllib2.install_opener(opener)
68 f = urllib2.urlopen('http://www.python.org/')
74 # If an authentication error handler that tries to perform
75 # authentication for some reason but fails, how should the error be
76 # signalled? The client needs to know the HTTP error code. But if
77 # the handler knows that the problem was, e.g., that it didn't know
78 # that hash algo that requested in the challenge, it would be good to
79 # pass that information along to the client, too.
83 # documentation (getting there)
85 # abstract factory for opener
86 # ftp errors aren't handled cleanly
87 # gopher can return a socket.error
88 # check digest against correct (i.e. non-apache) implementation
110 from cStringIO
import StringIO
112 from StringIO
import StringIO
120 # not sure how many of these need to be gotten rid of
121 from urllib
import unwrap
, unquote
, splittype
, splithost
, \
122 addinfourl
, splitport
, splitgophertype
, splitquery
, \
123 splitattr
, ftpwrapper
, noheaders
125 # support for proxies via environment variables
126 from urllib
import getproxies
128 # support for FileHandler
129 from urllib
import localhost
, url2pathname
131 __version__
= "2.0a1"
134 def urlopen(url
, data
=None):
137 _opener
= build_opener()
138 return _opener
.open(url
, data
)
140 def install_opener(opener
):
144 # do these error classes make sense?
145 # make sure all of the IOError stuff is overridden. we just want to be
148 class URLError(IOError):
149 # URLError is a sub-type of IOError, but it doesn't share any of
150 # the implementation. need to override __init__ and __str__
151 def __init__(self
, reason
):
155 return '<urlopen error %s>' % self
.reason
157 class HTTPError(URLError
, addinfourl
):
158 """Raised when HTTP error occurs, but also acts like non-error return"""
159 __super_init
= addinfourl
.__init
__
161 def __init__(self
, url
, code
, msg
, hdrs
, fp
):
162 self
.__super
_init
(fp
, hdrs
, url
)
171 return 'HTTP Error %s: %s' % (self
.code
, self
.msg
)
174 # XXX is this safe? what if user catches exception, then
175 # extracts fp and discards exception?
179 class GopherError(URLError
):
185 def __init__(self
, url
, data
=None, headers
={}):
186 # unwrap('<URL:type://host/path>') --> 'type://host/path'
187 self
.__original
= unwrap(url
)
189 # self.__r_type is what's left after doing the splittype
194 self
.headers
.update(headers
)
196 def __getattr__(self
, attr
):
197 # XXX this is a fallback mechanism to guard against these
198 # methods getting called in a non-standard order. this may be
199 # too complicated and/or unnecessary.
200 # XXX should the __r_XXX attributes be public?
201 if attr
[:12] == '_Request__r_':
203 if hasattr(Request
, 'get_' + name
):
204 getattr(self
, 'get_' + name
)()
205 return getattr(self
, attr
)
206 raise AttributeError, attr
208 def add_data(self
, data
):
212 return self
.data
is not None
217 def get_full_url(self
):
218 return self
.__original
221 if self
.type is None:
222 self
.type, self
.__r
_type
= splittype(self
.__original
)
223 if self
.type is None:
224 raise ValueError, "unknown url type: %s" % self
.__original
228 if self
.host
is None:
229 self
.host
, self
.__r
_host
= splithost(self
.__r
_type
)
231 self
.host
= unquote(self
.host
)
234 def get_selector(self
):
237 def set_proxy(self
, host
, type):
238 self
.host
, self
.type = host
, type
239 self
.__r
_host
= self
.__original
241 def add_header(self
, key
, val
):
242 # useful for something like authentication
243 self
.headers
[key
] = val
245 class OpenerDirector
:
247 server_version
= "Python-urllib/%s" % __version__
248 self
.addheaders
= [('User-agent', server_version
)]
249 # manage the individual handlers
251 self
.handle_open
= {}
252 self
.handle_error
= {}
254 def add_handler(self
, handler
):
256 for meth
in dir(handler
):
257 if meth
[-5:] == '_open':
259 if self
.handle_open
.has_key(protocol
):
260 self
.handle_open
[protocol
].append(handler
)
262 self
.handle_open
[protocol
] = [handler
]
266 j
= meth
[i
+1:].find('_') + i
+ 1
267 if j
!= -1 and meth
[i
+1:j
] == 'error':
274 dict = self
.handle_error
.get(proto
, {})
275 if dict.has_key(kind
):
276 dict[kind
].append(handler
)
278 dict[kind
] = [handler
]
279 self
.handle_error
[proto
] = dict
283 self
.handlers
.append(handler
)
284 handler
.add_parent(self
)
290 for handler
in self
.handlers
:
294 def _call_chain(self
, chain
, kind
, meth_name
, *args
):
295 # XXX raise an exception if no one else should try to handle
296 # this url. return None if you can't but someone else could.
297 handlers
= chain
.get(kind
, ())
298 for handler
in handlers
:
299 func
= getattr(handler
, meth_name
)
302 if result
is not None:
305 def open(self
, fullurl
, data
=None):
306 # accept a URL or a Request object
307 if isinstance(fullurl
, (types
.StringType
, types
.UnicodeType
)):
308 req
= Request(fullurl
, data
)
313 assert isinstance(req
, Request
) # really only care about interface
315 result
= self
._call
_chain
(self
.handle_open
, 'default',
320 type_
= req
.get_type()
321 result
= self
._call
_chain
(self
.handle_open
, type_
, type_
+ \
326 return self
._call
_chain
(self
.handle_open
, 'unknown',
329 def error(self
, proto
, *args
):
330 if proto
in ['http', 'https']:
331 # XXX http[s] protocols are special-cased
332 dict = self
.handle_error
['http'] # https is not different than http
333 proto
= args
[2] # YUCK!
334 meth_name
= 'http_error_%d' % proto
338 dict = self
.handle_error
339 meth_name
= proto
+ '_error'
341 args
= (dict, proto
, meth_name
) + args
342 result
= self
._call
_chain
(*args
)
347 args
= (dict, 'default', 'http_error_default') + orig_args
348 return self
._call
_chain
(*args
)
350 # XXX probably also want an abstract factory that knows things like
351 # the fact that a ProxyHandler needs to get inserted first.
352 # would also know when it makes sense to skip a superclass in favor of
353 # a subclass and when it might make sense to include both
355 def build_opener(*handlers
):
356 """Create an opener object from a list of handlers.
358 The opener will use several default handlers, including support
359 for HTTP and FTP. If there is a ProxyHandler, it must be at the
360 front of the list of handlers. (Yuck.)
362 If any of the handlers passed as arguments are subclasses of the
363 default handlers, the default handlers will not be used.
366 opener
= OpenerDirector()
367 default_classes
= [ProxyHandler
, UnknownHandler
, HTTPHandler
,
368 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
369 FTPHandler
, FileHandler
]
370 if hasattr(httplib
, 'HTTPS'):
371 default_classes
.append(HTTPSHandler
)
373 for klass
in default_classes
:
374 for check
in handlers
:
375 if inspect
.isclass(check
):
376 if issubclass(check
, klass
):
378 elif isinstance(check
, klass
):
381 default_classes
.remove(klass
)
383 for klass
in default_classes
:
384 opener
.add_handler(klass())
387 if inspect
.isclass(h
):
389 opener
.add_handler(h
)
393 def add_parent(self
, parent
):
398 class HTTPDefaultErrorHandler(BaseHandler
):
399 def http_error_default(self
, req
, fp
, code
, msg
, hdrs
):
400 raise HTTPError(req
.get_full_url(), code
, msg
, hdrs
, fp
)
402 class HTTPRedirectHandler(BaseHandler
):
403 # Implementation note: To avoid the server sending us into an
404 # infinite loop, the request object needs to track what URLs we
405 # have already seen. Do this by adding a handler-specific
406 # attribute to the Request object.
407 def http_error_302(self
, req
, fp
, code
, msg
, headers
):
408 if headers
.has_key('location'):
409 newurl
= headers
['location']
410 elif headers
.has_key('uri'):
411 newurl
= headers
['uri']
414 newurl
= urlparse
.urljoin(req
.get_full_url(), newurl
)
416 # XXX Probably want to forget about the state of the current
417 # request, although that might interact poorly with other
418 # handlers that also use handler-specific request attributes
419 new
= Request(newurl
, req
.get_data())
420 new
.error_302_dict
= {}
421 if hasattr(req
, 'error_302_dict'):
422 if len(req
.error_302_dict
)>10 or \
423 req
.error_302_dict
.has_key(newurl
):
424 raise HTTPError(req
.get_full_url(), code
,
425 self
.inf_msg
+ msg
, headers
, fp
)
426 new
.error_302_dict
.update(req
.error_302_dict
)
427 new
.error_302_dict
[newurl
] = newurl
429 # Don't close the fp until we are sure that we won't use it
434 return self
.parent
.open(new
)
436 http_error_301
= http_error_302
438 inf_msg
= "The HTTP server returned a redirect error that would" \
439 "lead to an infinite loop.\n" \
440 "The last 302 error message was:\n"
442 class ProxyHandler(BaseHandler
):
443 def __init__(self
, proxies
=None):
445 proxies
= getproxies()
446 assert hasattr(proxies
, 'has_key'), "proxies must be a mapping"
447 self
.proxies
= proxies
448 for type, url
in proxies
.items():
449 setattr(self
, '%s_open' % type,
450 lambda r
, proxy
=url
, type=type, meth
=self
.proxy_open
: \
451 meth(r
, proxy
, type))
453 def proxy_open(self
, req
, proxy
, type):
454 orig_type
= req
.get_type()
455 type, r_type
= splittype(proxy
)
456 host
, XXX
= splithost(r_type
)
458 user_pass
, host
= host
.split('@', 1)
459 user_pass
= base64
.encodestring(unquote(user_pass
)).strip()
460 req
.add_header('Proxy-Authorization', 'Basic '+user_pass
)
462 req
.set_proxy(host
, type)
463 if orig_type
== type:
464 # let other handlers take care of it
465 # XXX this only makes sense if the proxy is before the
469 # need to start over, because the other handlers don't
470 # grok the proxy's URL type
471 return self
.parent
.open(req
)
473 # feature suggested by Duncan Booth
474 # XXX custom is not a good name
476 # either pass a function to the constructor or override handle
477 def __init__(self
, proto
, func
=None, proxy_addr
=None):
480 self
.addr
= proxy_addr
482 def handle(self
, req
):
483 if self
.func
and self
.func(req
):
489 class CustomProxyHandler(BaseHandler
):
490 def __init__(self
, *proxies
):
493 def proxy_open(self
, req
):
494 proto
= req
.get_type()
496 proxies
= self
.proxies
[proto
]
501 req
.set_proxy(p
.get_proxy())
502 return self
.parent
.open(req
)
505 def do_proxy(self
, p
, req
):
506 return self
.parent
.open(req
)
508 def add_proxy(self
, cpo
):
509 if self
.proxies
.has_key(cpo
.proto
):
510 self
.proxies
[cpo
.proto
].append(cpo
)
512 self
.proxies
[cpo
.proto
] = [cpo
]
514 class HTTPPasswordMgr
:
518 def add_password(self
, realm
, uri
, user
, passwd
):
519 # uri could be a single URI or a sequence
520 if isinstance(uri
, (types
.StringType
, types
.UnicodeType
)):
522 uri
= tuple(map(self
.reduce_uri
, uri
))
523 if not self
.passwd
.has_key(realm
):
524 self
.passwd
[realm
] = {}
525 self
.passwd
[realm
][uri
] = (user
, passwd
)
527 def find_user_password(self
, realm
, authuri
):
528 domains
= self
.passwd
.get(realm
, {})
529 authuri
= self
.reduce_uri(authuri
)
530 for uris
, authinfo
in domains
.items():
532 if self
.is_suburi(uri
, authuri
):
536 def reduce_uri(self
, uri
):
537 """Accept netloc or URI and extract only the netloc and path"""
538 parts
= urlparse
.urlparse(uri
)
540 return parts
[1], parts
[2] or '/'
544 def is_suburi(self
, base
, test
):
545 """Check if test is below base in a URI tree
547 Both args must be URIs in reduced form.
551 if base
[0] != test
[0]:
553 common
= posixpath
.commonprefix((base
[1], test
[1]))
554 if len(common
) == len(base
[1]):
559 class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr
):
561 def find_user_password(self
, realm
, authuri
):
562 user
, password
= HTTPPasswordMgr
.find_user_password(self
,realm
,authuri
)
564 return user
, password
565 return HTTPPasswordMgr
.find_user_password(self
, None, authuri
)
568 class AbstractBasicAuthHandler
:
570 rx
= re
.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')
572 # XXX there can actually be multiple auth-schemes in a
573 # www-authenticate header. should probably be a lot more careful
574 # in parsing them to extract multiple alternatives
576 def __init__(self
, password_mgr
=None):
577 if password_mgr
is None:
578 password_mgr
= HTTPPasswordMgr()
579 self
.passwd
= password_mgr
580 self
.add_password
= self
.passwd
.add_password
582 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
583 # XXX could be multiple headers
584 authreq
= headers
.get(authreq
, None)
586 mo
= AbstractBasicAuthHandler
.rx
.match(authreq
)
588 scheme
, realm
= mo
.groups()
589 if scheme
.lower() == 'basic':
590 return self
.retry_http_basic_auth(host
, req
, realm
)
592 def retry_http_basic_auth(self
, host
, req
, realm
):
593 user
,pw
= self
.passwd
.find_user_password(realm
, host
)
595 raw
= "%s:%s" % (user
, pw
)
596 auth
= 'Basic %s' % base64
.encodestring(raw
).strip()
597 if req
.headers
.get(self
.auth_header
, None) == auth
:
599 req
.add_header(self
.auth_header
, auth
)
600 return self
.parent
.open(req
)
604 class HTTPBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
606 auth_header
= 'Authorization'
608 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
609 host
= urlparse
.urlparse(req
.get_full_url())[1]
610 return self
.http_error_auth_reqed('www-authenticate',
614 class ProxyBasicAuthHandler(AbstractBasicAuthHandler
, BaseHandler
):
616 auth_header
= 'Proxy-Authorization'
618 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
619 host
= req
.get_host()
620 return self
.http_error_auth_reqed('proxy-authenticate',
624 class AbstractDigestAuthHandler
:
626 def __init__(self
, passwd
=None):
628 passwd
= HTTPPasswordMgr()
630 self
.add_password
= self
.passwd
.add_password
632 def http_error_auth_reqed(self
, authreq
, host
, req
, headers
):
633 authreq
= headers
.get(self
.auth_header
, None)
635 kind
= authreq
.split()[0]
637 return self
.retry_http_digest_auth(req
, authreq
)
639 def retry_http_digest_auth(self
, req
, auth
):
640 token
, challenge
= auth
.split(' ', 1)
641 chal
= parse_keqv_list(parse_http_list(challenge
))
642 auth
= self
.get_authorization(req
, chal
)
644 auth_val
= 'Digest %s' % auth
645 if req
.headers
.get(self
.auth_header
, None) == auth_val
:
647 req
.add_header(self
.auth_header
, auth_val
)
648 resp
= self
.parent
.open(req
)
651 def get_authorization(self
, req
, chal
):
653 realm
= chal
['realm']
654 nonce
= chal
['nonce']
655 algorithm
= chal
.get('algorithm', 'MD5')
656 # mod_digest doesn't send an opaque, even though it isn't
657 # supposed to be optional
658 opaque
= chal
.get('opaque', None)
662 H
, KD
= self
.get_algorithm_impls(algorithm
)
666 user
, pw
= self
.passwd
.find_user_password(realm
,
671 # XXX not implemented yet
673 entdig
= self
.get_entity_digest(req
.get_data(), chal
)
677 A1
= "%s:%s:%s" % (user
, realm
, pw
)
678 A2
= "%s:%s" % (req
.has_data() and 'POST' or 'GET',
679 # XXX selector: what about proxies and full urls
681 respdig
= KD(H(A1
), "%s:%s" % (nonce
, H(A2
)))
682 # XXX should the partial digests be encoded too?
684 base
= 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
685 'response="%s"' % (user
, realm
, nonce
, req
.get_selector(),
688 base
= base
+ ', opaque="%s"' % opaque
690 base
= base
+ ', digest="%s"' % entdig
691 if algorithm
!= 'MD5':
692 base
= base
+ ', algorithm="%s"' % algorithm
695 def get_algorithm_impls(self
, algorithm
):
696 # lambdas assume digest modules are imported at the top level
697 if algorithm
== 'MD5':
698 H
= lambda x
, e
=encode_digest
:e(md5
.new(x
).digest())
699 elif algorithm
== 'SHA':
700 H
= lambda x
, e
=encode_digest
:e(sha
.new(x
).digest())
702 KD
= lambda s
, d
, H
=H
: H("%s:%s" % (s
, d
))
705 def get_entity_digest(self
, data
, chal
):
706 # XXX not implemented yet
710 class HTTPDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
711 """An authentication protocol defined by RFC 2069
713 Digest authentication improves on basic authentication because it
714 does not transmit passwords in the clear.
717 header
= 'Authorization'
719 def http_error_401(self
, req
, fp
, code
, msg
, headers
):
720 host
= urlparse
.urlparse(req
.get_full_url())[1]
721 self
.http_error_auth_reqed('www-authenticate', host
, req
, headers
)
724 class ProxyDigestAuthHandler(BaseHandler
, AbstractDigestAuthHandler
):
726 header
= 'Proxy-Authorization'
728 def http_error_407(self
, req
, fp
, code
, msg
, headers
):
729 host
= req
.get_host()
730 self
.http_error_auth_reqed('proxy-authenticate', host
, req
, headers
)
733 def encode_digest(digest
):
736 n
= (ord(c
) >> 4) & 0xf
737 hexrep
.append(hex(n
)[-1])
739 hexrep
.append(hex(n
)[-1])
740 return ''.join(hexrep
)
743 class AbstractHTTPHandler(BaseHandler
):
745 def do_open(self
, http_class
, req
):
746 host
= req
.get_host()
748 raise URLError('no host given')
751 h
= http_class(host
) # will parse host:port
753 data
= req
.get_data()
754 h
.putrequest('POST', req
.get_selector())
755 if not req
.headers
.has_key('Content-type'):
756 h
.putheader('Content-type',
757 'application/x-www-form-urlencoded')
758 if not req
.headers
.has_key('Content-length'):
759 h
.putheader('Content-length', '%d' % len(data
))
761 h
.putrequest('GET', req
.get_selector())
762 except socket
.error
, err
:
765 h
.putheader('Host', host
)
766 for args
in self
.parent
.addheaders
:
768 for k
, v
in req
.headers
.items():
774 code
, msg
, hdrs
= h
.getreply()
777 return addinfourl(fp
, hdrs
, req
.get_full_url())
779 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
782 class HTTPHandler(AbstractHTTPHandler
):
784 def http_open(self
, req
):
785 return self
.do_open(httplib
.HTTP
, req
)
788 if hasattr(httplib
, 'HTTPS'):
789 class HTTPSHandler(AbstractHTTPHandler
):
791 def https_open(self
, req
):
792 return self
.do_open(httplib
.HTTPS
, req
)
795 class UnknownHandler(BaseHandler
):
796 def unknown_open(self
, req
):
797 type = req
.get_type()
798 raise URLError('unknown url type: %s' % type)
800 def parse_keqv_list(l
):
801 """Parse list of key=value strings where keys are not duplicated."""
804 k
, v
= elt
.split('=', 1)
805 if v
[0] == '"' and v
[-1] == '"':
810 def parse_http_list(s
):
811 """Parse lists as described by RFC 2068 Section 2.
813 In particular, parse comman-separated lists where the elements of
814 the list may include quoted-strings. A quoted-string could
817 # XXX this function could probably use more testing
829 list.append(s
[start
:])
833 raise ValueError, "unbalanced quotes"
835 list.append(s
[start
:i
+c
])
840 list.append(s
[start
:i
+c
])
848 list.append(s
[start
:i
+c
])
854 return map(lambda x
: x
.strip(), list)
856 class FileHandler(BaseHandler
):
857 # Use local file or FTP depending on form of URL
858 def file_open(self
, req
):
859 url
= req
.get_selector()
860 if url
[:2] == '//' and url
[2:3] != '/':
862 return self
.parent
.open(req
)
864 return self
.open_local_file(req
)
866 # names for the localhost
869 if FileHandler
.names
is None:
870 FileHandler
.names
= (socket
.gethostbyname('localhost'),
871 socket
.gethostbyname(socket
.gethostname()))
872 return FileHandler
.names
874 # not entirely sure what the rules are here
875 def open_local_file(self
, req
):
876 host
= req
.get_host()
877 file = req
.get_selector()
878 localfile
= url2pathname(file)
879 stats
= os
.stat(localfile
)
880 size
= stats
[stat
.ST_SIZE
]
881 modified
= rfc822
.formatdate(stats
[stat
.ST_MTIME
])
882 mtype
= mimetypes
.guess_type(file)[0]
883 stats
= os
.stat(localfile
)
884 headers
= mimetools
.Message(StringIO(
885 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
886 (mtype
or 'text/plain', size
, modified
)))
888 host
, port
= splitport(host
)
890 (not port
and socket
.gethostbyname(host
) in self
.get_names()):
891 return addinfourl(open(localfile
, 'rb'),
892 headers
, 'file:'+file)
893 raise URLError('file not on local host')
895 class FTPHandler(BaseHandler
):
896 def ftp_open(self
, req
):
897 host
= req
.get_host()
899 raise IOError, ('ftp error', 'no host given')
900 # XXX handle custom username & password
902 host
= socket
.gethostbyname(host
)
903 except socket
.error
, msg
:
905 host
, port
= splitport(host
)
907 port
= ftplib
.FTP_PORT
908 path
, attrs
= splitattr(req
.get_selector())
910 dirs
= path
.split('/')
911 dirs
, file = dirs
[:-1], dirs
[-1]
912 if dirs
and not dirs
[0]:
914 user
= passwd
= '' # XXX
916 fw
= self
.connect_ftp(user
, passwd
, host
, port
, dirs
)
917 type = file and 'I' or 'D'
919 attr
, value
= splitattr(attr
)
920 if attr
.lower() == 'type' and \
921 value
in ('a', 'A', 'i', 'I', 'd', 'D'):
923 fp
, retrlen
= fw
.retrfile(file, type)
925 mtype
= mimetypes
.guess_type(req
.get_full_url())[0]
927 headers
+= "Content-Type: %s\n" % mtype
928 if retrlen
is not None and retrlen
>= 0:
929 headers
+= "Content-Length: %d\n" % retrlen
930 sf
= StringIO(headers
)
931 headers
= mimetools
.Message(sf
)
932 return addinfourl(fp
, headers
, req
.get_full_url())
933 except ftplib
.all_errors
, msg
:
934 raise IOError, ('ftp error', msg
), sys
.exc_info()[2]
936 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
937 fw
= ftpwrapper(user
, passwd
, host
, port
, dirs
)
938 ## fw.ftp.set_debuglevel(1)
941 class CacheFTPHandler(FTPHandler
):
942 # XXX would be nice to have pluggable cache strategies
943 # XXX this stuff is definitely not thread safe
951 def setTimeout(self
, t
):
954 def setMaxConns(self
, m
):
957 def connect_ftp(self
, user
, passwd
, host
, port
, dirs
):
958 key
= user
, passwd
, host
, port
959 if self
.cache
.has_key(key
):
960 self
.timeout
[key
] = time
.time() + self
.delay
962 self
.cache
[key
] = ftpwrapper(user
, passwd
, host
, port
, dirs
)
963 self
.timeout
[key
] = time
.time() + self
.delay
965 return self
.cache
[key
]
967 def check_cache(self
):
968 # first check for old ones
970 if self
.soonest
<= t
:
971 for k
, v
in self
.timeout
.items():
973 self
.cache
[k
].close()
976 self
.soonest
= min(self
.timeout
.values())
978 # then check the size
979 if len(self
.cache
) == self
.max_conns
:
980 for k
, v
in self
.timeout
.items():
981 if v
== self
.soonest
:
985 self
.soonest
= min(self
.timeout
.values())
987 class GopherHandler(BaseHandler
):
988 def gopher_open(self
, req
):
989 host
= req
.get_host()
991 raise GopherError('no host given')
993 selector
= req
.get_selector()
994 type, selector
= splitgophertype(selector
)
995 selector
, query
= splitquery(selector
)
996 selector
= unquote(selector
)
998 query
= unquote(query
)
999 fp
= gopherlib
.send_query(selector
, query
, host
)
1001 fp
= gopherlib
.send_selector(selector
, host
)
1002 return addinfourl(fp
, noheaders(), req
.get_full_url())
1004 #bleck! don't use this yet
1005 class OpenerFactory
:
1007 default_handlers
= [UnknownHandler
, HTTPHandler
,
1008 HTTPDefaultErrorHandler
, HTTPRedirectHandler
,
1009 FTPHandler
, FileHandler
]
1010 proxy_handlers
= [ProxyHandler
]
1012 replacement_handlers
= []
1014 def add_proxy_handler(self
, ph
):
1015 self
.proxy_handlers
= self
.proxy_handlers
+ [ph
]
1017 def add_handler(self
, h
):
1018 self
.handlers
= self
.handlers
+ [h
]
1020 def replace_handler(self
, h
):
1023 def build_opener(self
):
1024 opener
= OpenerDirector()
1025 for ph
in self
.proxy_handlers
:
1026 if inspect
.isclass(ph
):
1028 opener
.add_handler(ph
)
1030 if __name__
== "__main__":
1031 # XXX some of the test code depends on machine configurations that
1032 # are internal to CNRI. Need to set up a public server with the
1033 # right authentication configuration for test purposes.
1034 if socket
.gethostname() == 'bitdiddle':
1035 localhost
= 'bitdiddle.cnri.reston.va.us'
1036 elif socket
.gethostname() == 'bitdiddle.concentric.net':
1037 localhost
= 'localhost'
1041 # Thanks to Fred for finding these!
1042 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex',
1043 'gopher://gopher.vt.edu:10010/10/33',
1046 'file://nonsensename/etc/passwd',
1047 'ftp://www.python.org/pub/python/misc/sousa.au',
1048 'ftp://www.python.org/pub/tmp/blat',
1049 'http://www.espn.com/', # redirect
1050 'http://www.python.org/Spanish/Inquistion/',
1051 ('http://www.python.org/cgi-bin/faqw.py',
1052 'query=pythonistas&querytype=simple&casefold=yes&req=search'),
1053 'http://www.python.org/',
1054 'ftp://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/00README-Legal-Rules-Regs',
1057 ## if localhost is not None:
1059 ## 'file://%s/etc/passwd' % localhost,
1060 ## 'http://%s/simple/' % localhost,
1061 ## 'http://%s/digest/' % localhost,
1062 ## 'http://%s/not/found.h' % localhost,
1065 ## bauth = HTTPBasicAuthHandler()
1066 ## bauth.add_password('basic_test_realm', localhost, 'jhylton',
1068 ## dauth = HTTPDigestAuthHandler()
1069 ## dauth.add_password('digest_test_realm', localhost, 'jhylton',
1073 cfh
= CacheFTPHandler()
1076 ## # XXX try out some custom proxy objects too!
1077 ## def at_cnri(req):
1078 ## host = req.get_host()
1080 ## if host[-18:] == '.cnri.reston.va.us':
1082 ## p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us')
1083 ## ph = CustomProxyHandler(p)
1085 ## install_opener(build_opener(dauth, bauth, cfh, GopherHandler, ph))
1086 install_opener(build_opener(cfh
, GopherHandler
))
1089 if isinstance(url
, types
.TupleType
):
1095 f
= urlopen(url
, req
)
1096 except IOError, err
:
1097 print "IOError:", err
1098 except socket
.error
, err
:
1099 print "socket.error:", err
1103 print "read %d bytes" % len(buf
)