Fullscreen support, UI fixes, reset improved
[smpy-maemo.git] / mechanize / _opener.py
blob145350f1247937400571a41fb1ae9a74cb420b72
1 """Integration with Python standard library module urllib2: OpenerDirector
2 class.
4 Copyright 2004-2006 John J Lee <jjl@pobox.com>
6 This code is free software; you can redistribute it and/or modify it
7 under the terms of the BSD or ZPL 2.1 licenses (see the file
8 COPYING.txt included with the distribution).
10 """
12 import os, urllib2, bisect, urllib, httplib, types, tempfile
13 try:
14 import threading as _threading
15 except ImportError:
16 import dummy_threading as _threading
17 try:
18 set
19 except NameError:
20 import sets
21 set = sets.Set
23 import _http
24 import _upgrade
25 import _rfc3986
26 import _response
27 from _util import isstringlike
28 from _request import Request
31 class ContentTooShortError(urllib2.URLError):
32 def __init__(self, reason, result):
33 urllib2.URLError.__init__(self, reason)
34 self.result = result
37 class OpenerDirector(urllib2.OpenerDirector):
38 def __init__(self):
39 urllib2.OpenerDirector.__init__(self)
40 # really none of these are (sanely) public -- the lack of initial
41 # underscore on some is just due to following urllib2
42 self.process_response = {}
43 self.process_request = {}
44 self._any_request = {}
45 self._any_response = {}
46 self._handler_index_valid = True
47 self._tempfiles = []
49 def add_handler(self, handler):
50 if handler in self.handlers:
51 return
52 # XXX why does self.handlers need to be sorted?
53 bisect.insort(self.handlers, handler)
54 handler.add_parent(self)
55 self._handler_index_valid = False
57 def _maybe_reindex_handlers(self):
58 if self._handler_index_valid:
59 return
61 handle_error = {}
62 handle_open = {}
63 process_request = {}
64 process_response = {}
65 any_request = set()
66 any_response = set()
67 unwanted = []
69 for handler in self.handlers:
70 added = False
71 for meth in dir(handler):
72 if meth in ["redirect_request", "do_open", "proxy_open"]:
73 # oops, coincidental match
74 continue
76 if meth == "any_request":
77 any_request.add(handler)
78 added = True
79 continue
80 elif meth == "any_response":
81 any_response.add(handler)
82 added = True
83 continue
85 ii = meth.find("_")
86 scheme = meth[:ii]
87 condition = meth[ii+1:]
89 if condition.startswith("error"):
90 jj = meth[ii+1:].find("_") + ii + 1
91 kind = meth[jj+1:]
92 try:
93 kind = int(kind)
94 except ValueError:
95 pass
96 lookup = handle_error.setdefault(scheme, {})
97 elif condition == "open":
98 kind = scheme
99 lookup = handle_open
100 elif condition == "request":
101 kind = scheme
102 lookup = process_request
103 elif condition == "response":
104 kind = scheme
105 lookup = process_response
106 else:
107 continue
109 lookup.setdefault(kind, set()).add(handler)
110 added = True
112 if not added:
113 unwanted.append(handler)
115 for handler in unwanted:
116 self.handlers.remove(handler)
118 # sort indexed methods
119 # XXX could be cleaned up
120 for lookup in [process_request, process_response]:
121 for scheme, handlers in lookup.iteritems():
122 lookup[scheme] = handlers
123 for scheme, lookup in handle_error.iteritems():
124 for code, handlers in lookup.iteritems():
125 handlers = list(handlers)
126 handlers.sort()
127 lookup[code] = handlers
128 for scheme, handlers in handle_open.iteritems():
129 handlers = list(handlers)
130 handlers.sort()
131 handle_open[scheme] = handlers
133 # cache the indexes
134 self.handle_error = handle_error
135 self.handle_open = handle_open
136 self.process_request = process_request
137 self.process_response = process_response
138 self._any_request = any_request
139 self._any_response = any_response
141 def _request(self, url_or_req, data, visit):
142 if isstringlike(url_or_req):
143 req = Request(url_or_req, data, visit=visit)
144 else:
145 # already a urllib2.Request or mechanize.Request instance
146 req = url_or_req
147 if data is not None:
148 req.add_data(data)
149 # XXX yuck, give request a .visit attribute if it doesn't have one
150 try:
151 req.visit
152 except AttributeError:
153 req.visit = None
154 if visit is not None:
155 req.visit = visit
156 return req
158 def open(self, fullurl, data=None):
159 req = self._request(fullurl, data, None)
160 req_scheme = req.get_type()
162 self._maybe_reindex_handlers()
164 # pre-process request
165 # XXX should we allow a Processor to change the URL scheme
166 # of the request?
167 request_processors = set(self.process_request.get(req_scheme, []))
168 request_processors.update(self._any_request)
169 request_processors = list(request_processors)
170 request_processors.sort()
171 for processor in request_processors:
172 for meth_name in ["any_request", req_scheme+"_request"]:
173 meth = getattr(processor, meth_name, None)
174 if meth:
175 req = meth(req)
177 # In Python >= 2.4, .open() supports processors already, so we must
178 # call ._open() instead.
179 urlopen = getattr(urllib2.OpenerDirector, "_open",
180 urllib2.OpenerDirector.open)
181 response = urlopen(self, req, data)
183 # post-process response
184 response_processors = set(self.process_response.get(req_scheme, []))
185 response_processors.update(self._any_response)
186 response_processors = list(response_processors)
187 response_processors.sort()
188 for processor in response_processors:
189 for meth_name in ["any_response", req_scheme+"_response"]:
190 meth = getattr(processor, meth_name, None)
191 if meth:
192 response = meth(req, response)
194 return response
196 def error(self, proto, *args):
197 if proto in ['http', 'https']:
198 # XXX http[s] protocols are special-cased
199 dict = self.handle_error['http'] # https is not different than http
200 proto = args[2] # YUCK!
201 meth_name = 'http_error_%s' % proto
202 http_err = 1
203 orig_args = args
204 else:
205 dict = self.handle_error
206 meth_name = proto + '_error'
207 http_err = 0
208 args = (dict, proto, meth_name) + args
209 result = apply(self._call_chain, args)
210 if result:
211 return result
213 if http_err:
214 args = (dict, 'default', 'http_error_default') + orig_args
215 return apply(self._call_chain, args)
217 BLOCK_SIZE = 1024*8
218 def retrieve(self, fullurl, filename=None, reporthook=None, data=None):
219 """Returns (filename, headers).
221 For remote objects, the default filename will refer to a temporary
222 file. Temporary files are removed when the OpenerDirector.close()
223 method is called.
225 For file: URLs, at present the returned filename is None. This may
226 change in future.
228 If the actual number of bytes read is less than indicated by the
229 Content-Length header, raises ContentTooShortError (a URLError
230 subclass). The exception's .result attribute contains the (filename,
231 headers) that would have been returned.
234 req = self._request(fullurl, data, False)
235 scheme = req.get_type()
236 fp = self.open(req)
237 headers = fp.info()
238 if filename is None and scheme == 'file':
239 # XXX req.get_selector() seems broken here, return None,
240 # pending sanity :-/
241 return None, headers
242 #return urllib.url2pathname(req.get_selector()), headers
243 if filename:
244 tfp = open(filename, 'wb')
245 else:
246 path = _rfc3986.urlsplit(fullurl)[2]
247 suffix = os.path.splitext(path)[1]
248 fd, filename = tempfile.mkstemp(suffix)
249 self._tempfiles.append(filename)
250 tfp = os.fdopen(fd, 'wb')
252 result = filename, headers
253 bs = self.BLOCK_SIZE
254 size = -1
255 read = 0
256 blocknum = 0
257 if reporthook:
258 if "content-length" in headers:
259 size = int(headers["Content-Length"])
260 reporthook(blocknum, bs, size)
261 while 1:
262 block = fp.read(bs)
263 if block == "":
264 break
265 read += len(block)
266 tfp.write(block)
267 blocknum += 1
268 if reporthook:
269 reporthook(blocknum, bs, size)
270 fp.close()
271 tfp.close()
272 del fp
273 del tfp
275 # raise exception if actual size does not match content-length header
276 if size >= 0 and read < size:
277 raise ContentTooShortError(
278 "retrieval incomplete: "
279 "got only %i out of %i bytes" % (read, size),
280 result
283 return result
285 def close(self):
286 urllib2.OpenerDirector.close(self)
288 # make it very obvious this object is no longer supposed to be used
289 self.open = self.error = self.retrieve = self.add_handler = None
291 if self._tempfiles:
292 for filename in self._tempfiles:
293 try:
294 os.unlink(filename)
295 except OSError:
296 pass
297 del self._tempfiles[:]
300 def wrapped_open(urlopen, process_response_object, fullurl, data=None):
301 success = True
302 try:
303 response = urlopen(fullurl, data)
304 except urllib2.HTTPError, error:
305 success = False
306 if error.fp is None: # not a response
307 raise
308 response = error
310 if response is not None:
311 response = process_response_object(response)
313 if not success:
314 raise response
315 return response
317 class ResponseProcessingOpener(OpenerDirector):
319 def open(self, fullurl, data=None):
320 def bound_open(fullurl, data=None):
321 return OpenerDirector.open(self, fullurl, data)
322 return wrapped_open(
323 bound_open, self.process_response_object, fullurl, data)
325 def process_response_object(self, response):
326 return response
329 class SeekableResponseOpener(ResponseProcessingOpener):
330 def process_response_object(self, response):
331 return _response.seek_wrapped_response(response)
334 class OpenerFactory:
335 """This class's interface is quite likely to change."""
337 default_classes = [
338 # handlers
339 urllib2.ProxyHandler,
340 urllib2.UnknownHandler,
341 _http.HTTPHandler, # derived from new AbstractHTTPHandler
342 _http.HTTPDefaultErrorHandler,
343 _http.HTTPRedirectHandler, # bugfixed
344 urllib2.FTPHandler,
345 urllib2.FileHandler,
346 # processors
347 _upgrade.HTTPRequestUpgradeProcessor,
348 _http.HTTPCookieProcessor,
349 _http.HTTPErrorProcessor,
351 if hasattr(httplib, 'HTTPS'):
352 default_classes.append(_http.HTTPSHandler)
353 handlers = []
354 replacement_handlers = []
356 def __init__(self, klass=OpenerDirector):
357 self.klass = klass
359 def build_opener(self, *handlers):
360 """Create an opener object from a list of handlers and processors.
362 The opener will use several default handlers and processors, including
363 support for HTTP and FTP.
365 If any of the handlers passed as arguments are subclasses of the
366 default handlers, the default handlers will not be used.
369 opener = self.klass()
370 default_classes = list(self.default_classes)
371 skip = []
372 for klass in default_classes:
373 for check in handlers:
374 if type(check) == types.ClassType:
375 if issubclass(check, klass):
376 skip.append(klass)
377 elif type(check) == types.InstanceType:
378 if isinstance(check, klass):
379 skip.append(klass)
380 for klass in skip:
381 default_classes.remove(klass)
383 for klass in default_classes:
384 opener.add_handler(klass())
385 for h in handlers:
386 if type(h) == types.ClassType:
387 h = h()
388 opener.add_handler(h)
390 return opener
393 build_opener = OpenerFactory().build_opener
395 _opener = None
396 urlopen_lock = _threading.Lock()
397 def urlopen(url, data=None):
398 global _opener
399 if _opener is None:
400 urlopen_lock.acquire()
401 try:
402 if _opener is None:
403 _opener = build_opener()
404 finally:
405 urlopen_lock.release()
406 return _opener.open(url, data)
408 def urlretrieve(url, filename=None, reporthook=None, data=None):
409 global _opener
410 if _opener is None:
411 urlopen_lock.acquire()
412 try:
413 if _opener is None:
414 _opener = build_opener()
415 finally:
416 urlopen_lock.release()
417 return _opener.retrieve(url, filename, reporthook, data)
419 def install_opener(opener):
420 global _opener
421 _opener = opener