Fullscreen support, UI fixes, reset improved
[smpy-maemo.git] / mechanize / _useragent.py
bloba6d5769dff429c624b2a36c9ab079214b76e4557
1 """Convenient HTTP UserAgent class.
3 This is a subclass of urllib2.OpenerDirector.
6 Copyright 2003-2006 John J. Lee <jjl@pobox.com>
8 This code is free software; you can redistribute it and/or modify it under
9 the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
10 included with the distribution).
12 """
14 import sys, warnings, urllib2
16 import _opener
17 import _urllib2
18 import _auth
19 import _gzip
20 import _response
23 class UserAgentBase(_opener.OpenerDirector):
24 """Convenient user-agent class.
26 Do not use .add_handler() to add a handler for something already dealt with
27 by this code.
29 The only reason at present for the distinction between UserAgent and
30 UserAgentBase is so that classes that depend on .seek()able responses
31 (e.g. mechanize.Browser) can inherit from UserAgentBase. The subclass
32 UserAgent exposes a .set_seekable_responses() method that allows switching
33 off the adding of a .seek() method to responses.
35 Public attributes:
37 addheaders: list of (name, value) pairs specifying headers to send with
38 every request, unless they are overridden in the Request instance.
40 >>> ua = UserAgentBase()
41 >>> ua.addheaders = [
42 ... ("User-agent", "Mozilla/5.0 (compatible)"),
43 ... ("From", "responsible.person@example.com")]
45 """
47 handler_classes = {
48 # scheme handlers
49 "http": _urllib2.HTTPHandler,
50 # CacheFTPHandler is buggy, at least in 2.3, so we don't use it
51 "ftp": _urllib2.FTPHandler,
52 "file": _urllib2.FileHandler,
53 "gopher": _urllib2.GopherHandler,
55 # other handlers
56 "_unknown": _urllib2.UnknownHandler,
57 # HTTP{S,}Handler depend on HTTPErrorProcessor too
58 "_http_error": _urllib2.HTTPErrorProcessor,
59 "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
60 "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
62 # feature handlers
63 "_basicauth": _urllib2.HTTPBasicAuthHandler,
64 "_digestauth": _urllib2.HTTPDigestAuthHandler,
65 "_redirect": _urllib2.HTTPRedirectHandler,
66 "_cookies": _urllib2.HTTPCookieProcessor,
67 "_refresh": _urllib2.HTTPRefreshProcessor,
68 "_equiv": _urllib2.HTTPEquivProcessor,
69 "_proxy": _urllib2.ProxyHandler,
70 "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
71 "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
72 "_robots": _urllib2.HTTPRobotRulesProcessor,
73 "_gzip": _gzip.HTTPGzipProcessor, # experimental!
75 # debug handlers
76 "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
77 "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
80 default_schemes = ["http", "ftp", "file", "gopher"]
81 default_others = ["_unknown", "_http_error", "_http_request_upgrade",
82 "_http_default_error",
84 default_features = ["_redirect", "_cookies",
85 "_refresh", "_equiv",
86 "_basicauth", "_digestauth",
87 "_proxy", "_proxy_basicauth", "_proxy_digestauth",
88 "_robots",
90 if hasattr(_urllib2, 'HTTPSHandler'):
91 handler_classes["https"] = _urllib2.HTTPSHandler
92 default_schemes.append("https")
94 def __init__(self):
95 _opener.OpenerDirector.__init__(self)
97 ua_handlers = self._ua_handlers = {}
98 for scheme in (self.default_schemes+
99 self.default_others+
100 self.default_features):
101 klass = self.handler_classes[scheme]
102 ua_handlers[scheme] = klass()
103 for handler in ua_handlers.itervalues():
104 self.add_handler(handler)
106 # Yuck.
107 # Ensure correct default constructor args were passed to
108 # HTTPRefreshProcessor and HTTPEquivProcessor.
109 if "_refresh" in ua_handlers:
110 self.set_handle_refresh(True)
111 if "_equiv" in ua_handlers:
112 self.set_handle_equiv(True)
113 # Ensure default password managers are installed.
114 pm = ppm = None
115 if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
116 pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
117 if ("_proxy_basicauth" in ua_handlers or
118 "_proxy_digestauth" in ua_handlers):
119 ppm = _auth.HTTPProxyPasswordMgr()
120 self.set_password_manager(pm)
121 self.set_proxy_password_manager(ppm)
122 # set default certificate manager
123 if "https" in ua_handlers:
124 cm = _urllib2.HTTPSClientCertMgr()
125 self.set_client_cert_manager(cm)
127 def close(self):
128 _opener.OpenerDirector.close(self)
129 self._ua_handlers = None
131 # XXX
132 ## def set_timeout(self, timeout):
133 ## self._timeout = timeout
134 ## def set_http_connection_cache(self, conn_cache):
135 ## self._http_conn_cache = conn_cache
136 ## def set_ftp_connection_cache(self, conn_cache):
137 ## # XXX ATM, FTP has cache as part of handler; should it be separate?
138 ## self._ftp_conn_cache = conn_cache
140 def set_handled_schemes(self, schemes):
141 """Set sequence of URL scheme (protocol) strings.
143 For example: ua.set_handled_schemes(["http", "ftp"])
145 If this fails (with ValueError) because you've passed an unknown
146 scheme, the set of handled schemes will not be changed.
149 want = {}
150 for scheme in schemes:
151 if scheme.startswith("_"):
152 raise ValueError("not a scheme '%s'" % scheme)
153 if scheme not in self.handler_classes:
154 raise ValueError("unknown scheme '%s'")
155 want[scheme] = None
157 # get rid of scheme handlers we don't want
158 for scheme, oldhandler in self._ua_handlers.items():
159 if scheme.startswith("_"): continue # not a scheme handler
160 if scheme not in want:
161 self._replace_handler(scheme, None)
162 else:
163 del want[scheme] # already got it
164 # add the scheme handlers that are missing
165 for scheme in want.keys():
166 self._set_handler(scheme, True)
168 def set_cookiejar(self, cookiejar):
169 """Set a mechanize.CookieJar, or None."""
170 self._set_handler("_cookies", obj=cookiejar)
172 # XXX could use Greg Stein's httpx for some of this instead?
173 # or httplib2??
174 def set_proxies(self, proxies):
175 """Set a dictionary mapping URL scheme to proxy specification, or None.
177 e.g. {"http": "joe:password@myproxy.example.com:3128",
178 "ftp": "proxy.example.com"}
181 self._set_handler("_proxy", obj=proxies)
183 def add_password(self, url, user, password, realm=None):
184 self._password_manager.add_password(realm, url, user, password)
185 def add_proxy_password(self, user, password, hostport=None, realm=None):
186 self._proxy_password_manager.add_password(
187 realm, hostport, user, password)
189 def add_client_certificate(self, url, key_file, cert_file):
190 """Add an SSL client certificate, for HTTPS client auth.
192 key_file and cert_file must be filenames of the key and certificate
193 files, in PEM format. You can use e.g. OpenSSL to convert a p12 (PKCS
194 12) file to PEM format:
196 openssl pkcs12 -clcerts -nokeys -in cert.p12 -out cert.pem
197 openssl pkcs12 -nocerts -in cert.p12 -out key.pem
200 Note that client certificate password input is very inflexible ATM. At
201 the moment this seems to be console only, which is presumably the
202 default behaviour of libopenssl. In future mechanize may support
203 third-party libraries that (I assume) allow more options here.
206 self._client_cert_manager.add_key_cert(url, key_file, cert_file)
208 # the following are rarely useful -- use add_password / add_proxy_password
209 # instead
210 def set_password_manager(self, password_manager):
211 """Set a mechanize.HTTPPasswordMgrWithDefaultRealm, or None."""
212 self._password_manager = password_manager
213 self._set_handler("_basicauth", obj=password_manager)
214 self._set_handler("_digestauth", obj=password_manager)
215 def set_proxy_password_manager(self, password_manager):
216 """Set a mechanize.HTTPProxyPasswordMgr, or None."""
217 self._proxy_password_manager = password_manager
218 self._set_handler("_proxy_basicauth", obj=password_manager)
219 self._set_handler("_proxy_digestauth", obj=password_manager)
220 def set_client_cert_manager(self, cert_manager):
221 """Set a mechanize.HTTPClientCertMgr, or None."""
222 self._client_cert_manager = cert_manager
223 handler = self._ua_handlers["https"]
224 handler.client_cert_manager = cert_manager
226 # these methods all take a boolean parameter
227 def set_handle_robots(self, handle):
228 """Set whether to observe rules from robots.txt."""
229 self._set_handler("_robots", handle)
230 def set_handle_redirect(self, handle):
231 """Set whether to handle HTTP 30x redirections."""
232 self._set_handler("_redirect", handle)
233 def set_handle_refresh(self, handle, max_time=None, honor_time=True):
234 """Set whether to handle HTTP Refresh headers."""
235 self._set_handler("_refresh", handle, constructor_kwds=
236 {"max_time": max_time, "honor_time": honor_time})
237 def set_handle_equiv(self, handle, head_parser_class=None):
238 """Set whether to treat HTML http-equiv headers like HTTP headers.
240 Response objects may be .seek()able if this is set (currently returned
241 responses are, raised HTTPError exception responses are not).
244 if head_parser_class is not None:
245 constructor_kwds = {"head_parser_class": head_parser_class}
246 else:
247 constructor_kwds={}
248 self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
249 def set_handle_gzip(self, handle):
250 """Handle gzip transfer encoding.
253 if handle:
254 warnings.warn(
255 "gzip transfer encoding is experimental!", stacklevel=2)
256 self._set_handler("_gzip", handle)
257 def set_debug_redirects(self, handle):
258 """Log information about HTTP redirects (including refreshes).
260 Logging is performed using module logging. The logger name is
261 "mechanize.http_redirects". To actually print some debug output,
264 import sys, logging
265 logger = logging.getLogger("mechanize.http_redirects")
266 logger.addHandler(logging.StreamHandler(sys.stdout))
267 logger.setLevel(logging.INFO)
269 Other logger names relevant to this module:
271 "mechanize.http_responses"
272 "mechanize.cookies" (or "cookielib" if running Python 2.4)
274 To turn on everything:
276 import sys, logging
277 logger = logging.getLogger("mechanize")
278 logger.addHandler(logging.StreamHandler(sys.stdout))
279 logger.setLevel(logging.INFO)
282 self._set_handler("_debug_redirect", handle)
283 def set_debug_responses(self, handle):
284 """Log HTTP response bodies.
286 See docstring for .set_debug_redirects() for details of logging.
288 Response objects may be .seek()able if this is set (currently returned
289 responses are, raised HTTPError exception responses are not).
292 self._set_handler("_debug_response_body", handle)
293 def set_debug_http(self, handle):
294 """Print HTTP headers to sys.stdout."""
295 level = int(bool(handle))
296 for scheme in "http", "https":
297 h = self._ua_handlers.get(scheme)
298 if h is not None:
299 h.set_http_debuglevel(level)
301 def _set_handler(self, name, handle=None, obj=None,
302 constructor_args=(), constructor_kwds={}):
303 if handle is None:
304 handle = obj is not None
305 if handle:
306 handler_class = self.handler_classes[name]
307 if obj is not None:
308 newhandler = handler_class(obj)
309 else:
310 newhandler = handler_class(*constructor_args, **constructor_kwds)
311 else:
312 newhandler = None
313 self._replace_handler(name, newhandler)
315 def _replace_handler(self, name, newhandler=None):
316 # first, if handler was previously added, remove it
317 if name is not None:
318 handler = self._ua_handlers.get(name)
319 if handler:
320 try:
321 self.handlers.remove(handler)
322 except ValueError:
323 pass
324 # then add the replacement, if any
325 if newhandler is not None:
326 self.add_handler(newhandler)
327 self._ua_handlers[name] = newhandler
330 class UserAgent(UserAgentBase):
332 def __init__(self):
333 UserAgentBase.__init__(self)
334 self._seekable = False
336 def set_seekable_responses(self, handle):
337 """Make response objects .seek()able."""
338 self._seekable = bool(handle)
340 def open(self, fullurl, data=None):
341 if self._seekable:
342 def bound_open(fullurl, data=None):
343 return UserAgentBase.open(self, fullurl, data)
344 response = _opener.wrapped_open(
345 bound_open, _response.seek_wrapped_response, fullurl, data)
346 else:
347 response = UserAgentBase.open(self, fullurl, data)
348 return response