py-cvs-rel2_1 (Rev 1.2) merge
[python/dscho.git] / Doc / lib / liburllib2.tex
blob0be3bbb472afd8c083f95d5dfcc09937141953fa
1 \section{\module{urllib2} ---
2 extensible library for opening URLs}
4 \declaremodule{standard}{urllib2}
5 \moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net}
6 \sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net}
8 \modulesynopsis{An extensible library for opening URLs using a variety of
9 protocols}
11 The \module{urllib2} module defines functions and classes which help
12 in opening URLs (mostly HTTP) in a complex world --- basic and digest
13 authentication, redirections and more.
15 The \module{urllib2} module defines the following functions:
17 \begin{funcdesc}{urlopen}{url\optional{, data}}
18 Open the url \var{url}, which can either a string or a \class{Request}
19 object (currently the code checks that it really is a \class{Request}
20 instance, or an instance of a subclass of \class{Request}).
22 \var{data} should be a string, which specifies additional data to
23 send to the server. In HTTP requests, which are the only ones that
24 support \var{data}, it should be a buffer in the format of
25 \mimetype{application/x-www-form-urlencoded}, for example one returned
26 from \function{urllib.urlencode()}.
28 This function returns a file-like object with two additional methods:
30 \begin{itemize}
31 \item \method{geturl()} --- return the URL of the resource retrieved
32 \item \method{info()} --- return the meta-information of the page, as
33 a dictionary-like object
34 \end{itemize}
36 Raises \exception{URLError} on errors.
37 \end{funcdesc}
39 \begin{funcdesc}{install_opener}{opener}
40 Install a \class{OpenerDirector} instance as the default opener.
41 The code does not check for a real \class{OpenerDirector}, and any
42 class with the appropriate interface will work.
43 \end{funcdesc}
45 \begin{funcdesc}{build_opener}{\optional{handler, \moreargs}}
46 Return an \class{OpenerDirector} instance, which chains the
47 handlers in the order given. \var{handler}s can be either instances
48 of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in
49 which case it must be possible to call the constructor without
50 any parameters. Instances of the following classes will be in
51 the front of the \var{handler}s, unless the \var{handler}s contain
52 them, instances of them or subclasses of them:
54 \code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
55 HTTPRedirectHandler, FTPHandler, FileHandler}
57 If the Python installation has SSL support (\function{socket.ssl()}
58 exists), \class{HTTPSHandler} will also be added.
59 \end{funcdesc}
62 The following exceptions are raised as appropriate:
64 \begin{excdesc}{URLError}
65 The error handlers raise when they run into a problem. It is a
66 subclass of \exception{IOError}.
67 \end{excdesc}
69 \begin{excdesc}{HTTPError}
70 A subclass of \exception{URLError}, it can also function as a
71 non-exceptional file-like return value (the same thing that
72 \function{urlopen()} returns). This is useful when handling exotic
73 HTTP errors, such as requests for authentication.
74 \end{excdesc}
76 \begin{excdesc}{GopherError}
77 A subclass of \exception{URLError}, this is the error raised by the
78 Gopher handler.
79 \end{excdesc}
82 The following classes are provided:
84 \begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
85 This class is an abstraction of a URL request.
87 \var{url} should be a string which is a valid URL. For descrtion
88 of \var{data} see the \method{add_data()} description.
89 \var{headers} should be a dictionary, and will be treated as if
90 \method{add_header()} was called with each key and value as arguments.
91 \end{classdesc}
93 \begin{classdesc}{OpenerDirector}{}
94 The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s
95 chained together. It manages the chaining of handlers, and recovery
96 from errors.
97 \end{classdesc}
99 \begin{classdesc}{BaseHandler}{}
100 This is the base class for all registered handlers --- and handles only
101 the simple mechanics of registration.
102 \end{classdesc}
104 \begin{classdesc}{HTTPDefaultErrorHandler}{}
105 A class which defines a default handler for HTTP error responses; all
106 responses are turned into \exception{HTTPError} exceptions.
107 \end{classdesc}
109 \begin{classdesc}{HTTPRedirectHandler}{}
110 A class to handle redirections.
111 \end{classdesc}
113 \begin{classdesc}{ProxyHandler}{\optional{proxies}}
114 Cause requests to go through a proxy.
115 If \var{proxies} is given, it must be a dictionary mapping
116 protocol names to URLs of proxies.
117 The default is to read the list of proxies from the environment
118 variables \var{protocol}_proxy.
119 \end{classdesc}
121 \begin{classdesc}{HTTPPasswordMgr}{}
122 Keep a database of
123 \code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})}
124 mappings.
125 \end{classdesc}
127 \begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{}
128 Keep a database of
129 \code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mappings.
130 A realm of \code{None} is considered a catch-all realm, which is searched
131 if no other realm fits.
132 \end{classdesc}
134 \begin{classdesc}{AbstractBasicAuthHandler}{\optional{password_mgr}}
135 This is a mixin class that helps with HTTP authentication, both
136 to the remote host and to a proxy.
138 \var{password_mgr} should be something that is compatible with
139 \class{HTTPPasswordMgr} --- supplies the documented interface above.
140 \end{classdesc}
142 \begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}}
143 Handle authentication with the remote host.
144 Valid \var{password_mgr}, if given, are the same as for
145 \class{AbstractBasicAuthHandler}.
146 \end{classdesc}
148 \begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}}
149 Handle authentication with the proxy.
150 Valid \var{password_mgr}, if given, are the same as for
151 \class{AbstractBasicAuthHandler}.
152 \end{classdesc}
154 \begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}}
155 This is a mixin class, that helps with HTTP authentication, both
156 to the remote host and to a proxy.
158 \var{password_mgr} should be something that is compatible with
159 \class{HTTPPasswordMgr} --- supplies the documented interface above.
160 \end{classdesc}
162 \begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}}
163 Handle authentication with the remote host.
164 Valid \var{password_mgr}, if given, are the same as for
165 \class{AbstractBasicAuthHandler}.
166 \end{classdesc}
168 \begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}}
169 Handle authentication with the proxy.
170 \var{password_mgr}, if given, shoudl be the same as for
171 the constructor of \class{AbstractDigestAuthHandler}.
172 \end{classdesc}
174 \begin{classdesc}{HTTPHandler}{}
175 A class to handle opening of HTTP URLs.
176 \end{classdesc}
178 \begin{classdesc}{HTTPSHandler}{}
179 A class to handle opening of HTTPS URLs.
180 \end{classdesc}
182 \begin{classdesc}{FileHandler}{}
183 Open local files.
184 \end{classdesc}
186 \begin{classdesc}{FTPHandler}{}
187 Open FTP URLs.
188 \end{classdesc}
190 \begin{classdesc}{CacheFTPHandler}{}
191 Open FTP URLs, keeping a cache of open FTP connections to minimize
192 delays.
193 \end{classdesc}
195 \begin{classdesc}{GopherHandler}{}
196 Open gopher URLs.
197 \end{classdesc}
199 \begin{classdesc}{UnknownHandler}{}
200 A catch-all class to handle unknown URLs.
201 \end{classdesc}
204 \subsection{Request Objects \label{request-objects}}
206 The following methods describe all of \class{Request}'s public interface,
207 and so all must be overridden in subclasses.
209 \begin{methoddesc}[Request]{add_data}{data}
210 Set the \class{Request} data to \var{data} is ignored
211 by all handlers except HTTP handlers --- and there it should be an
212 \mimetype{application/x-www-form-encoded} buffer, and will change the
213 request to be \code{POST} rather then \code{GET}.
214 \end{methoddesc}
216 \begin{methoddesc}[Request]{has_data}{data}
217 Return whether the instance has a non-\code{None} data.
218 \end{methoddesc}
220 \begin{methoddesc}[Request]{get_data}{data}
221 Return the instance's data.
222 \end{methoddesc}
224 \begin{methoddesc}[Request]{add_header}{key, val}
225 Add another header to the request. Headers are currently ignored by
226 all handlers except HTTP handlers, where they are added to the list
227 of headers sent to the server. Note that there cannot be more then
228 one header with the same name, and later calls will overwrite
229 previous calls in case the \var{key} collides. Currently, this is
230 no loss of HTTP functionality, since all headers which have meaning
231 when used more then once have a (header-specific) way of gaining the
232 same functionality using only one header.
233 \end{methoddesc}
235 \begin{methoddesc}[Request]{get_full_url}{}
236 Return the URL given in the constructor.
237 \end{methoddesc}
239 \begin{methoddesc}[Request]{get_type}{}
240 Return the type of the URL --- also known as the scheme.
241 \end{methoddesc}
243 \begin{methoddesc}[Request]{get_host}{}
244 Return the host to which connection will be made.
245 \end{methoddesc}
247 \begin{methoddesc}[Request]{get_selector}{}
248 Return the selector --- the part of the URL that is sent to
249 the server.
250 \end{methoddesc}
252 \begin{methoddesc}[Request]{set_proxy}{host, type}
253 Make the request by connecting to a proxy server. The \var{host} and
254 \var{type} will replace those of the instance, and the instance's
255 selector will be the original URL given in the constructor.
256 \end{methoddesc}
259 \subsection{OpenerDirector Objects \label{opener-director-objects}}
261 \class{OpenerDirector} instances have the following methods:
263 \begin{methoddesc}[OpenerDirector]{add_handler}{handler}
264 \var{handler} should be an instance of \class{BaseHandler}. The
265 following methods are searched, and added to the possible chains.
267 \begin{itemize}
268 \item \method{\var{protocol}_open()} ---
269 signal that the handler knows how to open \var{protocol} URLs.
270 \item \method{\var{protocol}_error_\var{type}()} ---
271 signal that the handler knows how to handle \var{type} errors from
272 \var{protocol}.
273 \end{itemize}
274 \end{methoddesc}
276 \begin{methoddesc}[OpenerDirector]{close}{}
277 Explicitly break cycles, and delete all the handlers.
278 Because the \class{OpenerDirector} needs to know the registered handlers,
279 and a handler needs to know who the \class{OpenerDirector} who called
280 it is, there is a reference cycles. Even though recent versions of Python
281 have cycle-collection, it is sometimes preferable to explicitly break
282 the cycles.
283 \end{methoddesc}
285 \begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
286 Open the given \var{url}. (which can be a request object or a string),
287 optionally passing the given \var{data}.
288 Arguments, return values and exceptions raised are the same as those
289 of \function{urlopen()} (which simply calls the \method{open()} method
290 on the default installed \class{OpenerDirector}.
291 \end{methoddesc}
293 \begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
294 arg\optional{, \moreargs}}}
295 Handle an error in a given protocol. The HTTP protocol is special cased to
296 use the code as the error. This will call the registered error handlers
297 for the given protocol with the given arguments (which are protocol specific).
299 Return values and exceptions raised are the same as those
300 of \function{urlopen()}.
301 \end{methoddesc}
304 \subsection{BaseHandler Objects \label{base-handler-objects}}
306 \class{BaseHandler} objects provide a couple of methods that are
307 directly useful, and others that are meant to be used by derived
308 classes. These are intended for direct use:
310 \begin{methoddesc}[BaseHandler]{add_parent}{director}
311 Add a director as parent.
312 \end{methoddesc}
314 \begin{methoddesc}[BaseHandler]{close}{}
315 Remove any parents.
316 \end{methoddesc}
318 The following members and methods should be used only be classes
319 derived from \class{BaseHandler}:
321 \begin{memberdesc}[BaseHandler]{parent}
322 A valid \class{OpenerDirector}, which can be used to open using a
323 different protocol, or handle errors.
324 \end{memberdesc}
326 \begin{methoddesc}[BaseHandler]{default_open}{req}
327 This method is \emph{not} defined in \class{BaseHandler}, but
328 subclasses should define it if they want to catch all URLs.
330 This method, if exists, will be called by the \member{parent}
331 \class{OpenerDirector}. It should return a file-like object as
332 described in the return value of the \method{open()} of
333 \class{OpenerDirector} or \code{None}. It should raise
334 \exception{URLError}, unless a truly exceptional thing happens (for
335 example, \exception{MemoryError} should not be mapped to
336 \exception{URLError}.
338 This method will be called before any protocol-specific open method.
339 \end{methoddesc}
341 \begin{methoddescni}[BaseHandler]{\var{protocol}_open}{req}
342 This method is \emph{not} defined in \class{BaseHandler}, but
343 subclasses should define it if they want to handle URLs with the given
344 protocol.
346 This method, if defined, will be called by the \member{parent}
347 \class{OpenerDirector}. Return values should be the same as for
348 \method{default_open()}.
349 \end{methoddescni}
351 \begin{methoddesc}[BaseHandler]{unknown_open}{req}
352 This method is \var{not} defined in \class{BaseHandler}, but
353 subclasses should define it if they want to catch all URLs with no
354 specific registerd handler to open it.
356 This method, if exists, will be called by the \member{parent}
357 \class{OpenerDirector}. Return values should be the same as for
358 \method{default_open()}.
359 \end{methoddesc}
361 \begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs}
362 This method is \emph{not} defined in \class{BaseHandler}, but
363 subclasses should override it if they intend to provide a catch-all
364 for otherwise unhandled HTTP errors. It will be called automatically
365 by the \class{OpenerDirector} getting the error, and should not
366 normally be called in other circumstances.
368 \var{req} will be a \class{Request} object, \var{fp} will be a
369 file-like object with the HTTP error body, \var{code} will be the
370 three-digit code of the error, \var{msg} will be the user-visible
371 explanation of the code and \var{hdrs} will be a mapping object with
372 the headers of the error.
374 Return values and exceptions raised should be the same as those
375 of \function{urlopen()}.
376 \end{methoddesc}
378 \begin{methoddesc}[BaseHandler]{http_error_\var{nnn}}{req, fp, code, msg, hdrs}
379 \var{nnn} should be a three-digit HTTP error code. This method is
380 also not defined in \class{BaseHandler}, but will be called, if it
381 exists, on an instance of a subclass, when an HTTP error with code
382 \var{nnn} occurs.
384 Subclasses should override this method to handle specific HTTP
385 errors.
387 Arguments, return values and exceptions raised should be the same as
388 for \method{http_error_default()}.
389 \end{methoddesc}
392 \subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
394 \strong{Note:} 303 redirection is not supported by this version of
395 \module{urllib2}.
397 \begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
398 fp, code, msg, hdrs}
399 Redirect to the \code{Location:} URL. This method is called by
400 the parent \class{OpenerDirector} when getting an HTTP
401 permanent-redirect response.
402 \end{methoddesc}
404 \begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
405 fp, code, msg, hdrs}
406 The same as \method{http_error_301()}, but called for the
407 temporary-redirect response.
408 \end{methoddesc}
411 \subsection{ProxyHandler Objects \label{proxy-handler}}
413 \begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
414 The \class{ProxyHandler} will have a method
415 \method{\var{protocol}_open()} for every \var{protocol} which has a
416 proxy in the \var{proxies} dictionary given in the constructor. The
417 method will modify requests to go through the proxy, by calling
418 \code{request.set_proxy()}, and call the next handler in the chain to
419 actually execute the protocol.
420 \end{methoddescni}
423 \subsection{HTTPPasswordMgr Objects \label{http-password-mgr}}
425 These methods are available on \class{HTTPPasswordMgr} and
426 \class{HTTPPasswordMgrWithDefaultRealm} objects.
428 \begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd}
429 \var{uri} can be either a single URI, or a sequene of URIs. \var{realm},
430 \var{user} and \var{passwd} must be strings. This causes
431 \code{(\var{user}, \var{passwd})} to be used as authentication tokens
432 when authentication for \var{realm} and a super-URI of any of the
433 given URIs is given.
434 \end{methoddesc}
436 \begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri}
437 Get user/password for given realm and URI, if any. This method will
438 return \code{(None, None)} if there is no matching user/password.
440 For \class{HTTPPasswordMgrWithDefaultRealm} objects, the realm
441 \code{None} will be searched if the given \var{realm} has no matching
442 user/password.
443 \end{methoddesc}
446 \subsection{AbstractBasicAuthHandler Objects
447 \label{abstract-basic-auth-handler}}
449 \begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request}
450 {authreq, host, req, headers}
451 Handle an authentication request by getting user/password pair, and retrying.
452 \var{authreq} should be the name of the header where the information about
453 the realm, \var{host} is the host to authenticate too, \var{req} should be the
454 (failed) \class{Request} object, and \var{headers} should be the error headers.
455 \end{methoddesc}
458 \subsection{HTTPBasicAuthHandler Objects
459 \label{http-basic-auth-handler}}
461 \begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code,
462 msg, hdrs}
463 Retry the request with authentication info, if available.
464 \end{methoddesc}
467 \subsection{ProxyBasicAuthHandler Objects
468 \label{proxy-basic-auth-handler}}
470 \begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code,
471 msg, hdrs}
472 Retry the request with authentication info, if available.
473 \end{methoddesc}
476 \subsection{AbstractDigestAuthHandler Objects
477 \label{abstract-digest-auth-handler}}
479 \begin{methoddesc}[AbstractDigestAuthHandler]{handle_authentication_request}
480 {authreq, host, req, headers}
481 \var{authreq} should be the name of the header where the information about
482 the realm, \var{host} should be the host to authenticate too, \var{req}
483 should be the (failed) \class{Request} object, and \var{headers} should be the
484 error headers.
485 \end{methoddesc}
488 \subsection{HTTPDigestAuthHandler Objects
489 \label{http-digest-auth-handler}}
491 \begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code,
492 msg, hdrs}
493 Retry the request with authentication info, if available.
494 \end{methoddesc}
497 \subsection{ProxyDigestAuthHandler Objects
498 \label{proxy-digest-auth-handler}}
500 \begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code,
501 msg, hdrs}
502 Retry the request with authentication information, if available.
503 \end{methoddesc}
506 \subsection{HTTPHandler Objects \label{http-handler-objects}}
508 \begin{methoddesc}[HTTPHandler]{http_open}{req}
509 Send an HTTP request, whcih can be either GET or POST, depending on
510 \code{\var{req}.has_data()}.
511 \end{methoddesc}
514 \subsection{HTTPSHandler Objects \label{https-handler-objects}}
516 \begin{methoddesc}[HTTPSHandler]{https_open}{req}
517 Send an HTTPS request, which can be either GET or POST, depending on
518 \code{\var{req}.has_data()}.
519 \end{methoddesc}
522 \subsection{FileHandler Objects \label{file-handler-objects}}
524 \begin{methoddesc}[FileHandler]{file_open}{req}
525 Open the file locally, if there is no host name, or
526 the host name is \code{'localhost'}. Change the
527 protocol to \code{ftp} otherwise, and retry opening
528 it using \member{parent}.
529 \end{methoddesc}
532 \subsection{FTPHandler Objects \label{ftp-handler-objects}}
534 \begin{methoddesc}[FTPHandler]{ftp_open}{req}
535 Open the FTP file indicated by \var{req}.
536 The login is always done with empty username and password.
537 \end{methoddesc}
540 \subsection{CacheFTPHandler Objects \label{cacheftp-handler-objects}}
542 \class{CacheFTPHandler} objects are \class{FTPHandler} objects with
543 the following additional methods:
545 \begin{methoddesc}[CacheFTPHandler]{setTimeout}{t}
546 Set timeout of connections to \var{t} seconds.
547 \end{methoddesc}
549 \begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m}
550 Set maximum number of cached connections to \var{m}.
551 \end{methoddesc}
554 \subsection{GopherHandler Objects \label{gopher-handler}}
556 \begin{methoddesc}[GopherHandler]{gopher_open}{req}
557 Open the gopher resource indicated by \var{req}.
558 \end{methoddesc}
561 \subsection{UnknownHandler Objects \label{unknown-handler-objects}}
563 \begin{methoddesc}[UnknownHandler]{unknown_open}{}
564 Raise a \exception{URLError} exception.
565 \end{methoddesc}