1 \section{\module{urllib2
} ---
2 extensible library for opening URLs
}
4 \declaremodule{standard
}{urllib2
}
5 \moduleauthor{Jeremy Hylton
}{jhylton@users.sourceforge.net
}
6 \sectionauthor{Moshe Zadka
}{moshez@users.sourceforge.net
}
8 \modulesynopsis{An extensible library for opening URLs using a variety of
11 The
\module{urllib2
} module defines functions and classes which help
12 in opening URLs (mostly HTTP) in a complex world --- basic and digest
13 authentication, redirections and more.
15 The
\module{urllib2
} module defines the following functions:
17 \begin{funcdesc
}{urlopen
}{url
\optional{, data
}}
18 Open the URL
\var{url
}, which can be either a string or a
\class{Request
}
19 object (currently the code checks that it really is a
\class{Request
}
20 instance, or an instance of a subclass of
\class{Request
}).
22 \var{data
} should be a string, which specifies additional data to
23 send to the server. In HTTP requests, which are the only ones that
24 support
\var{data
}, it should be a buffer in the format of
25 \mimetype{application/x-www-form-urlencoded
}, for example one returned
26 from
\function{urllib.urlencode()
}.
28 This function returns a file-like object with two additional methods:
31 \item \method{geturl()
} --- return the URL of the resource retrieved
32 \item \method{info()
} --- return the meta-information of the page, as
33 a dictionary-like object
36 Raises
\exception{URLError
} on errors.
39 \begin{funcdesc
}{install_opener
}{opener
}
40 Install an
\class{OpenerDirector
} instance as the default opener.
41 The code does not check for a real
\class{OpenerDirector
}, and any
42 class with the appropriate interface will work.
45 \begin{funcdesc
}{build_opener
}{\optional{handler,
\moreargs}}
46 Return an
\class{OpenerDirector
} instance, which chains the
47 handlers in the order given.
\var{handler
}s can be either instances
48 of
\class{BaseHandler
}, or subclasses of
\class{BaseHandler
} (in
49 which case it must be possible to call the constructor without
50 any parameters). Instances of the following classes will be in
51 front of the
\var{handler
}s, unless the
\var{handler
}s contain
52 them, instances of them or subclasses of them:
54 \code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
55 HTTPRedirectHandler, FTPHandler, FileHandler
}
57 If the Python installation has SSL support (
\function{socket.ssl()
}
58 exists),
\class{HTTPSHandler
} will also be added.
62 The following exceptions are raised as appropriate:
64 \begin{excdesc
}{URLError
}
65 The handlers raise this exception (or derived exceptions) when they
66 run into a problem. It is a subclass of
\exception{IOError
}.
69 \begin{excdesc
}{HTTPError
}
70 A subclass of
\exception{URLError
}, it can also function as a
71 non-exceptional file-like return value (the same thing that
72 \function{urlopen()
} returns). This is useful when handling exotic
73 HTTP errors, such as requests for authentication.
76 \begin{excdesc
}{GopherError
}
77 A subclass of
\exception{URLError
}, this is the error raised by the
82 The following classes are provided:
84 \begin{classdesc
}{Request
}{url
\optional{, data
\optional{, headers
}}}
85 This class is an abstraction of a URL request.
87 \var{url
} should be a string which is a valid URL. For a description
88 of
\var{data
} see the
\method{add_data()
} description.
89 \var{headers
} should be a dictionary, and will be treated as if
90 \method{add_header()
} was called with each key and value as arguments.
93 \begin{classdesc
}{OpenerDirector
}{}
94 The
\class{OpenerDirector
} class opens URLs via
\class{BaseHandler
}s
95 chained together. It manages the chaining of handlers, and recovery
99 \begin{classdesc
}{BaseHandler
}{}
100 This is the base class for all registered handlers --- and handles only
101 the simple mechanics of registration.
104 \begin{classdesc
}{HTTPDefaultErrorHandler
}{}
105 A class which defines a default handler for HTTP error responses; all
106 responses are turned into
\exception{HTTPError
} exceptions.
109 \begin{classdesc
}{HTTPRedirectHandler
}{}
110 A class to handle redirections.
113 \begin{classdesc
}{ProxyHandler
}{\optional{proxies
}}
114 Cause requests to go through a proxy.
115 If
\var{proxies
} is given, it must be a dictionary mapping
116 protocol names to URLs of proxies.
117 The default is to read the list of proxies from the environment
118 variables
\var{protocol
}_proxy.
121 \begin{classdesc
}{HTTPPasswordMgr
}{}
123 \code{(
\var{realm
},
\var{uri
}) -> (
\var{user
},
\var{password
})
}
127 \begin{classdesc
}{HTTPPasswordMgrWithDefaultRealm
}{}
129 \code{(
\var{realm
},
\var{uri
}) -> (
\var{user
},
\var{password
})
} mappings.
130 A realm of
\code{None
} is considered a catch-all realm, which is searched
131 if no other realm fits.
134 \begin{classdesc
}{AbstractBasicAuthHandler
}{\optional{password_mgr
}}
135 This is a mixin class that helps with HTTP authentication, both
136 to the remote host and to a proxy.
137 \var{password_mgr
}, if given, should be something that is compatible
138 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
139 for information on the interface that must be supported.
142 \begin{classdesc
}{HTTPBasicAuthHandler
}{\optional{password_mgr
}}
143 Handle authentication with the remote host.
144 \var{password_mgr
}, if given, should be something that is compatible
145 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
146 for information on the interface that must be supported.
149 \begin{classdesc
}{ProxyBasicAuthHandler
}{\optional{password_mgr
}}
150 Handle authentication with the proxy.
151 \var{password_mgr
}, if given, should be something that is compatible
152 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
153 for information on the interface that must be supported.
156 \begin{classdesc
}{AbstractDigestAuthHandler
}{\optional{password_mgr
}}
157 This is a mixin class that helps with HTTP authentication, both
158 to the remote host and to a proxy.
159 \var{password_mgr
}, if given, should be something that is compatible
160 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
161 for information on the interface that must be supported.
164 \begin{classdesc
}{HTTPDigestAuthHandler
}{\optional{password_mgr
}}
165 Handle authentication with the remote host.
166 \var{password_mgr
}, if given, should be something that is compatible
167 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
168 for information on the interface that must be supported.
171 \begin{classdesc
}{ProxyDigestAuthHandler
}{\optional{password_mgr
}}
172 Handle authentication with the proxy.
173 \var{password_mgr
}, if given, should be something that is compatible
174 with
\class{HTTPPasswordMgr
}; refer to section~
\ref{http-password-mgr
}
175 for information on the interface that must be supported.
178 \begin{classdesc
}{HTTPHandler
}{}
179 A class to handle opening of HTTP URLs.
182 \begin{classdesc
}{HTTPSHandler
}{}
183 A class to handle opening of HTTPS URLs.
186 \begin{classdesc
}{FileHandler
}{}
190 \begin{classdesc
}{FTPHandler
}{}
194 \begin{classdesc
}{CacheFTPHandler
}{}
195 Open FTP URLs, keeping a cache of open FTP connections to minimize
199 \begin{classdesc
}{GopherHandler
}{}
203 \begin{classdesc
}{UnknownHandler
}{}
204 A catch-all class to handle unknown URLs.
208 \subsection{Request Objects
\label{request-objects
}}
210 The following methods describe all of
\class{Request
}'s public interface,
211 and so all must be overridden in subclasses.
213 \begin{methoddesc
}[Request
]{add_data
}{data
}
214 Set the
\class{Request
} data to
\var{data
}. This is ignored
215 by all handlers except HTTP handlers --- and there it should be an
216 \mimetype{application/x-www-form-encoded
} buffer, and will change the
217 request to be
\code{POST
} rather than
\code{GET
}.
220 \begin{methoddesc
}[Request
]{has_data
}{}
221 Return whether the instance has a non-
\code{None
} data.
224 \begin{methoddesc
}[Request
]{get_data
}{}
225 Return the instance's data.
228 \begin{methoddesc
}[Request
]{add_header
}{key, val
}
229 Add another header to the request. Headers are currently ignored by
230 all handlers except HTTP handlers, where they are added to the list
231 of headers sent to the server. Note that there cannot be more than
232 one header with the same name, and later calls will overwrite
233 previous calls in case the
\var{key
} collides. Currently, this is
234 no loss of HTTP functionality, since all headers which have meaning
235 when used more than once have a (header-specific) way of gaining the
236 same functionality using only one header.
239 \begin{methoddesc
}[Request
]{get_full_url
}{}
240 Return the URL given in the constructor.
243 \begin{methoddesc
}[Request
]{get_type
}{}
244 Return the type of the URL --- also known as the scheme.
247 \begin{methoddesc
}[Request
]{get_host
}{}
248 Return the host to which a connection will be made.
251 \begin{methoddesc
}[Request
]{get_selector
}{}
252 Return the selector --- the part of the URL that is sent to
256 \begin{methoddesc
}[Request
]{set_proxy
}{host, type
}
257 Prepare the request by connecting to a proxy server. The
\var{host
}
258 and
\var{type
} will replace those of the instance, and the instance's
259 selector will be the original URL given in the constructor.
263 \subsection{OpenerDirector Objects
\label{opener-director-objects
}}
265 \class{OpenerDirector
} instances have the following methods:
267 \begin{methoddesc
}[OpenerDirector
]{add_handler
}{handler
}
268 \var{handler
} should be an instance of
\class{BaseHandler
}. The
269 following methods are searched, and added to the possible chains.
272 \item \method{\var{protocol
}_open()
} ---
273 signal that the handler knows how to open
\var{protocol
} URLs.
274 \item \method{\var{protocol
}_error_
\var{type
}()
} ---
275 signal that the handler knows how to handle
\var{type
} errors from
280 \begin{methoddesc
}[OpenerDirector
]{close
}{}
281 Explicitly break cycles, and delete all the handlers.
282 Because the
\class{OpenerDirector
} needs to know the registered handlers,
283 and a handler needs to know who the
\class{OpenerDirector
} who called
284 it is, there is a reference cycle. Even though recent versions of Python
285 have cycle-collection, it is sometimes preferable to explicitly break
289 \begin{methoddesc
}[OpenerDirector
]{open
}{url
\optional{, data
}}
290 Open the given
\var{url
} (which can be a request object or a string),
291 optionally passing the given
\var{data
}.
292 Arguments, return values and exceptions raised are the same as those
293 of
\function{urlopen()
} (which simply calls the
\method{open()
} method
294 on the default installed
\class{OpenerDirector
}.
297 \begin{methoddesc
}[OpenerDirector
]{error
}{proto
\optional{,
298 arg
\optional{,
\moreargs}}}
299 Handle an error in a given protocol. This will call the registered
300 error handlers for the given protocol with the given arguments (which
301 are protocol specific). The HTTP protocol is a special case which
302 uses the HTTP response code to determine the specific error handler;
303 refer to the
\method{http_error_*()
} methods of the handler classes.
305 Return values and exceptions raised are the same as those
306 of
\function{urlopen()
}.
310 \subsection{BaseHandler Objects
\label{base-handler-objects
}}
312 \class{BaseHandler
} objects provide a couple of methods that are
313 directly useful, and others that are meant to be used by derived
314 classes. These are intended for direct use:
316 \begin{methoddesc
}[BaseHandler
]{add_parent
}{director
}
317 Add a director as parent.
320 \begin{methoddesc
}[BaseHandler
]{close
}{}
324 The following members and methods should only be used by classes
325 derived from
\class{BaseHandler
}:
327 \begin{memberdesc
}[BaseHandler
]{parent
}
328 A valid
\class{OpenerDirector
}, which can be used to open using a
329 different protocol, or handle errors.
332 \begin{methoddesc
}[BaseHandler
]{default_open
}{req
}
333 This method is
\emph{not
} defined in
\class{BaseHandler
}, but
334 subclasses should define it if they want to catch all URLs.
336 This method, if implemented, will be called by the parent
337 \class{OpenerDirector
}. It should return a file-like object as
338 described in the return value of the
\method{open()
} of
339 \class{OpenerDirector
}, or
\code{None
}. It should raise
340 \exception{URLError
}, unless a truly exceptional thing happens (for
341 example,
\exception{MemoryError
} should not be mapped to
342 \exception{URLError
}).
344 This method will be called before any protocol-specific open method.
347 \begin{methoddescni
}[BaseHandler
]{\var{protocol
}_open
}{req
}
348 This method is
\emph{not
} defined in
\class{BaseHandler
}, but
349 subclasses should define it if they want to handle URLs with the given
352 This method, if defined, will be called by the parent
353 \class{OpenerDirector
}. Return values should be the same as for
354 \method{default_open()
}.
357 \begin{methoddesc
}[BaseHandler
]{unknown_open
}{req
}
358 This method is
\var{not
} defined in
\class{BaseHandler
}, but
359 subclasses should define it if they want to catch all URLs with no
360 specific registered handler to open it.
362 This method, if implemented, will be called by the
\member{parent
}
363 \class{OpenerDirector
}. Return values should be the same as for
364 \method{default_open()
}.
367 \begin{methoddesc
}[BaseHandler
]{http_error_default
}{req, fp, code, msg, hdrs
}
368 This method is
\emph{not
} defined in
\class{BaseHandler
}, but
369 subclasses should override it if they intend to provide a catch-all
370 for otherwise unhandled HTTP errors. It will be called automatically
371 by the
\class{OpenerDirector
} getting the error, and should not
372 normally be called in other circumstances.
374 \var{req
} will be a
\class{Request
} object,
\var{fp
} will be a
375 file-like object with the HTTP error body,
\var{code
} will be the
376 three-digit code of the error,
\var{msg
} will be the user-visible
377 explanation of the code and
\var{hdrs
} will be a mapping object with
378 the headers of the error.
380 Return values and exceptions raised should be the same as those
381 of
\function{urlopen()
}.
384 \begin{methoddesc
}[BaseHandler
]{http_error_
\var{nnn
}}{req, fp, code, msg, hdrs
}
385 \var{nnn
} should be a three-digit HTTP error code. This method is
386 also not defined in
\class{BaseHandler
}, but will be called, if it
387 exists, on an instance of a subclass, when an HTTP error with code
390 Subclasses should override this method to handle specific HTTP
393 Arguments, return values and exceptions raised should be the same as
394 for
\method{http_error_default()
}.
398 \subsection{HTTPRedirectHandler Objects
\label{http-redirect-handler
}}
400 \note{303 redirection is not supported by this version of
403 \begin{methoddesc
}[HTTPRedirectHandler
]{http_error_301
}{req,
405 Redirect to the
\code{Location:
} URL. This method is called by
406 the parent
\class{OpenerDirector
} when getting an HTTP
407 permanent-redirect response.
410 \begin{methoddesc
}[HTTPRedirectHandler
]{http_error_302
}{req,
412 The same as
\method{http_error_301()
}, but called for the
413 temporary-redirect response.
417 \subsection{ProxyHandler Objects
\label{proxy-handler
}}
419 \begin{methoddescni
}[ProxyHandler
]{\var{protocol
}_open
}{request
}
420 The
\class{ProxyHandler
} will have a method
421 \method{\var{protocol
}_open()
} for every
\var{protocol
} which has a
422 proxy in the
\var{proxies
} dictionary given in the constructor. The
423 method will modify requests to go through the proxy, by calling
424 \code{request.set_proxy()
}, and call the next handler in the chain to
425 actually execute the protocol.
429 \subsection{HTTPPasswordMgr Objects
\label{http-password-mgr
}}
431 These methods are available on
\class{HTTPPasswordMgr
} and
432 \class{HTTPPasswordMgrWithDefaultRealm
} objects.
434 \begin{methoddesc
}[HTTPPasswordMgr
]{add_password
}{realm, uri, user, passwd
}
435 \var{uri
} can be either a single URI, or a sequene of URIs.
\var{realm
},
436 \var{user
} and
\var{passwd
} must be strings. This causes
437 \code{(
\var{user
},
\var{passwd
})
} to be used as authentication tokens
438 when authentication for
\var{realm
} and a super-URI of any of the
442 \begin{methoddesc
}[HTTPPasswordMgr
]{find_user_password
}{realm, authuri
}
443 Get user/password for given realm and URI, if any. This method will
444 return
\code{(None, None)
} if there is no matching user/password.
446 For
\class{HTTPPasswordMgrWithDefaultRealm
} objects, the realm
447 \code{None
} will be searched if the given
\var{realm
} has no matching
452 \subsection{AbstractBasicAuthHandler Objects
453 \label{abstract-basic-auth-handler
}}
455 \begin{methoddesc
}[AbstractBasicAuthHandler
]{handle_authentication_request
}
456 {authreq, host, req, headers
}
457 Handle an authentication request by getting a user/password pair, and
458 re-trying the request.
\var{authreq
} should be the name of the header
459 where the information about the realm is included in the request,
460 \var{host
} is the host to authenticate to,
\var{req
} should be the
461 (failed)
\class{Request
} object, and
\var{headers
} should be the error
466 \subsection{HTTPBasicAuthHandler Objects
467 \label{http-basic-auth-handler
}}
469 \begin{methoddesc
}[HTTPBasicAuthHandler
]{http_error_401
}{req, fp, code,
471 Retry the request with authentication information, if available.
475 \subsection{ProxyBasicAuthHandler Objects
476 \label{proxy-basic-auth-handler
}}
478 \begin{methoddesc
}[ProxyBasicAuthHandler
]{http_error_407
}{req, fp, code,
480 Retry the request with authentication information, if available.
484 \subsection{AbstractDigestAuthHandler Objects
485 \label{abstract-digest-auth-handler
}}
487 \begin{methoddesc
}[AbstractDigestAuthHandler
]{handle_authentication_request
}
488 {authreq, host, req, headers
}
489 \var{authreq
} should be the name of the header where the information about
490 the realm is included in the request,
\var{host
} should be the host to
491 authenticate to,
\var{req
} should be the (failed)
\class{Request
}
492 object, and
\var{headers
} should be the error headers.
496 \subsection{HTTPDigestAuthHandler Objects
497 \label{http-digest-auth-handler
}}
499 \begin{methoddesc
}[HTTPDigestAuthHandler
]{http_error_401
}{req, fp, code,
501 Retry the request with authentication information, if available.
505 \subsection{ProxyDigestAuthHandler Objects
506 \label{proxy-digest-auth-handler
}}
508 \begin{methoddesc
}[ProxyDigestAuthHandler
]{http_error_407
}{req, fp, code,
510 Retry the request with authentication information, if available.
514 \subsection{HTTPHandler Objects
\label{http-handler-objects
}}
516 \begin{methoddesc
}[HTTPHandler
]{http_open
}{req
}
517 Send an HTTP request, which can be either GET or POST, depending on
518 \code{\var{req
}.has_data()
}.
522 \subsection{HTTPSHandler Objects
\label{https-handler-objects
}}
524 \begin{methoddesc
}[HTTPSHandler
]{https_open
}{req
}
525 Send an HTTPS request, which can be either GET or POST, depending on
526 \code{\var{req
}.has_data()
}.
530 \subsection{FileHandler Objects
\label{file-handler-objects
}}
532 \begin{methoddesc
}[FileHandler
]{file_open
}{req
}
533 Open the file locally, if there is no host name, or
534 the host name is
\code{'localhost'
}. Change the
535 protocol to
\code{ftp
} otherwise, and retry opening
536 it using
\member{parent
}.
540 \subsection{FTPHandler Objects
\label{ftp-handler-objects
}}
542 \begin{methoddesc
}[FTPHandler
]{ftp_open
}{req
}
543 Open the FTP file indicated by
\var{req
}.
544 The login is always done with empty username and password.
548 \subsection{CacheFTPHandler Objects
\label{cacheftp-handler-objects
}}
550 \class{CacheFTPHandler
} objects are
\class{FTPHandler
} objects with
551 the following additional methods:
553 \begin{methoddesc
}[CacheFTPHandler
]{setTimeout
}{t
}
554 Set timeout of connections to
\var{t
} seconds.
557 \begin{methoddesc
}[CacheFTPHandler
]{setMaxConns
}{m
}
558 Set maximum number of cached connections to
\var{m
}.
562 \subsection{GopherHandler Objects
\label{gopher-handler
}}
564 \begin{methoddesc
}[GopherHandler
]{gopher_open
}{req
}
565 Open the gopher resource indicated by
\var{req
}.
569 \subsection{UnknownHandler Objects
\label{unknown-handler-objects
}}
571 \begin{methoddesc
}[UnknownHandler
]{unknown_open
}{}
572 Raise a
\exception{URLError
} exception.