1 # -*- test-case-name: openid.test.test_fetchers -*-
3 This module contains the HTTP fetcher interface and several implementations.
6 __all__
= ['fetch', 'getDefaultFetcher', 'setDefaultFetcher', 'HTTPResponse',
7 'HTTPFetcher', 'createHTTPFetcher', 'HTTPFetchingError',
18 # Try to import httplib2 for caching support
19 # http://bitworking.org/projects/httplib2/
23 # httplib2 not available
26 # try to import pycurl, which will let us use CurlHTTPFetcher
32 USER_AGENT
= "python-openid/%s (%s)" % (openid
.__version
__, sys
.platform
)
33 MAX_RESPONSE_KB
= 1024
35 def fetch(url
, body
=None, headers
=None):
36 """Invoke the fetch method on the default fetcher. Most users
37 should need only this method.
39 @raises Exception: any exceptions that may be raised by the default fetcher
41 fetcher
= getDefaultFetcher()
42 return fetcher
.fetch(url
, body
, headers
)
44 def createHTTPFetcher():
45 """Create a default HTTP fetcher instance
47 prefers Curl to urllib2."""
49 fetcher
= Urllib2Fetcher()
51 fetcher
= CurlHTTPFetcher()
55 # Contains the currently set HTTP fetcher. If it is set to None, the
56 # library will call createHTTPFetcher() to set it. Do not access this
57 # variable outside of this module.
58 _default_fetcher
= None
60 def getDefaultFetcher():
61 """Return the default fetcher instance
62 if no fetcher has been set, it will create a default fetcher.
64 @return: the default fetcher
67 global _default_fetcher
69 if _default_fetcher
is None:
70 setDefaultFetcher(createHTTPFetcher())
72 return _default_fetcher
74 def setDefaultFetcher(fetcher
, wrap_exceptions
=True):
75 """Set the default fetcher
77 @param fetcher: The fetcher to use as the default HTTP fetcher
78 @type fetcher: HTTPFetcher
80 @param wrap_exceptions: Whether to wrap exceptions thrown by the
81 fetcher wil HTTPFetchingError so that they may be caught
82 easier. By default, exceptions will be wrapped. In general,
83 unwrapped fetchers are useful for debugging of fetching errors
84 or if your fetcher raises well-known exceptions that you would
86 @type wrap_exceptions: bool
88 global _default_fetcher
89 if fetcher
is None or not wrap_exceptions
:
90 _default_fetcher
= fetcher
92 _default_fetcher
= ExceptionWrappingFetcher(fetcher
)
95 """Whether the currently set HTTP fetcher is a Curl HTTP fetcher."""
96 return isinstance(getDefaultFetcher(), CurlHTTPFetcher
)
98 class HTTPResponse(object):
99 """XXX document attributes"""
105 def __init__(self
, final_url
=None, status
=None, headers
=None, body
=None):
106 self
.final_url
= final_url
108 self
.headers
= headers
112 return "<%s status %s for %s>" % (self
.__class
__.__name
__,
116 class HTTPFetcher(object):
118 This class is the interface for openid HTTP fetchers. This
119 interface is only important if you need to write a new fetcher for
123 def fetch(self
, url
, body
=None, headers
=None):
125 This performs an HTTP POST or GET, following redirects along
126 the way. If a body is specified, then the request will be a
127 POST. Otherwise, it will be a GET.
130 @param headers: HTTP headers to include with the request
131 @type headers: {str:str}
133 @return: An object representing the server's HTTP response. If
134 there are network or protocol errors, an exception will be
135 raised. HTTP error responses, like 404 or 500, do not
138 @rtype: L{HTTPResponse}
140 @raise Exception: Different implementations will raise
141 different errors based on the underlying HTTP library.
143 raise NotImplementedError
145 def _allowedURL(url
):
146 return url
.startswith('http://') or url
.startswith('https://')
148 class HTTPFetchingError(Exception):
149 """Exception that is wrapped around all exceptions that are raised
150 by the underlying fetcher when using the ExceptionWrappingFetcher
152 @ivar why: The exception that caused this exception
154 def __init__(self
, why
=None):
155 Exception.__init
__(self
, why
)
158 class ExceptionWrappingFetcher(HTTPFetcher
):
159 """Fetcher that wraps another fetcher, causing all exceptions
161 @cvar uncaught_exceptions: Exceptions that should be exposed to the
162 user if they are raised by the fetch call
165 uncaught_exceptions
= (SystemExit, KeyboardInterrupt, MemoryError)
167 def __init__(self
, fetcher
):
168 self
.fetcher
= fetcher
170 def fetch(self
, *args
, **kwargs
):
172 return self
.fetcher
.fetch(*args
, **kwargs
)
173 except self
.uncaught_exceptions
:
176 exc_cls
, exc_inst
= sys
.exc_info()[:2]
181 raise HTTPFetchingError(why
=exc_inst
)
183 class Urllib2Fetcher(HTTPFetcher
):
184 """An C{L{HTTPFetcher}} that uses urllib2.
187 # Parameterized for the benefit of testing frameworks, see
188 # http://trac.openidenabled.com/trac/ticket/85
189 urlopen
= staticmethod(urllib2
.urlopen
)
191 def fetch(self
, url
, body
=None, headers
=None):
192 if not _allowedURL(url
):
193 raise ValueError('Bad URL scheme: %r' % (url
,))
200 "%s Python-urllib/%s" % (USER_AGENT
, urllib2
.__version__
,))
203 '0-%s' % (1024*MAX_RESPONSE_KB
,))
205 req
= urllib2
.Request(url
, data
=body
, headers
=headers
)
207 f
= self
.urlopen(req
)
209 return self
._makeResponse
(f
)
212 except urllib2
.HTTPError
, why
:
214 return self
._makeResponse
(why
)
218 def _makeResponse(self
, urllib2_response
):
219 resp
= HTTPResponse()
220 resp
.body
= urllib2_response
.read(MAX_RESPONSE_KB
* 1024)
221 resp
.final_url
= urllib2_response
.geturl()
222 resp
.headers
= dict(urllib2_response
.info().items())
224 if hasattr(urllib2_response
, 'code'):
225 resp
.status
= urllib2_response
.code
231 class HTTPError(HTTPFetchingError
):
233 This exception is raised by the C{L{CurlHTTPFetcher}} when it
234 encounters an exceptional situation fetching a URL.
238 # XXX: define what we mean by paranoid, and make sure it is.
239 class CurlHTTPFetcher(HTTPFetcher
):
241 An C{L{HTTPFetcher}} that uses pycurl for fetching.
242 See U{http://pycurl.sourceforge.net/}.
244 ALLOWED_TIME
= 20 # seconds
247 HTTPFetcher
.__init
__(self
)
249 raise RuntimeError('Cannot find pycurl library')
251 def _parseHeaders(self
, header_file
):
254 # Remove the status line from the beginning of the input
255 unused_http_status_line
= header_file
.readline()
256 lines
= [line
.strip() for line
in header_file
]
258 # and the blank line from the end
259 empty_line
= lines
.pop()
261 raise HTTPError("No blank line at end of headers: %r" % (line
,))
266 name
, value
= line
.split(':', 1)
269 "Malformed HTTP header line in response: %r" % (line
,))
271 value
= value
.strip()
273 # HTTP headers are case-insensitive
275 headers
[name
] = value
279 def _checkURL(self
, url
):
280 # XXX: document that this can be overridden to match desired policy
281 # XXX: make sure url is well-formed and routeable
282 return _allowedURL(url
)
284 def fetch(self
, url
, body
=None, headers
=None):
285 stop
= int(time
.time()) + self
.ALLOWED_TIME
286 off
= self
.ALLOWED_TIME
291 headers
.setdefault('User-Agent',
292 "%s %s" % (USER_AGENT
, pycurl
.version
,))
295 if headers
is not None:
296 for header_name
, header_value
in headers
.iteritems():
297 header_list
.append('%s: %s' % (header_name
, header_value
))
301 c
.setopt(pycurl
.NOSIGNAL
, 1)
304 c
.setopt(pycurl
.HTTPHEADER
, header_list
)
306 # Presence of a body indicates that we should do a POST
308 c
.setopt(pycurl
.POST
, 1)
309 c
.setopt(pycurl
.POSTFIELDS
, body
)
312 if not self
._checkURL
(url
):
313 raise HTTPError("Fetching URL not allowed: %r" % (url
,))
315 data
= cStringIO
.StringIO()
316 def write_data(chunk
):
317 if data
.tell() > 1024*MAX_RESPONSE_KB
:
320 return data
.write(chunk
)
322 response_header_data
= cStringIO
.StringIO()
323 c
.setopt(pycurl
.WRITEFUNCTION
, write_data
)
324 c
.setopt(pycurl
.HEADERFUNCTION
, response_header_data
.write
)
325 c
.setopt(pycurl
.TIMEOUT
, off
)
326 c
.setopt(pycurl
.URL
, openid
.urinorm
.urinorm(url
))
327 c
.setopt(pycurl
.RANGE
, '0-%s'%(MAX_RESPONSE_KB
*1024))
331 response_headers
= self
._parseHeaders
(response_header_data
)
332 code
= c
.getinfo(pycurl
.RESPONSE_CODE
)
333 if code
in [301, 302, 303, 307]:
334 url
= response_headers
.get('location')
337 'Redirect (%s) returned without a location' % code
)
339 # Redirects are always GETs
340 c
.setopt(pycurl
.POST
, 0)
342 # There is no way to reset POSTFIELDS to empty and
343 # reuse the connection, but we only use it once.
345 resp
= HTTPResponse()
346 resp
.headers
= response_headers
349 resp
.body
= data
.getvalue()
352 off
= stop
- int(time
.time())
354 raise HTTPError("Timed out fetching: %r" % (url
,))
358 class HTTPLib2Fetcher(HTTPFetcher
):
359 """A fetcher that uses C{httplib2} for performing HTTP
360 requests. This implementation supports HTTP caching.
362 @see: http://bitworking.org/projects/httplib2/
365 def __init__(self
, cache
=None):
366 """@param cache: An object suitable for use as an C{httplib2}
367 cache. If a string is passed, it is assumed to be a
371 raise RuntimeError('Cannot find httplib2 library. '
372 'See http://bitworking.org/projects/httplib2/')
374 super(HTTPLib2Fetcher
, self
).__init
__()
376 # An instance of the httplib2 object that performs HTTP requests
377 self
.httplib2
= httplib2
.Http(cache
)
379 # We want httplib2 to raise exceptions for errors, just like
380 # the other fetchers.
381 self
.httplib2
.force_exception_to_status_code
= False
383 def fetch(self
, url
, body
=None, headers
=None):
384 """Perform an HTTP request
386 @raises Exception: Any exception that can be raised by httplib2
388 @see: C{L{HTTPFetcher.fetch}}
399 '0-%s' % (1024*MAX_RESPONSE_KB
,))
401 # httplib2 doesn't check to make sure that the URL's scheme is
402 # 'http' so we do it here.
403 if not (url
.startswith('http://') or url
.startswith('https://')):
404 raise ValueError('URL is not a HTTP URL: %r' % (url
,))
406 httplib2_response
, content
= self
.httplib2
.request(
407 url
, method
, body
=body
, headers
=headers
)
409 # Translate the httplib2 response to our HTTP response abstraction
411 # When a 400 is returned, there is no "content-location"
412 # header set. This seems like a bug to me. I can't think of a
413 # case where we really care about the final URL when it is an
414 # error response, but being careful about it can't hurt.
416 final_url
= httplib2_response
['content-location']
418 # We're assuming that no redirects occurred
419 assert not httplib2_response
.previous
421 # And this should never happen for a successful response
422 assert httplib2_response
.status
!= 200
428 headers
=dict(httplib2_response
.items()),
429 status
=httplib2_response
.status
,