1 from __future__
import generators
5 A caching http interface that supports ETags and gzip
8 Requires Python 2.3 or later
11 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
15 __author__
= "Joe Gregorio (joe@bitworking.org)"
16 __copyright__
= "Copyright 2006, Joe Gregorio"
17 __contributors__
= ["Thomas Broyer (t.broyer@ltgt.net)",
19 "Xavier Verges Farrero",
25 __version__
= "$Rev: 259 $"
46 from gettext
import gettext
as _
54 if sys
.version_info
>= (2,3):
55 from iri2uri
import iri2uri
60 __all__
= ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
61 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
62 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
66 # The httplib debug level, set to a non-zero value to get debug output
70 if sys
.version_info
< (2,4):
76 def HTTPResponse__getheaders(self
):
77 """Return list of (header, value) tuples."""
79 raise httplib
.ResponseNotReady()
80 return self
.msg
.items()
82 if not hasattr(httplib
.HTTPResponse
, 'getheaders'):
83 httplib
.HTTPResponse
.getheaders
= HTTPResponse__getheaders
85 # All exceptions raised here derive from HttpLib2Error
86 class HttpLib2Error(Exception): pass
88 # Some exceptions can be caught and optionally
89 # be turned back into responses.
90 class HttpLib2ErrorWithResponse(HttpLib2Error
):
91 def __init__(self
, desc
, response
, content
):
92 self
.response
= response
93 self
.content
= content
94 HttpLib2Error
.__init
__(self
, desc
)
96 class RedirectMissingLocation(HttpLib2ErrorWithResponse
): pass
97 class RedirectLimit(HttpLib2ErrorWithResponse
): pass
98 class FailedToDecompressContent(HttpLib2ErrorWithResponse
): pass
99 class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse
): pass
100 class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse
): pass
102 class RelativeURIError(HttpLib2Error
): pass
103 class ServerNotFoundError(HttpLib2Error
): pass
109 # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
111 # Pluggable cache storage (supports storing the cache in
112 # flat files by default. We need a plug-in architecture
113 # that can support Berkeley DB and Squid)
116 # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
117 # Does not handle Cache-Control: max-stale
118 # Does not use Age: headers when calculating cache freshness.
121 # The number of redirections to follow before giving up.
122 # Note that only GET redirects are automatically followed.
123 # Will also honor 301 requests by saving that info and never
124 # requesting that URI again.
125 DEFAULT_MAX_REDIRECTS
= 5
127 # Which headers are hop-by-hop headers by default
128 HOP_BY_HOP
= ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
130 def _get_end2end_headers(response
):
131 hopbyhop
= list(HOP_BY_HOP
)
132 hopbyhop
.extend([x
.strip() for x
in response
.get('connection', '').split(',')])
133 return [header
for header
in response
.keys() if header
not in hopbyhop
]
135 URI
= re
.compile(r
"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
138 """Parses a URI using the regex given in Appendix B of RFC 3986.
140 (scheme, authority, path, query, fragment) = parse_uri(uri)
142 groups
= URI
.match(uri
).groups()
143 return (groups
[1], groups
[3], groups
[4], groups
[6], groups
[8])
146 (scheme
, authority
, path
, query
, fragment
) = parse_uri(uri
)
147 if not scheme
or not authority
:
148 raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri
)
149 authority
= authority
.lower()
150 scheme
= scheme
.lower()
153 # Could do syntax based normalization of the URI before
154 # computing the digest. See Section 6.2.2 of Std 66.
155 request_uri
= query
and "?".join([path
, query
]) or path
156 scheme
= scheme
.lower()
157 defrag_uri
= scheme
+ "://" + authority
+ request_uri
158 return scheme
, authority
, request_uri
, defrag_uri
161 # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
162 re_url_scheme
= re
.compile(r
'^\w+://')
163 re_slash
= re
.compile(r
'[?/:|]+')
165 def safename(filename
):
166 """Return a filename suitable for the cache.
168 Strips dangerous and common characters to create a filename we
169 can use to store the cache in.
173 if re_url_scheme
.match(filename
):
174 if isinstance(filename
,str):
175 filename
= filename
.decode('utf-8')
176 filename
= filename
.encode('idna')
178 filename
= filename
.encode('idna')
181 if isinstance(filename
,unicode):
182 filename
=filename
.encode('utf-8')
183 filemd5
= md5
.new(filename
).hexdigest()
184 filename
= re_url_scheme
.sub("", filename
)
185 filename
= re_slash
.sub(",", filename
)
187 # limit length of filename
188 if len(filename
)>200:
189 filename
=filename
[:200]
190 return ",".join((filename
, filemd5
))
192 NORMALIZE_SPACE
= re
.compile(r
'(?:\r\n)?[ \t]+')
193 def _normalize_headers(headers
):
194 return dict([ (key
.lower(), NORMALIZE_SPACE
.sub(value
, ' ').strip()) for (key
, value
) in headers
.iteritems()])
196 def _parse_cache_control(headers
):
198 if headers
.has_key('cache-control'):
199 parts
= headers
['cache-control'].split(',')
200 parts_with_args
= [tuple([x
.strip() for x
in part
.split("=")]) for part
in parts
if -1 != part
.find("=")]
201 parts_wo_args
= [(name
.strip(), 1) for name
in parts
if -1 == name
.find("=")]
202 retval
= dict(parts_with_args
+ parts_wo_args
)
205 # Whether to use a strict mode to parse WWW-Authenticate headers
206 # Might lead to bad results in case of ill-formed header value,
207 # so disabled by default, falling back to relaxed parsing.
208 # Set to true to turn on, usefull for testing servers.
209 USE_WWW_AUTH_STRICT_PARSING
= 0
212 # [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP
213 # "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
214 # Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
215 # \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
216 WWW_AUTH_STRICT
= re
.compile(r
"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\
]?
={} \t]+)\s
*=\s
*\"?
((?
<=\")(?
:[^\
0-\x08\x0A-\x1f\x7f-\xff\\\"]|
\\[\
0-\x7f])*?
(?
=\")|
(?
<!\")[^\
0-\x1f\x7f-\xff()<>@,;:\\\"/[\
]?
={} \t]+(?
!\"))\"?
)(.*)$
")
217 WWW_AUTH_RELAXED = re.compile(r"^
(?
:\s
*(?
:,\s
*)?
([^
\t\r\n=]+)\s
*=\s
*\"?
((?
<=\")(?
:[^
\\\"]|
\\.)*?
(?
=\")|
(?
<!\")[^
\t\r\n,]+(?
!\"))\"?
)(.*)$
")
218 UNQUOTE_PAIRS = re.compile(r'\\(.)')
219 def _parse_www_authenticate(headers, headername='www-authenticate'):
220 """Returns a dictionary of dictionaries, one dict
223 if headers.has_key(headername):
224 authenticate = headers[headername].strip()
225 www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
227 # Break off the scheme at the beginning of the line
228 if headername == 'authentication-info':
229 (auth_scheme, the_rest) = ('digest', authenticate)
231 (auth_scheme, the_rest) = authenticate.split(" ", 1)
232 # Now loop over all the key value pairs that come after the scheme,
233 # being careful not to roll into the next scheme
234 match = www_auth.search(the_rest)
237 if match and len(match.groups()) == 3:
238 (key, value, the_rest) = match.groups()
239 auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
240 match = www_auth.search(the_rest)
241 retval[auth_scheme.lower()] = auth_params
242 authenticate = the_rest.strip()
246 def _entry_disposition(response_headers, request_headers):
247 """Determine freshness from the Date, Expires and Cache-Control headers.
249 We don't handle the following:
251 1. Cache-Control: max-stale
252 2. Age: headers are not used in the calculations.
254 Not that this algorithm is simpler than you might think
255 because we are operating as a private (non-shared) cache.
256 This lets us ignore 's-maxage'. We can also ignore
257 'proxy-invalidate' since we aren't a proxy.
258 We will never return a stale document as
259 fresh as a design decision, and thus the non-implementation
260 of 'max-stale'. This also lets us safely ignore 'must-revalidate'
261 since we operate as if every server has sent 'must-revalidate'.
262 Since we are private we get to ignore both 'public' and
263 'private' parameters. We also ignore 'no-transform' since
264 we don't do any transformations.
265 The 'no-store' parameter is handled at a higher level.
266 So the only Cache-Control parameters we look at are:
275 cc = _parse_cache_control(request_headers)
276 cc_response = _parse_cache_control(response_headers)
278 if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
279 retval = "TRANSPARENT
"
280 if 'cache-control' not in request_headers:
281 request_headers['cache-control'] = 'no-cache'
282 elif cc.has_key('no-cache'):
283 retval = "TRANSPARENT
"
284 elif cc_response.has_key('no-cache'):
286 elif cc.has_key('only-if-cached'):
288 elif response_headers.has_key('date'):
289 date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
291 current_age = max(0, now - date)
292 if cc_response.has_key('max-age'):
294 freshness_lifetime = int(cc_response['max-age'])
296 freshness_lifetime = 0
297 elif response_headers.has_key('expires'):
298 expires = email.Utils.parsedate_tz(response_headers['expires'])
300 freshness_lifetime = 0
302 freshness_lifetime = max(0, calendar.timegm(expires) - date)
304 freshness_lifetime = 0
305 if cc.has_key('max-age'):
307 freshness_lifetime = int(cc['max-age'])
309 freshness_lifetime = 0
310 if cc.has_key('min-fresh'):
312 min_fresh = int(cc['min-fresh'])
315 current_age += min_fresh
316 if freshness_lifetime > current_age:
320 def _decompressContent(response, new_content):
321 content = new_content
323 encoding = response.get('content-encoding', None)
324 if encoding in ['gzip', 'deflate']:
325 if encoding == 'gzip':
326 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
327 if encoding == 'deflate':
328 content = zlib.decompress(content)
329 response['content-length'] = str(len(content))
330 del response['content-encoding']
333 raise FailedToDecompressContent(_("Content purported to be compressed with
%s but failed to decompress
.") % response.get('content-encoding'), response, content)
336 def _updateCache(request_headers, response_headers, content, cache, cachekey):
338 cc = _parse_cache_control(request_headers)
339 cc_response = _parse_cache_control(response_headers)
340 if cc.has_key('no-store') or cc_response.has_key('no-store'):
341 cache.delete(cachekey)
343 info = email.Message.Message()
344 for key, value in response_headers.iteritems():
345 if key not in ['status','content-encoding','transfer-encoding']:
348 status = response_headers.status
352 status_header = 'status: %d\r\n' % response_headers.status
354 header_str = info.as_string()
356 header_str = re.sub("\r(?
!\n)|
(?
<!\r)\n", "\r\n", header_str)
357 text = "".join([status_header, header_str, content])
359 cache.set(cachekey, text)
362 dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
365 def _wsse_username_token(cnonce, iso_now, password):
366 return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
369 # For credentials we need two things, first
370 # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
371 # Then we also need a list of URIs that have already demanded authentication
372 # That list is tricky since sub-URIs can take the same auth, or the
373 # auth scheme may change as you descend the tree.
374 # So we also need each Auth instance to be able to tell us
375 # how close to the 'top' it is.
377 class Authentication(object):
378 def __init__(self, credentials, host, request_uri, headers, response, content, http):
379 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
382 self.credentials = credentials
385 def depth(self, request_uri):
386 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
387 return request_uri[len(self.path):].count("/")
389 def inscope(self, host, request_uri):
390 # XXX Should we normalize the request_uri?
391 (scheme, authority, path, query, fragment) = parse_uri(request_uri)
392 return (host == self.host) and path.startswith(self.path)
394 def request(self, method, request_uri, headers, content):
395 """Modify the request headers to add the appropriate
396 Authorization header. Over-rise this in sub-classes."""
399 def response(self, response, content):
400 """Gives us a chance to update with new nonces
401 or such returned from the last authorized response.
402 Over-rise this in sub-classes if necessary.
404 Return TRUE is the request is to be retried, for
405 example Digest may return stale=true.
411 class BasicAuthentication(Authentication):
412 def __init__(self, credentials, host, request_uri, headers, response, content, http):
413 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
415 def request(self, method, request_uri, headers, content):
416 """Modify the request headers to add the appropriate
417 Authorization header."""
418 headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip()
421 class DigestAuthentication(Authentication):
422 """Only do qop='auth' and MD5, since that
423 is all Apache currently implements"""
424 def __init__(self, credentials, host, request_uri, headers, response, content, http):
425 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
426 challenge = _parse_www_authenticate(response, 'www-authenticate')
427 self.challenge = challenge['digest']
428 qop = self.challenge.get('qop')
429 self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
430 if self.challenge['qop'] is None:
431 raise UnimplementedDigestAuthOptionError( _("Unsupported value
for qop
: %s." % qop))
432 self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5')
433 if self.challenge['algorithm'] != 'MD5':
434 raise UnimplementedDigestAuthOptionError( _("Unsupported value
for algorithm
: %s." % self.challenge['algorithm']))
435 self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
436 self.challenge['nc'] = 1
438 def request(self, method, request_uri, headers, content, cnonce = None):
439 """Modify the request headers"""
440 H = lambda x: md5.new(x).hexdigest()
441 KD = lambda s, d: H("%s:%s" % (s, d))
442 A2 = "".join([method, ":", request_uri])
443 self.challenge['cnonce'] = cnonce or _cnonce()
444 request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
445 '%08x' % self.challenge['nc'],
446 self.challenge['cnonce'],
447 self.challenge['qop'], H(A2)
449 headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
451 self.challenge['realm'],
452 self.challenge['nonce'],
454 self.challenge['algorithm'],
456 self.challenge['qop'],
457 self.challenge['nc'],
458 self.challenge['cnonce'],
460 self.challenge['nc'] += 1
462 def response(self, response, content):
463 if not response.has_key('authentication-info'):
464 challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
465 if 'true' == challenge.get('stale'):
466 self.challenge['nonce'] = challenge['nonce']
467 self.challenge['nc'] = 1
470 updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
472 if updated_challenge.has_key('nextnonce'):
473 self.challenge['nonce'] = updated_challenge['nextnonce']
474 self.challenge['nc'] = 1
478 class HmacDigestAuthentication(Authentication):
479 """Adapted from Robert Sayre's code and DigestAuthentication above."""
480 __author__ = "Thomas
Broyer (t
.broyer
@ltgt.net
)"
482 def __init__(self, credentials, host, request_uri, headers, response, content, http):
483 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
484 challenge = _parse_www_authenticate(response, 'www-authenticate')
485 self.challenge = challenge['hmacdigest']
486 # TODO: self.challenge['domain']
487 self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
488 if self.challenge['reason'] not in ['unauthorized', 'integrity']:
489 self.challenge['reason'] = 'unauthorized'
490 self.challenge['salt'] = self.challenge.get('salt', '')
491 if not self.challenge.get('snonce'):
492 raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn
't contain a server nonce, or this one is empty."))
493 self.challenge['algorithm
'] = self.challenge.get('algorithm
', 'HMAC
-SHA
-1')
494 if self.challenge['algorithm
'] not in ['HMAC
-SHA
-1', 'HMAC
-MD5
']:
495 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm
']))
496 self.challenge['pw
-algorithm
'] = self.challenge.get('pw
-algorithm
', 'SHA
-1')
497 if self.challenge['pw
-algorithm
'] not in ['SHA
-1', 'MD5
']:
498 raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw
-algorithm
']))
499 if self.challenge['algorithm
'] == 'HMAC
-MD5
':
503 if self.challenge['pw
-algorithm
'] == 'MD5
':
507 self.key = "".join([self.credentials[0], ":",
508 self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt
']])).hexdigest().lower(),
509 ":", self.challenge['realm
']
511 self.key = self.pwhashmod.new(self.key).hexdigest().lower()
513 def request(self, method, request_uri, headers, content):
514 """Modify the request headers"""
515 keys = _get_end2end_headers(headers)
516 keylist = "".join(["%s " % k for k in keys])
517 headers_val = "".join([headers[k] for k in keys])
518 created = time.strftime('%Y
-%m
-%dT
%H
:%M
:%SZ
',time.gmtime())
520 request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce
'], headers_val)
521 request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
522 headers['Authorization
'] = 'HMACDigest username
="%s", realm
="%s", snonce
="%s", cnonce
="%s", uri
="%s", created
="%s", response
="%s", headers
="%s"' % (
524 self.challenge['realm
'],
525 self.challenge['snonce
'],
533 def response(self, response, content):
534 challenge = _parse_www_authenticate(response, 'www
-authenticate
').get('hmacdigest
', {})
535 if challenge.get('reason
') in ['integrity
', 'stale
']:
540 class WsseAuthentication(Authentication):
541 """This is thinly tested and should not be relied upon.
542 At this time there isn't any third party server to test against
.
543 Blogger
and TypePad implemented this algorithm at one point
544 but Blogger has since switched to Basic over HTTPS
and
545 TypePad has implemented it wrong
, by never issuing a
401
546 challenge but instead requiring your client to telepathically know that
547 their endpoint
is expecting WSSE profile
="UsernameToken"."""
548 def __init__(self, credentials, host, request_uri, headers, response, content, http):
549 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
551 def request(self, method, request_uri, headers, content):
552 """Modify the request headers to add the appropriate
553 Authorization header
."""
554 headers['Authorization'] = 'WSSE profile="UsernameToken"'
555 iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
557 password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
558 headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
564 class GoogleLoginAuthentication(Authentication):
565 def __init__(self, credentials, host, request_uri, headers, response, content, http):
566 from urllib import urlencode
567 Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
568 challenge = _parse_www_authenticate(response, 'www-authenticate')
569 service = challenge['googlelogin'].get('service', 'xapi')
570 # Bloggger actually returns the service in the challenge
571 # For the rest we guess based on the URI
572 if service == 'xapi' and request_uri.find("calendar") > 0:
574 # No point in guessing Base or Spreadsheet
575 #elif request_uri.find("spreadsheets") > 0:
578 auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
579 resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
580 lines = content.split('\n')
581 d = dict([tuple(line.split("=", 1)) for line in lines if line])
582 if resp.status == 403:
585 self.Auth = d['Auth']
587 def request(self, method, request_uri, headers, content):
588 """Modify the request headers to add the appropriate
589 Authorization header
."""
590 headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
593 AUTH_SCHEME_CLASSES = {
594 "basic": BasicAuthentication,
595 "wsse": WsseAuthentication,
596 "digest": DigestAuthentication,
597 "hmacdigest": HmacDigestAuthentication,
598 "googlelogin": GoogleLoginAuthentication
601 AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
606 class FileCache(object):
607 """Uses a local directory
as a store
for cached files
.
608 Not really safe to use
if multiple threads
or processes are going to
609 be running on the same cache
.
611 def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
614 if not os.path.exists(cache):
615 os.makedirs(self.cache)
619 cacheFullPath = os.path.join(self.cache, self.safe(key))
621 f = file(cacheFullPath, "r")
628 def set(self, key, value):
629 cacheFullPath = os.path.join(self.cache, self.safe(key))
630 f = file(cacheFullPath, "w")
634 def delete(self, key):
635 cacheFullPath = os.path.join(self.cache, self.safe(key))
636 if os.path.exists(cacheFullPath):
637 os.remove(cacheFullPath)
639 class Credentials(object):
641 self.credentials = []
643 def add(self, name, password, domain=""):
644 self.credentials.append((domain.lower(), name, password))
647 self.credentials = []
649 def iter(self, domain):
650 for (cdomain, name, password) in self.credentials:
651 if cdomain == "" or domain == cdomain:
652 yield (name, password)
654 class KeyCerts(Credentials):
655 """Identical to Credentials
except that
656 name
/password are mapped to key
/cert
."""
660 class ProxyInfo(object):
661 """Collect information required to use a proxy
."""
662 def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
663 """The parameter proxy_type must be
set to one of socks
.PROXY_TYPE_XXX
664 constants
. For example
:
666 p
= ProxyInfo(proxy_type
=socks
.PROXY_TYPE_HTTP
, proxy_host
='localhost', proxy_port
=8000)
668 self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
671 return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
672 self.proxy_user, self.proxy_pass)
675 return socks and (self.proxy_host != None) and (self.proxy_port != None)
678 class HTTPConnectionWithTimeout(httplib.HTTPConnection):
679 """HTTPConnection subclass that supports timeouts
"""
681 def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
682 httplib.HTTPConnection.__init__(self, host, port, strict)
683 self.timeout = timeout
684 self.proxy_info = proxy_info
687 """Connect to the host
and port specified
in __init__
."""
688 # Mostly verbatim from httplib.py.
689 msg = "getaddrinfo returns an empty list"
690 for res in socket.getaddrinfo(self.host, self.port, 0,
692 af, socktype, proto, canonname, sa = res
694 if self.proxy_info and self.proxy_info.isgood():
695 self.sock = socks.socksocket(af, socktype, proto)
696 self.sock.setproxy(*self.proxy_info.astuple())
698 self.sock = socket.socket(af, socktype, proto)
699 # Different from httplib: support timeouts.
700 if self.timeout is not None:
701 self.sock.settimeout(self.timeout)
702 # End of difference from httplib.
703 if self.debuglevel > 0:
704 print "connect: (%s, %s)" % (self.host, self.port)
705 self.sock.connect(sa)
706 except socket.error, msg:
707 if self.debuglevel > 0:
708 print 'connect fail:', (self.host, self.port)
715 raise socket.error, msg
717 class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
718 "This class allows communication via SSL."
720 def __init__(self, host, port=None, key_file=None, cert_file=None,
721 strict=None, timeout=None, proxy_info=None):
722 self.timeout = timeout
723 self.proxy_info = proxy_info
724 httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
725 cert_file=cert_file, strict=strict)
728 "Connect to a host on a given (SSL) port."
730 if self.proxy_info and self.proxy_info.isgood():
731 self.sock.setproxy(*self.proxy_info.astuple())
732 sock.setproxy(*self.proxy_info.astuple())
734 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
735 if self.timeout is not None:
736 sock.settimeout(self.timeout)
737 sock.connect((self.host, self.port))
738 ssl = socket.ssl(sock, self.key_file, self.cert_file)
739 self.sock = httplib.FakeSocket(sock, ssl)
744 """An HTTP client that handles
:
756 def __init__(self, cache=None, timeout=None, proxy_info=None):
757 """The value of proxy_info
is a ProxyInfo instance
.
759 If
'cache' is a string then it
is used
as a directory name
760 for a disk cache
. Otherwise it must be an
object that supports
761 the same interface
as FileCache
."""
762 self.proxy_info = proxy_info
763 # Map domain name to an httplib connection
764 self.connections = {}
765 # The location of the cache, for now a directory
766 # where cached responses are held.
767 if cache and isinstance(cache, str):
768 self.cache = FileCache(cache)
773 self.credentials = Credentials()
776 self.certificates = KeyCerts()
778 # authorization objects
779 self.authorizations = []
781 # If set to False then no redirects are followed, even safe ones.
782 self.follow_redirects = True
784 # If 'follow_redirects' is True, and this is set to True then
785 # all redirecs are followed, including unsafe ones.
786 self.follow_all_redirects = False
788 self.ignore_etag = False
790 self.force_exception_to_status_code = False
792 self.timeout = timeout
794 def _auth_from_challenge(self, host, request_uri, headers, response, content):
795 """A generator that creates Authorization objects
796 that can be applied to requests
.
798 challenges = _parse_www_authenticate(response, 'www-authenticate')
799 for cred in self.credentials.iter(host):
800 for scheme in AUTH_SCHEME_ORDER:
801 if challenges.has_key(scheme):
802 yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
804 def add_credentials(self, name, password, domain=""):
805 """Add a name
and password that will be used
806 any time a request requires authentication
."""
807 self.credentials.add(name, password, domain)
809 def add_certificate(self, key, cert, domain):
810 """Add a key
and cert that will be used
811 any time a request requires authentication
."""
812 self.certificates.add(key, cert, domain)
814 def clear_credentials(self):
815 """Remove all the names
and passwords
816 that are used
for authentication
"""
817 self.credentials.clear()
818 self.authorizations = []
820 def _conn_request(self, conn, request_uri, method, body, headers):
823 conn.request(method, request_uri, body, headers)
824 response = conn.getresponse()
825 except socket.gaierror:
827 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
828 except httplib.HTTPException, e:
836 content = response.read()
837 response = Response(response)
839 content = _decompressContent(response, content)
842 return (response, content)
845 def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
846 """Do the actual request using the connection
object
847 and also follow one level of redirects
if necessary
"""
849 auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
850 auth = auths and sorted(auths)[0][1] or None
852 auth.request(method, request_uri, headers, body)
854 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
857 if auth.response(response, body):
858 auth.request(method, request_uri, headers, body)
859 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
860 response._stale_digest = 1
862 if response.status == 401:
863 for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
864 authorization.request(method, request_uri, headers, body)
865 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
866 if response.status != 401:
867 self.authorizations.append(authorization)
868 authorization.response(response, body)
871 if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
872 if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
873 # Pick out the location header and basically start from the beginning
874 # remembering first to strip the ETag header and decrement our 'depth'
876 if not response.has_key('location') and response.status != 300:
877 raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
878 # Fix-up relative redirects (which violate an RFC 2616 MUST)
879 if response.has_key('location'):
880 location = response['location']
881 (scheme, authority, path, query, fragment) = parse_uri(location)
882 if authority == None:
883 response['location'] = urlparse.urljoin(absolute_uri, location)
884 if response.status == 301 and method in ["GET", "HEAD"]:
885 response['-x-permanent-redirect-url'] = response['location']
886 if not response.has_key('content-location'):
887 response['content-location'] = absolute_uri
888 _updateCache(headers, response, content, self.cache, cachekey)
889 if headers.has_key('if-none-match'):
890 del headers['if-none-match']
891 if headers.has_key('if-modified-since'):
892 del headers['if-modified-since']
893 if response.has_key('location'):
894 location = response['location']
895 old_response = copy.deepcopy(response)
896 if not old_response.has_key('content-location'):
897 old_response['content-location'] = absolute_uri
898 redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
899 (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
900 response.previous = old_response
902 raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
903 elif response.status in [200, 203] and method == "GET":
904 # Don't cache 206's since we aren't going to handle byte range requests
905 if not response.has_key('content-location'):
906 response['content-location'] = absolute_uri
907 _updateCache(headers, response, content, self.cache, cachekey)
909 return (response, content)
912 # Need to catch and rebrand some exceptions
913 # Then need to optionally turn all exceptions into status codes
914 # including all socket.* and httplib.* exceptions.
917 def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
918 """ Performs a single HTTP request
.
919 The
'uri' is the URI of the HTTP resource
and can begin
920 with either
'http' or 'https'. The value of
'uri' must be an absolute URI
.
922 The
'method' is the HTTP method to perform
, such
as GET
, POST
, DELETE
, etc
.
923 There
is no restriction on the methods allowed
.
925 The
'body' is the entity body to be sent with the request
. It
is a string
928 Any extra headers that are to be sent with the request should be provided
in the
929 'headers' dictionary
.
931 The maximum number of redirect to follow before raising an
932 exception
is 'redirections. The default is 5.
934 The return value is a tuple of (response, content), the first
935 being and instance of the 'Response
' class, the second being
936 a string that contains the response entity body.
942 headers = _normalize_headers(headers)
944 if not headers.has_key('user
-agent
'):
945 headers['user
-agent
'] = "Python-httplib2/%s" % __version__
949 (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
951 conn_key = scheme+":"+authority
952 if conn_key in self.connections:
953 conn = self.connections[conn_key]
955 if not connection_type:
956 connection_type = (scheme == 'https
') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
957 certs = list(self.certificates.iter(authority))
958 if scheme == 'https
' and certs:
959 conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
960 cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
962 conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
963 conn.set_debuglevel(debuglevel)
965 if method in ["GET", "HEAD"] and 'range' not in headers:
966 headers['accept
-encoding
'] = 'compress
, gzip
'
968 info = email.Message.Message()
971 cachekey = defrag_uri
972 cached_value = self.cache.get(cachekey)
974 info = email.message_from_string(cached_value)
976 content = cached_value.split('\r\n\r\n', 1)[1]
978 self.cache.delete(cachekey)
984 if method in ["PUT"] and self.cache and info.has_key('etag
') and not self.ignore_etag and 'if-match
' not in headers:
985 # http://www.w3.org/1999/04/Editing/
986 headers['if-match
'] = info['etag
']
988 if method not in ["GET", "HEAD"] and self.cache and cachekey:
989 # RFC 2616 Section 13.10
990 self.cache.delete(cachekey)
992 if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
993 if info.has_key('-x
-permanent
-redirect
-url
'):
994 # Should cached permanent redirects be counted in our redirection count? For now, yes.
995 (response, new_content) = self.request(info['-x
-permanent
-redirect
-url
'], "GET", headers = headers, redirections = redirections - 1)
996 response.previous = Response(info)
997 response.previous.fromcache = True
999 # Determine our course of action:
1000 # Is the cached entry fresh or stale?
1001 # Has the client requested a non-cached response?
1003 # There seems to be three possible answers:
1004 # 1. [FRESH] Return the cache entry w/o doing a GET
1005 # 2. [STALE] Do the GET (but add in cache validators if available)
1006 # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
1007 entry_disposition = _entry_disposition(info, headers)
1009 if entry_disposition == "FRESH":
1010 if not cached_value:
1011 info['status
'] = '504'
1013 response = Response(info)
1015 response.fromcache = True
1016 return (response, content)
1018 if entry_disposition == "STALE":
1019 if info.has_key('etag
') and not self.ignore_etag and not 'if-none
-match
' in headers:
1020 headers['if-none
-match
'] = info['etag
']
1021 if info.has_key('last
-modified
') and not 'last
-modified
' in headers:
1022 headers['if-modified
-since
'] = info['last
-modified
']
1023 elif entry_disposition == "TRANSPARENT":
1026 (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1028 if response.status == 304 and method == "GET":
1029 # Rewrite the cache entry with the new end-to-end headers
1030 # Take all headers that are in response
1031 # and overwrite their values in info.
1032 # unless they are hop-by-hop, or are listed in the connection header.
1034 for key in _get_end2end_headers(response):
1035 info[key] = response[key]
1036 merged_response = Response(info)
1037 if hasattr(response, "_stale_digest"):
1038 merged_response._stale_digest = response._stale_digest
1039 _updateCache(headers, merged_response, content, self.cache, cachekey)
1040 response = merged_response
1041 response.status = 200
1042 response.fromcache = True
1044 elif response.status == 200:
1045 content = new_content
1047 self.cache.delete(cachekey)
1048 content = new_content
1050 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1051 except Exception, e:
1052 if self.force_exception_to_status_code:
1053 if isinstance(e, HttpLib2ErrorWithResponse):
1054 response = e.response
1056 response.status = 500
1057 response.reason = str(e)
1058 elif isinstance(e, socket.timeout):
1059 content = "Request Timeout"
1060 response = Response( {
1061 "content-type": "text/plain",
1063 "content-length": len(content)
1065 response.reason = "Request Timeout"
1068 response = Response( {
1069 "content-type": "text/plain",
1071 "content-length": len(content)
1073 response.reason = "Bad Request"
1078 return (response, content)
1082 class Response(dict):
1083 """An object more like email.Message than httplib.HTTPResponse."""
1085 """Is this response from our local cache"""
1088 """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
1091 "Status code returned by server. "
1094 """Reason phrase returned by server."""
1099 def __init__(self, info):
1100 # info is either an email.Message or
1101 # an httplib.HTTPResponse object.
1102 if isinstance(info, httplib.HTTPResponse):
1103 for key, value in info.getheaders():
1105 self.status = info.status
1106 self['status
'] = str(self.status)
1107 self.reason = info.reason
1108 self.version = info.version
1109 elif isinstance(info, email.Message.Message):
1110 for key, value in info.items():
1112 self.status = int(self['status
'])
1114 for key, value in info.iteritems():
1116 self.status = int(self.get('status
', self.status))
1119 def __getattr__(self, name):
1123 raise AttributeError, name