1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 A http client with support for https connections with certificate verification.
8 The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3
9 and the code is from Lib/ssl.py in python3:
10 http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py
12 One use case is to download Chromium DEPS file in a secure way:
13 https://src.chromium.org/chrome/trunk/src/DEPS
15 Notice: python 2.7 or newer is required.
31 _SCRIPT_DIR
= os
.path
.dirname(__file__
)
32 _TRUSTED_ROOT_CERTS
= os
.path
.join(_SCRIPT_DIR
, 'cacert.pem')
35 class CertificateError(ValueError):
39 def _DNSNameMatch(dn
, hostname
, max_wildcards
=1):
40 """Matching according to RFC 6125, section 6.4.3
42 http://tools.ietf.org/html/rfc6125#section-6.4.3
48 parts
= dn
.split(r
'.')
52 wildcards
= leftmost
.count('*')
53 if wildcards
> max_wildcards
:
54 # Issue #17980: avoid denials of service by refusing more
55 # than one wildcard per fragment. A survery of established
56 # policy among SSL implementations showed it to be a
58 raise CertificateError(
59 'too many wildcards in certificate DNS name: ' + repr(dn
))
61 # speed up common case w/o wildcards
63 return dn
.lower() == hostname
.lower()
65 # RFC 6125, section 6.4.3, subitem 1.
66 # The client SHOULD NOT attempt to match a presented identifier in which
67 # the wildcard character comprises a label other than the left-most label.
69 # When '*' is a fragment by itself, it matches a non-empty dotless
72 elif leftmost
.startswith('xn--') or hostname
.startswith('xn--'):
73 # RFC 6125, section 6.4.3, subitem 3.
74 # The client SHOULD NOT attempt to match a presented identifier
75 # where the wildcard character is embedded within an A-label or
76 # U-label of an internationalized domain name.
77 pats
.append(re
.escape(leftmost
))
79 # Otherwise, '*' matches any dotless string, e.g. www*
80 pats
.append(re
.escape(leftmost
).replace(r
'\*', '[^.]*'))
82 # add the remaining fragments, ignore any wildcards
83 for frag
in remainder
:
84 pats
.append(re
.escape(frag
))
86 pat
= re
.compile(r
'\A' + r
'\.'.join(pats
) + r
'\Z', re
.IGNORECASE
)
87 return pat
.match(hostname
)
90 def _MatchHostname(cert
, hostname
):
91 """Verify that *cert* (in decoded format as returned by
92 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
93 rules are followed, but IP addresses are not accepted for *hostname*.
95 CertificateError is raised on failure. On success, the function
99 raise ValueError('empty or no certificate, match_hostname needs a '
100 'SSL socket or SSL context with either '
101 'CERT_OPTIONAL or CERT_REQUIRED')
103 san
= cert
.get('subjectAltName', ())
104 for key
, value
in san
:
106 if _DNSNameMatch(value
, hostname
):
108 dnsnames
.append(value
)
110 # The subject is only checked when there is no dNSName entry
112 for sub
in cert
.get('subject', ()):
113 for key
, value
in sub
:
114 # XXX according to RFC 2818, the most specific Common Name
116 if key
== 'commonName':
117 if _DNSNameMatch(value
, hostname
):
119 dnsnames
.append(value
)
120 if len(dnsnames
) > 1:
121 raise CertificateError('hostname %r doesn\'t match either of %s'
122 % (hostname
, ', '.join(map(repr, dnsnames
))))
123 elif len(dnsnames
) == 1:
124 raise CertificateError('hostname %r doesn\'t match %r'
125 % (hostname
, dnsnames
[0]))
127 raise CertificateError('no appropriate commonName or '
128 'subjectAltName fields were found')
131 class HTTPSConnection(httplib
.HTTPSConnection
):
133 def __init__(self
, host
, root_certs
=_TRUSTED_ROOT_CERTS
, **kwargs
):
134 self
.root_certs
= root_certs
135 httplib
.HTTPSConnection
.__init
__(self
, host
, **kwargs
)
138 # Overrides for certificate verification.
139 args
= [(self
.host
, self
.port
), self
.timeout
,]
140 if self
.source_address
:
141 args
.append(self
.source_address
)
142 sock
= socket
.create_connection(*args
)
144 if self
._tunnel
_host
:
148 # Wrap the socket for verification with the root certs.
150 if self
.root_certs
is not None:
151 kwargs
.update(cert_reqs
=ssl
.CERT_REQUIRED
, ca_certs
=self
.root_certs
)
152 self
.sock
= ssl
.wrap_socket(sock
, **kwargs
)
156 _MatchHostname(self
.sock
.getpeercert(), self
.host
)
157 except CertificateError
:
158 self
.sock
.shutdown(socket
.SHUT_RDWR
)
163 class HTTPSHandler(urllib2
.HTTPSHandler
):
165 def __init__(self
, root_certs
=_TRUSTED_ROOT_CERTS
):
166 urllib2
.HTTPSHandler
.__init
__(self
)
167 self
.root_certs
= root_certs
169 def https_open(self
, req
):
170 # Pass a reference to the function below so that verification against
171 # trusted root certs could be injected.
172 return self
.do_open(self
.GetConnection
, req
)
174 def GetConnection(self
, host
, **kwargs
):
175 params
= dict(root_certs
=self
.root_certs
)
176 params
.update(kwargs
)
177 return HTTPSConnection(host
, **params
)
180 def _SendRequest(url
, timeout
=None):
181 """Send request to the given https url, and return the server response.
184 url: The https url to send request to.
187 An integer: http code of the response.
188 A string: content of the response.
191 CertificateError: Certificate verification fails.
197 if url
.startswith('https://'):
198 # HTTPSHandler has to go first, because we don't want to send secure cookies
199 # to a man in the middle.
200 handlers
.append(HTTPSHandler())
203 cookie_file
= os
.environ
.get('COOKIE_FILE')
204 if cookie_file
and os
.path
.exists(cookie_file
):
206 urllib2
.HTTPCookieProcessor(cookielib
.MozillaCookieJar(cookie_file
)))
208 url_opener
= urllib2
.build_opener(*handlers
)
214 response
= url_opener
.open(url
, timeout
=timeout
)
216 status_code
= response
.code
217 content
= response
.read()
218 except urllib2
.HTTPError
as e
:
221 except (ssl
.SSLError
, httplib
.BadStatusLine
, IOError):
225 return status_code
, content
228 class HttpClientLocal(http_client
.HttpClient
):
229 """This http client is used locally in a workstation, GCE VMs, etc."""
232 def Get(url
, params
={}, timeout
=120, retries
=5, retry_interval
=0.5,
235 url
= '%s?%s' % (url
, urllib
.urlencode(params
))
241 status_code
, content
= _SendRequest(url
, timeout
=timeout
)
242 if status_code
== 200:
243 return status_code
, content
244 if retry_if_not
and status_code
== retry_if_not
:
245 return status_code
, content
248 time
.sleep(retry_interval
)
250 return status_code
, content
252 # Should never be reached.
253 return status_code
, content