Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / tools / findit / common / http_client_local.py
blobb8a168dce24b4f1c5837d7184743278c73a557ff
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """
6 A http client with support for https connections with certificate verification.
8 The verification is based on http://tools.ietf.org/html/rfc6125#section-6.4.3
9 and the code is from Lib/ssl.py in python3:
10 http://hg.python.org/cpython/file/4dac45f88d45/Lib/ssl.py
12 One use case is to download Chromium DEPS file in a secure way:
13 https://src.chromium.org/chrome/trunk/src/DEPS
15 Notice: python 2.7 or newer is required.
16 """
18 import cookielib
19 import httplib
20 import os
21 import re
22 import socket
23 import ssl
24 import time
25 import urllib
26 import urllib2
28 import http_client
31 _SCRIPT_DIR = os.path.dirname(__file__)
32 _TRUSTED_ROOT_CERTS = os.path.join(_SCRIPT_DIR, 'cacert.pem')
35 class CertificateError(ValueError):
36 pass
39 def _DNSNameMatch(dn, hostname, max_wildcards=1):
40 """Matching according to RFC 6125, section 6.4.3
42 http://tools.ietf.org/html/rfc6125#section-6.4.3
43 """
44 pats = []
45 if not dn:
46 return False
48 parts = dn.split(r'.')
49 leftmost = parts[0]
50 remainder = parts[1:]
52 wildcards = leftmost.count('*')
53 if wildcards > max_wildcards:
54 # Issue #17980: avoid denials of service by refusing more
55 # than one wildcard per fragment. A survery of established
56 # policy among SSL implementations showed it to be a
57 # reasonable choice.
58 raise CertificateError(
59 'too many wildcards in certificate DNS name: ' + repr(dn))
61 # speed up common case w/o wildcards
62 if not wildcards:
63 return dn.lower() == hostname.lower()
65 # RFC 6125, section 6.4.3, subitem 1.
66 # The client SHOULD NOT attempt to match a presented identifier in which
67 # the wildcard character comprises a label other than the left-most label.
68 if leftmost == '*':
69 # When '*' is a fragment by itself, it matches a non-empty dotless
70 # fragment.
71 pats.append('[^.]+')
72 elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
73 # RFC 6125, section 6.4.3, subitem 3.
74 # The client SHOULD NOT attempt to match a presented identifier
75 # where the wildcard character is embedded within an A-label or
76 # U-label of an internationalized domain name.
77 pats.append(re.escape(leftmost))
78 else:
79 # Otherwise, '*' matches any dotless string, e.g. www*
80 pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
82 # add the remaining fragments, ignore any wildcards
83 for frag in remainder:
84 pats.append(re.escape(frag))
86 pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
87 return pat.match(hostname)
90 def _MatchHostname(cert, hostname):
91 """Verify that *cert* (in decoded format as returned by
92 SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
93 rules are followed, but IP addresses are not accepted for *hostname*.
95 CertificateError is raised on failure. On success, the function
96 returns nothing.
97 """
98 if not cert:
99 raise ValueError('empty or no certificate, match_hostname needs a '
100 'SSL socket or SSL context with either '
101 'CERT_OPTIONAL or CERT_REQUIRED')
102 dnsnames = []
103 san = cert.get('subjectAltName', ())
104 for key, value in san:
105 if key == 'DNS':
106 if _DNSNameMatch(value, hostname):
107 return
108 dnsnames.append(value)
109 if not dnsnames:
110 # The subject is only checked when there is no dNSName entry
111 # in subjectAltName
112 for sub in cert.get('subject', ()):
113 for key, value in sub:
114 # XXX according to RFC 2818, the most specific Common Name
115 # must be used.
116 if key == 'commonName':
117 if _DNSNameMatch(value, hostname):
118 return
119 dnsnames.append(value)
120 if len(dnsnames) > 1:
121 raise CertificateError('hostname %r doesn\'t match either of %s'
122 % (hostname, ', '.join(map(repr, dnsnames))))
123 elif len(dnsnames) == 1:
124 raise CertificateError('hostname %r doesn\'t match %r'
125 % (hostname, dnsnames[0]))
126 else:
127 raise CertificateError('no appropriate commonName or '
128 'subjectAltName fields were found')
131 class HTTPSConnection(httplib.HTTPSConnection):
133 def __init__(self, host, root_certs=_TRUSTED_ROOT_CERTS, **kwargs):
134 self.root_certs = root_certs
135 httplib.HTTPSConnection.__init__(self, host, **kwargs)
137 def connect(self):
138 # Overrides for certificate verification.
139 args = [(self.host, self.port), self.timeout,]
140 if self.source_address:
141 args.append(self.source_address)
142 sock = socket.create_connection(*args)
144 if self._tunnel_host:
145 self.sock = sock
146 self._tunnel()
148 # Wrap the socket for verification with the root certs.
149 kwargs = {}
150 if self.root_certs is not None:
151 kwargs.update(cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.root_certs)
152 self.sock = ssl.wrap_socket(sock, **kwargs)
154 # Check hostname.
155 try:
156 _MatchHostname(self.sock.getpeercert(), self.host)
157 except CertificateError:
158 self.sock.shutdown(socket.SHUT_RDWR)
159 self.sock.close()
160 raise
163 class HTTPSHandler(urllib2.HTTPSHandler):
165 def __init__(self, root_certs=_TRUSTED_ROOT_CERTS):
166 urllib2.HTTPSHandler.__init__(self)
167 self.root_certs = root_certs
169 def https_open(self, req):
170 # Pass a reference to the function below so that verification against
171 # trusted root certs could be injected.
172 return self.do_open(self.GetConnection, req)
174 def GetConnection(self, host, **kwargs):
175 params = dict(root_certs=self.root_certs)
176 params.update(kwargs)
177 return HTTPSConnection(host, **params)
180 def _SendRequest(url, timeout=None):
181 """Send request to the given https url, and return the server response.
183 Args:
184 url: The https url to send request to.
186 Returns:
187 An integer: http code of the response.
188 A string: content of the response.
190 Raises:
191 CertificateError: Certificate verification fails.
193 if not url:
194 return None, None
196 handlers = []
197 if url.startswith('https://'):
198 # HTTPSHandler has to go first, because we don't want to send secure cookies
199 # to a man in the middle.
200 handlers.append(HTTPSHandler())
203 cookie_file = os.environ.get('COOKIE_FILE')
204 if cookie_file and os.path.exists(cookie_file):
205 handlers.append(
206 urllib2.HTTPCookieProcessor(cookielib.MozillaCookieJar(cookie_file)))
208 url_opener = urllib2.build_opener(*handlers)
210 status_code = None
211 content = None
213 try:
214 response = url_opener.open(url, timeout=timeout)
216 status_code = response.code
217 content = response.read()
218 except urllib2.HTTPError as e:
219 status_code = e.code
220 content = None
221 except (ssl.SSLError, httplib.BadStatusLine, IOError):
222 status_code = -1
223 content = None
225 return status_code, content
228 class HttpClientLocal(http_client.HttpClient):
229 """This http client is used locally in a workstation, GCE VMs, etc."""
231 @staticmethod
232 def Get(url, params={}, timeout=120, retries=5, retry_interval=0.5,
233 retry_if_not=None):
234 if params:
235 url = '%s?%s' % (url, urllib.urlencode(params))
237 count = 0
238 while True:
239 count += 1
241 status_code, content = _SendRequest(url, timeout=timeout)
242 if status_code == 200:
243 return status_code, content
244 if retry_if_not and status_code == retry_if_not:
245 return status_code, content
247 if count < retries:
248 time.sleep(retry_interval)
249 else:
250 return status_code, content
252 # Should never be reached.
253 return status_code, content