QUIC - collect histogram data for secure (https) URLs to google servers.
[chromium-blink-merge.git] / tools / perf_expectations / make_expectations.py
blobb8c35a9208408831bb5fab4c4c53bebb532513c1
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 # For instructions see:
7 # http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs
9 import hashlib
10 import math
11 import optparse
12 import os
13 import re
14 import subprocess
15 import sys
16 import time
17 import urllib2
20 try:
21 import json
22 except ImportError:
23 import simplejson as json
26 __version__ = '1.0'
27 EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__))
28 DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR,
29 'chromium_perf_expectations.cfg')
30 DEFAULT_TOLERANCE = 0.05
31 USAGE = ''
34 def ReadFile(filename):
35 try:
36 file = open(filename, 'rb')
37 except IOError, e:
38 print >> sys.stderr, ('I/O Error reading file %s(%s): %s' %
39 (filename, e.errno, e.strerror))
40 raise e
41 contents = file.read()
42 file.close()
43 return contents
46 def ConvertJsonIntoDict(string):
47 """Read a JSON string and convert its contents into a Python datatype."""
48 if len(string) == 0:
49 print >> sys.stderr, ('Error could not parse empty string')
50 raise Exception('JSON data missing')
52 try:
53 jsondata = json.loads(string)
54 except ValueError, e:
55 print >> sys.stderr, ('Error parsing string: "%s"' % string)
56 raise e
57 return jsondata
60 # Floating point representation of last time we fetched a URL.
61 last_fetched_at = None
62 def FetchUrlContents(url):
63 global last_fetched_at
64 if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5):
65 # Sleep for half a second to avoid overloading the server.
66 time.sleep(0.5)
67 try:
68 last_fetched_at = time.time()
69 connection = urllib2.urlopen(url)
70 except urllib2.HTTPError, e:
71 if e.code == 404:
72 return None
73 raise e
74 text = connection.read().strip()
75 connection.close()
76 return text
79 def GetRowData(data, key):
80 rowdata = []
81 # reva and revb always come first.
82 for subkey in ['reva', 'revb']:
83 if subkey in data[key]:
84 rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
85 # Strings, like type, come next.
86 for subkey in ['type', 'better']:
87 if subkey in data[key]:
88 rowdata.append('"%s": "%s"' % (subkey, data[key][subkey]))
89 # Finally the main numbers come last.
90 for subkey in ['improve', 'regress', 'tolerance']:
91 if subkey in data[key]:
92 rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
93 return rowdata
96 def GetRowDigest(rowdata, key):
97 sha1 = hashlib.sha1()
98 rowdata = [str(possibly_unicode_string).encode('ascii')
99 for possibly_unicode_string in rowdata]
100 sha1.update(str(rowdata) + key)
101 return sha1.hexdigest()[0:8]
104 def WriteJson(filename, data, keys, calculate_sha1=True):
105 """Write a list of |keys| in |data| to the file specified in |filename|."""
106 try:
107 file = open(filename, 'wb')
108 except IOError, e:
109 print >> sys.stderr, ('I/O Error writing file %s(%s): %s' %
110 (filename, e.errno, e.strerror))
111 return False
112 jsondata = []
113 for key in keys:
114 rowdata = GetRowData(data, key)
115 if calculate_sha1:
116 # Include an updated checksum.
117 rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key))
118 else:
119 if 'sha1' in data[key]:
120 rowdata.append('"sha1": "%s"' % (data[key]['sha1']))
121 jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata)))
122 jsondata.append('"load": true')
123 jsontext = '{%s\n}' % ',\n '.join(jsondata)
124 file.write(jsontext + '\n')
125 file.close()
126 return True
129 def FloatIsInt(f):
130 epsilon = 1.0e-10
131 return abs(f - int(f)) <= epsilon
134 last_key_printed = None
135 def Main(args):
136 def OutputMessage(message, verbose_message=True):
137 global last_key_printed
138 if not options.verbose and verbose_message:
139 return
141 if key != last_key_printed:
142 last_key_printed = key
143 print '\n' + key + ':'
144 print ' %s' % message
146 parser = optparse.OptionParser(usage=USAGE, version=__version__)
147 parser.add_option('-v', '--verbose', action='store_true', default=False,
148 help='enable verbose output')
149 parser.add_option('-s', '--checksum', action='store_true',
150 help='test if any changes are pending')
151 parser.add_option('-c', '--config', dest='config_file',
152 default=DEFAULT_CONFIG_FILE,
153 help='set the config file to FILE', metavar='FILE')
154 options, args = parser.parse_args(args)
156 if options.verbose:
157 print 'Verbose output enabled.'
159 config = ConvertJsonIntoDict(ReadFile(options.config_file))
161 # Get the list of summaries for a test.
162 base_url = config['base_url']
163 # Make the perf expectations file relative to the path of the config file.
164 perf_file = os.path.join(
165 os.path.dirname(options.config_file), config['perf_file'])
166 perf = ConvertJsonIntoDict(ReadFile(perf_file))
168 # Fetch graphs.dat for this combination.
169 perfkeys = perf.keys()
170 # In perf_expectations.json, ignore the 'load' key.
171 perfkeys.remove('load')
172 perfkeys.sort()
174 write_new_expectations = False
175 found_checksum_mismatch = False
176 for key in perfkeys:
177 value = perf[key]
178 tolerance = value.get('tolerance', DEFAULT_TOLERANCE)
179 better = value.get('better', None)
181 # Verify the checksum.
182 original_checksum = value.get('sha1', '')
183 if 'sha1' in value:
184 del value['sha1']
185 rowdata = GetRowData(perf, key)
186 computed_checksum = GetRowDigest(rowdata, key)
187 if original_checksum == computed_checksum:
188 OutputMessage('checksum matches, skipping')
189 continue
190 elif options.checksum:
191 found_checksum_mismatch = True
192 continue
194 # Skip expectations that are missing a reva or revb. We can't generate
195 # expectations for those.
196 if not(value.has_key('reva') and value.has_key('revb')):
197 OutputMessage('missing revision range, skipping')
198 continue
199 revb = int(value['revb'])
200 reva = int(value['reva'])
202 # Ensure that reva is less than revb.
203 if reva > revb:
204 temp = reva
205 reva = revb
206 revb = temp
208 # Get the system/test/graph/tracename and reftracename for the current key.
209 matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key)
210 if not matchData:
211 OutputMessage('cannot parse key, skipping')
212 continue
213 system = matchData.group(1)
214 test = matchData.group(2)
215 graph = matchData.group(3)
216 tracename = matchData.group(4)
217 reftracename = tracename + '_ref'
219 # Create the summary_url and get the json data for that URL.
220 # FetchUrlContents() may sleep to avoid overloading the server with
221 # requests.
222 summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph)
223 summaryjson = FetchUrlContents(summary_url)
224 if not summaryjson:
225 OutputMessage('ERROR: cannot find json data, please verify',
226 verbose_message=False)
227 return 0
229 # Set value's type to 'relative' by default.
230 value_type = value.get('type', 'relative')
232 summarylist = summaryjson.split('\n')
233 trace_values = {}
234 traces = [tracename]
235 if value_type == 'relative':
236 traces += [reftracename]
237 for trace in traces:
238 trace_values.setdefault(trace, {})
240 # Find the high and low values for each of the traces.
241 scanning = False
242 for line in summarylist:
243 jsondata = ConvertJsonIntoDict(line)
244 try:
245 rev = int(jsondata['rev'])
246 except ValueError:
247 print ('Warning: skipping rev %r because could not be parsed '
248 'as an integer.' % jsondata['rev'])
249 continue
250 if rev <= revb:
251 scanning = True
252 if rev < reva:
253 break
255 # We found the upper revision in the range. Scan for trace data until we
256 # find the lower revision in the range.
257 if scanning:
258 for trace in traces:
259 if trace not in jsondata['traces']:
260 OutputMessage('trace %s missing' % trace)
261 continue
262 if type(jsondata['traces'][trace]) != type([]):
263 OutputMessage('trace %s format not recognized' % trace)
264 continue
265 try:
266 tracevalue = float(jsondata['traces'][trace][0])
267 except ValueError:
268 OutputMessage('trace %s value error: %s' % (
269 trace, str(jsondata['traces'][trace][0])))
270 continue
272 for bound in ['high', 'low']:
273 trace_values[trace].setdefault(bound, tracevalue)
275 trace_values[trace]['high'] = max(trace_values[trace]['high'],
276 tracevalue)
277 trace_values[trace]['low'] = min(trace_values[trace]['low'],
278 tracevalue)
280 if 'high' not in trace_values[tracename]:
281 OutputMessage('no suitable traces matched, skipping')
282 continue
284 if value_type == 'relative':
285 # Calculate assuming high deltas are regressions and low deltas are
286 # improvements.
287 regress = (float(trace_values[tracename]['high']) -
288 float(trace_values[reftracename]['low']))
289 improve = (float(trace_values[tracename]['low']) -
290 float(trace_values[reftracename]['high']))
291 elif value_type == 'absolute':
292 # Calculate assuming high absolutes are regressions and low absolutes are
293 # improvements.
294 regress = float(trace_values[tracename]['high'])
295 improve = float(trace_values[tracename]['low'])
297 # So far we've assumed better is lower (regress > improve). If the actual
298 # values for regress and improve are equal, though, and better was not
299 # specified, alert the user so we don't let them create a new file with
300 # ambiguous rules.
301 if better == None and regress == improve:
302 OutputMessage('regress (%s) is equal to improve (%s), and "better" is '
303 'unspecified, please fix by setting "better": "lower" or '
304 '"better": "higher" in this perf trace\'s expectation' % (
305 regress, improve), verbose_message=False)
306 return 1
308 # If the existing values assume regressions are low deltas relative to
309 # improvements, swap our regress and improve. This value must be a
310 # scores-like result.
311 if 'regress' in perf[key] and 'improve' in perf[key]:
312 if perf[key]['regress'] < perf[key]['improve']:
313 assert(better != 'lower')
314 better = 'higher'
315 temp = regress
316 regress = improve
317 improve = temp
318 else:
319 # Sometimes values are equal, e.g., when they are both 0,
320 # 'better' may still be set to 'higher'.
321 assert(better != 'higher' or
322 perf[key]['regress'] == perf[key]['improve'])
323 better = 'lower'
325 # If both were ints keep as int, otherwise use the float version.
326 originally_ints = False
327 if FloatIsInt(regress) and FloatIsInt(improve):
328 originally_ints = True
330 if better == 'higher':
331 if originally_ints:
332 regress = int(math.floor(regress - abs(regress*tolerance)))
333 improve = int(math.ceil(improve + abs(improve*tolerance)))
334 else:
335 regress = regress - abs(regress*tolerance)
336 improve = improve + abs(improve*tolerance)
337 else:
338 if originally_ints:
339 improve = int(math.floor(improve - abs(improve*tolerance)))
340 regress = int(math.ceil(regress + abs(regress*tolerance)))
341 else:
342 improve = improve - abs(improve*tolerance)
343 regress = regress + abs(regress*tolerance)
345 # Calculate the new checksum to test if this is the only thing that may have
346 # changed.
347 checksum_rowdata = GetRowData(perf, key)
348 new_checksum = GetRowDigest(checksum_rowdata, key)
350 if ('regress' in perf[key] and 'improve' in perf[key] and
351 perf[key]['regress'] == regress and perf[key]['improve'] == improve and
352 original_checksum == new_checksum):
353 OutputMessage('no change')
354 continue
356 write_new_expectations = True
357 OutputMessage('traces: %s' % trace_values, verbose_message=False)
358 OutputMessage('before: %s' % perf[key], verbose_message=False)
359 perf[key]['regress'] = regress
360 perf[key]['improve'] = improve
361 OutputMessage('after: %s' % perf[key], verbose_message=False)
363 if options.checksum:
364 if found_checksum_mismatch:
365 return 1
366 else:
367 return 0
369 if write_new_expectations:
370 print '\nWriting expectations... ',
371 WriteJson(perf_file, perf, perfkeys)
372 print 'done'
373 else:
374 if options.verbose:
375 print ''
376 print 'No changes.'
377 return 0
380 if __name__ == '__main__':
381 sys.exit(Main(sys.argv))