tools/perf_expectations/make_expectations.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 # For instructions see:
   7 # http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs
   8
   9 import hashlib
  10 import math
  11 import optparse
  12 import os
  13 import re
  14 import subprocess
  15 import sys
  16 import time
  17 import urllib2
  18
  19
  20 try:
  21   import json
  22 except ImportError:
  23   import simplejson as json
  24
  25
  26 __version__ = '1.0'
  27 EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__))
  28 DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR,
  29                                    'chromium_perf_expectations.cfg')
  30 DEFAULT_TOLERANCE = 0.05
  31 USAGE = ''
  32
  33
  34 def ReadFile(filename):
  35   try:
  36     file = open(filename, 'rb')
  37   except IOError, e:
  38     print >> sys.stderr, ('I/O Error reading file %s(%s): %s' %
  39                           (filename, e.errno, e.strerror))
  40     raise e
  41   contents = file.read()
  42   file.close()
  43   return contents
  44
  45
  46 def ConvertJsonIntoDict(string):
  47   """Read a JSON string and convert its contents into a Python datatype."""
  48   if len(string) == 0:
  49     print >> sys.stderr, ('Error could not parse empty string')
  50     raise Exception('JSON data missing')
  51
  52   try:
  53     jsondata = json.loads(string)
  54   except ValueError, e:
  55     print >> sys.stderr, ('Error parsing string: "%s"' % string)
  56     raise e
  57   return jsondata
  58
  59
  60 # Floating point representation of last time we fetched a URL.
  61 last_fetched_at = None
  62 def FetchUrlContents(url):
  63   global last_fetched_at
  64   if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5):
  65     # Sleep for half a second to avoid overloading the server.
  66     time.sleep(0.5)
  67   try:
  68     last_fetched_at = time.time()
  69     connection = urllib2.urlopen(url)
  70   except urllib2.HTTPError, e:
  71     if e.code == 404:
  72       return None
  73     raise e
  74   text = connection.read().strip()
  75   connection.close()
  76   return text
  77
  78
  79 def GetRowData(data, key):
  80   rowdata = []
  81   # reva and revb always come first.
  82   for subkey in ['reva', 'revb']:
  83     if subkey in data[key]:
  84       rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
  85   # Strings, like type, come next.
  86   for subkey in ['type', 'better']:
  87     if subkey in data[key]:
  88       rowdata.append('"%s": "%s"' % (subkey, data[key][subkey]))
  89   # Finally the main numbers come last.
  90   for subkey in ['improve', 'regress', 'tolerance']:
  91     if subkey in data[key]:
  92       rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
  93   return rowdata
  94
  95
  96 def GetRowDigest(rowdata, key):
  97   sha1 = hashlib.sha1()
  98   rowdata = [str(possibly_unicode_string).encode('ascii')
  99              for possibly_unicode_string in rowdata]
 100   sha1.update(str(rowdata) + key)
 101   return sha1.hexdigest()[0:8]
 102
 103
 104 def WriteJson(filename, data, keys, calculate_sha1=True):
 105   """Write a list of |keys| in |data| to the file specified in |filename|."""
 106   try:
 107     file = open(filename, 'wb')
 108   except IOError, e:
 109     print >> sys.stderr, ('I/O Error writing file %s(%s): %s' %
 110                           (filename, e.errno, e.strerror))
 111     return False
 112   jsondata = []
 113   for key in keys:
 114     rowdata = GetRowData(data, key)
 115     if calculate_sha1:
 116       # Include an updated checksum.
 117       rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key))
 118     else:
 119       if 'sha1' in data[key]:
 120         rowdata.append('"sha1": "%s"' % (data[key]['sha1']))
 121     jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata)))
 122   jsondata.append('"load": true')
 123   jsontext = '{%s\n}' % ',\n '.join(jsondata)
 124   file.write(jsontext + '\n')
 125   file.close()
 126   return True
 127
 128
 129 def FloatIsInt(f):
 130   epsilon = 1.0e-10
 131   return abs(f - int(f)) <= epsilon
 132
 133
 134 last_key_printed = None
 135 def Main(args):
 136   def OutputMessage(message, verbose_message=True):
 137     global last_key_printed
 138     if not options.verbose and verbose_message:
 139       return
 140
 141     if key != last_key_printed:
 142       last_key_printed = key
 143       print '\n' + key + ':'
 144     print '  %s' % message
 145
 146   parser = optparse.OptionParser(usage=USAGE, version=__version__)
 147   parser.add_option('-v', '--verbose', action='store_true', default=False,
 148                     help='enable verbose output')
 149   parser.add_option('-s', '--checksum', action='store_true',
 150                     help='test if any changes are pending')
 151   parser.add_option('-c', '--config', dest='config_file',
 152                     default=DEFAULT_CONFIG_FILE,
 153                     help='set the config file to FILE', metavar='FILE')
 154   options, args = parser.parse_args(args)
 155
 156   if options.verbose:
 157     print 'Verbose output enabled.'
 158
 159   config = ConvertJsonIntoDict(ReadFile(options.config_file))
 160
 161   # Get the list of summaries for a test.
 162   base_url = config['base_url']
 163   # Make the perf expectations file relative to the path of the config file.
 164   perf_file = os.path.join(
 165     os.path.dirname(options.config_file), config['perf_file'])
 166   perf = ConvertJsonIntoDict(ReadFile(perf_file))
 167
 168   # Fetch graphs.dat for this combination.
 169   perfkeys = perf.keys()
 170   # In perf_expectations.json, ignore the 'load' key.
 171   perfkeys.remove('load')
 172   perfkeys.sort()
 173
 174   write_new_expectations = False
 175   found_checksum_mismatch = False
 176   for key in perfkeys:
 177     value = perf[key]
 178     tolerance = value.get('tolerance', DEFAULT_TOLERANCE)
 179     better = value.get('better', None)
 180
 181     # Verify the checksum.
 182     original_checksum = value.get('sha1', '')
 183     if 'sha1' in value:
 184       del value['sha1']
 185     rowdata = GetRowData(perf, key)
 186     computed_checksum = GetRowDigest(rowdata, key)
 187     if original_checksum == computed_checksum:
 188       OutputMessage('checksum matches, skipping')
 189       continue
 190     elif options.checksum:
 191       found_checksum_mismatch = True
 192       continue
 193
 194     # Skip expectations that are missing a reva or revb.  We can't generate
 195     # expectations for those.
 196     if not(value.has_key('reva') and value.has_key('revb')):
 197       OutputMessage('missing revision range, skipping')
 198       continue
 199     revb = int(value['revb'])
 200     reva = int(value['reva'])
 201
 202     # Ensure that reva is less than revb.
 203     if reva > revb:
 204       temp = reva
 205       reva = revb
 206       revb = temp
 207
 208     # Get the system/test/graph/tracename and reftracename for the current key.
 209     matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key)
 210     if not matchData:
 211       OutputMessage('cannot parse key, skipping')
 212       continue
 213     system = matchData.group(1)
 214     test = matchData.group(2)
 215     graph = matchData.group(3)
 216     tracename = matchData.group(4)
 217     reftracename = tracename + '_ref'
 218
 219     # Create the summary_url and get the json data for that URL.
 220     # FetchUrlContents() may sleep to avoid overloading the server with
 221     # requests.
 222     summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph)
 223     summaryjson = FetchUrlContents(summary_url)
 224     if not summaryjson:
 225       OutputMessage('ERROR: cannot find json data, please verify',
 226                     verbose_message=False)
 227       return 0
 228
 229     # Set value's type to 'relative' by default.
 230     value_type = value.get('type', 'relative')
 231
 232     summarylist = summaryjson.split('\n')
 233     trace_values = {}
 234     traces = [tracename]
 235     if value_type == 'relative':
 236       traces += [reftracename]
 237     for trace in traces:
 238       trace_values.setdefault(trace, {})
 239
 240     # Find the high and low values for each of the traces.
 241     scanning = False
 242     for line in summarylist:
 243       jsondata = ConvertJsonIntoDict(line)
 244       try:
 245         rev = int(jsondata['rev'])
 246       except ValueError:
 247         print ('Warning: skipping rev %r because could not be parsed '
 248                'as an integer.' % jsondata['rev'])
 249         continue
 250       if rev <= revb:
 251         scanning = True
 252       if rev < reva:
 253         break
 254
 255       # We found the upper revision in the range.  Scan for trace data until we
 256       # find the lower revision in the range.
 257       if scanning:
 258         for trace in traces:
 259           if trace not in jsondata['traces']:
 260             OutputMessage('trace %s missing' % trace)
 261             continue
 262           if type(jsondata['traces'][trace]) != type([]):
 263             OutputMessage('trace %s format not recognized' % trace)
 264             continue
 265           try:
 266             tracevalue = float(jsondata['traces'][trace][0])
 267           except ValueError:
 268             OutputMessage('trace %s value error: %s' % (
 269                 trace, str(jsondata['traces'][trace][0])))
 270             continue
 271
 272           for bound in ['high', 'low']:
 273             trace_values[trace].setdefault(bound, tracevalue)
 274
 275           trace_values[trace]['high'] = max(trace_values[trace]['high'],
 276                                             tracevalue)
 277           trace_values[trace]['low'] = min(trace_values[trace]['low'],
 278                                            tracevalue)
 279
 280     if 'high' not in trace_values[tracename]:
 281       OutputMessage('no suitable traces matched, skipping')
 282       continue
 283
 284     if value_type == 'relative':
 285       # Calculate assuming high deltas are regressions and low deltas are
 286       # improvements.
 287       regress = (float(trace_values[tracename]['high']) -
 288                  float(trace_values[reftracename]['low']))
 289       improve = (float(trace_values[tracename]['low']) -
 290                  float(trace_values[reftracename]['high']))
 291     elif value_type == 'absolute':
 292       # Calculate assuming high absolutes are regressions and low absolutes are
 293       # improvements.
 294       regress = float(trace_values[tracename]['high'])
 295       improve = float(trace_values[tracename]['low'])
 296
 297     # So far we've assumed better is lower (regress > improve).  If the actual
 298     # values for regress and improve are equal, though, and better was not
 299     # specified, alert the user so we don't let them create a new file with
 300     # ambiguous rules.
 301     if better == None and regress == improve:
 302       OutputMessage('regress (%s) is equal to improve (%s), and "better" is '
 303                     'unspecified, please fix by setting "better": "lower" or '
 304                     '"better": "higher" in this perf trace\'s expectation' % (
 305                     regress, improve), verbose_message=False)
 306       return 1
 307
 308     # If the existing values assume regressions are low deltas relative to
 309     # improvements, swap our regress and improve.  This value must be a
 310     # scores-like result.
 311     if 'regress' in perf[key] and 'improve' in perf[key]:
 312       if perf[key]['regress'] < perf[key]['improve']:
 313         assert(better != 'lower')
 314         better = 'higher'
 315         temp = regress
 316         regress = improve
 317         improve = temp
 318       else:
 319         # Sometimes values are equal, e.g., when they are both 0,
 320         # 'better' may still be set to 'higher'.
 321         assert(better != 'higher' or
 322                perf[key]['regress'] == perf[key]['improve'])
 323         better = 'lower'
 324
 325     # If both were ints keep as int, otherwise use the float version.
 326     originally_ints = False
 327     if FloatIsInt(regress) and FloatIsInt(improve):
 328       originally_ints = True
 329
 330     if better == 'higher':
 331       if originally_ints:
 332         regress = int(math.floor(regress - abs(regress*tolerance)))
 333         improve = int(math.ceil(improve + abs(improve*tolerance)))
 334       else:
 335         regress = regress - abs(regress*tolerance)
 336         improve = improve + abs(improve*tolerance)
 337     else:
 338       if originally_ints:
 339         improve = int(math.floor(improve - abs(improve*tolerance)))
 340         regress = int(math.ceil(regress + abs(regress*tolerance)))
 341       else:
 342         improve = improve - abs(improve*tolerance)
 343         regress = regress + abs(regress*tolerance)
 344
 345     # Calculate the new checksum to test if this is the only thing that may have
 346     # changed.
 347     checksum_rowdata = GetRowData(perf, key)
 348     new_checksum = GetRowDigest(checksum_rowdata, key)
 349
 350     if ('regress' in perf[key] and 'improve' in perf[key] and
 351         perf[key]['regress'] == regress and perf[key]['improve'] == improve and
 352         original_checksum == new_checksum):
 353       OutputMessage('no change')
 354       continue
 355
 356     write_new_expectations = True
 357     OutputMessage('traces: %s' % trace_values, verbose_message=False)
 358     OutputMessage('before: %s' % perf[key], verbose_message=False)
 359     perf[key]['regress'] = regress
 360     perf[key]['improve'] = improve
 361     OutputMessage('after: %s' % perf[key], verbose_message=False)
 362
 363   if options.checksum:
 364     if found_checksum_mismatch:
 365       return 1
 366     else:
 367       return 0
 368
 369   if write_new_expectations:
 370     print '\nWriting expectations... ',
 371     WriteJson(perf_file, perf, perfkeys)
 372     print 'done'
 373   else:
 374     if options.verbose:
 375       print ''
 376     print 'No changes.'
 377   return 0
 378
 379
 380 if __name__ == '__main__':
 381   sys.exit(Main(sys.argv))