build/android/pylib/perf/test_runner.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """Runs perf tests.
   6
   7 Our buildbot infrastructure requires each slave to run steps serially.
   8 This is sub-optimal for android, where these steps can run independently on
   9 multiple connected devices.
  10
  11 The buildbots will run this script multiple times per cycle:
  12 - First: all steps listed in --steps in will be executed in parallel using all
  13 connected devices. Step results will be pickled to disk. Each step has a unique
  14 name. The result code will be ignored if the step name is listed in
  15 --flaky-steps.
  16 The buildbot will treat this step as a regular step, and will not process any
  17 graph data.
  18
  19 - Then, with -print-step STEP_NAME: at this stage, we'll simply print the file
  20 with the step results previously saved. The buildbot will then process the graph
  21 data accordingly.
  22
  23 The JSON steps file contains a dictionary in the format:
  24 { "version": int,
  25   "steps": {
  26     "foo": {
  27       "device_affinity": int,
  28       "cmd": "script_to_execute foo"
  29     },
  30     "bar": {
  31       "device_affinity": int,
  32       "cmd": "script_to_execute bar"
  33     }
  34   }
  35 }
  36
  37 The JSON flaky steps file contains a list with step names which results should
  38 be ignored:
  39 [
  40   "step_name_foo",
  41   "step_name_bar"
  42 ]
  43
  44 Note that script_to_execute necessarily have to take at least the following
  45 option:
  46   --device: the serial number to be passed to all adb commands.
  47 """
  48
  49 import collections
  50 import datetime
  51 import json
  52 import logging
  53 import os
  54 import pickle
  55 import shutil
  56 import sys
  57 import tempfile
  58 import threading
  59 import time
  60
  61 from pylib import cmd_helper
  62 from pylib import constants
  63 from pylib import forwarder
  64 from pylib.base import base_test_result
  65 from pylib.base import base_test_runner
  66 from pylib.device import device_errors
  67
  68
  69 def OutputJsonList(json_input, json_output):
  70   with file(json_input, 'r') as i:
  71     all_steps = json.load(i)
  72   step_values = [{'test': k, 'device_affinity': v['device_affinity']}
  73       for k, v in all_steps['steps'].iteritems()]
  74   with file(json_output, 'w') as o:
  75     o.write(json.dumps(step_values))
  76   return 0
  77
  78
  79 def PrintTestOutput(test_name, json_file_name=None):
  80   """Helper method to print the output of previously executed test_name.
  81
  82   Args:
  83     test_name: name of the test that has been previously executed.
  84     json_file_name: name of the file to output chartjson data to.
  85
  86   Returns:
  87     exit code generated by the test step.
  88   """
  89   file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
  90   if not os.path.exists(file_name):
  91     logging.error('File not found %s', file_name)
  92     return 1
  93
  94   with file(file_name, 'r') as f:
  95     persisted_result = pickle.loads(f.read())
  96   logging.info('*' * 80)
  97   logging.info('Output from:')
  98   logging.info(persisted_result['cmd'])
  99   logging.info('*' * 80)
 100   print persisted_result['output']
 101
 102   if json_file_name:
 103     with file(json_file_name, 'w') as f:
 104       f.write(persisted_result['chartjson'])
 105
 106   return persisted_result['exit_code']
 107
 108
 109 def PrintSummary(test_names):
 110   logging.info('*' * 80)
 111   logging.info('Sharding summary')
 112   device_total_time = collections.defaultdict(int)
 113   for test_name in test_names:
 114     file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
 115     if not os.path.exists(file_name):
 116       logging.info('%s : No status file found', test_name)
 117       continue
 118     with file(file_name, 'r') as f:
 119       result = pickle.loads(f.read())
 120     logging.info('%s : exit_code=%d in %d secs at %s',
 121                  result['name'], result['exit_code'], result['total_time'],
 122                  result['device'])
 123     device_total_time[result['device']] += result['total_time']
 124   for device, device_time in device_total_time.iteritems():
 125     logging.info('Total for device %s : %d secs', device, device_time)
 126   logging.info('Total steps time: %d secs', sum(device_total_time.values()))
 127
 128
 129 class _HeartBeatLogger(object):
 130   # How often to print the heartbeat on flush().
 131   _PRINT_INTERVAL = 30.0
 132
 133   def __init__(self):
 134     """A file-like class for keeping the buildbot alive."""
 135     self._len = 0
 136     self._tick = time.time()
 137     self._stopped = threading.Event()
 138     self._timer = threading.Thread(target=self._runner)
 139     self._timer.start()
 140
 141   def _runner(self):
 142     while not self._stopped.is_set():
 143       self.flush()
 144       self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)
 145
 146   def write(self, data):
 147     self._len += len(data)
 148
 149   def flush(self):
 150     now = time.time()
 151     if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:
 152       self._tick = now
 153       print '--single-step output length %d' % self._len
 154       sys.stdout.flush()
 155
 156   def stop(self):
 157     self._stopped.set()
 158
 159
 160 class TestRunner(base_test_runner.BaseTestRunner):
 161   def __init__(self, test_options, device, shard_index, max_shard, tests,
 162       flaky_tests):
 163     """A TestRunner instance runs a perf test on a single device.
 164
 165     Args:
 166       test_options: A PerfOptions object.
 167       device: Device to run the tests.
 168       shard_index: the index of this device.
 169       max_shards: the maximum shard index.
 170       tests: a dict mapping test_name to command.
 171       flaky_tests: a list of flaky test_name.
 172     """
 173     super(TestRunner, self).__init__(device, None, 'Release')
 174     self._options = test_options
 175     self._shard_index = shard_index
 176     self._max_shard = max_shard
 177     self._tests = tests
 178     self._flaky_tests = flaky_tests
 179     self._output_dir = None
 180
 181   @staticmethod
 182   def _IsBetter(result):
 183     if result['actual_exit_code'] == 0:
 184       return True
 185     pickled = os.path.join(constants.PERF_OUTPUT_DIR,
 186                            result['name'])
 187     if not os.path.exists(pickled):
 188       return True
 189     with file(pickled, 'r') as f:
 190       previous = pickle.loads(f.read())
 191     return result['actual_exit_code'] < previous['actual_exit_code']
 192
 193   @staticmethod
 194   def _SaveResult(result):
 195     if TestRunner._IsBetter(result):
 196       with file(os.path.join(constants.PERF_OUTPUT_DIR,
 197                              result['name']), 'w') as f:
 198         f.write(pickle.dumps(result))
 199
 200   def _CheckDeviceAffinity(self, test_name):
 201     """Returns True if test_name has affinity for this shard."""
 202     affinity = (self._tests['steps'][test_name]['device_affinity'] %
 203                 self._max_shard)
 204     if self._shard_index == affinity:
 205       return True
 206     logging.info('Skipping %s on %s (affinity is %s, device is %s)',
 207                  test_name, self.device_serial, affinity, self._shard_index)
 208     return False
 209
 210   def _CleanupOutputDirectory(self):
 211     if self._output_dir:
 212       shutil.rmtree(self._output_dir, ignore_errors=True)
 213       self._output_dir = None
 214
 215   def _ReadChartjsonOutput(self):
 216     if not self._output_dir:
 217       return ''
 218
 219     json_output_path = os.path.join(self._output_dir, 'results-chart.json')
 220     with open(json_output_path) as f:
 221       return f.read()
 222
 223   def _LaunchPerfTest(self, test_name):
 224     """Runs a perf test.
 225
 226     Args:
 227       test_name: the name of the test to be executed.
 228
 229     Returns:
 230       A tuple containing (Output, base_test_result.ResultType)
 231     """
 232     if not self._CheckDeviceAffinity(test_name):
 233       return '', base_test_result.ResultType.PASS
 234
 235     try:
 236       logging.warning('Unmapping device ports')
 237       forwarder.Forwarder.UnmapAllDevicePorts(self.device)
 238       self.device.old_interface.RestartAdbdOnDevice()
 239     except Exception as e:
 240       logging.error('Exception when tearing down device %s', e)
 241
 242     cmd = ('%s --device %s' %
 243            (self._tests['steps'][test_name]['cmd'],
 244             self.device_serial))
 245
 246     if self._options.collect_chartjson_data:
 247       self._output_dir = tempfile.mkdtemp()
 248       cmd = cmd + ' --output-dir=%s' % self._output_dir
 249
 250     logging.info('%s : %s', test_name, cmd)
 251     start_time = datetime.datetime.now()
 252
 253     timeout = self._tests['steps'][test_name].get('timeout', 5400)
 254     if self._options.no_timeout:
 255       timeout = None
 256     logging.info('Timeout for %s test: %s', test_name, timeout)
 257     full_cmd = cmd
 258     if self._options.dry_run:
 259       full_cmd = 'echo %s' % cmd
 260
 261     logfile = sys.stdout
 262     if self._options.single_step:
 263       # Just print a heart-beat so that the outer buildbot scripts won't timeout
 264       # without response.
 265       logfile = _HeartBeatLogger()
 266     cwd = os.path.abspath(constants.DIR_SOURCE_ROOT)
 267     if full_cmd.startswith('src/'):
 268       cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir))
 269     try:
 270       exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(
 271           full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile)
 272       json_output = self._ReadChartjsonOutput()
 273     except cmd_helper.TimeoutError as e:
 274       exit_code = -1
 275       output = str(e)
 276       json_output = ''
 277     finally:
 278       self._CleanupOutputDirectory()
 279       if self._options.single_step:
 280         logfile.stop()
 281     end_time = datetime.datetime.now()
 282     if exit_code is None:
 283       exit_code = -1
 284     logging.info('%s : exit_code=%d in %d secs at %s',
 285                  test_name, exit_code, (end_time - start_time).seconds,
 286                  self.device_serial)
 287
 288     if exit_code == 0:
 289       result_type = base_test_result.ResultType.PASS
 290     else:
 291       result_type = base_test_result.ResultType.FAIL
 292       # Since perf tests use device affinity, give the device a chance to
 293       # recover if it is offline after a failure. Otherwise, the master sharder
 294       # will remove it from the pool and future tests on this device will fail.
 295       try:
 296         self.device.WaitUntilFullyBooted(timeout=120)
 297       except device_errors.CommandTimeoutError as e:
 298         logging.error('Device failed to return after %s: %s' % (test_name, e))
 299
 300     actual_exit_code = exit_code
 301     if test_name in self._flaky_tests:
 302       # The exit_code is used at the second stage when printing the
 303       # test output. If the test is flaky, force to "0" to get that step green
 304       # whilst still gathering data to the perf dashboards.
 305       # The result_type is used by the test_dispatcher to retry the test.
 306       exit_code = 0
 307
 308     persisted_result = {
 309         'name': test_name,
 310         'output': output,
 311         'chartjson': json_output,
 312         'exit_code': exit_code,
 313         'actual_exit_code': actual_exit_code,
 314         'result_type': result_type,
 315         'total_time': (end_time - start_time).seconds,
 316         'device': self.device_serial,
 317         'cmd': cmd,
 318     }
 319     self._SaveResult(persisted_result)
 320
 321     return (output, result_type)
 322
 323   def RunTest(self, test_name):
 324     """Run a perf test on the device.
 325
 326     Args:
 327       test_name: String to use for logging the test result.
 328
 329     Returns:
 330       A tuple of (TestRunResults, retry).
 331     """
 332     _, result_type = self._LaunchPerfTest(test_name)
 333     results = base_test_result.TestRunResults()
 334     results.AddResult(base_test_result.BaseTestResult(test_name, result_type))
 335     retry = None
 336     if not results.DidRunPass():
 337       retry = test_name
 338     return results, retry