tools/bisect-perf-regression_test.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import unittest
   6
   7 from auto_bisect import source_control as source_control_module
   8
   9 # Special import necessary because filename contains dash characters.
  10 bisect_perf_module = __import__('bisect-perf-regression')
  11
  12
  13 class BisectPerfRegressionTest(unittest.TestCase):
  14   """Test case for other functions and classes in bisect-perf-regression.py."""
  15
  16   def _AssertConfidence(self, score, bad_values, good_values):
  17     """Checks whether the given sets of values have a given confidence score.
  18
  19     The score represents our confidence that the two sets of values wouldn't
  20     be as different as they are just by chance; that is, that some real change
  21     occurred between the two sets of values.
  22
  23     Args:
  24       score: Expected confidence score.
  25       bad_values: First list of numbers.
  26       good_values: Second list of numbers.
  27     """
  28     # ConfidenceScore takes a list of lists but these lists are flattened
  29     # inside the function.
  30     confidence = bisect_perf_module.ConfidenceScore(
  31         [[v] for v in bad_values],
  32         [[v] for v in good_values])
  33     self.assertEqual(score, confidence)
  34
  35   def testConfidenceScore_ZeroConfidence(self):
  36     # The good and bad sets contain the same values, so the confidence that
  37     # they're different should be zero.
  38     self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
  39
  40   def testConfidenceScore_MediumConfidence(self):
  41     self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
  42
  43   def testConfidenceScore_HighConfidence(self):
  44     self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
  45
  46   def testConfidenceScore_VeryHighConfidence(self):
  47     # Confidence is high if the two sets of values have no internal variance.
  48     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
  49     self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
  50
  51   def testConfidenceScore_UnbalancedSampleSize(self):
  52     # The second set of numbers only contains one number, so confidence is 0.
  53     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
  54
  55   def testConfidenceScore_EmptySample(self):
  56     # Confidence is zero if either or both samples are empty.
  57     self._AssertConfidence(0.0, [], [])
  58     self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
  59     self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
  60
  61   def testConfidenceScore_FunctionalTestResults(self):
  62     self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
  63     self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
  64
  65   def testConfidenceScore_RealWorldCases(self):
  66     """This method contains a set of data from actual bisect results.
  67
  68     The confidence scores asserted below were all copied from the actual
  69     results, so the purpose of this test method is mainly to show what the
  70     results for real cases are, and compare when we change the confidence
  71     score function in the future.
  72     """
  73     self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
  74     self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
  75     self._AssertConfidence(80, [67, 68], [65, 65, 67])
  76     self._AssertConfidence(0, [514], [514])
  77     self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
  78     self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
  79     self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
  80     self._AssertConfidence(0, [1999004, 1999627], [223355])
  81     self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
  82     self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
  83
  84   def testParseDEPSStringManually(self):
  85     """Tests DEPS parsing."""
  86     deps_file_contents = """
  87     vars = {
  88         'ffmpeg_hash':
  89              '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
  90         'webkit_url':
  91              'https://chromium.googlesource.com/chromium/blink.git',
  92         'git_url':
  93              'https://chromium.googlesource.com',
  94         'webkit_rev':
  95              '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
  96         'angle_revision':
  97              '74697cf2064c0a2c0d7e1b1b28db439286766a05'
  98     }"""
  99
 100     # Should only expect SVN/git revisions to come through, and URLs should be
 101     # filtered out.
 102     expected_vars_dict = {
 103         'ffmpeg_hash': '@ac4a9f31fe2610bd146857bbd55d7a260003a888',
 104         'webkit_rev': '@e01ac0a267d1017288bc67fa3c366b10469d8a24',
 105         'angle_revision': '74697cf2064c0a2c0d7e1b1b28db439286766a05'
 106     }
 107     # Testing private function.
 108     # pylint: disable=W0212
 109     vars_dict = bisect_perf_module._ParseRevisionsFromDEPSFileManually(
 110         deps_file_contents)
 111     self.assertEqual(vars_dict, expected_vars_dict)
 112
 113   def _AssertParseResult(self, expected_values, result_string):
 114     """Asserts some values are parsed from a RESULT line."""
 115     results_template = ('RESULT other_chart: other_trace= 123 count\n'
 116                         'RESULT my_chart: my_trace= %(value)s\n')
 117     results = results_template % {'value': result_string}
 118     metric = ['my_chart', 'my_trace']
 119     # Testing private function.
 120     # pylint: disable=W0212
 121     values = bisect_perf_module._TryParseResultValuesFromOutput(metric, results)
 122     self.assertEqual(expected_values, values)
 123
 124   def testTryParseResultValuesFromOutput_WithSingleValue(self):
 125     """Tests result pattern <*>RESULT <graph>: <trace>= <value>"""
 126     self._AssertParseResult([66.88], '66.88 kb')
 127     self._AssertParseResult([66.88], '66.88 ')
 128     self._AssertParseResult([-66.88], '-66.88 kb')
 129     self._AssertParseResult([66], '66 kb')
 130     self._AssertParseResult([0.66], '.66 kb')
 131     self._AssertParseResult([], '. kb')
 132     self._AssertParseResult([], 'aaa kb')
 133
 134   def testTryParseResultValuesFromOutput_WithMultiValue(self):
 135     """Tests result pattern <*>RESULT <graph>: <trace>= [<value>,<value>, ..]"""
 136     self._AssertParseResult([66.88], '[66.88] kb')
 137     self._AssertParseResult([66.88, 99.44], '[66.88, 99.44]kb')
 138     self._AssertParseResult([66.88, 99.44], '[ 66.88, 99.44 ]')
 139     self._AssertParseResult([-66.88, 99.44], '[-66.88, 99.44] kb')
 140     self._AssertParseResult([-66, 99], '[-66,99] kb')
 141     self._AssertParseResult([-66, 99], '[-66,99,] kb')
 142     self._AssertParseResult([-66, 0.99], '[-66,.99] kb')
 143     self._AssertParseResult([], '[] kb')
 144     self._AssertParseResult([], '[-66,abc] kb')
 145
 146   def testTryParseResultValuesFromOutputWithMeanStd(self):
 147     """Tests result pattern <*>RESULT <graph>: <trace>= {<mean, std}"""
 148     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 149     self._AssertParseResult([33.22], '{33.22, 3.6} kb')
 150     self._AssertParseResult([33.22], '{33.22,3.6}kb')
 151     self._AssertParseResult([33.22], '{33.22,3.6} kb')
 152     self._AssertParseResult([33.22], '{ 33.22,3.6 }kb')
 153     self._AssertParseResult([-33.22], '{-33.22,3.6}kb')
 154     self._AssertParseResult([22], '{22,6}kb')
 155     self._AssertParseResult([.22], '{.22,6}kb')
 156     self._AssertParseResult([], '{.22,6, 44}kb')
 157     self._AssertParseResult([], '{}kb')
 158     self._AssertParseResult([], '{XYZ}kb')
 159
 160   def _AssertCompatibleCommand(
 161       self, expected_command, original_command, revision, target_platform):
 162     """Tests the modification of the command that might be done.
 163
 164     This modification to the command is done in order to get a Telemetry
 165     command that works; before some revisions, the browser name that Telemetry
 166     expects is different in some cases, but we want it to work anyway.
 167
 168     Specifically, only for android:
 169       After r276628, only android-chrome-shell works.
 170       Prior to r274857, only android-chromium-testshell works.
 171       In the range [274857, 276628], both work.
 172     """
 173     bisect_options = bisect_perf_module.BisectOptions()
 174     bisect_options.output_buildbot_annotations = None
 175     source_control = source_control_module.DetermineAndCreateSourceControl(
 176         bisect_options)
 177     bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
 178         source_control, bisect_options)
 179     bisect_instance.opts.target_platform = target_platform
 180     git_revision = bisect_instance.source_control.ResolveToRevision(
 181         revision, 'chromium', bisect_perf_module.DEPOT_DEPS_NAME, 100)
 182     depot = 'chromium'
 183     command = bisect_instance.GetCompatibleCommand(
 184         original_command, git_revision, depot)
 185     self.assertEqual(expected_command, command)
 186
 187   def testGetCompatibleCommand_ChangeToTestShell(self):
 188     # For revisions <= r274857, only android-chromium-testshell is used.
 189     self._AssertCompatibleCommand(
 190         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 191         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 192         274857, 'android')
 193
 194   def testGetCompatibleCommand_ChangeToShell(self):
 195     # For revisions >= r276728, only android-chrome-shell can be used.
 196     self._AssertCompatibleCommand(
 197         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 198         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 199         276628, 'android')
 200
 201   def testGetCompatibleCommand_NoChange(self):
 202     # For revisions < r276728, android-chromium-testshell can be used.
 203     self._AssertCompatibleCommand(
 204         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 205         'tools/perf/run_benchmark -v --browser=android-chromium-testshell foo',
 206         274858, 'android')
 207     # For revisions > r274857, android-chrome-shell can be used.
 208     self._AssertCompatibleCommand(
 209         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 210         'tools/perf/run_benchmark -v --browser=android-chrome-shell foo',
 211         274858, 'android')
 212
 213   def testGetCompatibleCommand_NonAndroidPlatform(self):
 214     # In most cases, there's no need to change Telemetry command.
 215     # For revisions >= r276728, only android-chrome-shell can be used.
 216     self._AssertCompatibleCommand(
 217         'tools/perf/run_benchmark -v --browser=release foo',
 218         'tools/perf/run_benchmark -v --browser=release foo',
 219         276628, 'chromium')
 220
 221   # This method doesn't reference self; it fails if an error is thrown.
 222   # pylint: disable=R0201
 223   def testDryRun(self):
 224     """Does a dry run of the bisect script.
 225
 226     This serves as a smoke test to catch errors in the basic execution of the
 227     script.
 228     """
 229     options_dict = {
 230       'debug_ignore_build': True,
 231       'debug_ignore_sync': True,
 232       'debug_ignore_perf_test': True,
 233       'command': 'fake_command',
 234       'metric': 'fake/metric',
 235       'good_revision': 280000,
 236       'bad_revision': 280005,
 237     }
 238     bisect_options = bisect_perf_module.BisectOptions.FromDict(options_dict)
 239     source_control = source_control_module.DetermineAndCreateSourceControl(
 240         bisect_options)
 241     bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
 242         source_control, bisect_options)
 243     results = bisect_instance.Run(bisect_options.command,
 244                                   bisect_options.bad_revision,
 245                                   bisect_options.good_revision,
 246                                   bisect_options.metric)
 247     bisect_instance.FormatAndPrintResults(results)
 248
 249
 250 if __name__ == '__main__':
 251   unittest.main()