Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / tools / auto_bisect / bisect_results_test.py
blob25359c5e17c3d472178af0020ff931623896d7bf
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import os
6 import unittest
8 from bisect_results import BisectResults
9 import source_control
12 class MockDepotRegistry(object):
13 def ChangeToDepotDir(self, depot):
14 pass
17 class MockRevisionState(object):
18 def __init__(self, revision, index, depot='chromium', value=None,
19 perf_time=0, build_time=0, passed='?', external=None):
20 self.depot = depot
21 self.revision = revision
22 self.index = index
23 self.value = value
24 self.perf_time = perf_time
25 self.build_time = build_time
26 self.passed = passed
27 self.external = external
30 class MockBisectState(object):
32 def __init__(self):
33 self.mock_revision_states = []
35 mock_bad_val = {'values': [100, 105, 95]}
36 for i, rev in enumerate(['a', 'b']):
37 mock_rev_state = MockRevisionState(rev, i, value=mock_bad_val, passed=0)
38 self.mock_revision_states.append(mock_rev_state)
40 mock_good_val = {'values': [1, 2, 3]}
41 for i, rev in enumerate(['c', 'd', 'e'], start=2):
42 mock_rev_state = MockRevisionState(rev, i, value=mock_good_val, passed=1)
43 self.mock_revision_states.append(mock_rev_state)
45 def GetRevisionStates(self):
46 return self.mock_revision_states
49 class MockBisectOptions(object):
51 def __init__(self):
52 self.repeat_test_count = 3
55 class BisectResultsTest(unittest.TestCase):
57 def setUp(self):
58 self.mock_bisect_state = MockBisectState()
59 self.mock_depot_registry = MockDepotRegistry()
60 self.mock_opts = MockBisectOptions()
61 self.mock_warnings = []
63 self.original_getcwd = os.getcwd
64 self.original_chdir = os.chdir
65 self.original_query_revision_info = source_control.QueryRevisionInfo
67 os.getcwd = lambda: '/path'
68 os.chdir = lambda _: None
70 revision_infos = {'b': {'test': 'b'}, 'c': {'test': 'c'}}
71 source_control.QueryRevisionInfo = lambda rev: revision_infos[rev]
73 def tearDown(self):
74 os.getcwd = self.original_getcwd
75 os.chdir = self.original_chdir
76 source_control.QueryRevisionInfo = self.original_query_revision_info
78 def _AssertConfidence(self, score, bad_values, good_values):
79 """Checks whether the given sets of values have a given confidence score.
81 The score represents our confidence that the two sets of values wouldn't
82 be as different as they are just by chance; that is, that some real change
83 occurred between the two sets of values.
85 Args:
86 score: Expected confidence score.
87 bad_values: First list of numbers.
88 good_values: Second list of numbers.
89 """
90 confidence = BisectResults.ConfidenceScore(bad_values, good_values)
91 self.assertEqual(score, confidence)
93 def testConfidenceScoreIsZeroOnTooFewLists(self):
94 self._AssertConfidence(0.0, [], [1, 2])
95 self._AssertConfidence(0.0, [1, 2], [])
96 self._AssertConfidence(0.0, [1], [1, 2])
97 self._AssertConfidence(0.0, [1, 2], [1])
99 def testConfidenceScore_ZeroConfidence(self):
100 # The good and bad sets contain the same values, so the confidence that
101 # they're different should be zero.
102 self._AssertConfidence(0.0, [4, 5, 7, 6, 8, 7], [8, 7, 6, 7, 5, 4])
104 def testConfidenceScore_MediumConfidence(self):
105 self._AssertConfidence(80.0, [0, 1, 1, 1, 2, 2], [1, 1, 1, 3, 3, 4])
107 def testConfidenceScore_HighConfidence(self):
108 self._AssertConfidence(95.0, [0, 1, 1, 1, 2, 2], [1, 2, 2, 3, 3, 4])
110 def testConfidenceScore_VeryHighConfidence(self):
111 # Confidence is high if the two sets of values have no internal variance.
112 self._AssertConfidence(99.9, [1, 1, 1, 1], [1.2, 1.2, 1.2, 1.2])
113 self._AssertConfidence(99.9, [1, 1, 1, 1], [1.01, 1.01, 1.01, 1.01])
115 def testConfidenceScore_UnbalancedSampleSize(self):
116 # The second set of numbers only contains one number, so confidence is 0.
117 self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2], [1.4])
119 def testConfidenceScore_EmptySample(self):
120 # Confidence is zero if either or both samples are empty.
121 self._AssertConfidence(0.0, [], [])
122 self._AssertConfidence(0.0, [], [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3])
123 self._AssertConfidence(0.0, [1.1, 1.2, 1.1, 1.2, 1.0, 1.3, 1.2, 1.3], [])
125 def testConfidenceScore_FunctionalTestResults(self):
126 self._AssertConfidence(80.0, [1, 1, 0, 1, 1, 1, 0, 1], [0, 0, 1, 0, 1, 0])
127 self._AssertConfidence(99.9, [1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0])
129 def testConfidenceScore_RealWorldCases(self):
130 """This method contains a set of data from actual bisect results.
132 The confidence scores asserted below were all copied from the actual
133 results, so the purpose of this test method is mainly to show what the
134 results for real cases are, and compare when we change the confidence
135 score function in the future.
137 self._AssertConfidence(80, [133, 130, 132, 132, 130, 129], [129, 129, 125])
138 self._AssertConfidence(99.5, [668, 667], [498, 498, 499])
139 self._AssertConfidence(80, [67, 68], [65, 65, 67])
140 self._AssertConfidence(0, [514], [514])
141 self._AssertConfidence(90, [616, 613, 607, 615], [617, 619, 619, 617])
142 self._AssertConfidence(0, [3.5, 5.8, 4.7, 3.5, 3.6], [2.8])
143 self._AssertConfidence(90, [3, 3, 3], [2, 2, 2, 3])
144 self._AssertConfidence(0, [1999004, 1999627], [223355])
145 self._AssertConfidence(90, [1040, 934, 961], [876, 875, 789])
146 self._AssertConfidence(90, [309, 305, 304], [302, 302, 299, 303, 298])
148 def testCorrectlyFindsBreakingRange(self):
149 revision_states = self.mock_bisect_state.mock_revision_states
150 revision_states[0].passed = 0
151 revision_states[1].passed = 0
152 revision_states[2].passed = 1
153 revision_states[3].passed = 1
154 revision_states[4].passed = 1
156 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
157 self.mock_opts, self.mock_warnings)
158 self.assertEqual(revision_states[2], results.first_working_revision)
159 self.assertEqual(revision_states[1], results.last_broken_revision)
161 def testCorrectlyComputesRegressionStatistics(self):
162 revision_states = self.mock_bisect_state.mock_revision_states
163 revision_states[0].passed = 0
164 revision_states[0].value = {'values': [1000, 999, 998]}
165 revision_states[1].passed = 0
166 revision_states[1].value = {'values': [980, 1000, 999]}
167 revision_states[2].passed = 1
168 revision_states[2].value = {'values': [50, 45, 55]}
169 revision_states[3].passed = 1
170 revision_states[3].value = {'values': [45, 56, 45]}
171 revision_states[4].passed = 1
172 revision_states[4].value = {'values': [51, 41, 58]}
174 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
175 self.mock_opts, self.mock_warnings)
176 self.assertAlmostEqual(99.9, results.confidence)
177 self.assertAlmostEqual(1909.86547085, results.regression_size)
178 self.assertAlmostEqual(7.16625904, results.regression_std_err)
180 def testFindsCulpritRevisions(self):
181 revision_states = self.mock_bisect_state.mock_revision_states
182 revision_states[1].depot = 'chromium'
183 revision_states[2].depot = 'webkit'
185 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
186 self.mock_opts, self.mock_warnings)
188 self.assertEqual(1, len(results.culprit_revisions))
189 self.assertEqual(('b', {'test': 'b'}, 'chromium'),
190 results.culprit_revisions[0])
192 def testFindsOtherRegressions(self):
193 revision_states = self.mock_bisect_state.mock_revision_states
194 revision_states[0].passed = 0
195 revision_states[0].value = {'values': [100, 100, 100]}
196 revision_states[1].passed = 0
197 revision_states[1].value = {'values': [100, 100, 100]}
198 revision_states[2].passed = 1
199 revision_states[2].value = {'values': [10, 10, 10]}
200 revision_states[3].passed = 1
201 revision_states[3].value = {'values': [100, 100, 100]}
202 revision_states[4].passed = 1
203 revision_states[4].value = {'values': [60, 60, 60]}
205 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
206 self.mock_opts, self.mock_warnings)
207 expected_regressions = [[revision_states[2], revision_states[1], 99.9],
208 [revision_states[4], revision_states[3], 80.0]]
209 self.assertEqual(expected_regressions, results.other_regressions)
211 def testNoResultBasedWarningsForNormalState(self):
212 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
213 self.mock_opts, self.mock_warnings)
214 self.assertEqual(0, len(results.warnings))
216 def testWarningForMultipleCulpritRevisions(self):
217 self.mock_bisect_state.mock_revision_states[2].passed = 'Skipped'
218 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
219 self.mock_opts, self.mock_warnings)
220 self.assertEqual(1, len(results.warnings))
222 def testWarningForTooLowRetryLimit(self):
223 self.mock_opts.repeat_test_count = 1
224 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
225 self.mock_opts, self.mock_warnings)
226 self.assertEqual(1, len(results.warnings))
228 def testWarningForTooLowConfidence(self):
229 revision_states = self.mock_bisect_state.mock_revision_states
230 revision_states[2].value = {'values': [95, 100, 90]}
231 revision_states[3].value = {'values': [95, 100, 90]}
232 revision_states[4].value = {'values': [95, 100, 90]}
233 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
234 self.mock_opts, self.mock_warnings)
235 self.assertGreater(results.confidence, 0)
236 self.assertEqual(1, len(results.warnings))
238 def testWarningForZeroConfidence(self):
239 revision_states = self.mock_bisect_state.mock_revision_states
240 revision_states[2].value = {'values': [100, 105, 95]}
241 revision_states[3].value = {'values': [100, 105, 95]}
242 revision_states[4].value = {'values': [100, 105, 95]}
243 results = BisectResults(self.mock_bisect_state, self.mock_depot_registry,
244 self.mock_opts, self.mock_warnings)
245 self.assertEqual(0, results.confidence)
246 self.assertEqual(1, len(results.warnings))
249 if __name__ == '__main__':
250 unittest.main()