1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
13 from bisect_state
import RevisionState
16 class BisectResults(object):
17 """Contains results of the completed bisect.
20 error: Error message if the bisect failed.
22 If the error is None, the following properties are present:
23 warnings: List of warnings from the bisect run.
24 state: BisectState object from which these results were generated.
25 first_working_revision: First good revision.
26 last_broken_revision: Last bad revision.
28 If both of above revisions are not None, the follow properties are present:
29 culprit_revisions: A list of revisions, which contain the bad change
30 introducing the failure.
31 other_regressions: A list of tuples representing other regressions, which
33 regression_size: For performance bisects, this is a relative change of
34 the mean metric value. For other bisects this field always contains
36 regression_std_err: For performance bisects, it is a pooled standard error
37 for groups of good and bad runs. Not used for other bisects.
38 confidence: For performance bisects, it is a confidence that the good and
39 bad runs are distinct groups. Not used for non-performance bisects.
42 def __init__(self
, bisect_state
=None, depot_registry
=None, opts
=None,
43 runtime_warnings
=None, error
=None, abort_reason
=None):
44 """Computes final bisect results after a bisect run is complete.
46 This constructor should be called in one of the following ways:
47 BisectResults(state, depot_registry, opts, runtime_warnings)
48 BisectResults(error=error)
50 First option creates an object representing successful bisect results, while
51 second option creates an error result.
54 bisect_state: BisectState object representing latest bisect state.
55 depot_registry: DepotDirectoryRegistry object with information on each
56 repository in the bisect_state.
57 opts: Options passed to the bisect run.
58 runtime_warnings: A list of warnings from the bisect run.
59 error: Error message. When error is not None, other arguments are ignored.
62 self
.abort_reason
= abort_reason
63 if error
is not None or abort_reason
is not None:
66 assert (bisect_state
is not None and depot_registry
is not None and
67 opts
is not None and runtime_warnings
is not None), (
68 'Incorrect use of the BisectResults constructor. '
69 'When error is None, all other arguments are required.')
71 self
.state
= bisect_state
73 rev_states
= bisect_state
.GetRevisionStates()
74 first_working_rev
, last_broken_rev
= self
.FindBreakingRevRange(rev_states
)
75 self
.first_working_revision
= first_working_rev
76 self
.last_broken_revision
= last_broken_rev
78 self
.warnings
= runtime_warnings
80 self
.retest_results_tot
= None
81 self
.retest_results_reverted
= None
83 if first_working_rev
is not None and last_broken_rev
is not None:
84 statistics
= self
._ComputeRegressionStatistics
(
85 rev_states
, first_working_rev
, last_broken_rev
)
87 self
.regression_size
= statistics
['regression_size']
88 self
.regression_std_err
= statistics
['regression_std_err']
89 self
.confidence
= statistics
['confidence']
91 self
.culprit_revisions
= self
._FindCulpritRevisions
(
92 rev_states
, depot_registry
, first_working_rev
, last_broken_rev
)
94 self
.other_regressions
= self
._FindOtherRegressions
(
95 rev_states
, statistics
['bad_greater_than_good'])
97 self
.warnings
+= self
._GetResultBasedWarnings
(
98 self
.culprit_revisions
, opts
, self
.confidence
)
99 elif first_working_rev
is not None:
100 # Setting these attributes so that bisect printer does not break when the
101 # regression cannot be reproduced (no broken revision was found)
102 self
.regression_size
= 0
103 self
.regression_std_err
= 0
105 self
.culprit_revisions
= []
106 self
.other_regressions
= []
108 def AddRetestResults(self
, results_tot
, results_reverted
):
109 if not results_tot
or not results_reverted
:
110 self
.warnings
.append(
111 'Failed to re-test reverted culprit CL against ToT.')
114 confidence_params
= (results_reverted
[0]['values'],
115 results_tot
[0]['values'])
116 confidence
= BisectResults
.ConfidenceScore(*confidence_params
)
118 self
.retest_results_tot
= RevisionState('ToT', 'n/a', 0)
119 self
.retest_results_tot
.value
= results_tot
[0]
121 self
.retest_results_reverted
= RevisionState('Reverted', 'n/a', 0)
122 self
.retest_results_reverted
.value
= results_reverted
[0]
124 if confidence
<= bisect_utils
.HIGH_CONFIDENCE
:
125 self
.warnings
.append(
126 'Confidence of re-test with reverted CL is not high.'
127 ' Check that the regression hasn\'t already recovered. '
128 ' There\'s still a chance this is a regression, as performance of'
129 ' local builds may not match official builds.')
132 def _GetResultBasedWarnings(culprit_revisions
, opts
, confidence
):
134 if len(culprit_revisions
) > 1:
135 warnings
.append('Due to build errors, regression range could '
136 'not be narrowed down to a single commit.')
137 if opts
.repeat_test_count
== 1:
138 warnings
.append('Tests were only set to run once. This may '
139 'be insufficient to get meaningful results.')
140 if 0 < confidence
< bisect_utils
.HIGH_CONFIDENCE
:
141 warnings
.append('Confidence is not high. Try bisecting again '
142 'with increased repeat_count, larger range, or '
143 'on another metric.')
145 warnings
.append('Confidence score is 0%. Try bisecting again on '
146 'another platform or another metric.')
150 def ConfidenceScore(sample1
, sample2
,
151 accept_single_bad_or_good
=False):
152 """Calculates a confidence score.
154 This score is a percentage which represents our degree of confidence in the
155 proposition that the good results and bad results are distinct groups, and
156 their differences aren't due to chance alone.
160 sample1: A flat list of "good" result numbers.
161 sample2: A flat list of "bad" result numbers.
162 accept_single_bad_or_good: If True, computes confidence even if there is
163 just one bad or good revision, otherwise single good or bad revision
164 always returns 0.0 confidence. This flag will probably get away when
165 we will implement expanding the bisect range by one more revision for
169 A number in the range [0, 100].
171 # If there's only one item in either list, this means only one revision was
172 # classified good or bad; this isn't good enough evidence to make a
173 # decision. If an empty list was passed, that also implies zero confidence.
174 if not accept_single_bad_or_good
:
175 if len(sample1
) <= 1 or len(sample2
) <= 1:
178 # If there were only empty lists in either of the lists (this is unexpected
179 # and normally shouldn't happen), then we also want to return 0.
180 if not sample1
or not sample2
:
183 # The p-value is approximately the probability of obtaining the given set
184 # of good and bad values just by chance.
185 _
, _
, p_value
= ttest
.WelchsTTest(sample1
, sample2
)
186 return 100.0 * (1.0 - p_value
)
189 def _FindOtherRegressions(cls
, revision_states
, bad_greater_than_good
):
190 """Compiles a list of other possible regressions from the revision data.
193 revision_states: Sorted list of RevisionState objects.
194 bad_greater_than_good: Whether the result value at the "bad" revision is
195 numerically greater than the result value at the "good" revision.
198 A list of [current_rev, previous_rev, confidence] for other places where
199 there may have been a regression.
201 other_regressions
= []
204 for revision_state
in revision_states
:
205 if revision_state
.value
:
206 current_values
= revision_state
.value
['values']
208 confidence_params
= (sum(previous_values
, []),
209 sum([current_values
], []))
210 confidence
= cls
.ConfidenceScore(*confidence_params
,
211 accept_single_bad_or_good
=True)
212 mean_of_prev_runs
= math_utils
.Mean(sum(previous_values
, []))
213 mean_of_current_runs
= math_utils
.Mean(current_values
)
215 # Check that the potential regression is in the same direction as
216 # the overall regression. If the mean of the previous runs < the
217 # mean of the current runs, this local regression is in same
219 prev_greater_than_current
= mean_of_prev_runs
> mean_of_current_runs
220 if bad_greater_than_good
:
221 is_same_direction
= prev_greater_than_current
223 is_same_direction
= not prev_greater_than_current
225 # Only report potential regressions with high confidence.
226 if is_same_direction
and confidence
> 50:
227 other_regressions
.append([revision_state
, prev_state
, confidence
])
228 previous_values
.append(current_values
)
229 prev_state
= revision_state
230 return other_regressions
233 def FindBreakingRevRange(revision_states
):
234 """Finds the last known good and first known bad revisions.
236 Note that since revision_states is expected to be in reverse chronological
237 order, the last known good revision is the first revision in the list that
238 has the passed property set to 1, therefore the name
239 `first_working_revision`. The inverse applies to `last_broken_revision`.
242 revision_states: A list of RevisionState instances.
245 A tuple containing the two revision states at the border. (Last
246 known good and first known bad.)
248 first_working_revision
= None
249 last_broken_revision
= None
251 for revision_state
in revision_states
:
252 if revision_state
.passed
== 1 and not first_working_revision
:
253 first_working_revision
= revision_state
255 if not revision_state
.passed
:
256 last_broken_revision
= revision_state
258 return first_working_revision
, last_broken_revision
261 def _FindCulpritRevisions(revision_states
, depot_registry
, first_working_rev
,
265 culprit_revisions
= []
266 for i
in xrange(last_broken_rev
.index
, first_working_rev
.index
):
267 depot_registry
.ChangeToDepotDir(revision_states
[i
].depot
)
268 info
= source_control
.QueryRevisionInfo(revision_states
[i
].revision
)
269 culprit_revisions
.append((revision_states
[i
].revision
, info
,
270 revision_states
[i
].depot
))
273 return culprit_revisions
276 def _ComputeRegressionStatistics(cls
, rev_states
, first_working_rev
,
278 # TODO(sergiyb): We assume that value has "values" key, which may not be
279 # the case for failure-bisects, where there is a single value only.
280 broken_means
= [state
.value
['values']
281 for state
in rev_states
[:last_broken_rev
.index
+1]
284 working_means
= [state
.value
['values']
285 for state
in rev_states
[first_working_rev
.index
:]
288 # Flatten the lists to calculate mean of all values.
289 working_mean
= sum(working_means
, [])
290 broken_mean
= sum(broken_means
, [])
292 # Calculate the approximate size of the regression
293 mean_of_bad_runs
= math_utils
.Mean(broken_mean
)
294 mean_of_good_runs
= math_utils
.Mean(working_mean
)
296 regression_size
= 100 * math_utils
.RelativeChange(mean_of_good_runs
,
298 if math
.isnan(regression_size
):
299 regression_size
= 'zero-to-nonzero'
301 regression_std_err
= math
.fabs(math_utils
.PooledStandardError(
302 [working_mean
, broken_mean
]) /
303 max(0.0001, min(mean_of_good_runs
, mean_of_bad_runs
))) * 100.0
305 # Give a "confidence" in the bisect. Currently, we consider the values of
306 # only the revisions at the breaking range (last known good and first known
307 # bad) see the note in the docstring for FindBreakingRange.
308 confidence_params
= (
309 sum([first_working_rev
.value
['values']], []),
310 sum([last_broken_rev
.value
['values']], [])
312 confidence
= cls
.ConfidenceScore(*confidence_params
)
314 bad_greater_than_good
= mean_of_bad_runs
> mean_of_good_runs
316 return {'regression_size': regression_size
,
317 'regression_std_err': regression_std_err
,
318 'confidence': confidence
,
319 'bad_greater_than_good': bad_greater_than_good
}