1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
13 from bisect_state
import RevisionState
16 class BisectResults(object):
17 """Contains results of the completed bisect.
20 error: Error message if the bisect failed.
22 If the error is None, the following properties are present:
23 warnings: List of warnings from the bisect run.
24 state: BisectState object from which these results were generated.
25 first_working_revision: First good revision.
26 last_broken_revision: Last bad revision.
28 If both of above revisions are not None, the follow properties are present:
29 culprit_revisions: A list of revisions, which contain the bad change
30 introducing the failure.
31 other_regressions: A list of tuples representing other regressions, which
33 regression_size: For performance bisects, this is a relative change of
34 the mean metric value. For other bisects this field always contains
36 regression_std_err: For performance bisects, it is a pooled standard error
37 for groups of good and bad runs. Not used for other bisects.
38 confidence: For performance bisects, it is a confidence that the good and
39 bad runs are distinct groups. Not used for non-performance bisects.
42 def __init__(self
, bisect_state
=None, depot_registry
=None, opts
=None,
43 runtime_warnings
=None, error
=None, abort_reason
=None):
44 """Computes final bisect results after a bisect run is complete.
46 This constructor should be called in one of the following ways:
47 BisectResults(state, depot_registry, opts, runtime_warnings)
48 BisectResults(error=error)
50 First option creates an object representing successful bisect results, while
51 second option creates an error result.
54 bisect_state: BisectState object representing latest bisect state.
55 depot_registry: DepotDirectoryRegistry object with information on each
56 repository in the bisect_state.
57 opts: Options passed to the bisect run.
58 runtime_warnings: A list of warnings from the bisect run.
59 error: Error message. When error is not None, other arguments are ignored.
63 self
.abort_reason
= abort_reason
64 if error
is not None or abort_reason
is not None:
67 assert (bisect_state
is not None and depot_registry
is not None and
68 opts
is not None and runtime_warnings
is not None), (
69 'Incorrect use of the BisectResults constructor. When error is '
70 'None, all other arguments are required')
72 self
.state
= bisect_state
74 rev_states
= bisect_state
.GetRevisionStates()
75 first_working_rev
, last_broken_rev
= self
.FindBreakingRevRange(rev_states
)
76 self
.first_working_revision
= first_working_rev
77 self
.last_broken_revision
= last_broken_rev
79 self
.warnings
= runtime_warnings
81 self
.retest_results_tot
= None
82 self
.retest_results_reverted
= None
84 if first_working_rev
is not None and last_broken_rev
is not None:
85 statistics
= self
._ComputeRegressionStatistics
(
86 rev_states
, first_working_rev
, last_broken_rev
)
88 self
.regression_size
= statistics
['regression_size']
89 self
.regression_std_err
= statistics
['regression_std_err']
90 self
.confidence
= statistics
['confidence']
92 self
.culprit_revisions
= self
._FindCulpritRevisions
(
93 rev_states
, depot_registry
, first_working_rev
, last_broken_rev
)
95 self
.other_regressions
= self
._FindOtherRegressions
(
96 rev_states
, statistics
['bad_greater_than_good'])
98 self
.warnings
+= self
._GetResultBasedWarnings
(
99 self
.culprit_revisions
, opts
, self
.confidence
)
100 elif first_working_rev
is not None:
101 # Setting these attributes so that bisect printer does not break when the
102 # regression cannot be reproduced (no broken revision was found)
103 self
.regression_size
= 0
104 self
.regression_std_err
= 0
106 self
.culprit_revisions
= []
107 self
.other_regressions
= []
109 def AddRetestResults(self
, results_tot
, results_reverted
):
110 if not results_tot
or not results_reverted
:
111 self
.warnings
.append(
112 'Failed to re-test reverted culprit CL against ToT.')
115 confidence_params
= (results_reverted
[0]['values'],
116 results_tot
[0]['values'])
117 confidence
= BisectResults
.ConfidenceScore(*confidence_params
)
119 self
.retest_results_tot
= RevisionState('ToT', 'n/a', 0)
120 self
.retest_results_tot
.value
= results_tot
[0]
122 self
.retest_results_reverted
= RevisionState('Reverted', 'n/a', 0)
123 self
.retest_results_reverted
.value
= results_reverted
[0]
125 if confidence
<= bisect_utils
.HIGH_CONFIDENCE
:
126 self
.warnings
.append(
127 'Confidence of re-test with reverted CL is not high.'
128 ' Check that the regression hasn\'t already recovered. '
129 ' There\'s still a chance this is a regression, as performance of'
130 ' local builds may not match official builds.' )
133 def _GetResultBasedWarnings(culprit_revisions
, opts
, confidence
):
135 if len(culprit_revisions
) > 1:
136 warnings
.append('Due to build errors, regression range could '
137 'not be narrowed down to a single commit.')
138 if opts
.repeat_test_count
== 1:
139 warnings
.append('Tests were only set to run once. This may '
140 'be insufficient to get meaningful results.')
141 if 0 < confidence
< bisect_utils
.HIGH_CONFIDENCE
:
142 warnings
.append('Confidence is not high. Try bisecting again '
143 'with increased repeat_count, larger range, or '
144 'on another metric.')
146 warnings
.append('Confidence score is 0%. Try bisecting again on '
147 'another platform or another metric.')
151 def ConfidenceScore(sample1
, sample2
,
152 accept_single_bad_or_good
=False):
153 """Calculates a confidence score.
155 This score is a percentage which represents our degree of confidence in the
156 proposition that the good results and bad results are distinct groups, and
157 their differences aren't due to chance alone.
161 sample1: A flat list of "good" result numbers.
162 sample2: A flat list of "bad" result numbers.
163 accept_single_bad_or_good: If True, computes confidence even if there is
164 just one bad or good revision, otherwise single good or bad revision
165 always returns 0.0 confidence. This flag will probably get away when
166 we will implement expanding the bisect range by one more revision for
170 A number in the range [0, 100].
172 # If there's only one item in either list, this means only one revision was
173 # classified good or bad; this isn't good enough evidence to make a
174 # decision. If an empty list was passed, that also implies zero confidence.
175 if not accept_single_bad_or_good
:
176 if len(sample1
) <= 1 or len(sample2
) <= 1:
179 # If there were only empty lists in either of the lists (this is unexpected
180 # and normally shouldn't happen), then we also want to return 0.
181 if not sample1
or not sample2
:
184 # The p-value is approximately the probability of obtaining the given set
185 # of good and bad values just by chance.
186 _
, _
, p_value
= ttest
.WelchsTTest(sample1
, sample2
)
187 return 100.0 * (1.0 - p_value
)
190 def _FindOtherRegressions(cls
, revision_states
, bad_greater_than_good
):
191 """Compiles a list of other possible regressions from the revision data.
194 revision_states: Sorted list of RevisionState objects.
195 bad_greater_than_good: Whether the result value at the "bad" revision is
196 numerically greater than the result value at the "good" revision.
199 A list of [current_rev, previous_rev, confidence] for other places where
200 there may have been a regression.
202 other_regressions
= []
205 for revision_state
in revision_states
:
206 if revision_state
.value
:
207 current_values
= revision_state
.value
['values']
209 confidence_params
= (sum(previous_values
, []),
210 sum([current_values
], []))
211 confidence
= cls
.ConfidenceScore(*confidence_params
,
212 accept_single_bad_or_good
=True)
213 mean_of_prev_runs
= math_utils
.Mean(sum(previous_values
, []))
214 mean_of_current_runs
= math_utils
.Mean(current_values
)
216 # Check that the potential regression is in the same direction as
217 # the overall regression. If the mean of the previous runs < the
218 # mean of the current runs, this local regression is in same
220 prev_greater_than_current
= mean_of_prev_runs
> mean_of_current_runs
221 is_same_direction
= (prev_greater_than_current
if
222 bad_greater_than_good
else not prev_greater_than_current
)
224 # Only report potential regressions with high confidence.
225 if is_same_direction
and confidence
> 50:
226 other_regressions
.append([revision_state
, prev_state
, confidence
])
227 previous_values
.append(current_values
)
228 prev_state
= revision_state
229 return other_regressions
232 def FindBreakingRevRange(revision_states
):
233 """Finds the last known good and first known bad revisions.
235 Note that since revision_states is expected to be in reverse chronological
236 order, the last known good revision is the first revision in the list that
237 has the passed property set to 1, therefore the name
238 `first_working_revision`. The inverse applies to `last_broken_revision`.
241 revision_states: A list of RevisionState instances.
244 A tuple containing the two revision states at the border. (Last
245 known good and first known bad.)
247 first_working_revision
= None
248 last_broken_revision
= None
250 for revision_state
in revision_states
:
251 if revision_state
.passed
== 1 and not first_working_revision
:
252 first_working_revision
= revision_state
254 if not revision_state
.passed
:
255 last_broken_revision
= revision_state
257 return first_working_revision
, last_broken_revision
260 def _FindCulpritRevisions(revision_states
, depot_registry
, first_working_rev
,
264 culprit_revisions
= []
265 for i
in xrange(last_broken_rev
.index
, first_working_rev
.index
):
266 depot_registry
.ChangeToDepotDir(revision_states
[i
].depot
)
267 info
= source_control
.QueryRevisionInfo(revision_states
[i
].revision
)
268 culprit_revisions
.append((revision_states
[i
].revision
, info
,
269 revision_states
[i
].depot
))
272 return culprit_revisions
275 def _ComputeRegressionStatistics(cls
, rev_states
, first_working_rev
,
277 # TODO(sergiyb): We assume that value has "values" key, which may not be
278 # the case for failure-bisects, where there is a single value only.
279 broken_means
= [state
.value
['values']
280 for state
in rev_states
[:last_broken_rev
.index
+1]
283 working_means
= [state
.value
['values']
284 for state
in rev_states
[first_working_rev
.index
:]
287 # Flatten the lists to calculate mean of all values.
288 working_mean
= sum(working_means
, [])
289 broken_mean
= sum(broken_means
, [])
291 # Calculate the approximate size of the regression
292 mean_of_bad_runs
= math_utils
.Mean(broken_mean
)
293 mean_of_good_runs
= math_utils
.Mean(working_mean
)
295 regression_size
= 100 * math_utils
.RelativeChange(mean_of_good_runs
,
297 if math
.isnan(regression_size
):
298 regression_size
= 'zero-to-nonzero'
300 regression_std_err
= math
.fabs(math_utils
.PooledStandardError(
301 [working_mean
, broken_mean
]) /
302 max(0.0001, min(mean_of_good_runs
, mean_of_bad_runs
))) * 100.0
304 # Give a "confidence" in the bisect. Currently, we consider the values of
305 # only the revisions at the breaking range (last known good and first known
306 # bad) see the note in the docstring for FindBreakingRange.
307 confidence_params
= (
308 sum([first_working_rev
.value
['values']], []),
309 sum([last_broken_rev
.value
['values']], [])
311 confidence
= cls
.ConfidenceScore(*confidence_params
)
313 bad_greater_than_good
= mean_of_bad_runs
> mean_of_good_runs
315 return {'regression_size': regression_size
,
316 'regression_std_err': regression_std_err
,
317 'confidence': confidence
,
318 'bad_greater_than_good': bad_greater_than_good
}