Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / auto_bisect / math_utils.py
blobeef7f0936d3da7a3f9c379683b91c90056a7e680
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """General statistical or mathematical functions."""
7 import math
10 def TruncatedMean(data_set, truncate_proportion):
11 """Calculates the truncated mean of a set of values.
13 Note that this isn't just the mean of the set of values with the highest
14 and lowest values discarded; the non-discarded values are also weighted
15 differently depending how many values are discarded.
17 NOTE: If there's not much benefit from this keeping and weighting
18 partial values, it might be better to use a simplified truncated mean
19 function without weighting.
21 Args:
22 data_set: Non-empty list of values.
23 truncate_proportion: How much of the upper and lower portions of the data
24 set to discard, expressed as a value in the range [0, 1].
25 Note: a value of 0.5 or greater would be meaningless
27 Returns:
28 The truncated mean as a float.
30 Raises:
31 TypeError: The data set was empty after discarding values.
32 """
33 if len(data_set) > 2:
34 data_set = sorted(data_set)
36 discard_num_float = len(data_set) * truncate_proportion
37 discard_num_int = int(math.floor(discard_num_float))
38 kept_weight = len(data_set) - (discard_num_float * 2)
40 data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
42 weight_left = 1.0 - (discard_num_float - discard_num_int)
44 if weight_left < 1:
45 # If the % to discard leaves a fractional portion, need to weight those
46 # values.
47 unweighted_vals = data_set[1:len(data_set)-1]
48 weighted_vals = [data_set[0], data_set[len(data_set)-1]]
49 weighted_vals = [w * weight_left for w in weighted_vals]
50 data_set = weighted_vals + unweighted_vals
51 else:
52 kept_weight = len(data_set)
54 data_sum = reduce(lambda x, y: float(x) + float(y), data_set)
55 truncated_mean = data_sum / kept_weight
56 return truncated_mean
59 def Mean(values):
60 """Calculates the arithmetic mean of a list of values."""
61 return TruncatedMean(values, 0.0)
64 def Variance(values):
65 """Calculates the sample variance."""
66 if len(values) == 1:
67 return 0.0
68 mean = Mean(values)
69 differences_from_mean = [float(x) - mean for x in values]
70 squared_differences = [float(x * x) for x in differences_from_mean]
71 variance = sum(squared_differences) / (len(values) - 1)
72 return variance
75 def StandardDeviation(values):
76 """Calculates the sample standard deviation of the given list of values."""
77 return math.sqrt(Variance(values))
80 def RelativeChange(before, after):
81 """Returns the relative change of before and after, relative to before.
83 There are several different ways to define relative difference between
84 two numbers; sometimes it is defined as relative to the smaller number,
85 or to the mean of the two numbers. This version returns the difference
86 relative to the first of the two numbers.
88 Args:
89 before: A number representing an earlier value.
90 after: Another number, representing a later value.
92 Returns:
93 A non-negative floating point number; 0.1 represents a 10% change.
94 """
95 if before == after:
96 return 0.0
97 if before == 0:
98 return float('nan')
99 difference = after - before
100 return math.fabs(difference / before)
103 def PooledStandardError(work_sets):
104 """Calculates the pooled sample standard error for a set of samples.
106 Args:
107 work_sets: A collection of collections of numbers.
109 Returns:
110 Pooled sample standard error.
112 numerator = 0.0
113 denominator1 = 0.0
114 denominator2 = 0.0
116 for current_set in work_sets:
117 std_dev = StandardDeviation(current_set)
118 numerator += (len(current_set) - 1) * std_dev ** 2
119 denominator1 += len(current_set) - 1
120 if len(current_set) > 0:
121 denominator2 += 1.0 / len(current_set)
123 if denominator1 == 0:
124 return 0.0
126 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
129 # Redefining built-in 'StandardError'
130 # pylint: disable=W0622
131 def StandardError(values):
132 """Calculates the standard error of a list of values."""
133 # NOTE: This behavior of returning 0.0 in the case of an empty list is
134 # inconsistent with Variance and StandardDeviation above.
135 if len(values) <= 1:
136 return 0.0
137 std_dev = StandardDeviation(values)
138 return std_dev / math.sqrt(len(values))