tools/auto_bisect/math_utils.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 """General statistical or mathematical functions."""
   6
   7 import math
   8
   9
  10 def TruncatedMean(data_set, truncate_percent):
  11   """Calculates the truncated mean of a set of values.
  12
  13   Note that this isn't just the mean of the set of values with the highest
  14   and lowest values discarded; the non-discarded values are also weighted
  15   differently depending how many values are discarded.
  16
  17   Args:
  18     data_set: Non-empty list of values.
  19     truncate_percent: How much of the upper and lower portions of the data set
  20         to discard, expressed as a value in [0, 1].
  21
  22   Returns:
  23     The truncated mean as a float.
  24
  25   Raises:
  26     TypeError: The data set was empty after discarding values.
  27   """
  28   if len(data_set) > 2:
  29     data_set = sorted(data_set)
  30
  31     discard_num_float = len(data_set) * truncate_percent
  32     discard_num_int = int(math.floor(discard_num_float))
  33     kept_weight = len(data_set) - discard_num_float * 2
  34
  35     data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
  36
  37     weight_left = 1.0 - (discard_num_float - discard_num_int)
  38
  39     if weight_left < 1:
  40       # If the % to discard leaves a fractional portion, need to weight those
  41       # values.
  42       unweighted_vals = data_set[1:len(data_set)-1]
  43       weighted_vals = [data_set[0], data_set[len(data_set)-1]]
  44       weighted_vals = [w * weight_left for w in weighted_vals]
  45       data_set = weighted_vals + unweighted_vals
  46   else:
  47     kept_weight = len(data_set)
  48
  49   truncated_mean = reduce(lambda x, y: float(x) + float(y),
  50                           data_set) / kept_weight
  51
  52   return truncated_mean
  53
  54
  55 def Mean(values):
  56   """Calculates the arithmetic mean of a list of values."""
  57   return TruncatedMean(values, 0.0)
  58
  59
  60 def Variance(values):
  61   """Calculates the sample variance."""
  62   if len(values) == 1:
  63     return 0.0
  64   mean = Mean(values)
  65   differences_from_mean = [float(x) - mean for x in values]
  66   squared_differences = [float(x * x) for x in differences_from_mean]
  67   variance = sum(squared_differences) / (len(values) - 1)
  68   return variance
  69
  70
  71 def StandardDeviation(values):
  72   """Calculates the sample standard deviation of the given list of values."""
  73   return math.sqrt(Variance(values))
  74
  75
  76 def RelativeChange(before, after):
  77   """Returns the relative change of before and after, relative to before.
  78
  79   There are several different ways to define relative difference between
  80   two numbers; sometimes it is defined as relative to the smaller number,
  81   or to the mean of the two numbers. This version returns the difference
  82   relative to the first of the two numbers.
  83
  84   Args:
  85     before: A number representing an earlier value.
  86     after: Another number, representing a later value.
  87
  88   Returns:
  89     A non-negative floating point number; 0.1 represents a 10% change.
  90   """
  91   if before == after:
  92     return 0.0
  93   if before == 0:
  94     return float('nan')
  95   difference = after - before
  96   return math.fabs(difference / before)
  97
  98
  99 def PooledStandardError(work_sets):
 100   """Calculates the pooled sample standard error for a set of samples.
 101
 102   Args:
 103     work_sets: A collection of collections of numbers.
 104
 105   Returns:
 106     Pooled sample standard error.
 107   """
 108   numerator = 0.0
 109   denominator1 = 0.0
 110   denominator2 = 0.0
 111
 112   for current_set in work_sets:
 113     std_dev = StandardDeviation(current_set)
 114     numerator += (len(current_set) - 1) * std_dev ** 2
 115     denominator1 += len(current_set) - 1
 116     if len(current_set) > 0:
 117       denominator2 += 1.0 / len(current_set)
 118
 119   if denominator1 == 0:
 120     return 0.0
 121
 122   return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
 123
 124
 125 # Redefining built-in 'StandardError'
 126 # pylint: disable=W0622
 127 def StandardError(values):
 128   """Calculates the standard error of a list of values."""
 129   if len(values) <= 1:
 130     return 0.0
 131   std_dev = StandardDeviation(values)
 132   return std_dev / math.sqrt(len(values))