components/variations/entropy_provider_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "components/variations/entropy_provider.h"
   6
   7 #include <cmath>
   8 #include <limits>
   9 #include <numeric>
  10
  11 #include "base/basictypes.h"
  12 #include "base/guid.h"
  13 #include "base/memory/scoped_ptr.h"
  14 #include "base/rand_util.h"
  15 #include "base/strings/string_number_conversions.h"
  16 #include "components/variations/metrics_util.h"
  17 #include "testing/gtest/include/gtest/gtest.h"
  18
  19 namespace metrics {
  20
  21 namespace {
  22
  23 // Size of the low entropy source to use for the permuted entropy provider
  24 // in tests.
  25 const size_t kMaxLowEntropySize = 8000;
  26
  27 // Field trial names used in unit tests.
  28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
  29                                         "NewTabButton" };
  30
  31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
  32 // distribution, where each entry has expected value |expected_value|.
  33 //
  34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
  35 // value and E is the expected value.
  36 double ComputeChiSquare(const std::vector<int>& values,
  37                         double expected_value) {
  38   double sum = 0;
  39   for (size_t i = 0; i < values.size(); ++i) {
  40     const double delta = values[i] - expected_value;
  41     sum += (delta * delta) / expected_value;
  42   }
  43   return sum;
  44 }
  45
  46 // Computes SHA1-based entropy for the given |trial_name| based on
  47 // |entropy_source|
  48 double GenerateSHA1Entropy(const std::string& entropy_source,
  49                            const std::string& trial_name) {
  50   SHA1EntropyProvider sha1_provider(entropy_source);
  51   return sha1_provider.GetEntropyForTrial(trial_name, 0);
  52 }
  53
  54 // Generates permutation-based entropy for the given |trial_name| based on
  55 // |entropy_source| which must be in the range [0, entropy_max).
  56 double GeneratePermutedEntropy(uint16 entropy_source,
  57                                size_t entropy_max,
  58                                const std::string& trial_name) {
  59   PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
  60   return permuted_provider.GetEntropyForTrial(trial_name, 0);
  61 }
  62
  63 // Helper interface for testing used to generate entropy values for a given
  64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
  65 // value constant and generates entropy for different trial names, instances
  66 // of TrialEntropyGenerator keep the trial name constant and generate low/high
  67 // entropy source values internally to produce each output entropy value.
  68 class TrialEntropyGenerator {
  69  public:
  70   virtual ~TrialEntropyGenerator() {}
  71   virtual double GenerateEntropyValue() const = 0;
  72 };
  73
  74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
  75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
  76 // entropy corresponding to a low entropy source).
  77 class SHA1EntropyGenerator : public TrialEntropyGenerator {
  78  public:
  79   explicit SHA1EntropyGenerator(const std::string& trial_name)
  80       : trial_name_(trial_name) {
  81   }
  82
  83   ~SHA1EntropyGenerator() override {}
  84
  85   double GenerateEntropyValue() const override {
  86     // Use a random GUID + 13 additional bits of entropy to match how the
  87     // SHA1EntropyProvider is used in metrics_service.cc.
  88     const int low_entropy_source =
  89         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
  90     const std::string high_entropy_source =
  91         base::GenerateGUID() + base::IntToString(low_entropy_source);
  92     return GenerateSHA1Entropy(high_entropy_source, trial_name_);
  93   }
  94
  95  private:
  96   std::string trial_name_;
  97
  98   DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
  99 };
 100
 101 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
 102 // using 13-bit low entropy source values.
 103 class PermutedEntropyGenerator : public TrialEntropyGenerator {
 104  public:
 105   explicit PermutedEntropyGenerator(const std::string& trial_name)
 106       : mapping_(kMaxLowEntropySize) {
 107     // Note: Given a trial name, the computed mapping will be the same.
 108     // As a performance optimization, pre-compute the mapping once per trial
 109     // name and index into it for each entropy value.
 110     const uint32 randomization_seed = HashName(trial_name);
 111     internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
 112                                                    &mapping_);
 113   }
 114
 115   ~PermutedEntropyGenerator() override {}
 116
 117   double GenerateEntropyValue() const override {
 118     const int low_entropy_source =
 119         static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
 120     return mapping_[low_entropy_source] /
 121            static_cast<double>(kMaxLowEntropySize);
 122   }
 123
 124  private:
 125   std::vector<uint16> mapping_;
 126
 127   DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
 128 };
 129
 130 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
 131 // of Fit Test.
 132 void PerformEntropyUniformityTest(
 133     const std::string& trial_name,
 134     const TrialEntropyGenerator& entropy_generator) {
 135   // Number of buckets in the simulated field trials.
 136   const size_t kBucketCount = 20;
 137   // Max number of iterations to perform before giving up and failing.
 138   const size_t kMaxIterationCount = 100000;
 139   // The number of iterations to perform before each time the statistical
 140   // significance of the results is checked.
 141   const size_t kCheckIterationCount = 10000;
 142   // This is the Chi-Square threshold from the Chi-Square statistic table for
 143   // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
 144   // level. See: http://www.medcalc.org/manual/chi-square-table.php
 145   const double kChiSquareThreshold = 43.82;
 146
 147   std::vector<int> distribution(kBucketCount);
 148
 149   for (size_t i = 1; i <= kMaxIterationCount; ++i) {
 150     const double entropy_value = entropy_generator.GenerateEntropyValue();
 151     const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
 152     ASSERT_LT(bucket, kBucketCount);
 153     distribution[bucket] += 1;
 154
 155     // After |kCheckIterationCount| iterations, compute the Chi-Square
 156     // statistic of the distribution. If the resulting statistic is greater
 157     // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
 158     // that the observed samples do not follow a uniform distribution.
 159     //
 160     // However, since 99.9% would still result in a false negative every
 161     // 1000 runs of the test, do not treat it as a failure (else the test
 162     // will be flaky). Instead, perform additional iterations to determine
 163     // if the distribution will converge, up to |kMaxIterationCount|.
 164     if ((i % kCheckIterationCount) == 0) {
 165       const double expected_value_per_bucket =
 166           static_cast<double>(i) / kBucketCount;
 167       const double chi_square =
 168           ComputeChiSquare(distribution, expected_value_per_bucket);
 169       if (chi_square < kChiSquareThreshold)
 170         break;
 171
 172       // If |i == kMaxIterationCount|, the Chi-Square statistic did not
 173       // converge after |kMaxIterationCount|.
 174       EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
 175           trial_name << " with chi_square = " << chi_square <<
 176           " after " << kMaxIterationCount << " iterations.";
 177     }
 178   }
 179 }
 180
 181 }  // namespace
 182
 183 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
 184   // Simply asserts that two trials using one-time randomization
 185   // that have different names, normally generate different results.
 186   //
 187   // Note that depending on the one-time random initialization, they
 188   // _might_ actually give the same result, but we know that given
 189   // the particular client_id we use for unit tests they won't.
 190   base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
 191   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
 192   scoped_refptr<base::FieldTrial> trials[] = {
 193       base::FieldTrialList::FactoryGetFieldTrial(
 194           "one", 100, "default", kNoExpirationYear, 1, 1,
 195           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
 196       base::FieldTrialList::FactoryGetFieldTrial(
 197           "two", 100, "default", kNoExpirationYear, 1, 1,
 198           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
 199   };
 200
 201   for (size_t i = 0; i < arraysize(trials); ++i) {
 202     for (int j = 0; j < 100; ++j)
 203       trials[i]->AppendGroup(std::string(), 1);
 204   }
 205
 206   // The trials are most likely to give different results since they have
 207   // different names.
 208   EXPECT_NE(trials[0]->group(), trials[1]->group());
 209   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
 210 }
 211
 212 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
 213   // Simply asserts that two trials using one-time randomization
 214   // that have different names, normally generate different results.
 215   //
 216   // Note that depending on the one-time random initialization, they
 217   // _might_ actually give the same result, but we know that given
 218   // the particular client_id we use for unit tests they won't.
 219   base::FieldTrialList field_trial_list(
 220       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
 221   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
 222   scoped_refptr<base::FieldTrial> trials[] = {
 223       base::FieldTrialList::FactoryGetFieldTrial(
 224           "one", 100, "default", kNoExpirationYear, 1, 1,
 225           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
 226       base::FieldTrialList::FactoryGetFieldTrial(
 227           "two", 100, "default", kNoExpirationYear, 1, 1,
 228           base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
 229   };
 230
 231   for (size_t i = 0; i < arraysize(trials); ++i) {
 232     for (int j = 0; j < 100; ++j)
 233       trials[i]->AppendGroup(std::string(), 1);
 234   }
 235
 236   // The trials are most likely to give different results since they have
 237   // different names.
 238   EXPECT_NE(trials[0]->group(), trials[1]->group());
 239   EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
 240 }
 241
 242 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
 243   // Ensures that two trials with different names but the same custom seed used
 244   // for one time randomization produce the same group assignments.
 245   base::FieldTrialList field_trial_list(
 246       new PermutedEntropyProvider(1234, kMaxLowEntropySize));
 247   const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
 248   const uint32 kCustomSeed = 9001;
 249   scoped_refptr<base::FieldTrial> trials[] = {
 250       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
 251           "one", 100, "default", kNoExpirationYear, 1, 1,
 252           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
 253       base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
 254           "two", 100, "default", kNoExpirationYear, 1, 1,
 255           base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
 256   };
 257
 258   for (size_t i = 0; i < arraysize(trials); ++i) {
 259     for (int j = 0; j < 100; ++j)
 260       trials[i]->AppendGroup(std::string(), 1);
 261   }
 262
 263   // Normally, these trials should produce different groups, but if the same
 264   // custom seed is used, they should produce the same group assignment.
 265   EXPECT_EQ(trials[0]->group(), trials[1]->group());
 266   EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
 267 }
 268
 269 TEST(EntropyProviderTest, SHA1Entropy) {
 270   const double results[] = { GenerateSHA1Entropy("hi", "1"),
 271                              GenerateSHA1Entropy("there", "1") };
 272
 273   EXPECT_NE(results[0], results[1]);
 274   for (size_t i = 0; i < arraysize(results); ++i) {
 275     EXPECT_LE(0.0, results[i]);
 276     EXPECT_GT(1.0, results[i]);
 277   }
 278
 279   EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
 280             GenerateSHA1Entropy("yo", "1"));
 281   EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
 282             GenerateSHA1Entropy("yo", "else"));
 283 }
 284
 285 TEST(EntropyProviderTest, PermutedEntropy) {
 286   const double results[] = {
 287       GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
 288       GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
 289
 290   EXPECT_NE(results[0], results[1]);
 291   for (size_t i = 0; i < arraysize(results); ++i) {
 292     EXPECT_LE(0.0, results[i]);
 293     EXPECT_GT(1.0, results[i]);
 294   }
 295
 296   EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
 297             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
 298   EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
 299             GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
 300 }
 301
 302 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
 303   // Verifies that PermutedEntropyProvider produces expected results. This
 304   // ensures that the results are the same between platforms and ensures that
 305   // changes to the implementation do not regress this accidentally.
 306
 307   EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
 308                    GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
 309   EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
 310                    GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
 311   EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
 312                    GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
 313 }
 314
 315 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
 316   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
 317     SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
 318     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
 319   }
 320 }
 321
 322 TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
 323   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
 324     PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
 325     PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
 326   }
 327 }
 328
 329 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
 330   // Verifies that SeededRandGenerator has a uniform distribution.
 331   //
 332   // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
 333
 334   const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
 335   const uint32 kExpectedAverage = kTopOfRange / 2ULL;
 336   const uint32 kAllowedVariance = kExpectedAverage / 50ULL;  // +/- 2%
 337   const int kMinAttempts = 1000;
 338   const int kMaxAttempts = 1000000;
 339
 340   for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
 341     const uint32 seed = HashName(kTestTrialNames[i]);
 342     internal::SeededRandGenerator rand_generator(seed);
 343
 344     double cumulative_average = 0.0;
 345     int count = 0;
 346     while (count < kMaxAttempts) {
 347       uint32 value = rand_generator(kTopOfRange);
 348       cumulative_average = (count * cumulative_average + value) / (count + 1);
 349
 350       // Don't quit too quickly for things to start converging, or we may have
 351       // a false positive.
 352       if (count > kMinAttempts &&
 353           kExpectedAverage - kAllowedVariance < cumulative_average &&
 354           cumulative_average < kExpectedAverage + kAllowedVariance) {
 355         break;
 356       }
 357
 358       ++count;
 359     }
 360
 361     ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
 362         kExpectedAverage << ", average ended at " << cumulative_average <<
 363         ", for trial " << kTestTrialNames[i];
 364   }
 365 }
 366
 367 }  // namespace metrics