Roll src/third_party/WebKit 5dc6c7a:f688398 (svn 201993:201994)
[chromium-blink-merge.git] / components / variations / entropy_provider_unittest.cc
blob652ab44e64b8a8f2d17bc4ced71fba748e64752f
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/variations/entropy_provider.h"
7 #include <cmath>
8 #include <limits>
9 #include <numeric>
11 #include "base/basictypes.h"
12 #include "base/guid.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/rand_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "components/variations/metrics_util.h"
17 #include "testing/gtest/include/gtest/gtest.h"
19 namespace metrics {
21 namespace {
23 // Size of the low entropy source to use for the permuted entropy provider
24 // in tests.
25 const size_t kMaxLowEntropySize = 8000;
27 // Field trial names used in unit tests.
28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
29 "NewTabButton" };
31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
32 // distribution, where each entry has expected value |expected_value|.
34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
35 // value and E is the expected value.
36 double ComputeChiSquare(const std::vector<int>& values,
37 double expected_value) {
38 double sum = 0;
39 for (size_t i = 0; i < values.size(); ++i) {
40 const double delta = values[i] - expected_value;
41 sum += (delta * delta) / expected_value;
43 return sum;
46 // Computes SHA1-based entropy for the given |trial_name| based on
47 // |entropy_source|
48 double GenerateSHA1Entropy(const std::string& entropy_source,
49 const std::string& trial_name) {
50 SHA1EntropyProvider sha1_provider(entropy_source);
51 return sha1_provider.GetEntropyForTrial(trial_name, 0);
54 // Generates permutation-based entropy for the given |trial_name| based on
55 // |entropy_source| which must be in the range [0, entropy_max).
56 double GeneratePermutedEntropy(uint16 entropy_source,
57 size_t entropy_max,
58 const std::string& trial_name) {
59 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
60 return permuted_provider.GetEntropyForTrial(trial_name, 0);
63 // Helper interface for testing used to generate entropy values for a given
64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
65 // value constant and generates entropy for different trial names, instances
66 // of TrialEntropyGenerator keep the trial name constant and generate low/high
67 // entropy source values internally to produce each output entropy value.
68 class TrialEntropyGenerator {
69 public:
70 virtual ~TrialEntropyGenerator() {}
71 virtual double GenerateEntropyValue() const = 0;
74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
76 // entropy corresponding to a low entropy source).
77 class SHA1EntropyGenerator : public TrialEntropyGenerator {
78 public:
79 explicit SHA1EntropyGenerator(const std::string& trial_name)
80 : trial_name_(trial_name) {
83 ~SHA1EntropyGenerator() override {}
85 double GenerateEntropyValue() const override {
86 // Use a random GUID + 13 additional bits of entropy to match how the
87 // SHA1EntropyProvider is used in metrics_service.cc.
88 const int low_entropy_source =
89 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
90 const std::string high_entropy_source =
91 base::GenerateGUID() + base::IntToString(low_entropy_source);
92 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
95 private:
96 std::string trial_name_;
98 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
101 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
102 // using 13-bit low entropy source values.
103 class PermutedEntropyGenerator : public TrialEntropyGenerator {
104 public:
105 explicit PermutedEntropyGenerator(const std::string& trial_name)
106 : mapping_(kMaxLowEntropySize) {
107 // Note: Given a trial name, the computed mapping will be the same.
108 // As a performance optimization, pre-compute the mapping once per trial
109 // name and index into it for each entropy value.
110 const uint32 randomization_seed = HashName(trial_name);
111 internal::PermuteMappingUsingRandomizationSeed(randomization_seed,
112 &mapping_);
115 ~PermutedEntropyGenerator() override {}
117 double GenerateEntropyValue() const override {
118 const int low_entropy_source =
119 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
120 return mapping_[low_entropy_source] /
121 static_cast<double>(kMaxLowEntropySize);
124 private:
125 std::vector<uint16> mapping_;
127 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
130 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
131 // of Fit Test.
132 void PerformEntropyUniformityTest(
133 const std::string& trial_name,
134 const TrialEntropyGenerator& entropy_generator) {
135 // Number of buckets in the simulated field trials.
136 const size_t kBucketCount = 20;
137 // Max number of iterations to perform before giving up and failing.
138 const size_t kMaxIterationCount = 100000;
139 // The number of iterations to perform before each time the statistical
140 // significance of the results is checked.
141 const size_t kCheckIterationCount = 10000;
142 // This is the Chi-Square threshold from the Chi-Square statistic table for
143 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
144 // level. See: http://www.medcalc.org/manual/chi-square-table.php
145 const double kChiSquareThreshold = 43.82;
147 std::vector<int> distribution(kBucketCount);
149 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
150 const double entropy_value = entropy_generator.GenerateEntropyValue();
151 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
152 ASSERT_LT(bucket, kBucketCount);
153 distribution[bucket] += 1;
155 // After |kCheckIterationCount| iterations, compute the Chi-Square
156 // statistic of the distribution. If the resulting statistic is greater
157 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
158 // that the observed samples do not follow a uniform distribution.
160 // However, since 99.9% would still result in a false negative every
161 // 1000 runs of the test, do not treat it as a failure (else the test
162 // will be flaky). Instead, perform additional iterations to determine
163 // if the distribution will converge, up to |kMaxIterationCount|.
164 if ((i % kCheckIterationCount) == 0) {
165 const double expected_value_per_bucket =
166 static_cast<double>(i) / kBucketCount;
167 const double chi_square =
168 ComputeChiSquare(distribution, expected_value_per_bucket);
169 if (chi_square < kChiSquareThreshold)
170 break;
172 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
173 // converge after |kMaxIterationCount|.
174 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
175 trial_name << " with chi_square = " << chi_square <<
176 " after " << kMaxIterationCount << " iterations.";
181 } // namespace
183 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
184 // Simply asserts that two trials using one-time randomization
185 // that have different names, normally generate different results.
187 // Note that depending on the one-time random initialization, they
188 // _might_ actually give the same result, but we know that given
189 // the particular client_id we use for unit tests they won't.
190 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
191 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
192 scoped_refptr<base::FieldTrial> trials[] = {
193 base::FieldTrialList::FactoryGetFieldTrial(
194 "one", 100, "default", kNoExpirationYear, 1, 1,
195 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
196 base::FieldTrialList::FactoryGetFieldTrial(
197 "two", 100, "default", kNoExpirationYear, 1, 1,
198 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
201 for (size_t i = 0; i < arraysize(trials); ++i) {
202 for (int j = 0; j < 100; ++j)
203 trials[i]->AppendGroup(std::string(), 1);
206 // The trials are most likely to give different results since they have
207 // different names.
208 EXPECT_NE(trials[0]->group(), trials[1]->group());
209 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
212 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
213 // Simply asserts that two trials using one-time randomization
214 // that have different names, normally generate different results.
216 // Note that depending on the one-time random initialization, they
217 // _might_ actually give the same result, but we know that given
218 // the particular client_id we use for unit tests they won't.
219 base::FieldTrialList field_trial_list(
220 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
221 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
222 scoped_refptr<base::FieldTrial> trials[] = {
223 base::FieldTrialList::FactoryGetFieldTrial(
224 "one", 100, "default", kNoExpirationYear, 1, 1,
225 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
226 base::FieldTrialList::FactoryGetFieldTrial(
227 "two", 100, "default", kNoExpirationYear, 1, 1,
228 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL),
231 for (size_t i = 0; i < arraysize(trials); ++i) {
232 for (int j = 0; j < 100; ++j)
233 trials[i]->AppendGroup(std::string(), 1);
236 // The trials are most likely to give different results since they have
237 // different names.
238 EXPECT_NE(trials[0]->group(), trials[1]->group());
239 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
242 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
243 // Ensures that two trials with different names but the same custom seed used
244 // for one time randomization produce the same group assignments.
245 base::FieldTrialList field_trial_list(
246 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
247 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
248 const uint32 kCustomSeed = 9001;
249 scoped_refptr<base::FieldTrial> trials[] = {
250 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
251 "one", 100, "default", kNoExpirationYear, 1, 1,
252 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
253 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
254 "two", 100, "default", kNoExpirationYear, 1, 1,
255 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL),
258 for (size_t i = 0; i < arraysize(trials); ++i) {
259 for (int j = 0; j < 100; ++j)
260 trials[i]->AppendGroup(std::string(), 1);
263 // Normally, these trials should produce different groups, but if the same
264 // custom seed is used, they should produce the same group assignment.
265 EXPECT_EQ(trials[0]->group(), trials[1]->group());
266 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
269 TEST(EntropyProviderTest, SHA1Entropy) {
270 const double results[] = { GenerateSHA1Entropy("hi", "1"),
271 GenerateSHA1Entropy("there", "1") };
273 EXPECT_NE(results[0], results[1]);
274 for (size_t i = 0; i < arraysize(results); ++i) {
275 EXPECT_LE(0.0, results[i]);
276 EXPECT_GT(1.0, results[i]);
279 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
280 GenerateSHA1Entropy("yo", "1"));
281 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
282 GenerateSHA1Entropy("yo", "else"));
285 TEST(EntropyProviderTest, PermutedEntropy) {
286 const double results[] = {
287 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
288 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
290 EXPECT_NE(results[0], results[1]);
291 for (size_t i = 0; i < arraysize(results); ++i) {
292 EXPECT_LE(0.0, results[i]);
293 EXPECT_GT(1.0, results[i]);
296 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
297 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
298 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
299 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
302 TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
303 // Verifies that PermutedEntropyProvider produces expected results. This
304 // ensures that the results are the same between platforms and ensures that
305 // changes to the implementation do not regress this accidentally.
307 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
308 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
309 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
310 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
311 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
312 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
315 TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
316 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
317 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
318 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
322 TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
323 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
324 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
325 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
329 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) {
330 // Verifies that SeededRandGenerator has a uniform distribution.
332 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
334 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
335 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
336 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
337 const int kMinAttempts = 1000;
338 const int kMaxAttempts = 1000000;
340 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
341 const uint32 seed = HashName(kTestTrialNames[i]);
342 internal::SeededRandGenerator rand_generator(seed);
344 double cumulative_average = 0.0;
345 int count = 0;
346 while (count < kMaxAttempts) {
347 uint32 value = rand_generator(kTopOfRange);
348 cumulative_average = (count * cumulative_average + value) / (count + 1);
350 // Don't quit too quickly for things to start converging, or we may have
351 // a false positive.
352 if (count > kMinAttempts &&
353 kExpectedAverage - kAllowedVariance < cumulative_average &&
354 cumulative_average < kExpectedAverage + kAllowedVariance) {
355 break;
358 ++count;
361 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
362 kExpectedAverage << ", average ended at " << cumulative_average <<
363 ", for trial " << kTestTrialNames[i];
367 } // namespace metrics