1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/variations/entropy_provider.h"
11 #include "base/basictypes.h"
12 #include "base/guid.h"
13 #include "base/memory/scoped_ptr.h"
14 #include "base/rand_util.h"
15 #include "base/strings/string_number_conversions.h"
16 #include "components/variations/metrics_util.h"
17 #include "testing/gtest/include/gtest/gtest.h"
23 // Size of the low entropy source to use for the permuted entropy provider
25 const size_t kMaxLowEntropySize
= 8000;
27 // Field trial names used in unit tests.
28 const char* const kTestTrialNames
[] = { "TestTrial", "AnotherTestTrial",
31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
32 // distribution, where each entry has expected value |expected_value|.
34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
35 // value and E is the expected value.
36 double ComputeChiSquare(const std::vector
<int>& values
,
37 double expected_value
) {
39 for (size_t i
= 0; i
< values
.size(); ++i
) {
40 const double delta
= values
[i
] - expected_value
;
41 sum
+= (delta
* delta
) / expected_value
;
46 // Computes SHA1-based entropy for the given |trial_name| based on
48 double GenerateSHA1Entropy(const std::string
& entropy_source
,
49 const std::string
& trial_name
) {
50 SHA1EntropyProvider
sha1_provider(entropy_source
);
51 return sha1_provider
.GetEntropyForTrial(trial_name
, 0);
54 // Generates permutation-based entropy for the given |trial_name| based on
55 // |entropy_source| which must be in the range [0, entropy_max).
56 double GeneratePermutedEntropy(uint16 entropy_source
,
58 const std::string
& trial_name
) {
59 PermutedEntropyProvider
permuted_provider(entropy_source
, entropy_max
);
60 return permuted_provider
.GetEntropyForTrial(trial_name
, 0);
63 // Helper interface for testing used to generate entropy values for a given
64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
65 // value constant and generates entropy for different trial names, instances
66 // of TrialEntropyGenerator keep the trial name constant and generate low/high
67 // entropy source values internally to produce each output entropy value.
68 class TrialEntropyGenerator
{
70 virtual ~TrialEntropyGenerator() {}
71 virtual double GenerateEntropyValue() const = 0;
74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
76 // entropy corresponding to a low entropy source).
77 class SHA1EntropyGenerator
: public TrialEntropyGenerator
{
79 explicit SHA1EntropyGenerator(const std::string
& trial_name
)
80 : trial_name_(trial_name
) {
83 virtual ~SHA1EntropyGenerator() {
86 virtual double GenerateEntropyValue() const OVERRIDE
{
87 // Use a random GUID + 13 additional bits of entropy to match how the
88 // SHA1EntropyProvider is used in metrics_service.cc.
89 const int low_entropy_source
=
90 static_cast<uint16
>(base::RandInt(0, kMaxLowEntropySize
- 1));
91 const std::string high_entropy_source
=
92 base::GenerateGUID() + base::IntToString(low_entropy_source
);
93 return GenerateSHA1Entropy(high_entropy_source
, trial_name_
);
97 std::string trial_name_
;
99 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator
);
102 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
103 // using 13-bit low entropy source values.
104 class PermutedEntropyGenerator
: public TrialEntropyGenerator
{
106 explicit PermutedEntropyGenerator(const std::string
& trial_name
)
107 : mapping_(kMaxLowEntropySize
) {
108 // Note: Given a trial name, the computed mapping will be the same.
109 // As a performance optimization, pre-compute the mapping once per trial
110 // name and index into it for each entropy value.
111 const uint32 randomization_seed
= HashName(trial_name
);
112 internal::PermuteMappingUsingRandomizationSeed(randomization_seed
,
116 virtual ~PermutedEntropyGenerator() {
119 virtual double GenerateEntropyValue() const OVERRIDE
{
120 const int low_entropy_source
=
121 static_cast<uint16
>(base::RandInt(0, kMaxLowEntropySize
- 1));
122 return mapping_
[low_entropy_source
] /
123 static_cast<double>(kMaxLowEntropySize
);
127 std::vector
<uint16
> mapping_
;
129 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator
);
132 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
134 void PerformEntropyUniformityTest(
135 const std::string
& trial_name
,
136 const TrialEntropyGenerator
& entropy_generator
) {
137 // Number of buckets in the simulated field trials.
138 const size_t kBucketCount
= 20;
139 // Max number of iterations to perform before giving up and failing.
140 const size_t kMaxIterationCount
= 100000;
141 // The number of iterations to perform before each time the statistical
142 // significance of the results is checked.
143 const size_t kCheckIterationCount
= 10000;
144 // This is the Chi-Square threshold from the Chi-Square statistic table for
145 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
146 // level. See: http://www.medcalc.org/manual/chi-square-table.php
147 const double kChiSquareThreshold
= 43.82;
149 std::vector
<int> distribution(kBucketCount
);
151 for (size_t i
= 1; i
<= kMaxIterationCount
; ++i
) {
152 const double entropy_value
= entropy_generator
.GenerateEntropyValue();
153 const size_t bucket
= static_cast<size_t>(kBucketCount
* entropy_value
);
154 ASSERT_LT(bucket
, kBucketCount
);
155 distribution
[bucket
] += 1;
157 // After |kCheckIterationCount| iterations, compute the Chi-Square
158 // statistic of the distribution. If the resulting statistic is greater
159 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
160 // that the observed samples do not follow a uniform distribution.
162 // However, since 99.9% would still result in a false negative every
163 // 1000 runs of the test, do not treat it as a failure (else the test
164 // will be flaky). Instead, perform additional iterations to determine
165 // if the distribution will converge, up to |kMaxIterationCount|.
166 if ((i
% kCheckIterationCount
) == 0) {
167 const double expected_value_per_bucket
=
168 static_cast<double>(i
) / kBucketCount
;
169 const double chi_square
=
170 ComputeChiSquare(distribution
, expected_value_per_bucket
);
171 if (chi_square
< kChiSquareThreshold
)
174 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
175 // converge after |kMaxIterationCount|.
176 EXPECT_NE(i
, kMaxIterationCount
) << "Failed for trial " <<
177 trial_name
<< " with chi_square = " << chi_square
<<
178 " after " << kMaxIterationCount
<< " iterations.";
185 TEST(EntropyProviderTest
, UseOneTimeRandomizationSHA1
) {
186 // Simply asserts that two trials using one-time randomization
187 // that have different names, normally generate different results.
189 // Note that depending on the one-time random initialization, they
190 // _might_ actually give the same result, but we know that given
191 // the particular client_id we use for unit tests they won't.
192 base::FieldTrialList
field_trial_list(new SHA1EntropyProvider("client_id"));
193 const int kNoExpirationYear
= base::FieldTrialList::kNoExpirationYear
;
194 scoped_refptr
<base::FieldTrial
> trials
[] = {
195 base::FieldTrialList::FactoryGetFieldTrial(
196 "one", 100, "default", kNoExpirationYear
, 1, 1,
197 base::FieldTrial::ONE_TIME_RANDOMIZED
, NULL
),
198 base::FieldTrialList::FactoryGetFieldTrial(
199 "two", 100, "default", kNoExpirationYear
, 1, 1,
200 base::FieldTrial::ONE_TIME_RANDOMIZED
, NULL
),
203 for (size_t i
= 0; i
< arraysize(trials
); ++i
) {
204 for (int j
= 0; j
< 100; ++j
)
205 trials
[i
]->AppendGroup(std::string(), 1);
208 // The trials are most likely to give different results since they have
210 EXPECT_NE(trials
[0]->group(), trials
[1]->group());
211 EXPECT_NE(trials
[0]->group_name(), trials
[1]->group_name());
214 TEST(EntropyProviderTest
, UseOneTimeRandomizationPermuted
) {
215 // Simply asserts that two trials using one-time randomization
216 // that have different names, normally generate different results.
218 // Note that depending on the one-time random initialization, they
219 // _might_ actually give the same result, but we know that given
220 // the particular client_id we use for unit tests they won't.
221 base::FieldTrialList
field_trial_list(
222 new PermutedEntropyProvider(1234, kMaxLowEntropySize
));
223 const int kNoExpirationYear
= base::FieldTrialList::kNoExpirationYear
;
224 scoped_refptr
<base::FieldTrial
> trials
[] = {
225 base::FieldTrialList::FactoryGetFieldTrial(
226 "one", 100, "default", kNoExpirationYear
, 1, 1,
227 base::FieldTrial::ONE_TIME_RANDOMIZED
, NULL
),
228 base::FieldTrialList::FactoryGetFieldTrial(
229 "two", 100, "default", kNoExpirationYear
, 1, 1,
230 base::FieldTrial::ONE_TIME_RANDOMIZED
, NULL
),
233 for (size_t i
= 0; i
< arraysize(trials
); ++i
) {
234 for (int j
= 0; j
< 100; ++j
)
235 trials
[i
]->AppendGroup(std::string(), 1);
238 // The trials are most likely to give different results since they have
240 EXPECT_NE(trials
[0]->group(), trials
[1]->group());
241 EXPECT_NE(trials
[0]->group_name(), trials
[1]->group_name());
244 TEST(EntropyProviderTest
, UseOneTimeRandomizationWithCustomSeedPermuted
) {
245 // Ensures that two trials with different names but the same custom seed used
246 // for one time randomization produce the same group assignments.
247 base::FieldTrialList
field_trial_list(
248 new PermutedEntropyProvider(1234, kMaxLowEntropySize
));
249 const int kNoExpirationYear
= base::FieldTrialList::kNoExpirationYear
;
250 const uint32 kCustomSeed
= 9001;
251 scoped_refptr
<base::FieldTrial
> trials
[] = {
252 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
253 "one", 100, "default", kNoExpirationYear
, 1, 1,
254 base::FieldTrial::ONE_TIME_RANDOMIZED
, kCustomSeed
, NULL
),
255 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
256 "two", 100, "default", kNoExpirationYear
, 1, 1,
257 base::FieldTrial::ONE_TIME_RANDOMIZED
, kCustomSeed
, NULL
),
260 for (size_t i
= 0; i
< arraysize(trials
); ++i
) {
261 for (int j
= 0; j
< 100; ++j
)
262 trials
[i
]->AppendGroup(std::string(), 1);
265 // Normally, these trials should produce different groups, but if the same
266 // custom seed is used, they should produce the same group assignment.
267 EXPECT_EQ(trials
[0]->group(), trials
[1]->group());
268 EXPECT_EQ(trials
[0]->group_name(), trials
[1]->group_name());
271 TEST(EntropyProviderTest
, SHA1Entropy
) {
272 const double results
[] = { GenerateSHA1Entropy("hi", "1"),
273 GenerateSHA1Entropy("there", "1") };
275 EXPECT_NE(results
[0], results
[1]);
276 for (size_t i
= 0; i
< arraysize(results
); ++i
) {
277 EXPECT_LE(0.0, results
[i
]);
278 EXPECT_GT(1.0, results
[i
]);
281 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
282 GenerateSHA1Entropy("yo", "1"));
283 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
284 GenerateSHA1Entropy("yo", "else"));
287 TEST(EntropyProviderTest
, PermutedEntropy
) {
288 const double results
[] = {
289 GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "1"),
290 GeneratePermutedEntropy(4321, kMaxLowEntropySize
, "1") };
292 EXPECT_NE(results
[0], results
[1]);
293 for (size_t i
= 0; i
< arraysize(results
); ++i
) {
294 EXPECT_LE(0.0, results
[i
]);
295 EXPECT_GT(1.0, results
[i
]);
298 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "1"),
299 GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "1"));
300 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "something"),
301 GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "else"));
304 TEST(EntropyProviderTest
, PermutedEntropyProviderResults
) {
305 // Verifies that PermutedEntropyProvider produces expected results. This
306 // ensures that the results are the same between platforms and ensures that
307 // changes to the implementation do not regress this accidentally.
309 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize
),
310 GeneratePermutedEntropy(1234, kMaxLowEntropySize
, "XYZ"));
311 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize
),
312 GeneratePermutedEntropy(1, kMaxLowEntropySize
, "Test"));
313 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize
),
314 GeneratePermutedEntropy(5000, kMaxLowEntropySize
, "Foo"));
317 TEST(EntropyProviderTest
, SHA1EntropyIsUniform
) {
318 for (size_t i
= 0; i
< arraysize(kTestTrialNames
); ++i
) {
319 SHA1EntropyGenerator
entropy_generator(kTestTrialNames
[i
]);
320 PerformEntropyUniformityTest(kTestTrialNames
[i
], entropy_generator
);
324 TEST(EntropyProviderTest
, PermutedEntropyIsUniform
) {
325 for (size_t i
= 0; i
< arraysize(kTestTrialNames
); ++i
) {
326 PermutedEntropyGenerator
entropy_generator(kTestTrialNames
[i
]);
327 PerformEntropyUniformityTest(kTestTrialNames
[i
], entropy_generator
);
331 TEST(EntropyProviderTest
, SeededRandGeneratorIsUniform
) {
332 // Verifies that SeededRandGenerator has a uniform distribution.
334 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
336 const uint32 kTopOfRange
= (std::numeric_limits
<uint32
>::max() / 4ULL) * 3ULL;
337 const uint32 kExpectedAverage
= kTopOfRange
/ 2ULL;
338 const uint32 kAllowedVariance
= kExpectedAverage
/ 50ULL; // +/- 2%
339 const int kMinAttempts
= 1000;
340 const int kMaxAttempts
= 1000000;
342 for (size_t i
= 0; i
< arraysize(kTestTrialNames
); ++i
) {
343 const uint32 seed
= HashName(kTestTrialNames
[i
]);
344 internal::SeededRandGenerator
rand_generator(seed
);
346 double cumulative_average
= 0.0;
348 while (count
< kMaxAttempts
) {
349 uint32 value
= rand_generator(kTopOfRange
);
350 cumulative_average
= (count
* cumulative_average
+ value
) / (count
+ 1);
352 // Don't quit too quickly for things to start converging, or we may have
354 if (count
> kMinAttempts
&&
355 kExpectedAverage
- kAllowedVariance
< cumulative_average
&&
356 cumulative_average
< kExpectedAverage
+ kAllowedVariance
) {
363 ASSERT_LT(count
, kMaxAttempts
) << "Expected average was " <<
364 kExpectedAverage
<< ", average ended at " << cumulative_average
<<
365 ", for trial " << kTestTrialNames
[i
];
369 } // namespace metrics