Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / base / i18n / streaming_utf8_validator_perftest.cc
blobac2eb0820bac01a3a172fb89236d92a9763562e9
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // All data that is passed through a WebSocket with type "Text" needs to be
6 // validated as UTF8. Since this is done on the IO thread, it needs to be
7 // reasonably fast.
9 // We are only interested in the performance on valid UTF8. Invalid UTF8 will
10 // result in a connection failure, so is unlikely to become a source of
11 // performance issues.
13 #include "base/i18n/streaming_utf8_validator.h"
15 #include <string>
17 #include "base/basictypes.h"
18 #include "base/bind.h"
19 #include "base/callback.h"
20 #include "base/strings/string_util.h"
21 #include "base/strings/stringprintf.h"
22 #include "base/test/perf_time_logger.h"
23 #include "testing/gtest/include/gtest/gtest.h"
25 namespace base {
26 namespace {
28 // We want to test ranges of valid UTF-8 sequences. These ranges are inclusive.
29 // They are intended to be large enough that the validator needs to do
30 // meaningful work while being in some sense "realistic" (eg. control characters
31 // are not included).
32 const char kOneByteSeqRangeStart[] = " "; // U+0020
33 const char kOneByteSeqRangeEnd[] = "~"; // U+007E
35 const char kTwoByteSeqRangeStart[] = "\xc2\xa0"; // U+00A0 non-breaking space
36 const char kTwoByteSeqRangeEnd[] = "\xc9\x8f"; // U+024F small y with stroke
38 const char kThreeByteSeqRangeStart[] = "\xe3\x81\x82"; // U+3042 Hiragana "a"
39 const char kThreeByteSeqRangeEnd[] = "\xe9\xbf\x83"; // U+9FC3 "to blink"
41 const char kFourByteSeqRangeStart[] = "\xf0\xa0\x80\x8b"; // U+2000B
42 const char kFourByteSeqRangeEnd[] = "\xf0\xaa\x9a\xb2"; // U+2A6B2
44 // The different lengths of strings to test.
45 const size_t kTestLengths[] = {1, 32, 256, 32768, 1 << 20};
47 // Simplest possible byte-at-a-time validator, to provide a baseline
48 // for comparison. This is only tried on 1-byte UTF-8 sequences, as
49 // the results will not be meaningful with sequences containing
50 // top-bit-set bytes.
51 bool IsString7Bit(const std::string& s) {
52 for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
53 if (*it & 0x80)
54 return false;
56 return true;
59 // Assumes that |previous| is a valid UTF-8 sequence, and attempts to return
60 // the next one. Is just barely smart enough to iterate through the ranges
61 // defined about.
62 std::string NextUtf8Sequence(const std::string& previous) {
63 DCHECK(StreamingUtf8Validator::Validate(previous));
64 std::string next = previous;
65 for (int i = static_cast<int>(previous.length() - 1); i >= 0; --i) {
66 // All bytes in a UTF-8 sequence except the first one are
67 // constrained to the range 0x80 to 0xbf, inclusive. When we
68 // increment past 0xbf, we carry into the previous byte.
69 if (i > 0 && next[i] == '\xbf') {
70 next[i] = '\x80';
71 continue; // carry
73 ++next[i];
74 break; // no carry
76 DCHECK(StreamingUtf8Validator::Validate(next))
77 << "Result \"" << next << "\" failed validation";
78 return next;
81 typedef bool (*TestTargetType)(const std::string&);
83 // Run fuction |target| over |test_string| |times| times, and report the results
84 // using |description|.
85 bool RunTest(const std::string& description,
86 TestTargetType target,
87 const std::string& test_string,
88 int times) {
89 base::PerfTimeLogger timer(description.c_str());
90 bool result = true;
91 for (int i = 0; i < times; ++i) {
92 result = target(test_string) && result;
94 timer.Done();
95 return result;
98 // Construct a string by repeating |input| enough times to equal or exceed
99 // |length|.
100 std::string ConstructRepeatedTestString(const std::string& input,
101 size_t length) {
102 std::string output = input;
103 while (output.length() * 2 < length) {
104 output += output;
106 if (output.length() < length) {
107 output += ConstructRepeatedTestString(input, length - output.length());
109 return output;
112 // Construct a string by expanding the range of UTF-8 sequences
113 // between |input_start| and |input_end|, inclusive, and then
114 // repeating the resulting string until it equals or exceeds |length|
115 // bytes. |input_start| and |input_end| must be valid UTF-8
116 // sequences.
117 std::string ConstructRangedTestString(const std::string& input_start,
118 const std::string& input_end,
119 size_t length) {
120 std::string output = input_start;
121 std::string input = input_start;
122 while (output.length() < length && input != input_end) {
123 input = NextUtf8Sequence(input);
124 output += input;
126 if (output.length() < length) {
127 output = ConstructRepeatedTestString(output, length);
129 return output;
132 struct TestFunctionDescription {
133 TestTargetType function;
134 const char* function_name;
137 // IsString7Bit is intentionally placed last so it can be excluded easily.
138 const TestFunctionDescription kTestFunctions[] = {
139 {&StreamingUtf8Validator::Validate, "StreamingUtf8Validator"},
140 {&IsStringUTF8, "IsStringUTF8"}, {&IsString7Bit, "IsString7Bit"}};
142 // Construct a test string from |construct_test_string| for each of the lengths
143 // in |kTestLengths| in turn. For each string, run each test in |test_functions|
144 // for a number of iterations such that the total number of bytes validated
145 // is around 16MB.
146 void RunSomeTests(
147 const char format[],
148 base::Callback<std::string(size_t length)> construct_test_string,
149 const TestFunctionDescription* test_functions,
150 size_t test_count) {
151 for (size_t i = 0; i < arraysize(kTestLengths); ++i) {
152 const size_t length = kTestLengths[i];
153 const std::string test_string = construct_test_string.Run(length);
154 const int real_length = static_cast<int>(test_string.length());
155 const int times = (1 << 24) / real_length;
156 for (size_t test_index = 0; test_index < test_count; ++test_index) {
157 EXPECT_TRUE(RunTest(StringPrintf(format,
158 test_functions[test_index].function_name,
159 real_length,
160 times),
161 test_functions[test_index].function,
162 test_string,
163 times));
168 TEST(StreamingUtf8ValidatorPerfTest, OneByteRepeated) {
169 RunSomeTests("%s: bytes=1 repeated length=%d repeat=%d",
170 base::Bind(ConstructRepeatedTestString, kOneByteSeqRangeStart),
171 kTestFunctions,
175 TEST(StreamingUtf8ValidatorPerfTest, OneByteRange) {
176 RunSomeTests("%s: bytes=1 ranged length=%d repeat=%d",
177 base::Bind(ConstructRangedTestString,
178 kOneByteSeqRangeStart,
179 kOneByteSeqRangeEnd),
180 kTestFunctions,
184 TEST(StreamingUtf8ValidatorPerfTest, TwoByteRepeated) {
185 RunSomeTests("%s: bytes=2 repeated length=%d repeat=%d",
186 base::Bind(ConstructRepeatedTestString, kTwoByteSeqRangeStart),
187 kTestFunctions,
191 TEST(StreamingUtf8ValidatorPerfTest, TwoByteRange) {
192 RunSomeTests("%s: bytes=2 ranged length=%d repeat=%d",
193 base::Bind(ConstructRangedTestString,
194 kTwoByteSeqRangeStart,
195 kTwoByteSeqRangeEnd),
196 kTestFunctions,
200 TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRepeated) {
201 RunSomeTests(
202 "%s: bytes=3 repeated length=%d repeat=%d",
203 base::Bind(ConstructRepeatedTestString, kThreeByteSeqRangeStart),
204 kTestFunctions,
208 TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRange) {
209 RunSomeTests("%s: bytes=3 ranged length=%d repeat=%d",
210 base::Bind(ConstructRangedTestString,
211 kThreeByteSeqRangeStart,
212 kThreeByteSeqRangeEnd),
213 kTestFunctions,
217 TEST(StreamingUtf8ValidatorPerfTest, FourByteRepeated) {
218 RunSomeTests("%s: bytes=4 repeated length=%d repeat=%d",
219 base::Bind(ConstructRepeatedTestString, kFourByteSeqRangeStart),
220 kTestFunctions,
224 TEST(StreamingUtf8ValidatorPerfTest, FourByteRange) {
225 RunSomeTests("%s: bytes=4 ranged length=%d repeat=%d",
226 base::Bind(ConstructRangedTestString,
227 kFourByteSeqRangeStart,
228 kFourByteSeqRangeEnd),
229 kTestFunctions,
233 } // namespace
234 } // namespace base