Move AsyncPolicyProvider, etc. to components/policy/.
[chromium-blink-merge.git] / net / base / escape_unittest.cc
blobe7e435c08ef0c0877238a248ac085eaf77d38c3a
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
6 #include <string>
8 #include "net/base/escape.h"
10 #include "base/basictypes.h"
11 #include "base/i18n/icu_string_conversions.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h"
17 namespace net {
18 namespace {
20 const size_t kNpos = base::string16::npos;
22 struct EscapeCase {
23 const char* input;
24 const char* output;
27 struct UnescapeURLCase {
28 const wchar_t* input;
29 UnescapeRule::Type rules;
30 const wchar_t* output;
33 struct UnescapeURLCaseASCII {
34 const char* input;
35 UnescapeRule::Type rules;
36 const char* output;
39 struct UnescapeAndDecodeCase {
40 const char* input;
42 // The expected output when run through UnescapeURL.
43 const char* url_unescaped;
45 // The expected output when run through UnescapeQuery.
46 const char* query_unescaped;
48 // The expected output when run through UnescapeAndDecodeURLComponent.
49 const wchar_t* decoded;
52 struct AdjustOffsetCase {
53 const char* input;
54 size_t input_offset;
55 size_t output_offset;
58 struct EscapeForHTMLCase {
59 const char* input;
60 const char* expected_output;
63 TEST(EscapeTest, EscapeTextForFormSubmission) {
64 const EscapeCase escape_cases[] = {
65 {"foo", "foo"},
66 {"foo bar", "foo+bar"},
67 {"foo++", "foo%2B%2B"}
69 for (size_t i = 0; i < arraysize(escape_cases); ++i) {
70 EscapeCase value = escape_cases[i];
71 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true));
74 const EscapeCase escape_cases_no_plus[] = {
75 {"foo", "foo"},
76 {"foo bar", "foo%20bar"},
77 {"foo++", "foo%2B%2B"}
79 for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
80 EscapeCase value = escape_cases_no_plus[i];
81 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false));
84 // Test all the values in we're supposed to be escaping.
85 const std::string no_escape(
86 "abcdefghijklmnopqrstuvwxyz"
87 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
88 "0123456789"
89 "!'()*-._~");
90 for (int i = 0; i < 256; ++i) {
91 std::string in;
92 in.push_back(i);
93 std::string out = EscapeQueryParamValue(in, true);
94 if (0 == i) {
95 EXPECT_EQ(out, std::string("%00"));
96 } else if (32 == i) {
97 // Spaces are plus escaped like web forms.
98 EXPECT_EQ(out, std::string("+"));
99 } else if (no_escape.find(in) == std::string::npos) {
100 // Check %hex escaping
101 std::string expected = base::StringPrintf("%%%02X", i);
102 EXPECT_EQ(expected, out);
103 } else {
104 // No change for things in the no_escape list.
105 EXPECT_EQ(out, in);
110 TEST(EscapeTest, EscapePath) {
111 ASSERT_EQ(
112 // Most of the character space we care about, un-escaped
113 EscapePath(
114 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
115 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
116 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
117 "{|}~\x7f\x80\xff"),
118 // Escaped
119 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
120 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
121 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
122 "%7B%7C%7D~%7F%80%FF");
125 TEST(EscapeTest, EscapeUrlEncodedData) {
126 ASSERT_EQ(
127 // Most of the character space we care about, un-escaped
128 EscapeUrlEncodedData(
129 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
130 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
131 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
132 "{|}~\x7f\x80\xff", true),
133 // Escaped
134 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
135 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
136 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
137 "%7B%7C%7D~%7F%80%FF");
140 TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
141 ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
142 ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
145 TEST(EscapeTest, UnescapeURLComponentASCII) {
146 const UnescapeURLCaseASCII unescape_cases[] = {
147 {"", UnescapeRule::NORMAL, ""},
148 {"%2", UnescapeRule::NORMAL, "%2"},
149 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
150 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
151 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
152 {"Some%20random text %25%2dOK", UnescapeRule::NONE,
153 "Some%20random text %25%2dOK"},
154 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
155 "Some%20random text %25-OK"},
156 {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
157 "Some random text %25-OK"},
158 {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
159 "Some%20random text %-OK"},
160 {"Some%20random text %25%2dOK",
161 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
162 "Some random text %-OK"},
163 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
164 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
165 // Certain URL-sensitive characters should not be unescaped unless asked.
166 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
167 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
168 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
169 UnescapeRule::URL_SPECIAL_CHARS,
170 "Hello%20%13%10world ## ?? == && %% ++"},
171 // We can neither escape nor unescape '@' since some websites expect it to
172 // be preserved as either '@' or "%40".
173 // See http://b/996720 and http://crbug.com/23933 .
174 {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
175 // Control characters.
176 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
177 "%01%02%03%04%05%06%07%08%09 %"},
178 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
179 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
180 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
181 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
184 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
185 std::string str(unescape_cases[i].input);
186 EXPECT_EQ(std::string(unescape_cases[i].output),
187 UnescapeURLComponent(str, unescape_cases[i].rules));
190 // Test the NULL character unescaping (which wouldn't work above since those
191 // are just char pointers).
192 std::string input("Null");
193 input.push_back(0); // Also have a NULL in the input.
194 input.append("%00%39Test");
196 // When we're unescaping NULLs
197 std::string expected("Null");
198 expected.push_back(0);
199 expected.push_back(0);
200 expected.append("9Test");
201 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
203 // When we're not unescaping NULLs.
204 expected = "Null";
205 expected.push_back(0);
206 expected.append("%009Test");
207 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
210 TEST(EscapeTest, UnescapeURLComponent) {
211 const UnescapeURLCase unescape_cases[] = {
212 {L"", UnescapeRule::NORMAL, L""},
213 {L"%2", UnescapeRule::NORMAL, L"%2"},
214 {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
215 {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
216 {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
217 {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
218 L"Some%20random text %25%2dOK"},
219 {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
220 L"Some%20random text %25-OK"},
221 {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
222 L"Some random text %25-OK"},
223 {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
224 L"Some%20random text %-OK"},
225 {L"Some%20random text %25%2dOK",
226 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
227 L"Some random text %-OK"},
228 {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
229 {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
230 // Certain URL-sensitive characters should not be unescaped unless asked.
231 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
232 L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
233 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
234 UnescapeRule::URL_SPECIAL_CHARS,
235 L"Hello%20%13%10world ## ?? == && %% ++"},
236 // We can neither escape nor unescape '@' since some websites expect it to
237 // be preserved as either '@' or "%40".
238 // See http://b/996720 and http://crbug.com/23933 .
239 {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
240 // Control characters.
241 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
242 L"%01%02%03%04%05%06%07%08%09 %"},
243 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
244 L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
245 {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
246 {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
247 L"Hello%20\x13\x10\x02"},
248 {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
249 L"Hello\x9824\x9827"},
252 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
253 base::string16 str(WideToUTF16(unescape_cases[i].input));
254 EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
255 UnescapeURLComponent(str, unescape_cases[i].rules));
258 // Test the NULL character unescaping (which wouldn't work above since those
259 // are just char pointers).
260 base::string16 input(WideToUTF16(L"Null"));
261 input.push_back(0); // Also have a NULL in the input.
262 input.append(WideToUTF16(L"%00%39Test"));
264 // When we're unescaping NULLs
265 base::string16 expected(WideToUTF16(L"Null"));
266 expected.push_back(0);
267 expected.push_back(0);
268 expected.append(ASCIIToUTF16("9Test"));
269 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
271 // When we're not unescaping NULLs.
272 expected = WideToUTF16(L"Null");
273 expected.push_back(0);
274 expected.append(WideToUTF16(L"%009Test"));
275 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
278 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
279 const UnescapeAndDecodeCase unescape_cases[] = {
280 { "%",
281 "%",
282 "%",
283 L"%"},
284 { "+",
285 "+",
286 " ",
287 L"+"},
288 { "%2+",
289 "%2+",
290 "%2 ",
291 L"%2+"},
292 { "+%%%+%%%",
293 "+%%%+%%%",
294 " %%% %%%",
295 L"+%%%+%%%"},
296 { "Don't escape anything",
297 "Don't escape anything",
298 "Don't escape anything",
299 L"Don't escape anything"},
300 { "+Invalid %escape %2+",
301 "+Invalid %escape %2+",
302 " Invalid %escape %2 ",
303 L"+Invalid %escape %2+"},
304 { "Some random text %25%2dOK",
305 "Some random text %25-OK",
306 "Some random text %25-OK",
307 L"Some random text %25-OK"},
308 { "%01%02%03%04%05%06%07%08%09",
309 "%01%02%03%04%05%06%07%08%09",
310 "%01%02%03%04%05%06%07%08%09",
311 L"%01%02%03%04%05%06%07%08%09"},
312 { "%E4%BD%A0+%E5%A5%BD",
313 "\xE4\xBD\xA0+\xE5\xA5\xBD",
314 "\xE4\xBD\xA0 \xE5\xA5\xBD",
315 L"\x4f60+\x597d"},
316 { "%ED%ED", // Invalid UTF-8.
317 "\xED\xED",
318 "\xED\xED",
319 L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
322 for (size_t i = 0; i < arraysize(unescape_cases); i++) {
323 std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
324 UnescapeRule::NORMAL);
325 EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
327 unescaped = UnescapeURLComponent(unescape_cases[i].input,
328 UnescapeRule::REPLACE_PLUS_WITH_SPACE);
329 EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
331 // TODO: Need to test unescape_spaces and unescape_percent.
332 base::string16 decoded = UnescapeAndDecodeUTF8URLComponent(
333 unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
334 EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded);
338 TEST(EscapeTest, AdjustOffset) {
339 const AdjustOffsetCase adjust_cases[] = {
340 {"", 0, 0},
341 {"", 1, std::string::npos},
342 {"test", 0, 0},
343 {"test", 2, 2},
344 {"test", 4, 4},
345 {"test", 5, std::string::npos},
346 {"test", std::string::npos, std::string::npos},
347 {"%2dtest", 6, 4},
348 {"%2dtest", 2, std::string::npos},
349 {"test%2d", 2, 2},
350 {"%E4%BD%A0+%E5%A5%BD", 9, 1},
351 {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
352 {"%ED%B0%80+%E5%A5%BD", 6, 6},
355 for (size_t i = 0; i < arraysize(adjust_cases); i++) {
356 size_t offset = adjust_cases[i].input_offset;
357 UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
358 UnescapeRule::NORMAL, &offset);
359 EXPECT_EQ(adjust_cases[i].output_offset, offset);
363 TEST(EscapeTest, EscapeForHTML) {
364 const EscapeForHTMLCase tests[] = {
365 { "hello", "hello" },
366 { "<hello>", "&lt;hello&gt;" },
367 { "don\'t mess with me", "don&#39;t mess with me" },
369 for (size_t i = 0; i < arraysize(tests); ++i) {
370 std::string result = EscapeForHTML(std::string(tests[i].input));
371 EXPECT_EQ(std::string(tests[i].expected_output), result);
375 TEST(EscapeTest, UnescapeForHTML) {
376 const EscapeForHTMLCase tests[] = {
377 { "", "" },
378 { "&lt;hello&gt;", "<hello>" },
379 { "don&#39;t mess with me", "don\'t mess with me" },
380 { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
381 { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
382 { "&amp;", "&" },
383 { "&quot;", "\"" },
384 { "&#39;", "'" },
385 { "&lt;", "<" },
386 { "&gt;", ">" },
387 { "&amp; &", "& &" },
389 for (size_t i = 0; i < arraysize(tests); ++i) {
390 base::string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
391 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
395 TEST(EscapeTest, AdjustEncodingOffset) {
396 // Imagine we have strings as shown in the following cases where the
397 // %XX's represent encoded characters
399 // 1: abc%ECdef ==> abcXdef
400 std::vector<size_t> offsets;
401 for (size_t t = 0; t < 9; ++t)
402 offsets.push_back(t);
403 internal::AdjustEncodingOffset::Adjustments adjustments;
404 adjustments.push_back(3);
405 std::for_each(offsets.begin(), offsets.end(),
406 internal::AdjustEncodingOffset(adjustments));
407 size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
408 EXPECT_EQ(offsets.size(), arraysize(expected_1));
409 for (size_t i = 0; i < arraysize(expected_1); ++i)
410 EXPECT_EQ(expected_1[i], offsets[i]);
413 // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
414 offsets.clear();
415 for (size_t t = 0; t < 18; ++t)
416 offsets.push_back(t);
417 adjustments.clear();
418 adjustments.push_back(0);
419 adjustments.push_back(6);
420 adjustments.push_back(9);
421 adjustments.push_back(15);
422 std::for_each(offsets.begin(), offsets.end(),
423 internal::AdjustEncodingOffset(adjustments));
424 size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
425 kNpos, 6, 7, 8, 9, kNpos, kNpos};
426 EXPECT_EQ(offsets.size(), arraysize(expected_2));
427 for (size_t i = 0; i < arraysize(expected_2); ++i)
428 EXPECT_EQ(expected_2[i], offsets[i]);
431 } // namespace
432 } // namespace net