1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
8 #include "net/base/escape.h"
10 #include "base/basictypes.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "testing/gtest/include/gtest/gtest.h"
24 struct UnescapeURLCase
{
26 UnescapeRule::Type rules
;
27 const wchar_t* output
;
30 struct UnescapeURLCaseASCII
{
32 UnescapeRule::Type rules
;
36 struct UnescapeAndDecodeCase
{
39 // The expected output when run through UnescapeURL.
40 const char* url_unescaped
;
42 // The expected output when run through UnescapeQuery.
43 const char* query_unescaped
;
45 // The expected output when run through UnescapeAndDecodeURLComponent.
46 const wchar_t* decoded
;
49 struct AdjustOffsetCase
{
55 struct EscapeForHTMLCase
{
57 const char* expected_output
;
60 TEST(EscapeTest
, EscapeTextForFormSubmission
) {
61 const EscapeCase escape_cases
[] = {
63 {"foo bar", "foo+bar"},
64 {"foo++", "foo%2B%2B"}
66 for (size_t i
= 0; i
< arraysize(escape_cases
); ++i
) {
67 EscapeCase value
= escape_cases
[i
];
68 EXPECT_EQ(value
.output
, EscapeQueryParamValue(value
.input
, true));
71 const EscapeCase escape_cases_no_plus
[] = {
73 {"foo bar", "foo%20bar"},
74 {"foo++", "foo%2B%2B"}
76 for (size_t i
= 0; i
< arraysize(escape_cases_no_plus
); ++i
) {
77 EscapeCase value
= escape_cases_no_plus
[i
];
78 EXPECT_EQ(value
.output
, EscapeQueryParamValue(value
.input
, false));
81 // Test all the values in we're supposed to be escaping.
82 const std::string
no_escape(
83 "abcdefghijklmnopqrstuvwxyz"
84 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
87 for (int i
= 0; i
< 256; ++i
) {
90 std::string out
= EscapeQueryParamValue(in
, true);
92 EXPECT_EQ(out
, std::string("%00"));
94 // Spaces are plus escaped like web forms.
95 EXPECT_EQ(out
, std::string("+"));
96 } else if (no_escape
.find(in
) == std::string::npos
) {
97 // Check %hex escaping
98 std::string expected
= base::StringPrintf("%%%02X", i
);
99 EXPECT_EQ(expected
, out
);
101 // No change for things in the no_escape list.
107 TEST(EscapeTest
, EscapePath
) {
109 // Most of the character space we care about, un-escaped
111 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
112 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
113 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
116 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
117 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
118 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
119 "%7B%7C%7D~%7F%80%FF");
122 TEST(EscapeTest
, DataURLWithAccentedCharacters
) {
123 const std::string url
=
124 "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
125 "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
127 base::OffsetAdjuster::Adjustments adjustments
;
128 net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(
129 url
, UnescapeRule::SPACES
, &adjustments
);
132 TEST(EscapeTest
, EscapeUrlEncodedData
) {
134 // Most of the character space we care about, un-escaped
135 EscapeUrlEncodedData(
136 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
137 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
138 "[\\]^_`abcdefghijklmnopqrstuvwxyz"
139 "{|}~\x7f\x80\xff", true),
141 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
142 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
143 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
144 "%7B%7C%7D~%7F%80%FF");
147 TEST(EscapeTest
, EscapeUrlEncodedDataSpace
) {
148 ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
149 ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
152 TEST(EscapeTest
, UnescapeURLComponentASCII
) {
153 const UnescapeURLCaseASCII unescape_cases
[] = {
154 {"", UnescapeRule::NORMAL
, ""},
155 {"%2", UnescapeRule::NORMAL
, "%2"},
156 {"%%%%%%", UnescapeRule::NORMAL
, "%%%%%%"},
157 {"Don't escape anything", UnescapeRule::NORMAL
, "Don't escape anything"},
158 {"Invalid %escape %2", UnescapeRule::NORMAL
, "Invalid %escape %2"},
159 {"Some%20random text %25%2dOK", UnescapeRule::NONE
,
160 "Some%20random text %25%2dOK"},
161 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL
,
162 "Some%20random text %25-OK"},
163 {"Some%20random text %25%2dOK", UnescapeRule::SPACES
,
164 "Some random text %25-OK"},
165 {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS
,
166 "Some%20random text %-OK"},
167 {"Some%20random text %25%2dOK",
168 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
,
169 "Some random text %-OK"},
170 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL
, "\xA0\xB1\xC2\xD3\xE4\xF5"},
171 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL
, "\xAa\xBb\xCc\xDd\xEe\xFf"},
172 // Certain URL-sensitive characters should not be unescaped unless asked.
173 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES
,
174 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
175 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
176 UnescapeRule::URL_SPECIAL_CHARS
,
177 "Hello%20%13%10world ## ?? == && %% ++"},
178 // We can neither escape nor unescape '@' since some websites expect it to
179 // be preserved as either '@' or "%40".
180 // See http://b/996720 and http://crbug.com/23933 .
181 {"me@my%40example", UnescapeRule::NORMAL
, "me@my%40example"},
182 // Control characters.
183 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS
,
184 "%01%02%03%04%05%06%07%08%09 %"},
185 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS
,
186 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
187 {"Hello%20%13%10%02", UnescapeRule::SPACES
, "Hello %13%10%02"},
188 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS
, "Hello%20\x13\x10\x02"},
191 for (size_t i
= 0; i
< arraysize(unescape_cases
); i
++) {
192 std::string
str(unescape_cases
[i
].input
);
193 EXPECT_EQ(std::string(unescape_cases
[i
].output
),
194 UnescapeURLComponent(str
, unescape_cases
[i
].rules
));
197 // Test the NULL character unescaping (which wouldn't work above since those
198 // are just char pointers).
199 std::string
input("Null");
200 input
.push_back(0); // Also have a NULL in the input.
201 input
.append("%00%39Test");
203 // When we're unescaping NULLs
204 std::string
expected("Null");
205 expected
.push_back(0);
206 expected
.push_back(0);
207 expected
.append("9Test");
208 EXPECT_EQ(expected
, UnescapeURLComponent(input
, UnescapeRule::CONTROL_CHARS
));
210 // When we're not unescaping NULLs.
212 expected
.push_back(0);
213 expected
.append("%009Test");
214 EXPECT_EQ(expected
, UnescapeURLComponent(input
, UnescapeRule::NORMAL
));
217 TEST(EscapeTest
, UnescapeURLComponent
) {
218 const UnescapeURLCase unescape_cases
[] = {
219 {L
"", UnescapeRule::NORMAL
, L
""},
220 {L
"%2", UnescapeRule::NORMAL
, L
"%2"},
221 {L
"%%%%%%", UnescapeRule::NORMAL
, L
"%%%%%%"},
222 {L
"Don't escape anything", UnescapeRule::NORMAL
, L
"Don't escape anything"},
223 {L
"Invalid %escape %2", UnescapeRule::NORMAL
, L
"Invalid %escape %2"},
224 {L
"Some%20random text %25%2dOK", UnescapeRule::NONE
,
225 L
"Some%20random text %25%2dOK"},
226 {L
"Some%20random text %25%2dOK", UnescapeRule::NORMAL
,
227 L
"Some%20random text %25-OK"},
228 {L
"Some%20random text %25%E2%80", UnescapeRule::NORMAL
,
229 L
"Some%20random text %25\xE2\x80"},
230 {L
"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL
,
231 L
"Some%20random text %25\xE2\x80OK"},
232 {L
"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL
,
233 L
"Some%20random text %25\xE2\x80\x84OK"},
235 // BiDi Control characters should not be unescaped.
236 {L
"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL
,
237 L
"Some%20random text %25%D8%9COK"},
238 {L
"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL
,
239 L
"Some%20random text %25%E2%80%8EOK"},
240 {L
"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL
,
241 L
"Some%20random text %25%E2%80%8FOK"},
242 {L
"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL
,
243 L
"Some%20random text %25%E2%80%AAOK"},
244 {L
"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL
,
245 L
"Some%20random text %25%E2%80%ABOK"},
246 {L
"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL
,
247 L
"Some%20random text %25%E2%80%AEOK"},
248 {L
"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL
,
249 L
"Some%20random text %25%E2%81%A6OK"},
250 {L
"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL
,
251 L
"Some%20random text %25%E2%81%A9OK"},
253 {L
"Some%20random text %25%2dOK", UnescapeRule::SPACES
,
254 L
"Some random text %25-OK"},
255 {L
"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS
,
256 L
"Some%20random text %-OK"},
257 {L
"Some%20random text %25%2dOK",
258 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
,
259 L
"Some random text %-OK"},
260 {L
"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL
, L
"\xA0\xB1\xC2\xD3\xE4\xF5"},
261 {L
"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL
, L
"\xAa\xBb\xCc\xDd\xEe\xFf"},
262 // Certain URL-sensitive characters should not be unescaped unless asked.
263 {L
"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES
,
264 L
"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
265 {L
"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
266 UnescapeRule::URL_SPECIAL_CHARS
,
267 L
"Hello%20%13%10world ## ?? == && %% ++"},
268 // We can neither escape nor unescape '@' since some websites expect it to
269 // be preserved as either '@' or "%40".
270 // See http://b/996720 and http://crbug.com/23933 .
271 {L
"me@my%40example", UnescapeRule::NORMAL
, L
"me@my%40example"},
272 // Control characters.
273 {L
"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS
,
274 L
"%01%02%03%04%05%06%07%08%09 %"},
275 {L
"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS
,
276 L
"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
277 {L
"Hello%20%13%10%02", UnescapeRule::SPACES
, L
"Hello %13%10%02"},
278 {L
"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS
,
279 L
"Hello%20\x13\x10\x02"},
280 {L
"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS
,
281 L
"Hello\x9824\x9827"},
284 for (size_t i
= 0; i
< arraysize(unescape_cases
); i
++) {
285 base::string16
str(base::WideToUTF16(unescape_cases
[i
].input
));
286 EXPECT_EQ(base::WideToUTF16(unescape_cases
[i
].output
),
287 UnescapeURLComponent(str
, unescape_cases
[i
].rules
));
290 // Test the NULL character unescaping (which wouldn't work above since those
291 // are just char pointers).
292 base::string16
input(base::WideToUTF16(L
"Null"));
293 input
.push_back(0); // Also have a NULL in the input.
294 input
.append(base::WideToUTF16(L
"%00%39Test"));
296 // When we're unescaping NULLs
297 base::string16
expected(base::WideToUTF16(L
"Null"));
298 expected
.push_back(0);
299 expected
.push_back(0);
300 expected
.append(base::ASCIIToUTF16("9Test"));
301 EXPECT_EQ(expected
, UnescapeURLComponent(input
, UnescapeRule::CONTROL_CHARS
));
303 // When we're not unescaping NULLs.
304 expected
= base::WideToUTF16(L
"Null");
305 expected
.push_back(0);
306 expected
.append(base::WideToUTF16(L
"%009Test"));
307 EXPECT_EQ(expected
, UnescapeURLComponent(input
, UnescapeRule::NORMAL
));
310 TEST(EscapeTest
, UnescapeAndDecodeUTF8URLComponent
) {
311 const UnescapeAndDecodeCase unescape_cases
[] = {
328 { "Don't escape anything",
329 "Don't escape anything",
330 "Don't escape anything",
331 L
"Don't escape anything"},
332 { "+Invalid %escape %2+",
333 "+Invalid %escape %2+",
334 " Invalid %escape %2 ",
335 L
"+Invalid %escape %2+"},
336 { "Some random text %25%2dOK",
337 "Some random text %25-OK",
338 "Some random text %25-OK",
339 L
"Some random text %25-OK"},
340 { "%01%02%03%04%05%06%07%08%09",
341 "%01%02%03%04%05%06%07%08%09",
342 "%01%02%03%04%05%06%07%08%09",
343 L
"%01%02%03%04%05%06%07%08%09"},
344 { "%E4%BD%A0+%E5%A5%BD",
345 "\xE4\xBD\xA0+\xE5\xA5\xBD",
346 "\xE4\xBD\xA0 \xE5\xA5\xBD",
348 { "%ED%ED", // Invalid UTF-8.
351 L
"%ED%ED"}, // Invalid UTF-8 -> kept unescaped.
354 for (size_t i
= 0; i
< arraysize(unescape_cases
); i
++) {
355 std::string unescaped
= UnescapeURLComponent(unescape_cases
[i
].input
,
356 UnescapeRule::NORMAL
);
357 EXPECT_EQ(std::string(unescape_cases
[i
].url_unescaped
), unescaped
);
359 unescaped
= UnescapeURLComponent(unescape_cases
[i
].input
,
360 UnescapeRule::REPLACE_PLUS_WITH_SPACE
);
361 EXPECT_EQ(std::string(unescape_cases
[i
].query_unescaped
), unescaped
);
363 // TODO: Need to test unescape_spaces and unescape_percent.
364 base::string16 decoded
= UnescapeAndDecodeUTF8URLComponent(
365 unescape_cases
[i
].input
, UnescapeRule::NORMAL
);
366 EXPECT_EQ(base::WideToUTF16(unescape_cases
[i
].decoded
), decoded
);
370 TEST(EscapeTest
, AdjustOffset
) {
371 const AdjustOffsetCase adjust_cases
[] = {
376 {"test", std::string::npos
, std::string::npos
},
379 {"%2dtest", 2, std::string::npos
},
380 {"%2dtest", 1, std::string::npos
},
383 {"%E4%BD%A0+%E5%A5%BD", 9, 1},
384 {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos
},
385 {"%E4%BD%A0+%E5%A5%BD", 0, 0},
386 {"%E4%BD%A0+%E5%A5%BD", 10, 2},
387 {"%E4%BD%A0+%E5%A5%BD", 19, 3},
389 {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
390 {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos
},
391 {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
392 {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
393 {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
394 {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
395 {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
396 {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos
},
397 {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
399 {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
400 {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos
},
401 {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
402 {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
403 {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
404 {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
405 {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos
},
406 {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
407 {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
409 {"%ED%B0%80+%E5%A5%BD", 6, 6}, // not convertable to UTF-8
412 for (size_t i
= 0; i
< arraysize(adjust_cases
); i
++) {
413 size_t offset
= adjust_cases
[i
].input_offset
;
414 base::OffsetAdjuster::Adjustments adjustments
;
415 UnescapeAndDecodeUTF8URLComponentWithAdjustments(
416 adjust_cases
[i
].input
, UnescapeRule::NORMAL
, &adjustments
);
417 base::OffsetAdjuster::AdjustOffset(adjustments
, &offset
);
418 EXPECT_EQ(adjust_cases
[i
].output_offset
, offset
)
419 << "input=" << adjust_cases
[i
].input
420 << " offset=" << adjust_cases
[i
].input_offset
;
424 TEST(EscapeTest
, EscapeForHTML
) {
425 const EscapeForHTMLCase tests
[] = {
426 { "hello", "hello" },
427 { "<hello>", "<hello>" },
428 { "don\'t mess with me", "don't mess with me" },
430 for (size_t i
= 0; i
< arraysize(tests
); ++i
) {
431 std::string result
= EscapeForHTML(std::string(tests
[i
].input
));
432 EXPECT_EQ(std::string(tests
[i
].expected_output
), result
);
436 TEST(EscapeTest
, UnescapeForHTML
) {
437 const EscapeForHTMLCase tests
[] = {
439 { "<hello>", "<hello>" },
440 { "don't mess with me", "don\'t mess with me" },
441 { "<>&"'", "<>&\"'" },
442 { "& lt; & ; &; '", "& lt; & ; &; '" },
448 { "& &", "& &" },
450 for (size_t i
= 0; i
< arraysize(tests
); ++i
) {
451 base::string16 result
= UnescapeForHTML(base::ASCIIToUTF16(tests
[i
].input
));
452 EXPECT_EQ(base::ASCIIToUTF16(tests
[i
].expected_output
), result
);