1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/macros.h"
6 #include "testing/gtest/include/gtest/gtest.h"
7 #include "third_party/icu/source/common/unicode/ucnv.h"
8 #include "url/url_canon.h"
9 #include "url/url_canon_icu.h"
10 #include "url/url_canon_stdstring.h"
11 #include "url/url_test_utils.h"
13 // Some implementations of base/basictypes.h may define ARRAYSIZE.
14 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
15 // which is in our version of basictypes.h.
17 #define ARRAYSIZE ARRAYSIZE_UNSAFE
22 using test_utils::WStringToUTF16
;
26 // Wrapper around a UConverter object that managers creation and destruction.
29 explicit UConvScoper(const char* charset_name
) {
30 UErrorCode err
= U_ZERO_ERROR
;
31 converter_
= ucnv_open(charset_name
, &err
);
36 ucnv_close(converter_
);
39 // Returns the converter object, may be NULL.
40 UConverter
* converter() const { return converter_
; }
43 UConverter
* converter_
;
46 TEST(URLCanonIcuTest
, ICUCharsetConverter
) {
53 {L
"Hello, world", "utf-8", "Hello, world"},
54 {L
"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
56 {L
"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
58 {L
"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
59 // Unrepresentable character in the destination set.
60 {L
"hello\x4f60\x06de\x597dworld", "big5",
61 "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
64 for (size_t i
= 0; i
< ARRAYSIZE(icu_cases
); i
++) {
65 UConvScoper
conv(icu_cases
[i
].encoding
);
66 ASSERT_TRUE(conv
.converter() != NULL
);
67 ICUCharsetConverter
converter(conv
.converter());
70 StdStringCanonOutput
output(&str
);
72 base::string16
input_str(WStringToUTF16(icu_cases
[i
].input
));
73 int input_len
= static_cast<int>(input_str
.length());
74 converter
.ConvertFromUTF16(input_str
.c_str(), input_len
, &output
);
77 EXPECT_STREQ(icu_cases
[i
].expected
, str
.c_str());
80 // Test string sizes around the resize boundary for the output to make sure
81 // the converter resizes as needed.
82 const int static_size
= 16;
83 UConvScoper
conv("utf-8");
84 ASSERT_TRUE(conv
.converter());
85 ICUCharsetConverter
converter(conv
.converter());
86 for (int i
= static_size
- 2; i
<= static_size
+ 2; i
++) {
87 // Make a string with the appropriate length.
89 for (int ch
= 0; ch
< i
; ch
++)
92 RawCanonOutput
<static_size
> output
;
93 converter
.ConvertFromUTF16(input
.c_str(), static_cast<int>(input
.length()),
95 EXPECT_EQ(input
.length(), static_cast<size_t>(output
.length()));
99 TEST(URLCanonIcuTest
, QueryWithConverter
) {
102 const wchar_t* input16
;
103 const char* encoding
;
104 const char* expected
;
106 // Regular ASCII case in some different encodings.
107 {"foo=bar", L
"foo=bar", "utf-8", "?foo=bar"},
108 {"foo=bar", L
"foo=bar", "shift_jis", "?foo=bar"},
109 {"foo=bar", L
"foo=bar", "gb2312", "?foo=bar"},
110 // Chinese input/output
111 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L
"q=\x4f60\x597d", "gb2312",
113 {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L
"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
114 // Unencodable character in the destination character set should be
115 // escaped. The escape sequence unescapes to be the entity name:
117 {"q=Chinese\xef\xbc\xa7", L
"q=Chinese\xff27", "iso-8859-1",
118 "?q=Chinese%26%2365319%3B"},
121 for (size_t i
= 0; i
< ARRAYSIZE(query_cases
); i
++) {
124 UConvScoper
conv(query_cases
[i
].encoding
);
125 ASSERT_TRUE(!query_cases
[i
].encoding
|| conv
.converter());
126 ICUCharsetConverter
converter(conv
.converter());
128 if (query_cases
[i
].input8
) {
129 int len
= static_cast<int>(strlen(query_cases
[i
].input8
));
130 Component
in_comp(0, len
);
133 StdStringCanonOutput
output(&out_str
);
134 CanonicalizeQuery(query_cases
[i
].input8
, in_comp
, &converter
, &output
,
138 EXPECT_EQ(query_cases
[i
].expected
, out_str
);
141 if (query_cases
[i
].input16
) {
142 base::string16
input16(WStringToUTF16(query_cases
[i
].input16
));
143 int len
= static_cast<int>(input16
.length());
144 Component
in_comp(0, len
);
147 StdStringCanonOutput
output(&out_str
);
148 CanonicalizeQuery(input16
.c_str(), in_comp
, &converter
, &output
,
152 EXPECT_EQ(query_cases
[i
].expected
, out_str
);
156 // Extra test for input with embedded NULL;
158 StdStringCanonOutput
output(&out_str
);
160 CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL
, &output
, &out_comp
);
162 EXPECT_EQ("?a%20%00z%01", out_str
);