1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ppapi/tests/test_char_set.h"
7 #include "ppapi/c/dev/ppb_char_set_dev.h"
8 #include "ppapi/c/trusted/ppb_char_set_trusted.h"
9 #include "ppapi/cpp/dev/memory_dev.h"
10 #include "ppapi/cpp/module.h"
11 #include "ppapi/tests/testing_instance.h"
13 REGISTER_TEST_CASE(CharSet
);
15 TestCharSet::TestCharSet(TestingInstance
* instance
)
17 char_set_interface_(NULL
) {
20 bool TestCharSet::Init() {
21 char_set_interface_
= static_cast<const PPB_CharSet_Dev
*>(
22 pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE
));
23 char_set_trusted_interface_
= static_cast<const PPB_CharSet_Trusted
*>(
24 pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE
));
25 return char_set_interface_
&& char_set_trusted_interface_
;
28 void TestCharSet::RunTests(const std::string
& filter
) {
29 RUN_TEST(UTF16ToCharSetDeprecated
, filter
);
30 RUN_TEST(UTF16ToCharSet
, filter
);
31 RUN_TEST(CharSetToUTF16Deprecated
, filter
);
32 RUN_TEST(CharSetToUTF16
, filter
);
33 RUN_TEST(GetDefaultCharSet
, filter
);
36 // TODO(brettw) remove this when the old interface is removed.
37 std::string
TestCharSet::TestUTF16ToCharSetDeprecated() {
39 std::vector
<uint16_t> utf16
;
41 uint32_t utf8result_len
= 0;
42 pp::Memory_Dev memory
;
43 char* utf8result
= char_set_interface_
->UTF16ToCharSet(
44 instance_
->pp_instance(), &utf16
[0], 0, "latin1",
45 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf8result_len
);
46 ASSERT_TRUE(utf8result
);
47 ASSERT_TRUE(utf8result
[0] == 0);
48 ASSERT_TRUE(utf8result_len
== 0);
49 memory
.MemFree(utf8result
);
51 // Try round-tripping some English & Chinese from UTF-8 through UTF-16
52 std::string
utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
53 utf16
= UTF8ToUTF16(utf8source
);
54 utf8result
= char_set_interface_
->UTF16ToCharSet(
55 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
56 "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL
, &utf8result_len
);
57 ASSERT_TRUE(utf8source
== std::string(utf8result
, utf8result_len
));
58 memory
.MemFree(utf8result
);
60 // Test an un-encodable character with various modes.
61 utf16
= UTF8ToUTF16("h\xe4\xbd\xa0i");
64 utf8result_len
= 1234; // Test that this gets 0'ed on failure.
65 utf8result
= char_set_interface_
->UTF16ToCharSet(
66 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
67 "latin1", PP_CHARSET_CONVERSIONERROR_FAIL
, &utf8result_len
);
68 ASSERT_TRUE(utf8result_len
== 0);
69 ASSERT_TRUE(utf8result
== NULL
);
72 utf8result
= char_set_interface_
->UTF16ToCharSet(
73 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
74 "latin1", PP_CHARSET_CONVERSIONERROR_SKIP
, &utf8result_len
);
75 ASSERT_TRUE(utf8result_len
== 2);
76 ASSERT_TRUE(utf8result
[0] == 'h' && utf8result
[1] == 'i' &&
78 memory
.MemFree(utf8result
);
81 utf8result
= char_set_interface_
->UTF16ToCharSet(
82 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
83 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf8result_len
);
84 ASSERT_TRUE(utf8result_len
== 3);
85 ASSERT_TRUE(utf8result
[0] == 'h' && utf8result
[1] == '?' &&
86 utf8result
[2] == 'i' && utf8result
[3] == 0);
87 memory
.MemFree(utf8result
);
89 // Try some invalid input encoding.
91 utf16
.push_back(0xD800); // High surrogate.
92 utf16
.push_back('A'); // Not a low surrogate.
93 utf8result
= char_set_interface_
->UTF16ToCharSet(
94 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
95 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf8result_len
);
96 ASSERT_TRUE(utf8result_len
== 2);
97 ASSERT_TRUE(utf8result
[0] == '?' && utf8result
[1] == 'A' &&
99 memory
.MemFree(utf8result
);
101 // Invalid encoding name.
102 utf8result
= char_set_interface_
->UTF16ToCharSet(
103 instance_
->pp_instance(), &utf16
[0], static_cast<uint32_t>(utf16
.size()),
104 "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf8result_len
);
105 ASSERT_TRUE(!utf8result
);
106 ASSERT_TRUE(utf8result_len
== 0);
111 std::string
TestCharSet::TestUTF16ToCharSet() {
113 std::vector
<uint16_t> utf16
;
115 std::string output_buffer
;
116 uint32_t utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
117 PP_Bool result
= char_set_trusted_interface_
->UTF16ToCharSet(
118 &utf16
[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
119 &output_buffer
[0], &utf8result_len
);
120 ASSERT_TRUE(result
== PP_TRUE
);
121 ASSERT_TRUE(utf8result_len
== 0);
123 // No output buffer returns length of string.
124 utf16
= UTF8ToUTF16("hello");
126 result
= char_set_trusted_interface_
->UTF16ToCharSet(
127 &utf16
[0], static_cast<uint32_t>(utf16
.size()), "latin1",
128 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
, NULL
, &utf8result_len
);
129 ASSERT_TRUE(result
== PP_TRUE
);
130 ASSERT_TRUE(utf8result_len
== 5);
132 // Giving too small of a buffer just fills in that many items and gives us
134 output_buffer
.resize(100);
136 output_buffer
[utf8result_len
] = '$'; // Barrier character.
137 result
= char_set_trusted_interface_
->UTF16ToCharSet(
138 &utf16
[0], static_cast<uint32_t>(utf16
.size()), "latin1",
139 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
140 &output_buffer
[0], &utf8result_len
);
141 ASSERT_TRUE(result
== PP_TRUE
);
142 ASSERT_TRUE(utf8result_len
== 5);
143 ASSERT_TRUE(output_buffer
[0] == 'h' && output_buffer
[1] == 'e' &&
144 output_buffer
[2] == '$');
146 // Try round-tripping some English & Chinese from UTF-8 through UTF-16
147 std::string
utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd");
148 utf16
= UTF8ToUTF16(utf8source
);
149 output_buffer
.resize(100);
150 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
151 result
= char_set_trusted_interface_
->UTF16ToCharSet(
152 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
153 "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
,
154 &output_buffer
[0], &utf8result_len
);
155 ASSERT_TRUE(result
== PP_TRUE
);
156 output_buffer
.resize(utf8result_len
);
157 ASSERT_TRUE(utf8source
== output_buffer
);
159 // Test an un-encodable character with various modes.
160 utf16
= UTF8ToUTF16("h\xe4\xbd\xa0i");
162 // Fail mode, size should get 0'ed on failure.
163 output_buffer
.resize(100);
164 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
165 result
= char_set_trusted_interface_
->UTF16ToCharSet(
166 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
167 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
,
168 &output_buffer
[0], &utf8result_len
);
169 ASSERT_TRUE(result
== PP_FALSE
);
170 ASSERT_TRUE(utf8result_len
== 0);
173 output_buffer
.resize(100);
174 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
175 result
= char_set_trusted_interface_
->UTF16ToCharSet(
176 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
177 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP
,
178 &output_buffer
[0], &utf8result_len
);
179 ASSERT_TRUE(result
== PP_TRUE
);
180 ASSERT_TRUE(utf8result_len
== 2);
181 ASSERT_TRUE(output_buffer
[0] == 'h' && output_buffer
[1] == 'i');
184 output_buffer
.resize(100);
185 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
186 result
= char_set_trusted_interface_
->UTF16ToCharSet(
187 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
188 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
189 &output_buffer
[0], &utf8result_len
);
190 ASSERT_TRUE(result
== PP_TRUE
);
191 ASSERT_TRUE(utf8result_len
== 3);
192 output_buffer
.resize(utf8result_len
);
193 ASSERT_TRUE(output_buffer
== "h?i");
195 // Try some invalid input encoding.
196 output_buffer
.resize(100);
197 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
199 utf16
.push_back(0xD800); // High surrogate.
200 utf16
.push_back('A'); // Not a low surrogate.
201 result
= char_set_trusted_interface_
->UTF16ToCharSet(
202 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
203 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
204 &output_buffer
[0], &utf8result_len
);
205 ASSERT_TRUE(result
== PP_TRUE
);
206 ASSERT_TRUE(utf8result_len
== 2);
207 ASSERT_TRUE(output_buffer
[0] == '?' && output_buffer
[1] == 'A');
209 // Invalid encoding name.
210 output_buffer
.resize(100);
211 utf8result_len
= static_cast<uint32_t>(output_buffer
.size());
212 result
= char_set_trusted_interface_
->UTF16ToCharSet(
213 &utf16
[0], static_cast<uint32_t>(utf16
.size()),
214 "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
215 &output_buffer
[0], &utf8result_len
);
216 ASSERT_TRUE(result
== PP_FALSE
);
217 ASSERT_TRUE(utf8result_len
== 0);
222 // TODO(brettw) remove this when the old interface is removed.
223 std::string
TestCharSet::TestCharSetToUTF16Deprecated() {
224 pp::Memory_Dev memory
;
227 uint32_t utf16result_len
;
228 uint16_t* utf16result
= char_set_interface_
->CharSetToUTF16(
229 instance_
->pp_instance(), "", 0, "latin1",
230 PP_CHARSET_CONVERSIONERROR_FAIL
, &utf16result_len
);
231 ASSERT_TRUE(utf16result
);
232 ASSERT_TRUE(utf16result_len
== 0);
233 ASSERT_TRUE(utf16result
[0] == 0);
234 memory
.MemFree(utf16result
);
237 char latin1
[] = "H\xef";
238 utf16result
= char_set_interface_
->CharSetToUTF16(
239 instance_
->pp_instance(), latin1
, 2, "latin1",
240 PP_CHARSET_CONVERSIONERROR_FAIL
, &utf16result_len
);
241 ASSERT_TRUE(utf16result
);
242 ASSERT_TRUE(utf16result_len
== 2);
243 ASSERT_TRUE(utf16result
[0] == 'H' && utf16result
[1] == 0xef &&
244 utf16result
[2] == 0);
245 memory
.MemFree(utf16result
);
247 // Invalid input encoding with FAIL.
248 char badutf8
[] = "A\xe4Z";
249 utf16result
= char_set_interface_
->CharSetToUTF16(
250 instance_
->pp_instance(), badutf8
, 3, "utf8",
251 PP_CHARSET_CONVERSIONERROR_FAIL
, &utf16result_len
);
252 ASSERT_TRUE(!utf16result
);
253 ASSERT_TRUE(utf16result_len
== 0);
254 memory
.MemFree(utf16result
);
256 // Invalid input with SKIP.
257 utf16result
= char_set_interface_
->CharSetToUTF16(
258 instance_
->pp_instance(), badutf8
, 3, "utf8",
259 PP_CHARSET_CONVERSIONERROR_SKIP
, &utf16result_len
);
260 ASSERT_TRUE(utf16result
);
261 ASSERT_TRUE(utf16result_len
== 2);
262 ASSERT_TRUE(utf16result
[0] == 'A' && utf16result
[1] == 'Z' &&
263 utf16result
[2] == 0);
264 memory
.MemFree(utf16result
);
266 // Invalid input with SUBSTITUTE.
267 utf16result
= char_set_interface_
->CharSetToUTF16(
268 instance_
->pp_instance(), badutf8
, 3, "utf8",
269 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf16result_len
);
270 ASSERT_TRUE(utf16result
);
271 ASSERT_TRUE(utf16result_len
== 3);
272 ASSERT_TRUE(utf16result
[0] == 'A' && utf16result
[1] == 0xFFFD &&
273 utf16result
[2] == 'Z' && utf16result
[3] == 0);
274 memory
.MemFree(utf16result
);
276 // Invalid encoding name.
277 utf16result
= char_set_interface_
->CharSetToUTF16(
278 instance_
->pp_instance(), badutf8
, 3, "poopiepants",
279 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE
, &utf16result_len
);
280 ASSERT_TRUE(!utf16result
);
281 ASSERT_TRUE(utf16result_len
== 0);
282 memory
.MemFree(utf16result
);
287 std::string
TestCharSet::TestCharSetToUTF16() {
288 std::vector
<uint16_t> output_buffer
;
289 output_buffer
.resize(100);
292 output_buffer
.resize(100);
293 uint32_t utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
294 PP_Bool result
= char_set_trusted_interface_
->CharSetToUTF16(
295 "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
,
296 &output_buffer
[0], &utf16result_len
);
298 ASSERT_TRUE(utf16result_len
== 0);
299 ASSERT_TRUE(output_buffer
[0] == 0);
302 output_buffer
.resize(100);
303 utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
304 char latin1
[] = "H\xef";
305 result
= char_set_trusted_interface_
->CharSetToUTF16(
306 latin1
, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
,
307 &output_buffer
[0], &utf16result_len
);
309 ASSERT_TRUE(utf16result_len
== 2);
310 ASSERT_TRUE(output_buffer
[0] == 'H' && output_buffer
[1] == 0xef);
312 // Invalid input encoding with FAIL.
313 output_buffer
.resize(100);
314 utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
315 char badutf8
[] = "A\xe4Z";
316 result
= char_set_trusted_interface_
->CharSetToUTF16(
317 badutf8
, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL
,
318 &output_buffer
[0], &utf16result_len
);
319 ASSERT_TRUE(!result
);
320 ASSERT_TRUE(utf16result_len
== 0);
322 // Invalid input with SKIP.
323 output_buffer
.resize(100);
324 utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
325 result
= char_set_trusted_interface_
->CharSetToUTF16(
326 badutf8
, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP
,
327 &output_buffer
[0], &utf16result_len
);
329 ASSERT_TRUE(utf16result_len
== 2);
330 ASSERT_TRUE(output_buffer
[0] == 'A' && output_buffer
[1] == 'Z');
332 // Invalid input with SUBSTITUTE.
333 output_buffer
.resize(100);
334 utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
335 result
= char_set_trusted_interface_
->CharSetToUTF16(
336 badutf8
, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
337 &output_buffer
[0], &utf16result_len
);
339 ASSERT_TRUE(utf16result_len
== 3);
340 ASSERT_TRUE(output_buffer
[0] == 'A' && output_buffer
[1] == 0xFFFD &&
341 output_buffer
[2] == 'Z');
343 // Invalid encoding name.
344 output_buffer
.resize(100);
345 utf16result_len
= static_cast<uint32_t>(output_buffer
.size());
346 result
= char_set_trusted_interface_
->CharSetToUTF16(
347 badutf8
, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE
,
348 &output_buffer
[0], &utf16result_len
);
349 ASSERT_TRUE(!result
);
350 ASSERT_TRUE(utf16result_len
== 0);
355 std::string
TestCharSet::TestGetDefaultCharSet() {
356 // Test invalid instance.
357 pp::Var
result(pp::PASS_REF
, char_set_interface_
->GetDefaultCharSet(0));
358 ASSERT_TRUE(result
.is_undefined());
360 // Just make sure the default char set is a nonempty string.
361 result
= pp::Var(pp::PASS_REF
,
362 char_set_interface_
->GetDefaultCharSet(instance_
->pp_instance()));
363 ASSERT_TRUE(result
.is_string());
364 ASSERT_FALSE(result
.AsString().empty());
369 std::vector
<uint16_t> TestCharSet::UTF8ToUTF16(const std::string
& utf8
) {
370 uint32_t result_len
= 0;
371 uint16_t* result
= char_set_interface_
->CharSetToUTF16(
372 instance_
->pp_instance(), utf8
.c_str(),
373 static_cast<uint32_t>(utf8
.size()),
374 "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL
, &result_len
);
376 std::vector
<uint16_t> result_vector
;
378 return result_vector
;
380 result_vector
.assign(result
, &result
[result_len
]);
381 pp::Memory_Dev memory
;
382 memory
.MemFree(result
);
383 return result_vector
;