Revert "[libc] Use best-fit binary trie to make malloc logarithmic" (#117065)
[llvm-project.git] / libcxx / test / std / localization / codecvt_unicode.pass.cpp
blobe54c0c2a4610a739dcf065b59ef22adbb69003fc
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT
11 // Requires the fix in 390840f.
12 // XFAIL: using-built-library-before-llvm-18
14 #include <algorithm>
15 #include <cassert>
16 #include <codecvt>
17 #include <locale>
19 #include "test_macros.h"
21 struct test_offsets_ok {
22 size_t in_size;
23 size_t out_size;
25 struct test_offsets_partial {
26 size_t in_size;
27 size_t out_size;
28 size_t expected_in_next;
29 size_t expected_out_next;
32 template <class CharT>
33 struct test_offsets_error {
34 size_t in_size;
35 size_t out_size;
36 size_t expected_in_next;
37 size_t expected_out_next;
38 CharT replace_char;
39 size_t replace_pos;
42 #define array_size(x) (sizeof(x) / sizeof(x)[0])
44 using std::begin;
45 using std::char_traits;
46 using std::codecvt_base;
47 using std::copy;
48 using std::end;
50 template <class InternT, class ExternT>
51 void utf8_to_utf32_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
52 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
53 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
54 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
55 static_assert(array_size(input) == 11, "");
56 static_assert(array_size(expected) == 5, "");
58 ExternT in[array_size(input)];
59 InternT exp[array_size(expected)];
60 copy(begin(input), end(input), begin(in));
61 copy(begin(expected), end(expected), begin(exp));
62 assert(char_traits<ExternT>::length(in) == 10);
63 assert(char_traits<InternT>::length(exp) == 4);
64 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
65 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
66 test_offsets_ok t = *it;
67 InternT out[array_size(exp) - 1] = {};
68 assert(t.in_size <= array_size(in));
69 assert(t.out_size <= array_size(out));
70 mbstate_t state = {};
71 const ExternT* in_next = nullptr;
72 InternT* out_next = nullptr;
73 codecvt_base::result res = codecvt_base::ok;
75 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
76 assert(res == cvt.ok);
77 assert(in_next == in + t.in_size);
78 assert(out_next == out + t.out_size);
79 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
80 if (t.out_size < array_size(out))
81 assert(out[t.out_size] == 0);
83 state = mbstate_t();
84 int len = cvt.length(state, in, in + t.in_size, t.out_size);
85 assert(len >= 0);
86 assert(static_cast<size_t>(len) == t.in_size);
89 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
90 test_offsets_ok t = *it;
91 InternT out[array_size(exp)] = {};
92 assert(t.in_size <= array_size(in));
93 assert(t.out_size <= array_size(out));
94 mbstate_t state = {};
95 const ExternT* in_next = nullptr;
96 InternT* out_next = nullptr;
97 codecvt_base::result res = codecvt_base::ok;
99 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
100 assert(res == cvt.ok);
101 assert(in_next == in + t.in_size);
102 assert(out_next == out + t.out_size);
103 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
104 if (t.out_size < array_size(out))
105 assert(out[t.out_size] == 0);
107 state = mbstate_t();
108 int len = cvt.length(state, in, in + t.in_size, array_size(out));
109 assert(len >= 0);
110 assert(static_cast<size_t>(len) == t.in_size);
114 template <class InternT, class ExternT>
115 void utf8_to_utf32_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
116 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
117 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
118 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
119 static_assert(array_size(input) == 11, "");
120 static_assert(array_size(expected) == 5, "");
122 ExternT in[array_size(input)];
123 InternT exp[array_size(expected)];
124 copy(begin(input), end(input), begin(in));
125 copy(begin(expected), end(expected), begin(exp));
126 assert(char_traits<ExternT>::length(in) == 10);
127 assert(char_traits<InternT>::length(exp) == 4);
129 test_offsets_partial offsets[] = {
130 {1, 0, 0, 0}, // no space for first CP
132 {3, 1, 1, 1}, // no space for second CP
133 {2, 2, 1, 1}, // incomplete second CP
134 {2, 1, 1, 1}, // incomplete second CP, and no space for it
136 {6, 2, 3, 2}, // no space for third CP
137 {4, 3, 3, 2}, // incomplete third CP
138 {5, 3, 3, 2}, // incomplete third CP
139 {4, 2, 3, 2}, // incomplete third CP, and no space for it
140 {5, 2, 3, 2}, // incomplete third CP, and no space for it
142 {10, 3, 6, 3}, // no space for fourth CP
143 {7, 4, 6, 3}, // incomplete fourth CP
144 {8, 4, 6, 3}, // incomplete fourth CP
145 {9, 4, 6, 3}, // incomplete fourth CP
146 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
147 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
148 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
151 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
152 test_offsets_partial t = *it;
153 InternT out[array_size(exp) - 1] = {};
154 assert(t.in_size <= array_size(in));
155 assert(t.out_size <= array_size(out));
156 assert(t.expected_in_next <= t.in_size);
157 assert(t.expected_out_next <= t.out_size);
158 mbstate_t state = {};
159 const ExternT* in_next = nullptr;
160 InternT* out_next = nullptr;
161 codecvt_base::result res = codecvt_base::ok;
163 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
164 assert(res == cvt.partial);
165 assert(in_next == in + t.expected_in_next);
166 assert(out_next == out + t.expected_out_next);
167 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
168 if (t.expected_out_next < array_size(out))
169 assert(out[t.expected_out_next] == 0);
171 state = mbstate_t();
172 int len = cvt.length(state, in, in + t.in_size, t.out_size);
173 assert(len >= 0);
174 assert(static_cast<size_t>(len) == t.expected_in_next);
178 template <class InternT, class ExternT>
179 void utf8_to_utf32_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
180 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
181 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
182 const char32_t expected[] = {'b', 0x0448, 0xD700, 0x10AAAA, 0};
183 static_assert(array_size(input) == 11, "");
184 static_assert(array_size(expected) == 5, "");
186 ExternT in[array_size(input)];
187 InternT exp[array_size(expected)];
188 copy(begin(input), end(input), begin(in));
189 copy(begin(expected), end(expected), begin(exp));
190 assert(char_traits<ExternT>::length(in) == 10);
191 assert(char_traits<InternT>::length(exp) == 4);
193 // There are 5 classes of errors in UTF-8 decoding
194 // 1. Missing leading byte
195 // 2. Missing trailing byte
196 // 3. Surrogate CP
197 // 4. Overlong sequence
198 // 5. CP out of Unicode range
199 test_offsets_error<unsigned char> offsets[] = {
201 // 1. Missing leading byte. We will replace the leading byte with
202 // non-leading byte, such as a byte that is always invalid or a trailing
203 // byte.
205 // replace leading byte with invalid byte
206 {1, 4, 0, 0, 0xFF, 0},
207 {3, 4, 1, 1, 0xFF, 1},
208 {6, 4, 3, 2, 0xFF, 3},
209 {10, 4, 6, 3, 0xFF, 6},
211 // replace leading byte with trailing byte
212 {1, 4, 0, 0, 0b10101010, 0},
213 {3, 4, 1, 1, 0b10101010, 1},
214 {6, 4, 3, 2, 0b10101010, 3},
215 {10, 4, 6, 3, 0b10101010, 6},
217 // 2. Missing trailing byte. We will replace the trailing byte with
218 // non-trailing byte, such as a byte that is always invalid or a leading
219 // byte (simple ASCII byte in our case).
221 // replace first trailing byte with ASCII byte
222 {3, 4, 1, 1, 'z', 2},
223 {6, 4, 3, 2, 'z', 4},
224 {10, 4, 6, 3, 'z', 7},
226 // replace first trailing byte with invalid byte
227 {3, 4, 1, 1, 0xFF, 2},
228 {6, 4, 3, 2, 0xFF, 4},
229 {10, 4, 6, 3, 0xFF, 7},
231 // replace second trailing byte with ASCII byte
232 {6, 4, 3, 2, 'z', 5},
233 {10, 4, 6, 3, 'z', 8},
235 // replace second trailing byte with invalid byte
236 {6, 4, 3, 2, 0xFF, 5},
237 {10, 4, 6, 3, 0xFF, 8},
239 // replace third trailing byte
240 {10, 4, 6, 3, 'z', 9},
241 {10, 4, 6, 3, 0xFF, 9},
243 // 2.1 The following test-cases raise doubt whether error or partial should
244 // be returned. For example, we have 4-byte sequence with valid leading
245 // byte. If we hide the last byte we need to return partial. But, if the
246 // second or third byte, which are visible to the call to codecvt, are
247 // malformed then error should be returned.
249 // replace first trailing byte with ASCII byte, also incomplete at end
250 {5, 4, 3, 2, 'z', 4},
251 {8, 4, 6, 3, 'z', 7},
252 {9, 4, 6, 3, 'z', 7},
254 // replace first trailing byte with invalid byte, also incomplete at end
255 {5, 4, 3, 2, 0xFF, 4},
256 {8, 4, 6, 3, 0xFF, 7},
257 {9, 4, 6, 3, 0xFF, 7},
259 // replace second trailing byte with ASCII byte, also incomplete at end
260 {9, 4, 6, 3, 'z', 8},
262 // replace second trailing byte with invalid byte, also incomplete at end
263 {9, 4, 6, 3, 0xFF, 8},
265 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
266 // CP U+D700
267 {6, 4, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
268 {6, 4, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
269 {6, 4, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
270 {6, 4, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
272 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
273 // just the leading byte is enough to make them overlong, i.e. for the
274 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
275 // zeroes.
276 {3, 4, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
277 {3, 4, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
278 {6, 4, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
279 {10, 4, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
281 // 5. CP above range
282 // turn U+10AAAA into U+14AAAA by changing its leading byte
283 {10, 4, 6, 3, 0b11110101, 6},
284 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
285 {10, 4, 6, 3, 0b10011010, 7},
287 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
288 test_offsets_error<unsigned char> t = *it;
289 InternT out[array_size(exp) - 1] = {};
290 assert(t.in_size <= array_size(in));
291 assert(t.out_size <= array_size(out));
292 assert(t.expected_in_next <= t.in_size);
293 assert(t.expected_out_next <= t.out_size);
294 ExternT old_char = in[t.replace_pos];
295 in[t.replace_pos] = t.replace_char;
297 mbstate_t state = {};
298 const ExternT* in_next = nullptr;
299 InternT* out_next = nullptr;
300 codecvt_base::result res = codecvt_base::ok;
302 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
303 assert(res == cvt.error);
304 assert(in_next == in + t.expected_in_next);
305 assert(out_next == out + t.expected_out_next);
306 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
307 if (t.expected_out_next < array_size(out))
308 assert(out[t.expected_out_next] == 0);
310 state = mbstate_t();
311 int len = cvt.length(state, in, in + t.in_size, t.out_size);
312 assert(len >= 0);
313 assert(static_cast<size_t>(len) == t.expected_in_next);
315 in[t.replace_pos] = old_char;
319 template <class InternT, class ExternT>
320 void utf8_to_utf32_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
321 utf8_to_utf32_in_ok(cvt);
322 utf8_to_utf32_in_partial(cvt);
323 utf8_to_utf32_in_error(cvt);
326 template <class InternT, class ExternT>
327 void utf32_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
328 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
329 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
330 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
331 static_assert(array_size(input) == 5, "");
332 static_assert(array_size(expected) == 11, "");
334 InternT in[array_size(input)];
335 ExternT exp[array_size(expected)];
336 copy(begin(input), end(input), begin(in));
337 copy(begin(expected), end(expected), begin(exp));
338 assert(char_traits<InternT>::length(in) == 4);
339 assert(char_traits<ExternT>::length(exp) == 10);
341 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
342 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
343 test_offsets_ok t = *it;
344 ExternT out[array_size(exp) - 1] = {};
345 assert(t.in_size <= array_size(in));
346 assert(t.out_size <= array_size(out));
347 mbstate_t state = {};
348 const InternT* in_next = nullptr;
349 ExternT* out_next = nullptr;
350 codecvt_base::result res = codecvt_base::ok;
352 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
353 assert(res == cvt.ok);
354 assert(in_next == in + t.in_size);
355 assert(out_next == out + t.out_size);
356 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
357 if (t.out_size < array_size(out))
358 assert(out[t.out_size] == 0);
362 template <class InternT, class ExternT>
363 void utf32_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
364 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
365 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
366 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
367 static_assert(array_size(input) == 5, "");
368 static_assert(array_size(expected) == 11, "");
370 InternT in[array_size(input)];
371 ExternT exp[array_size(expected)];
372 copy(begin(input), end(input), begin(in));
373 copy(begin(expected), end(expected), begin(exp));
374 assert(char_traits<InternT>::length(in) == 4);
375 assert(char_traits<ExternT>::length(exp) == 10);
377 test_offsets_partial offsets[] = {
378 {1, 0, 0, 0}, // no space for first CP
380 {2, 1, 1, 1}, // no space for second CP
381 {2, 2, 1, 1}, // no space for second CP
383 {3, 3, 2, 3}, // no space for third CP
384 {3, 4, 2, 3}, // no space for third CP
385 {3, 5, 2, 3}, // no space for third CP
387 {4, 6, 3, 6}, // no space for fourth CP
388 {4, 7, 3, 6}, // no space for fourth CP
389 {4, 8, 3, 6}, // no space for fourth CP
390 {4, 9, 3, 6}, // no space for fourth CP
392 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
393 test_offsets_partial t = *it;
394 ExternT out[array_size(exp) - 1] = {};
395 assert(t.in_size <= array_size(in));
396 assert(t.out_size <= array_size(out));
397 assert(t.expected_in_next <= t.in_size);
398 assert(t.expected_out_next <= t.out_size);
399 mbstate_t state = {};
400 const InternT* in_next = nullptr;
401 ExternT* out_next = nullptr;
402 codecvt_base::result res = codecvt_base::ok;
404 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
405 assert(res == cvt.partial);
406 assert(in_next == in + t.expected_in_next);
407 assert(out_next == out + t.expected_out_next);
408 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
409 if (t.expected_out_next < array_size(out))
410 assert(out[t.expected_out_next] == 0);
414 template <class InternT, class ExternT>
415 void utf32_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
416 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
417 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
418 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
419 static_assert(array_size(input) == 5, "");
420 static_assert(array_size(expected) == 11, "");
422 InternT in[array_size(input)];
423 ExternT exp[array_size(expected)];
424 copy(begin(input), end(input), begin(in));
425 copy(begin(expected), end(expected), begin(exp));
426 assert(char_traits<InternT>::length(in) == 4);
427 assert(char_traits<ExternT>::length(exp) == 10);
429 test_offsets_error<InternT> offsets[] = {
431 // Surrogate CP
432 {4, 10, 0, 0, 0xD800, 0},
433 {4, 10, 1, 1, 0xDBFF, 1},
434 {4, 10, 2, 3, 0xDC00, 2},
435 {4, 10, 3, 6, 0xDFFF, 3},
437 // CP out of range
438 {4, 10, 0, 0, 0x00110000, 0},
439 {4, 10, 1, 1, 0x00110000, 1},
440 {4, 10, 2, 3, 0x00110000, 2},
441 {4, 10, 3, 6, 0x00110000, 3}};
443 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
444 test_offsets_error<InternT> t = *it;
445 ExternT out[array_size(exp) - 1] = {};
446 assert(t.in_size <= array_size(in));
447 assert(t.out_size <= array_size(out));
448 assert(t.expected_in_next <= t.in_size);
449 assert(t.expected_out_next <= t.out_size);
450 InternT old_char = in[t.replace_pos];
451 in[t.replace_pos] = t.replace_char;
453 mbstate_t state = {};
454 const InternT* in_next = nullptr;
455 ExternT* out_next = nullptr;
456 codecvt_base::result res = codecvt_base::ok;
458 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
459 assert(res == cvt.error);
460 assert(in_next == in + t.expected_in_next);
461 assert(out_next == out + t.expected_out_next);
462 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
463 if (t.expected_out_next < array_size(out))
464 assert(out[t.expected_out_next] == 0);
466 in[t.replace_pos] = old_char;
470 template <class InternT, class ExternT>
471 void utf32_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
472 utf32_to_utf8_out_ok(cvt);
473 utf32_to_utf8_out_partial(cvt);
474 utf32_to_utf8_out_error(cvt);
477 template <class InternT, class ExternT>
478 void test_utf8_utf32_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
479 utf8_to_utf32_in(cvt);
480 utf32_to_utf8_out(cvt);
483 template <class InternT, class ExternT>
484 void utf8_to_utf16_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
485 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
486 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
487 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
488 static_assert(array_size(input) == 11, "");
489 static_assert(array_size(expected) == 6, "");
491 ExternT in[array_size(input)];
492 InternT exp[array_size(expected)];
493 copy(begin(input), end(input), begin(in));
494 copy(begin(expected), end(expected), begin(exp));
495 assert(char_traits<ExternT>::length(in) == 10);
496 assert(char_traits<InternT>::length(exp) == 5);
498 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
499 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
500 test_offsets_ok t = *it;
501 InternT out[array_size(exp) - 1] = {};
502 assert(t.in_size <= array_size(in));
503 assert(t.out_size <= array_size(out));
504 mbstate_t state = {};
505 const ExternT* in_next = nullptr;
506 InternT* out_next = nullptr;
507 codecvt_base::result res = codecvt_base::ok;
509 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
510 assert(res == cvt.ok);
511 assert(in_next == in + t.in_size);
512 assert(out_next == out + t.out_size);
513 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
514 if (t.out_size < array_size(out))
515 assert(out[t.out_size] == 0);
517 state = mbstate_t();
518 int len = cvt.length(state, in, in + t.in_size, t.out_size);
519 assert(len >= 0);
520 assert(static_cast<size_t>(len) == t.in_size);
523 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
524 test_offsets_ok t = *it;
525 InternT out[array_size(exp)] = {};
526 assert(t.in_size <= array_size(in));
527 assert(t.out_size <= array_size(out));
528 mbstate_t state = {};
529 const ExternT* in_next = nullptr;
530 InternT* out_next = nullptr;
531 codecvt_base::result res = codecvt_base::ok;
533 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
534 assert(res == cvt.ok);
535 assert(in_next == in + t.in_size);
536 assert(out_next == out + t.out_size);
537 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
538 if (t.out_size < array_size(out))
539 assert(out[t.out_size] == 0);
541 state = mbstate_t();
542 int len = cvt.length(state, in, in + t.in_size, array_size(out));
543 assert(len >= 0);
544 assert(static_cast<size_t>(len) == t.in_size);
548 template <class InternT, class ExternT>
549 void utf8_to_utf16_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
550 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
551 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
552 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
553 static_assert(array_size(input) == 11, "");
554 static_assert(array_size(expected) == 6, "");
556 ExternT in[array_size(input)];
557 InternT exp[array_size(expected)];
558 copy(begin(input), end(input), begin(in));
559 copy(begin(expected), end(expected), begin(exp));
560 assert(char_traits<ExternT>::length(in) == 10);
561 assert(char_traits<InternT>::length(exp) == 5);
563 test_offsets_partial offsets[] = {
564 {1, 0, 0, 0}, // no space for first CP
566 {3, 1, 1, 1}, // no space for second CP
567 {2, 2, 1, 1}, // incomplete second CP
568 {2, 1, 1, 1}, // incomplete second CP, and no space for it
570 {6, 2, 3, 2}, // no space for third CP
571 {4, 3, 3, 2}, // incomplete third CP
572 {5, 3, 3, 2}, // incomplete third CP
573 {4, 2, 3, 2}, // incomplete third CP, and no space for it
574 {5, 2, 3, 2}, // incomplete third CP, and no space for it
576 {10, 3, 6, 3}, // no space for fourth CP
577 {10, 4, 6, 3}, // no space for fourth CP
578 {7, 5, 6, 3}, // incomplete fourth CP
579 {8, 5, 6, 3}, // incomplete fourth CP
580 {9, 5, 6, 3}, // incomplete fourth CP
581 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
582 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
583 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
584 {7, 4, 6, 3}, // incomplete fourth CP, and no space for it
585 {8, 4, 6, 3}, // incomplete fourth CP, and no space for it
586 {9, 4, 6, 3}, // incomplete fourth CP, and no space for it
590 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
591 test_offsets_partial t = *it;
592 InternT out[array_size(exp) - 1] = {};
593 assert(t.in_size <= array_size(in));
594 assert(t.out_size <= array_size(out));
595 assert(t.expected_in_next <= t.in_size);
596 assert(t.expected_out_next <= t.out_size);
597 mbstate_t state = {};
598 const ExternT* in_next = nullptr;
599 InternT* out_next = nullptr;
600 codecvt_base::result res = codecvt_base::ok;
602 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
603 assert(res == cvt.partial);
604 assert(in_next == in + t.expected_in_next);
605 assert(out_next == out + t.expected_out_next);
606 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
607 if (t.expected_out_next < array_size(out))
608 assert(out[t.expected_out_next] == 0);
610 state = mbstate_t();
611 int len = cvt.length(state, in, in + t.in_size, t.out_size);
612 assert(len >= 0);
613 assert(static_cast<size_t>(len) == t.expected_in_next);
617 template <class InternT, class ExternT>
618 void utf8_to_utf16_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
619 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
620 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
621 const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
622 static_assert(array_size(input) == 11, "");
623 static_assert(array_size(expected) == 6, "");
625 ExternT in[array_size(input)];
626 InternT exp[array_size(expected)];
627 copy(begin(input), end(input), begin(in));
628 copy(begin(expected), end(expected), begin(exp));
629 assert(char_traits<ExternT>::length(in) == 10);
630 assert(char_traits<InternT>::length(exp) == 5);
632 // There are 5 classes of errors in UTF-8 decoding
633 // 1. Missing leading byte
634 // 2. Missing trailing byte
635 // 3. Surrogate CP
636 // 4. Overlong sequence
637 // 5. CP out of Unicode range
638 test_offsets_error<unsigned char> offsets[] = {
640 // 1. Missing leading byte. We will replace the leading byte with
641 // non-leading byte, such as a byte that is always invalid or a trailing
642 // byte.
644 // replace leading byte with invalid byte
645 {1, 5, 0, 0, 0xFF, 0},
646 {3, 5, 1, 1, 0xFF, 1},
647 {6, 5, 3, 2, 0xFF, 3},
648 {10, 5, 6, 3, 0xFF, 6},
650 // replace leading byte with trailing byte
651 {1, 5, 0, 0, 0b10101010, 0},
652 {3, 5, 1, 1, 0b10101010, 1},
653 {6, 5, 3, 2, 0b10101010, 3},
654 {10, 5, 6, 3, 0b10101010, 6},
656 // 2. Missing trailing byte. We will replace the trailing byte with
657 // non-trailing byte, such as a byte that is always invalid or a leading
658 // byte (simple ASCII byte in our case).
660 // replace first trailing byte with ASCII byte
661 {3, 5, 1, 1, 'z', 2},
662 {6, 5, 3, 2, 'z', 4},
663 {10, 5, 6, 3, 'z', 7},
665 // replace first trailing byte with invalid byte
666 {3, 5, 1, 1, 0xFF, 2},
667 {6, 5, 3, 2, 0xFF, 4},
668 {10, 5, 6, 3, 0xFF, 7},
670 // replace second trailing byte with ASCII byte
671 {6, 5, 3, 2, 'z', 5},
672 {10, 5, 6, 3, 'z', 8},
674 // replace second trailing byte with invalid byte
675 {6, 5, 3, 2, 0xFF, 5},
676 {10, 5, 6, 3, 0xFF, 8},
678 // replace third trailing byte
679 {10, 5, 6, 3, 'z', 9},
680 {10, 5, 6, 3, 0xFF, 9},
682 // 2.1 The following test-cases raise doubt whether error or partial should
683 // be returned. For example, we have 4-byte sequence with valid leading
684 // byte. If we hide the last byte we need to return partial. But, if the
685 // second or third byte, which are visible to the call to codecvt, are
686 // malformed then error should be returned.
688 // replace first trailing byte with ASCII byte, also incomplete at end
689 {5, 5, 3, 2, 'z', 4},
690 {8, 5, 6, 3, 'z', 7},
691 {9, 5, 6, 3, 'z', 7},
693 // replace first trailing byte with invalid byte, also incomplete at end
694 {5, 5, 3, 2, 0xFF, 4},
695 {8, 5, 6, 3, 0xFF, 7},
696 {9, 5, 6, 3, 0xFF, 7},
698 // replace second trailing byte with ASCII byte, also incomplete at end
699 {9, 5, 6, 3, 'z', 8},
701 // replace second trailing byte with invalid byte, also incomplete at end
702 {9, 5, 6, 3, 0xFF, 8},
704 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
705 // CP U+D700
706 {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
707 {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
708 {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
709 {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
711 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
712 // just the leading byte is enough to make them overlong, i.e. for the
713 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
714 // zeroes.
715 {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
716 {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
717 {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
718 {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
720 // 5. CP above range
721 // turn U+10AAAA into U+14AAAA by changing its leading byte
722 {10, 5, 6, 3, 0b11110101, 6},
723 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
724 {10, 5, 6, 3, 0b10011010, 7},
726 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
727 test_offsets_error<unsigned char> t = *it;
728 InternT out[array_size(exp) - 1] = {};
729 assert(t.in_size <= array_size(in));
730 assert(t.out_size <= array_size(out));
731 assert(t.expected_in_next <= t.in_size);
732 assert(t.expected_out_next <= t.out_size);
733 ExternT old_char = in[t.replace_pos];
734 in[t.replace_pos] = t.replace_char;
736 mbstate_t state = {};
737 const ExternT* in_next = nullptr;
738 InternT* out_next = nullptr;
739 codecvt_base::result res = codecvt_base::ok;
741 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
742 assert(res == cvt.error);
743 assert(in_next == in + t.expected_in_next);
744 assert(out_next == out + t.expected_out_next);
745 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
746 if (t.expected_out_next < array_size(out))
747 assert(out[t.expected_out_next] == 0);
749 state = mbstate_t();
750 int len = cvt.length(state, in, in + t.in_size, t.out_size);
751 assert(len >= 0);
752 assert(static_cast<size_t>(len) == t.expected_in_next);
754 in[t.replace_pos] = old_char;
758 template <class InternT, class ExternT>
759 void utf8_to_utf16_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
760 utf8_to_utf16_in_ok(cvt);
761 utf8_to_utf16_in_partial(cvt);
762 utf8_to_utf16_in_error(cvt);
765 template <class InternT, class ExternT>
766 void utf16_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
767 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
768 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
769 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
770 static_assert(array_size(input) == 6, "");
771 static_assert(array_size(expected) == 11, "");
773 InternT in[array_size(input)];
774 ExternT exp[array_size(expected)];
775 copy(begin(input), end(input), begin(in));
776 copy(begin(expected), end(expected), begin(exp));
777 assert(char_traits<InternT>::length(in) == 5);
778 assert(char_traits<ExternT>::length(exp) == 10);
780 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
781 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
782 test_offsets_ok t = *it;
783 ExternT out[array_size(exp) - 1] = {};
784 assert(t.in_size <= array_size(in));
785 assert(t.out_size <= array_size(out));
786 mbstate_t state = {};
787 const InternT* in_next = nullptr;
788 ExternT* out_next = nullptr;
789 codecvt_base::result res = codecvt_base::ok;
791 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
792 assert(res == cvt.ok);
793 assert(in_next == in + t.in_size);
794 assert(out_next == out + t.out_size);
795 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
796 if (t.out_size < array_size(out))
797 assert(out[t.out_size] == 0);
801 template <class InternT, class ExternT>
802 void utf16_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
803 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
804 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
805 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
806 static_assert(array_size(input) == 6, "");
807 static_assert(array_size(expected) == 11, "");
809 InternT in[array_size(input)];
810 ExternT exp[array_size(expected)];
811 copy(begin(input), end(input), begin(in));
812 copy(begin(expected), end(expected), begin(exp));
813 assert(char_traits<InternT>::length(in) == 5);
814 assert(char_traits<ExternT>::length(exp) == 10);
816 test_offsets_partial offsets[] = {
817 {1, 0, 0, 0}, // no space for first CP
819 {2, 1, 1, 1}, // no space for second CP
820 {2, 2, 1, 1}, // no space for second CP
822 {3, 3, 2, 3}, // no space for third CP
823 {3, 4, 2, 3}, // no space for third CP
824 {3, 5, 2, 3}, // no space for third CP
826 {5, 6, 3, 6}, // no space for fourth CP
827 {5, 7, 3, 6}, // no space for fourth CP
828 {5, 8, 3, 6}, // no space for fourth CP
829 {5, 9, 3, 6}, // no space for fourth CP
831 {4, 10, 3, 6}, // incomplete fourth CP
833 {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
834 {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
835 {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
836 {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
838 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
839 test_offsets_partial t = *it;
840 ExternT out[array_size(exp) - 1] = {};
841 assert(t.in_size <= array_size(in));
842 assert(t.out_size <= array_size(out));
843 assert(t.expected_in_next <= t.in_size);
844 assert(t.expected_out_next <= t.out_size);
845 mbstate_t state = {};
846 const InternT* in_next = nullptr;
847 ExternT* out_next = nullptr;
848 codecvt_base::result res = codecvt_base::ok;
850 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
851 assert(res == cvt.partial);
852 assert(in_next == in + t.expected_in_next);
853 assert(out_next == out + t.expected_out_next);
854 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
855 if (t.expected_out_next < array_size(out))
856 assert(out[t.expected_out_next] == 0);
860 template <class InternT, class ExternT>
861 void utf16_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
862 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
863 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
864 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
865 static_assert(array_size(input) == 6, "");
866 static_assert(array_size(expected) == 11, "");
868 InternT in[array_size(input)];
869 ExternT exp[array_size(expected)];
870 copy(begin(input), end(input), begin(in));
871 copy(begin(expected), end(expected), begin(exp));
872 assert(char_traits<InternT>::length(in) == 5);
873 assert(char_traits<ExternT>::length(exp) == 10);
875 // The only possible error in UTF-16 is unpaired surrogate code units.
876 // So we replace valid code points (scalar values) with lone surrogate CU.
877 test_offsets_error<InternT> offsets[] = {
878 {5, 10, 0, 0, 0xD800, 0},
879 {5, 10, 0, 0, 0xDBFF, 0},
880 {5, 10, 0, 0, 0xDC00, 0},
881 {5, 10, 0, 0, 0xDFFF, 0},
883 {5, 10, 1, 1, 0xD800, 1},
884 {5, 10, 1, 1, 0xDBFF, 1},
885 {5, 10, 1, 1, 0xDC00, 1},
886 {5, 10, 1, 1, 0xDFFF, 1},
888 {5, 10, 2, 3, 0xD800, 2},
889 {5, 10, 2, 3, 0xDBFF, 2},
890 {5, 10, 2, 3, 0xDC00, 2},
891 {5, 10, 2, 3, 0xDFFF, 2},
893 // make the leading surrogate a trailing one
894 {5, 10, 3, 6, 0xDC00, 3},
895 {5, 10, 3, 6, 0xDFFF, 3},
897 // make the trailing surrogate a leading one
898 {5, 10, 3, 6, 0xD800, 4},
899 {5, 10, 3, 6, 0xDBFF, 4},
901 // make the trailing surrogate a BMP char
902 {5, 10, 3, 6, 'z', 4},
905 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
906 test_offsets_error<InternT> t = *it;
907 ExternT out[array_size(exp) - 1] = {};
908 assert(t.in_size <= array_size(in));
909 assert(t.out_size <= array_size(out));
910 assert(t.expected_in_next <= t.in_size);
911 assert(t.expected_out_next <= t.out_size);
912 InternT old_char = in[t.replace_pos];
913 in[t.replace_pos] = t.replace_char;
915 mbstate_t state = {};
916 const InternT* in_next = nullptr;
917 ExternT* out_next = nullptr;
918 codecvt_base::result res = codecvt_base::ok;
920 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
921 assert(res == cvt.error);
922 assert(in_next == in + t.expected_in_next);
923 assert(out_next == out + t.expected_out_next);
924 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
925 if (t.expected_out_next < array_size(out))
926 assert(out[t.expected_out_next] == 0);
928 in[t.replace_pos] = old_char;
932 template <class InternT, class ExternT>
933 void utf16_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
934 utf16_to_utf8_out_ok(cvt);
935 utf16_to_utf8_out_partial(cvt);
936 utf16_to_utf8_out_error(cvt);
939 template <class InternT, class ExternT>
940 void test_utf8_utf16_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
941 utf8_to_utf16_in(cvt);
942 utf16_to_utf8_out(cvt);
945 template <class InternT, class ExternT>
946 void utf8_to_ucs2_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
947 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
948 const unsigned char input[] = "b\u0448\uAAAA";
949 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
950 static_assert(array_size(input) == 7, "");
951 static_assert(array_size(expected) == 4, "");
953 ExternT in[array_size(input)];
954 InternT exp[array_size(expected)];
955 copy(begin(input), end(input), begin(in));
956 copy(begin(expected), end(expected), begin(exp));
957 assert(char_traits<ExternT>::length(in) == 6);
958 assert(char_traits<InternT>::length(exp) == 3);
960 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
961 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
962 test_offsets_ok t = *it;
963 InternT out[array_size(exp) - 1] = {};
964 assert(t.in_size <= array_size(in));
965 assert(t.out_size <= array_size(out));
966 mbstate_t state = {};
967 const ExternT* in_next = nullptr;
968 InternT* out_next = nullptr;
969 codecvt_base::result res = codecvt_base::ok;
971 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
972 assert(res == cvt.ok);
973 assert(in_next == in + t.in_size);
974 assert(out_next == out + t.out_size);
975 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
976 if (t.out_size < array_size(out))
977 assert(out[t.out_size] == 0);
979 state = mbstate_t();
980 int len = cvt.length(state, in, in + t.in_size, t.out_size);
981 assert(len >= 0);
982 assert(static_cast<size_t>(len) == t.in_size);
985 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
986 test_offsets_ok t = *it;
987 InternT out[array_size(exp)] = {};
988 assert(t.in_size <= array_size(in));
989 assert(t.out_size <= array_size(out));
990 mbstate_t state = {};
991 const ExternT* in_next = nullptr;
992 InternT* out_next = nullptr;
993 codecvt_base::result res = codecvt_base::ok;
995 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
996 assert(res == cvt.ok);
997 assert(in_next == in + t.in_size);
998 assert(out_next == out + t.out_size);
999 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1000 if (t.out_size < array_size(out))
1001 assert(out[t.out_size] == 0);
1003 state = mbstate_t();
1004 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1005 assert(len >= 0);
1006 assert(static_cast<size_t>(len) == t.in_size);
1010 template <class InternT, class ExternT>
1011 void utf8_to_ucs2_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1012 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1013 const unsigned char input[] = "b\u0448\uAAAA";
1014 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1015 static_assert(array_size(input) == 7, "");
1016 static_assert(array_size(expected) == 4, "");
1018 ExternT in[array_size(input)];
1019 InternT exp[array_size(expected)];
1020 copy(begin(input), end(input), begin(in));
1021 copy(begin(expected), end(expected), begin(exp));
1022 assert(char_traits<ExternT>::length(in) == 6);
1023 assert(char_traits<InternT>::length(exp) == 3);
1025 test_offsets_partial offsets[] = {
1026 {1, 0, 0, 0}, // no space for first CP
1028 {3, 1, 1, 1}, // no space for second CP
1029 {2, 2, 1, 1}, // incomplete second CP
1030 {2, 1, 1, 1}, // incomplete second CP, and no space for it
1032 {6, 2, 3, 2}, // no space for third CP
1033 {4, 3, 3, 2}, // incomplete third CP
1034 {5, 3, 3, 2}, // incomplete third CP
1035 {4, 2, 3, 2}, // incomplete third CP, and no space for it
1036 {5, 2, 3, 2}, // incomplete third CP, and no space for it
1039 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1040 test_offsets_partial t = *it;
1041 InternT out[array_size(exp) - 1] = {};
1042 assert(t.in_size <= array_size(in));
1043 assert(t.out_size <= array_size(out));
1044 assert(t.expected_in_next <= t.in_size);
1045 assert(t.expected_out_next <= t.out_size);
1046 mbstate_t state = {};
1047 const ExternT* in_next = nullptr;
1048 InternT* out_next = nullptr;
1049 codecvt_base::result res = codecvt_base::ok;
1051 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1052 assert(res == cvt.partial);
1053 assert(in_next == in + t.expected_in_next);
1054 assert(out_next == out + t.expected_out_next);
1055 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1056 if (t.expected_out_next < array_size(out))
1057 assert(out[t.expected_out_next] == 0);
1059 state = mbstate_t();
1060 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1061 assert(len >= 0);
1062 assert(static_cast<size_t>(len) == t.expected_in_next);
1066 template <class InternT, class ExternT>
1067 void utf8_to_ucs2_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1068 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
1069 const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
1070 static_assert(array_size(input) == 11, "");
1071 static_assert(array_size(expected) == 6, "");
1073 ExternT in[array_size(input)];
1074 InternT exp[array_size(expected)];
1075 copy(begin(input), end(input), begin(in));
1076 copy(begin(expected), end(expected), begin(exp));
1077 assert(char_traits<ExternT>::length(in) == 10);
1078 assert(char_traits<InternT>::length(exp) == 5);
1080 // There are 5 classes of errors in UTF-8 decoding
1081 // 1. Missing leading byte
1082 // 2. Missing trailing byte
1083 // 3. Surrogate CP
1084 // 4. Overlong sequence
1085 // 5. CP out of Unicode range
1086 test_offsets_error<unsigned char> offsets[] = {
1088 // 1. Missing leading byte. We will replace the leading byte with
1089 // non-leading byte, such as a byte that is always invalid or a trailing
1090 // byte.
1092 // replace leading byte with invalid byte
1093 {1, 5, 0, 0, 0xFF, 0},
1094 {3, 5, 1, 1, 0xFF, 1},
1095 {6, 5, 3, 2, 0xFF, 3},
1096 {10, 5, 6, 3, 0xFF, 6},
1098 // replace leading byte with trailing byte
1099 {1, 5, 0, 0, 0b10101010, 0},
1100 {3, 5, 1, 1, 0b10101010, 1},
1101 {6, 5, 3, 2, 0b10101010, 3},
1102 {10, 5, 6, 3, 0b10101010, 6},
1104 // 2. Missing trailing byte. We will replace the trailing byte with
1105 // non-trailing byte, such as a byte that is always invalid or a leading
1106 // byte (simple ASCII byte in our case).
1108 // replace first trailing byte with ASCII byte
1109 {3, 5, 1, 1, 'z', 2},
1110 {6, 5, 3, 2, 'z', 4},
1111 {10, 5, 6, 3, 'z', 7},
1113 // replace first trailing byte with invalid byte
1114 {3, 5, 1, 1, 0xFF, 2},
1115 {6, 5, 3, 2, 0xFF, 4},
1116 {10, 5, 6, 3, 0xFF, 7},
1118 // replace second trailing byte with ASCII byte
1119 {6, 5, 3, 2, 'z', 5},
1120 {10, 5, 6, 3, 'z', 8},
1122 // replace second trailing byte with invalid byte
1123 {6, 5, 3, 2, 0xFF, 5},
1124 {10, 5, 6, 3, 0xFF, 8},
1126 // replace third trailing byte
1127 {10, 5, 6, 3, 'z', 9},
1128 {10, 5, 6, 3, 0xFF, 9},
1130 // 2.1 The following test-cases raise doubt whether error or partial should
1131 // be returned. For example, we have 4-byte sequence with valid leading
1132 // byte. If we hide the last byte we need to return partial. But, if the
1133 // second or third byte, which are visible to the call to codecvt, are
1134 // malformed then error should be returned.
1136 // replace first trailing byte with ASCII byte, also incomplete at end
1137 {5, 5, 3, 2, 'z', 4},
1138 {8, 5, 6, 3, 'z', 7},
1139 {9, 5, 6, 3, 'z', 7},
1141 // replace first trailing byte with invalid byte, also incomplete at end
1142 {5, 5, 3, 2, 0xFF, 4},
1143 {8, 5, 6, 3, 0xFF, 7},
1144 {9, 5, 6, 3, 0xFF, 7},
1146 // replace second trailing byte with ASCII byte, also incomplete at end
1147 {9, 5, 6, 3, 'z', 8},
1149 // replace second trailing byte with invalid byte, also incomplete at end
1150 {9, 5, 6, 3, 0xFF, 8},
1152 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
1153 // CP U+D700
1154 {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
1155 {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
1156 {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
1157 {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
1159 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
1160 // just the leading byte is enough to make them overlong, i.e. for the
1161 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
1162 // zeroes.
1163 {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
1164 {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
1165 {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
1166 {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
1168 // 5. CP above range
1169 // turn U+10AAAA into U+14AAAA by changing its leading byte
1170 {10, 5, 6, 3, 0b11110101, 6},
1171 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
1172 {10, 5, 6, 3, 0b10011010, 7},
1173 // Don't replace anything, show full 4-byte CP U+10AAAA
1174 {10, 4, 6, 3, 'b', 0},
1175 {10, 5, 6, 3, 'b', 0},
1176 // Don't replace anything, show incomplete 4-byte CP at the end. It's still
1177 // out of UCS2 range just by seeing the first byte.
1178 {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1179 {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1180 {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1181 {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1182 {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1183 {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1185 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
1186 test_offsets_error<unsigned char> t = *it;
1187 InternT out[array_size(exp) - 1] = {};
1188 assert(t.in_size <= array_size(in));
1189 assert(t.out_size <= array_size(out));
1190 assert(t.expected_in_next <= t.in_size);
1191 assert(t.expected_out_next <= t.out_size);
1192 ExternT old_char = in[t.replace_pos];
1193 in[t.replace_pos] = t.replace_char;
1195 mbstate_t state = {};
1196 const ExternT* in_next = nullptr;
1197 InternT* out_next = nullptr;
1198 codecvt_base::result res = codecvt_base::ok;
1200 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1201 assert(res == cvt.error);
1202 assert(in_next == in + t.expected_in_next);
1203 assert(out_next == out + t.expected_out_next);
1204 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1205 if (t.expected_out_next < array_size(out))
1206 assert(out[t.expected_out_next] == 0);
1208 state = mbstate_t();
1209 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1210 assert(len >= 0);
1211 assert(static_cast<size_t>(len) == t.expected_in_next);
1213 in[t.replace_pos] = old_char;
1217 template <class InternT, class ExternT>
1218 void utf8_to_ucs2_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1219 utf8_to_ucs2_in_ok(cvt);
1220 utf8_to_ucs2_in_partial(cvt);
1221 utf8_to_ucs2_in_error(cvt);
1224 template <class InternT, class ExternT>
1225 void ucs2_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1226 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1227 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1228 const unsigned char expected[] = "b\u0448\uAAAA";
1229 static_assert(array_size(input) == 4, "");
1230 static_assert(array_size(expected) == 7, "");
1232 InternT in[array_size(input)];
1233 ExternT exp[array_size(expected)];
1234 copy(begin(input), end(input), begin(in));
1235 copy(begin(expected), end(expected), begin(exp));
1236 assert(char_traits<InternT>::length(in) == 3);
1237 assert(char_traits<ExternT>::length(exp) == 6);
1239 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
1240 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1241 test_offsets_ok t = *it;
1242 ExternT out[array_size(exp) - 1] = {};
1243 assert(t.in_size <= array_size(in));
1244 assert(t.out_size <= array_size(out));
1245 mbstate_t state = {};
1246 const InternT* in_next = nullptr;
1247 ExternT* out_next = nullptr;
1248 codecvt_base::result res = codecvt_base::ok;
1250 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1251 assert(res == cvt.ok);
1252 assert(in_next == in + t.in_size);
1253 assert(out_next == out + t.out_size);
1254 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
1255 if (t.out_size < array_size(out))
1256 assert(out[t.out_size] == 0);
1260 template <class InternT, class ExternT>
1261 void ucs2_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1262 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1263 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1264 const unsigned char expected[] = "b\u0448\uAAAA";
1265 static_assert(array_size(input) == 4, "");
1266 static_assert(array_size(expected) == 7, "");
1268 InternT in[array_size(input)];
1269 ExternT exp[array_size(expected)];
1270 copy(begin(input), end(input), begin(in));
1271 copy(begin(expected), end(expected), begin(exp));
1272 assert(char_traits<InternT>::length(in) == 3);
1273 assert(char_traits<ExternT>::length(exp) == 6);
1275 test_offsets_partial offsets[] = {
1276 {1, 0, 0, 0}, // no space for first CP
1278 {2, 1, 1, 1}, // no space for second CP
1279 {2, 2, 1, 1}, // no space for second CP
1281 {3, 3, 2, 3}, // no space for third CP
1282 {3, 4, 2, 3}, // no space for third CP
1283 {3, 5, 2, 3}, // no space for third CP
1285 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1286 test_offsets_partial t = *it;
1287 ExternT out[array_size(exp) - 1] = {};
1288 assert(t.in_size <= array_size(in));
1289 assert(t.out_size <= array_size(out));
1290 assert(t.expected_in_next <= t.in_size);
1291 assert(t.expected_out_next <= t.out_size);
1292 mbstate_t state = {};
1293 const InternT* in_next = nullptr;
1294 ExternT* out_next = nullptr;
1295 codecvt_base::result res = codecvt_base::ok;
1297 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1298 assert(res == cvt.partial);
1299 assert(in_next == in + t.expected_in_next);
1300 assert(out_next == out + t.expected_out_next);
1301 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1302 if (t.expected_out_next < array_size(out))
1303 assert(out[t.expected_out_next] == 0);
1307 template <class InternT, class ExternT>
1308 void ucs2_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1309 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1310 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
1311 static_assert(array_size(input) == 6, "");
1312 static_assert(array_size(expected) == 11, "");
1314 InternT in[array_size(input)];
1315 ExternT exp[array_size(expected)];
1316 copy(begin(input), end(input), begin(in));
1317 copy(begin(expected), end(expected), begin(exp));
1318 assert(char_traits<InternT>::length(in) == 5);
1319 assert(char_traits<ExternT>::length(exp) == 10);
1321 test_offsets_error<InternT> offsets[] = {
1322 {3, 6, 0, 0, 0xD800, 0},
1323 {3, 6, 0, 0, 0xDBFF, 0},
1324 {3, 6, 0, 0, 0xDC00, 0},
1325 {3, 6, 0, 0, 0xDFFF, 0},
1327 {3, 6, 1, 1, 0xD800, 1},
1328 {3, 6, 1, 1, 0xDBFF, 1},
1329 {3, 6, 1, 1, 0xDC00, 1},
1330 {3, 6, 1, 1, 0xDFFF, 1},
1332 {3, 6, 2, 3, 0xD800, 2},
1333 {3, 6, 2, 3, 0xDBFF, 2},
1334 {3, 6, 2, 3, 0xDC00, 2},
1335 {3, 6, 2, 3, 0xDFFF, 2},
1337 // make the leading surrogate a trailing one
1338 {5, 10, 3, 6, 0xDC00, 3},
1339 {5, 10, 3, 6, 0xDFFF, 3},
1341 // make the trailing surrogate a leading one
1342 {5, 10, 3, 6, 0xD800, 4},
1343 {5, 10, 3, 6, 0xDBFF, 4},
1345 // make the trailing surrogate a BMP char
1346 {5, 10, 3, 6, 'z', 4},
1348 // don't replace anything in the test cases bellow, just show the surrogate
1349 // pair (fourth CP) fully or partially
1350 {5, 10, 3, 6, 'b', 0},
1351 {5, 7, 3, 6, 'b', 0}, // no space for fourth CP
1352 {5, 8, 3, 6, 'b', 0}, // no space for fourth CP
1353 {5, 9, 3, 6, 'b', 0}, // no space for fourth CP
1355 {4, 10, 3, 6, 'b', 0}, // incomplete fourth CP
1356 {4, 7, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1357 {4, 8, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1358 {4, 9, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1361 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1362 test_offsets_error<InternT> t = *it;
1363 ExternT out[array_size(exp) - 1] = {};
1364 assert(t.in_size <= array_size(in));
1365 assert(t.out_size <= array_size(out));
1366 assert(t.expected_in_next <= t.in_size);
1367 assert(t.expected_out_next <= t.out_size);
1368 InternT old_char = in[t.replace_pos];
1369 in[t.replace_pos] = t.replace_char;
1371 mbstate_t state = {};
1372 const InternT* in_next = nullptr;
1373 ExternT* out_next = nullptr;
1374 codecvt_base::result res = codecvt_base::ok;
1376 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1377 assert(res == cvt.error);
1378 assert(in_next == in + t.expected_in_next);
1379 assert(out_next == out + t.expected_out_next);
1380 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1381 if (t.expected_out_next < array_size(out))
1382 assert(out[t.expected_out_next] == 0);
1384 in[t.replace_pos] = old_char;
1388 template <class InternT, class ExternT>
1389 void ucs2_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1390 ucs2_to_utf8_out_ok(cvt);
1391 ucs2_to_utf8_out_partial(cvt);
1392 ucs2_to_utf8_out_error(cvt);
1395 template <class InternT, class ExternT>
1396 void test_utf8_ucs2_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1397 utf8_to_ucs2_in(cvt);
1398 ucs2_to_utf8_out(cvt);
1401 enum utf16_endianess { utf16_big_endian, utf16_little_endian };
1403 template <class Iter1, class Iter2>
1404 Iter2 utf16_to_bytes(Iter1 f, Iter1 l, Iter2 o, utf16_endianess e) {
1405 if (e == utf16_big_endian)
1406 for (; f != l; ++f) {
1407 *o++ = (*f >> 8) & 0xFF;
1408 *o++ = *f & 0xFF;
1410 else
1411 for (; f != l; ++f) {
1412 *o++ = *f & 0xFF;
1413 *o++ = (*f >> 8) & 0xFF;
1415 return o;
1418 template <class InternT>
1419 void utf16_to_utf32_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1420 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1421 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1422 static_assert(array_size(input) == 6, "");
1423 static_assert(array_size(expected) == 5, "");
1425 char in[array_size(input) * 2];
1426 InternT exp[array_size(expected)];
1427 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1428 copy(begin(expected), end(expected), begin(exp));
1430 test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}, {10, 4}};
1431 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1432 test_offsets_ok t = *it;
1433 InternT out[array_size(exp) - 1] = {};
1434 assert(t.in_size <= array_size(in));
1435 assert(t.out_size <= array_size(out));
1436 mbstate_t state = {};
1437 const char* in_next = nullptr;
1438 InternT* out_next = nullptr;
1439 codecvt_base::result res = codecvt_base::ok;
1441 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1442 assert(res == cvt.ok);
1443 assert(in_next == in + t.in_size);
1444 assert(out_next == out + t.out_size);
1445 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1446 if (t.out_size < array_size(out))
1447 assert(out[t.out_size] == 0);
1449 state = mbstate_t();
1450 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1451 assert(len >= 0);
1452 assert(static_cast<size_t>(len) == t.in_size);
1455 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1456 test_offsets_ok t = *it;
1457 InternT out[array_size(exp)] = {};
1458 assert(t.in_size <= array_size(in));
1459 assert(t.out_size <= array_size(out));
1460 mbstate_t state = {};
1461 const char* in_next = nullptr;
1462 InternT* out_next = nullptr;
1463 codecvt_base::result res = codecvt_base::ok;
1465 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1466 assert(res == cvt.ok);
1467 assert(in_next == in + t.in_size);
1468 assert(out_next == out + t.out_size);
1469 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1470 if (t.out_size < array_size(out))
1471 assert(out[t.out_size] == 0);
1473 state = mbstate_t();
1474 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1475 assert(len >= 0);
1476 assert(static_cast<size_t>(len) == t.in_size);
1480 template <class InternT>
1481 void utf16_to_utf32_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1482 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1483 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1484 static_assert(array_size(input) == 6, "");
1485 static_assert(array_size(expected) == 5, "");
1487 char in[array_size(input) * 2];
1488 InternT exp[array_size(expected)];
1489 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1490 copy(begin(expected), end(expected), begin(exp));
1492 test_offsets_partial offsets[] = {
1493 {2, 0, 0, 0}, // no space for first CP
1494 {1, 1, 0, 0}, // incomplete first CP
1495 {1, 0, 0, 0}, // incomplete first CP, and no space for it
1497 {4, 1, 2, 1}, // no space for second CP
1498 {3, 2, 2, 1}, // incomplete second CP
1499 {3, 1, 2, 1}, // incomplete second CP, and no space for it
1501 {6, 2, 4, 2}, // no space for third CP
1502 {5, 3, 4, 2}, // incomplete third CP
1503 {5, 2, 4, 2}, // incomplete third CP, and no space for it
1505 {10, 3, 6, 3}, // no space for fourth CP
1506 {7, 4, 6, 3}, // incomplete fourth CP
1507 {8, 4, 6, 3}, // incomplete fourth CP
1508 {9, 4, 6, 3}, // incomplete fourth CP
1509 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
1510 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
1511 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
1514 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1515 test_offsets_partial t = *it;
1516 InternT out[array_size(exp) - 1] = {};
1517 assert(t.in_size <= array_size(in));
1518 assert(t.out_size <= array_size(out));
1519 assert(t.expected_in_next <= t.in_size);
1520 assert(t.expected_out_next <= t.out_size);
1521 mbstate_t state = {};
1522 const char* in_next = nullptr;
1523 InternT* out_next = nullptr;
1524 codecvt_base::result res = codecvt_base::ok;
1526 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1527 assert(res == cvt.partial);
1528 assert(in_next == in + t.expected_in_next);
1529 assert(out_next == out + t.expected_out_next);
1530 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1531 if (t.expected_out_next < array_size(out))
1532 assert(out[t.expected_out_next] == 0);
1534 state = mbstate_t();
1535 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1536 assert(len >= 0);
1537 assert(static_cast<size_t>(len) == t.expected_in_next);
1541 template <class InternT>
1542 void utf16_to_utf32_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1543 char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1544 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1545 static_assert(array_size(input) == 6, "");
1546 static_assert(array_size(expected) == 5, "");
1548 InternT exp[array_size(expected)];
1549 copy(begin(expected), end(expected), begin(exp));
1551 // The only possible error in UTF-16 is unpaired surrogate code units.
1552 // So we replace valid code points (scalar values) with lone surrogate CU.
1553 test_offsets_error<char16_t> offsets[] = {
1554 {10, 4, 0, 0, 0xD800, 0},
1555 {10, 4, 0, 0, 0xDBFF, 0},
1556 {10, 4, 0, 0, 0xDC00, 0},
1557 {10, 4, 0, 0, 0xDFFF, 0},
1559 {10, 4, 2, 1, 0xD800, 1},
1560 {10, 4, 2, 1, 0xDBFF, 1},
1561 {10, 4, 2, 1, 0xDC00, 1},
1562 {10, 4, 2, 1, 0xDFFF, 1},
1564 {10, 4, 4, 2, 0xD800, 2},
1565 {10, 4, 4, 2, 0xDBFF, 2},
1566 {10, 4, 4, 2, 0xDC00, 2},
1567 {10, 4, 4, 2, 0xDFFF, 2},
1569 // make the leading surrogate a trailing one
1570 {10, 4, 6, 3, 0xDC00, 3},
1571 {10, 4, 6, 3, 0xDFFF, 3},
1573 // make the trailing surrogate a leading one
1574 {10, 4, 6, 3, 0xD800, 4},
1575 {10, 4, 6, 3, 0xDBFF, 4},
1577 // make the trailing surrogate a BMP char
1578 {10, 4, 6, 3, 'z', 4},
1581 for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1582 test_offsets_error<char16_t> t = *it;
1583 char in[array_size(input) * 2];
1584 InternT out[array_size(exp) - 1] = {};
1585 assert(t.in_size <= array_size(in));
1586 assert(t.out_size <= array_size(out));
1587 assert(t.expected_in_next <= t.in_size);
1588 assert(t.expected_out_next <= t.out_size);
1589 char16_t old_char = input[t.replace_pos];
1590 input[t.replace_pos] = t.replace_char; // replace in input, not in in
1591 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1593 mbstate_t state = {};
1594 const char* in_next = nullptr;
1595 InternT* out_next = nullptr;
1596 codecvt_base::result res = codecvt_base::ok;
1598 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1599 assert(res == cvt.error);
1600 assert(in_next == in + t.expected_in_next);
1601 assert(out_next == out + t.expected_out_next);
1602 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1603 if (t.expected_out_next < array_size(out))
1604 assert(out[t.expected_out_next] == 0);
1606 state = mbstate_t();
1607 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1608 assert(len >= 0);
1609 assert(static_cast<size_t>(len) == t.expected_in_next);
1611 input[t.replace_pos] = old_char;
1615 template <class InternT>
1616 void utf32_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1617 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1618 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1619 static_assert(array_size(input) == 5, "");
1620 static_assert(array_size(expected) == 6, "");
1622 InternT in[array_size(input)];
1623 char exp[array_size(expected) * 2];
1624 copy(begin(input), end(input), begin(in));
1625 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1627 test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 10}};
1628 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1629 test_offsets_ok t = *it;
1630 char out[array_size(exp) - 2] = {};
1631 assert(t.in_size <= array_size(in));
1632 assert(t.out_size <= array_size(out));
1633 mbstate_t state = {};
1634 const InternT* in_next = nullptr;
1635 char* out_next = nullptr;
1636 codecvt_base::result res = codecvt_base::ok;
1638 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1639 assert(res == cvt.ok);
1640 assert(in_next == in + t.in_size);
1641 assert(out_next == out + t.out_size);
1642 assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1643 if (t.out_size < array_size(out))
1644 assert(out[t.out_size] == 0);
1648 template <class InternT>
1649 void utf32_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1650 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1651 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1652 static_assert(array_size(input) == 5, "");
1653 static_assert(array_size(expected) == 6, "");
1655 InternT in[array_size(input)];
1656 char exp[array_size(expected) * 2];
1657 copy(begin(input), end(input), begin(in));
1658 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1660 test_offsets_partial offsets[] = {
1661 {1, 0, 0, 0}, // no space for first CP
1662 {1, 1, 0, 0}, // no space for first CP
1664 {2, 2, 1, 2}, // no space for second CP
1665 {2, 3, 1, 2}, // no space for second CP
1667 {3, 4, 2, 4}, // no space for third CP
1668 {3, 5, 2, 4}, // no space for third CP
1670 {4, 6, 3, 6}, // no space for fourth CP
1671 {4, 7, 3, 6}, // no space for fourth CP
1672 {4, 8, 3, 6}, // no space for fourth CP
1673 {4, 9, 3, 6}, // no space for fourth CP
1675 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1676 test_offsets_partial t = *it;
1677 char out[array_size(exp) - 2] = {};
1678 assert(t.in_size <= array_size(in));
1679 assert(t.out_size <= array_size(out));
1680 assert(t.expected_in_next <= t.in_size);
1681 assert(t.expected_out_next <= t.out_size);
1682 mbstate_t state = {};
1683 const InternT* in_next = nullptr;
1684 char* out_next = nullptr;
1685 codecvt_base::result res = codecvt_base::ok;
1687 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1688 assert(res == cvt.partial);
1689 assert(in_next == in + t.expected_in_next);
1690 assert(out_next == out + t.expected_out_next);
1691 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1692 if (t.expected_out_next < array_size(out))
1693 assert(out[t.expected_out_next] == 0);
1697 template <class InternT>
1698 void utf32_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1699 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1700 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1701 static_assert(array_size(input) == 5, "");
1702 static_assert(array_size(expected) == 6, "");
1704 InternT in[array_size(input)];
1705 char exp[array_size(expected) * 2];
1706 copy(begin(input), end(input), begin(in));
1707 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1709 test_offsets_error<InternT> offsets[] = {
1711 // Surrogate CP
1712 {4, 10, 0, 0, 0xD800, 0},
1713 {4, 10, 1, 2, 0xDBFF, 1},
1714 {4, 10, 2, 4, 0xDC00, 2},
1715 {4, 10, 3, 6, 0xDFFF, 3},
1717 // CP out of range
1718 {4, 10, 0, 0, 0x00110000, 0},
1719 {4, 10, 1, 2, 0x00110000, 1},
1720 {4, 10, 2, 4, 0x00110000, 2},
1721 {4, 10, 3, 6, 0x00110000, 3}};
1723 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1724 test_offsets_error<InternT> t = *it;
1725 char out[array_size(exp) - 2] = {};
1726 assert(t.in_size <= array_size(in));
1727 assert(t.out_size <= array_size(out));
1728 assert(t.expected_in_next <= t.in_size);
1729 assert(t.expected_out_next <= t.out_size);
1730 InternT old_char = in[t.replace_pos];
1731 in[t.replace_pos] = t.replace_char;
1733 mbstate_t state = {};
1734 const InternT* in_next = nullptr;
1735 char* out_next = nullptr;
1736 codecvt_base::result res = codecvt_base::ok;
1738 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1739 assert(res == cvt.error);
1740 assert(in_next == in + t.expected_in_next);
1741 assert(out_next == out + t.expected_out_next);
1742 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1743 if (t.expected_out_next < array_size(out))
1744 assert(out[t.expected_out_next] == 0);
1746 in[t.replace_pos] = old_char;
1750 template <class InternT>
1751 void test_utf16_utf32_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1752 utf16_to_utf32_in_ok(cvt, endianess);
1753 utf16_to_utf32_in_partial(cvt, endianess);
1754 utf16_to_utf32_in_error(cvt, endianess);
1755 utf32_to_utf16_out_ok(cvt, endianess);
1756 utf32_to_utf16_out_partial(cvt, endianess);
1757 utf32_to_utf16_out_error(cvt, endianess);
1760 template <class InternT>
1761 void utf16_to_ucs2_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1762 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1763 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1764 static_assert(array_size(input) == 4, "");
1765 static_assert(array_size(expected) == 4, "");
1767 char in[array_size(input) * 2];
1768 InternT exp[array_size(expected)];
1769 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1770 copy(begin(expected), end(expected), begin(exp));
1772 test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}};
1773 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1774 test_offsets_ok t = *it;
1775 InternT out[array_size(exp) - 1] = {};
1776 assert(t.in_size <= array_size(in));
1777 assert(t.out_size <= array_size(out));
1778 mbstate_t state = {};
1779 const char* in_next = nullptr;
1780 InternT* out_next = nullptr;
1781 codecvt_base::result res = codecvt_base::ok;
1783 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1784 assert(res == cvt.ok);
1785 assert(in_next == in + t.in_size);
1786 assert(out_next == out + t.out_size);
1787 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1788 if (t.out_size < array_size(out))
1789 assert(out[t.out_size] == 0);
1791 state = mbstate_t();
1792 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1793 assert(len >= 0);
1794 assert(static_cast<size_t>(len) == t.in_size);
1797 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1798 test_offsets_ok t = *it;
1799 InternT out[array_size(exp)] = {};
1800 assert(t.in_size <= array_size(in));
1801 assert(t.out_size <= array_size(out));
1802 mbstate_t state = {};
1803 const char* in_next = nullptr;
1804 InternT* out_next = nullptr;
1805 codecvt_base::result res = codecvt_base::ok;
1807 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1808 assert(res == cvt.ok);
1809 assert(in_next == in + t.in_size);
1810 assert(out_next == out + t.out_size);
1811 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1812 if (t.out_size < array_size(out))
1813 assert(out[t.out_size] == 0);
1815 state = mbstate_t();
1816 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1817 assert(len >= 0);
1818 assert(static_cast<size_t>(len) == t.in_size);
1822 template <class InternT>
1823 void utf16_to_ucs2_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1824 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1825 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1826 static_assert(array_size(input) == 4, "");
1827 static_assert(array_size(expected) == 4, "");
1829 char in[array_size(input) * 2];
1830 InternT exp[array_size(expected)];
1831 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1832 copy(begin(expected), end(expected), begin(exp));
1834 test_offsets_partial offsets[] = {
1835 {2, 0, 0, 0}, // no space for first CP
1836 {1, 1, 0, 0}, // incomplete first CP
1837 {1, 0, 0, 0}, // incomplete first CP, and no space for it
1839 {4, 1, 2, 1}, // no space for second CP
1840 {3, 2, 2, 1}, // incomplete second CP
1841 {3, 1, 2, 1}, // incomplete second CP, and no space for it
1843 {6, 2, 4, 2}, // no space for third CP
1844 {5, 3, 4, 2}, // incomplete third CP
1845 {5, 2, 4, 2}, // incomplete third CP, and no space for it
1848 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1849 test_offsets_partial t = *it;
1850 InternT out[array_size(exp) - 1] = {};
1851 assert(t.in_size <= array_size(in));
1852 assert(t.out_size <= array_size(out));
1853 assert(t.expected_in_next <= t.in_size);
1854 assert(t.expected_out_next <= t.out_size);
1855 mbstate_t state = {};
1856 const char* in_next = nullptr;
1857 InternT* out_next = nullptr;
1858 codecvt_base::result res = codecvt_base::ok;
1860 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1861 assert(res == cvt.partial);
1862 assert(in_next == in + t.expected_in_next);
1863 assert(out_next == out + t.expected_out_next);
1864 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1865 if (t.expected_out_next < array_size(out))
1866 assert(out[t.expected_out_next] == 0);
1868 state = mbstate_t();
1869 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1870 assert(len >= 0);
1871 assert(static_cast<size_t>(len) == t.expected_in_next);
1875 template <class InternT>
1876 void utf16_to_ucs2_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1877 char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1878 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1879 static_assert(array_size(input) == 6, "");
1880 static_assert(array_size(expected) == 6, "");
1882 InternT exp[array_size(expected)];
1883 copy(begin(expected), end(expected), begin(exp));
1885 // The only possible error in UTF-16 is unpaired surrogate code units.
1886 // Additionally, because the target encoding is UCS-2, a proper pair of
1887 // surrogates is also error. Simply, any surrogate CU is error.
1888 test_offsets_error<char16_t> offsets[] = {
1889 {6, 3, 0, 0, 0xD800, 0},
1890 {6, 3, 0, 0, 0xDBFF, 0},
1891 {6, 3, 0, 0, 0xDC00, 0},
1892 {6, 3, 0, 0, 0xDFFF, 0},
1894 {6, 3, 2, 1, 0xD800, 1},
1895 {6, 3, 2, 1, 0xDBFF, 1},
1896 {6, 3, 2, 1, 0xDC00, 1},
1897 {6, 3, 2, 1, 0xDFFF, 1},
1899 {6, 3, 4, 2, 0xD800, 2},
1900 {6, 3, 4, 2, 0xDBFF, 2},
1901 {6, 3, 4, 2, 0xDC00, 2},
1902 {6, 3, 4, 2, 0xDFFF, 2},
1904 // make the leading surrogate a trailing one
1905 {10, 5, 6, 3, 0xDC00, 3},
1906 {10, 5, 6, 3, 0xDFFF, 3},
1908 // make the trailing surrogate a leading one
1909 {10, 5, 6, 3, 0xD800, 4},
1910 {10, 5, 6, 3, 0xDBFF, 4},
1912 // make the trailing surrogate a BMP char
1913 {10, 5, 6, 3, 'z', 4},
1915 // don't replace anything in the test cases bellow, just show the surrogate
1916 // pair (fourth CP) fully or partially (just the first surrogate)
1917 {10, 5, 6, 3, 'b', 0},
1918 {8, 5, 6, 3, 'b', 0},
1919 {9, 5, 6, 3, 'b', 0},
1921 {10, 4, 6, 3, 'b', 0},
1922 {8, 4, 6, 3, 'b', 0},
1923 {9, 4, 6, 3, 'b', 0},
1926 for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1927 test_offsets_error<char16_t> t = *it;
1928 char in[array_size(input) * 2];
1929 InternT out[array_size(exp) - 1] = {};
1930 assert(t.in_size <= array_size(in));
1931 assert(t.out_size <= array_size(out));
1932 assert(t.expected_in_next <= t.in_size);
1933 assert(t.expected_out_next <= t.out_size);
1934 char16_t old_char = input[t.replace_pos];
1935 input[t.replace_pos] = t.replace_char; // replace in input, not in in
1936 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1938 mbstate_t state = {};
1939 const char* in_next = nullptr;
1940 InternT* out_next = nullptr;
1941 codecvt_base::result res = codecvt_base::ok;
1943 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1944 assert(res == cvt.error);
1945 assert(in_next == in + t.expected_in_next);
1946 assert(out_next == out + t.expected_out_next);
1947 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1948 if (t.expected_out_next < array_size(out))
1949 assert(out[t.expected_out_next] == 0);
1951 state = mbstate_t();
1952 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1953 assert(len >= 0);
1954 assert(static_cast<size_t>(len) == t.expected_in_next);
1956 input[t.replace_pos] = old_char;
1960 template <class InternT>
1961 void ucs2_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1962 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1963 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1964 static_assert(array_size(input) == 4, "");
1965 static_assert(array_size(expected) == 4, "");
1967 InternT in[array_size(input)];
1968 char exp[array_size(expected) * 2];
1969 copy(begin(input), end(input), begin(in));
1970 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1972 test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}};
1973 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1974 test_offsets_ok t = *it;
1975 char out[array_size(exp) - 2] = {};
1976 assert(t.in_size <= array_size(in));
1977 assert(t.out_size <= array_size(out));
1978 mbstate_t state = {};
1979 const InternT* in_next = nullptr;
1980 char* out_next = nullptr;
1981 codecvt_base::result res = codecvt_base::ok;
1983 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1984 assert(res == cvt.ok);
1985 assert(in_next == in + t.in_size);
1986 assert(out_next == out + t.out_size);
1987 assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1988 if (t.out_size < array_size(out))
1989 assert(out[t.out_size] == 0);
1993 template <class InternT>
1994 void ucs2_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1995 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1996 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1997 static_assert(array_size(input) == 4, "");
1998 static_assert(array_size(expected) == 4, "");
2000 InternT in[array_size(input)];
2001 char exp[array_size(expected) * 2];
2002 copy(begin(input), end(input), begin(in));
2003 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2005 test_offsets_partial offsets[] = {
2006 {1, 0, 0, 0}, // no space for first CP
2007 {1, 1, 0, 0}, // no space for first CP
2009 {2, 2, 1, 2}, // no space for second CP
2010 {2, 3, 1, 2}, // no space for second CP
2012 {3, 4, 2, 4}, // no space for third CP
2013 {3, 5, 2, 4}, // no space for third CP
2015 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
2016 test_offsets_partial t = *it;
2017 char out[array_size(exp) - 2] = {};
2018 assert(t.in_size <= array_size(in));
2019 assert(t.out_size <= array_size(out));
2020 assert(t.expected_in_next <= t.in_size);
2021 assert(t.expected_out_next <= t.out_size);
2022 mbstate_t state = {};
2023 const InternT* in_next = nullptr;
2024 char* out_next = nullptr;
2025 codecvt_base::result res = codecvt_base::ok;
2027 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2028 assert(res == cvt.partial);
2029 assert(in_next == in + t.expected_in_next);
2030 assert(out_next == out + t.expected_out_next);
2031 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2032 if (t.expected_out_next < array_size(out))
2033 assert(out[t.expected_out_next] == 0);
2037 template <class InternT>
2038 void ucs2_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2039 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2040 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2041 static_assert(array_size(input) == 6, "");
2042 static_assert(array_size(expected) == 6, "");
2044 InternT in[array_size(input)];
2045 char exp[array_size(expected) * 2];
2046 copy(begin(input), end(input), begin(in));
2047 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2049 test_offsets_error<InternT> offsets[] = {
2050 {3, 6, 0, 0, 0xD800, 0},
2051 {3, 6, 0, 0, 0xDBFF, 0},
2052 {3, 6, 0, 0, 0xDC00, 0},
2053 {3, 6, 0, 0, 0xDFFF, 0},
2055 {3, 6, 1, 2, 0xD800, 1},
2056 {3, 6, 1, 2, 0xDBFF, 1},
2057 {3, 6, 1, 2, 0xDC00, 1},
2058 {3, 6, 1, 2, 0xDFFF, 1},
2060 {3, 6, 2, 4, 0xD800, 2},
2061 {3, 6, 2, 4, 0xDBFF, 2},
2062 {3, 6, 2, 4, 0xDC00, 2},
2063 {3, 6, 2, 4, 0xDFFF, 2},
2065 // make the leading surrogate a trailing one
2066 {5, 10, 3, 6, 0xDC00, 3},
2067 {5, 10, 3, 6, 0xDFFF, 3},
2069 // make the trailing surrogate a leading one
2070 {5, 10, 3, 6, 0xD800, 4},
2071 {5, 10, 3, 6, 0xDBFF, 4},
2073 // make the trailing surrogate a BMP char
2074 {5, 10, 3, 6, 'z', 4},
2076 // don't replace anything in the test cases bellow, just show the surrogate
2077 // pair (fourth CP) fully or partially (just the first surrogate)
2078 {5, 10, 3, 6, 'b', 0},
2079 {5, 8, 3, 6, 'b', 0},
2080 {5, 9, 3, 6, 'b', 0},
2082 {4, 10, 3, 6, 'b', 0},
2083 {4, 8, 3, 6, 'b', 0},
2084 {4, 9, 3, 6, 'b', 0},
2087 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
2088 test_offsets_error<InternT> t = *it;
2089 char out[array_size(exp) - 2] = {};
2090 assert(t.in_size <= array_size(in));
2091 assert(t.out_size <= array_size(out));
2092 assert(t.expected_in_next <= t.in_size);
2093 assert(t.expected_out_next <= t.out_size);
2094 InternT old_char = in[t.replace_pos];
2095 in[t.replace_pos] = t.replace_char;
2097 mbstate_t state = {};
2098 const InternT* in_next = nullptr;
2099 char* out_next = nullptr;
2100 codecvt_base::result res = codecvt_base::ok;
2102 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2103 assert(res == cvt.error);
2104 assert(in_next == in + t.expected_in_next);
2105 assert(out_next == out + t.expected_out_next);
2106 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2107 if (t.expected_out_next < array_size(out))
2108 assert(out[t.expected_out_next] == 0);
2110 in[t.replace_pos] = old_char;
2114 template <class InternT>
2115 void test_utf16_ucs2_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2116 utf16_to_ucs2_in_ok(cvt, endianess);
2117 utf16_to_ucs2_in_partial(cvt, endianess);
2118 utf16_to_ucs2_in_error(cvt, endianess);
2119 ucs2_to_utf16_out_ok(cvt, endianess);
2120 ucs2_to_utf16_out_partial(cvt, endianess);
2121 ucs2_to_utf16_out_error(cvt, endianess);
2124 using std::codecvt;
2125 using std::codecvt_utf16;
2126 using std::codecvt_utf8;
2127 using std::codecvt_utf8_utf16;
2128 using std::has_facet;
2129 using std::locale;
2130 using std::use_facet;
2132 void test_utf8_utf32_codecvts() {
2133 typedef codecvt<char32_t, char, mbstate_t> codecvt_c32;
2134 const locale& loc_c = locale::classic();
2135 assert(has_facet<codecvt_c32>(loc_c));
2137 const codecvt_c32& cvt = use_facet<codecvt_c32>(loc_c);
2138 test_utf8_utf32_cvt(cvt);
2140 codecvt_utf8<char32_t> cvt2;
2141 test_utf8_utf32_cvt(cvt2);
2143 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2144 codecvt_utf8<wchar_t> cvt3;
2145 test_utf8_utf32_cvt(cvt3);
2146 #endif
2148 #ifndef TEST_HAS_NO_CHAR8_T
2149 typedef codecvt<char32_t, char8_t, mbstate_t> codecvt_c32_c8;
2150 assert(has_facet<codecvt_c32_c8>(loc_c));
2151 const codecvt_c32_c8& cvt4 = use_facet<codecvt_c32_c8>(loc_c);
2152 test_utf8_utf32_cvt(cvt4);
2153 #endif
2156 void test_utf8_utf16_codecvts() {
2157 typedef codecvt<char16_t, char, mbstate_t> codecvt_c16;
2158 const locale& loc_c = locale::classic();
2159 assert(has_facet<codecvt_c16>(loc_c));
2161 const codecvt_c16& cvt = use_facet<codecvt_c16>(loc_c);
2162 test_utf8_utf16_cvt(cvt);
2164 codecvt_utf8_utf16<char16_t> cvt2;
2165 test_utf8_utf16_cvt(cvt2);
2167 codecvt_utf8_utf16<char32_t> cvt3;
2168 test_utf8_utf16_cvt(cvt3);
2170 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
2171 codecvt_utf8_utf16<wchar_t> cvt4;
2172 test_utf8_utf16_cvt(cvt4);
2173 #endif
2175 #ifndef TEST_HAS_NO_CHAR8_T
2176 typedef codecvt<char16_t, char8_t, mbstate_t> codecvt_c16_c8;
2177 assert(has_facet<codecvt_c16_c8>(loc_c));
2178 const codecvt_c16_c8& cvt5 = use_facet<codecvt_c16_c8>(loc_c);
2179 test_utf8_utf16_cvt(cvt5);
2180 #endif
2183 void test_utf8_ucs2_codecvts() {
2184 codecvt_utf8<char16_t> cvt;
2185 test_utf8_ucs2_cvt(cvt);
2187 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2188 codecvt_utf8<wchar_t> cvt2;
2189 test_utf8_ucs2_cvt(cvt2);
2190 #endif
2193 void test_utf16_utf32_codecvts() {
2194 codecvt_utf16<char32_t> cvt;
2195 test_utf16_utf32_cvt(cvt, utf16_big_endian);
2197 codecvt_utf16<char32_t, 0x10FFFF, std::little_endian> cvt2;
2198 test_utf16_utf32_cvt(cvt2, utf16_little_endian);
2200 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2201 codecvt_utf16<wchar_t> cvt3;
2202 test_utf16_utf32_cvt(cvt3, utf16_big_endian);
2204 codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2205 test_utf16_utf32_cvt(cvt4, utf16_little_endian);
2206 #endif
2209 void test_utf16_ucs2_codecvts() {
2210 codecvt_utf16<char16_t> cvt;
2211 test_utf16_ucs2_cvt(cvt, utf16_big_endian);
2213 codecvt_utf16<char16_t, 0x10FFFF, std::little_endian> cvt2;
2214 test_utf16_ucs2_cvt(cvt2, utf16_little_endian);
2216 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2217 codecvt_utf16<wchar_t> cvt3;
2218 test_utf16_ucs2_cvt(cvt3, utf16_big_endian);
2220 codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2221 test_utf16_ucs2_cvt(cvt4, utf16_little_endian);
2222 #endif
2225 int main() {
2226 test_utf8_utf32_codecvts();
2227 test_utf8_utf16_codecvts();
2228 test_utf8_ucs2_codecvts();
2229 test_utf16_utf32_codecvts();
2230 test_utf16_ucs2_codecvts();