[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (...
[llvm-project.git] / libcxx / test / std / localization / codecvt_unicode.pass.cpp
blob08ecc16ef4311eb3b39b63c00778f16d818d3fca
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT
10 // XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0|12.0|13.0}}
12 #include <algorithm>
13 #include <cassert>
14 #include <codecvt>
15 #include <locale>
17 #include "test_macros.h"
19 struct test_offsets_ok {
20 size_t in_size;
21 size_t out_size;
23 struct test_offsets_partial {
24 size_t in_size;
25 size_t out_size;
26 size_t expected_in_next;
27 size_t expected_out_next;
30 template <class CharT>
31 struct test_offsets_error {
32 size_t in_size;
33 size_t out_size;
34 size_t expected_in_next;
35 size_t expected_out_next;
36 CharT replace_char;
37 size_t replace_pos;
40 #define array_size(x) (sizeof(x) / sizeof(x)[0])
42 using std::begin;
43 using std::char_traits;
44 using std::codecvt_base;
45 using std::copy;
46 using std::end;
48 template <class InternT, class ExternT>
49 void utf8_to_utf32_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
50 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
51 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
52 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
53 static_assert(array_size(input) == 11, "");
54 static_assert(array_size(expected) == 5, "");
56 ExternT in[array_size(input)];
57 InternT exp[array_size(expected)];
58 copy(begin(input), end(input), begin(in));
59 copy(begin(expected), end(expected), begin(exp));
60 assert(char_traits<ExternT>::length(in) == 10);
61 assert(char_traits<InternT>::length(exp) == 4);
62 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
63 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
64 test_offsets_ok t = *it;
65 InternT out[array_size(exp) - 1] = {};
66 assert(t.in_size <= array_size(in));
67 assert(t.out_size <= array_size(out));
68 mbstate_t state = {};
69 const ExternT* in_next = nullptr;
70 InternT* out_next = nullptr;
71 codecvt_base::result res = codecvt_base::ok;
73 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
74 assert(res == cvt.ok);
75 assert(in_next == in + t.in_size);
76 assert(out_next == out + t.out_size);
77 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
78 if (t.out_size < array_size(out))
79 assert(out[t.out_size] == 0);
81 state = mbstate_t();
82 int len = cvt.length(state, in, in + t.in_size, t.out_size);
83 assert(len >= 0);
84 assert(static_cast<size_t>(len) == t.in_size);
87 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
88 test_offsets_ok t = *it;
89 InternT out[array_size(exp)] = {};
90 assert(t.in_size <= array_size(in));
91 assert(t.out_size <= array_size(out));
92 mbstate_t state = {};
93 const ExternT* in_next = nullptr;
94 InternT* out_next = nullptr;
95 codecvt_base::result res = codecvt_base::ok;
97 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
98 assert(res == cvt.ok);
99 assert(in_next == in + t.in_size);
100 assert(out_next == out + t.out_size);
101 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
102 if (t.out_size < array_size(out))
103 assert(out[t.out_size] == 0);
105 state = mbstate_t();
106 int len = cvt.length(state, in, in + t.in_size, array_size(out));
107 assert(len >= 0);
108 assert(static_cast<size_t>(len) == t.in_size);
112 template <class InternT, class ExternT>
113 void utf8_to_utf32_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
114 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
115 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
116 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
117 static_assert(array_size(input) == 11, "");
118 static_assert(array_size(expected) == 5, "");
120 ExternT in[array_size(input)];
121 InternT exp[array_size(expected)];
122 copy(begin(input), end(input), begin(in));
123 copy(begin(expected), end(expected), begin(exp));
124 assert(char_traits<ExternT>::length(in) == 10);
125 assert(char_traits<InternT>::length(exp) == 4);
127 test_offsets_partial offsets[] = {
128 {1, 0, 0, 0}, // no space for first CP
130 {3, 1, 1, 1}, // no space for second CP
131 {2, 2, 1, 1}, // incomplete second CP
132 {2, 1, 1, 1}, // incomplete second CP, and no space for it
134 {6, 2, 3, 2}, // no space for third CP
135 {4, 3, 3, 2}, // incomplete third CP
136 {5, 3, 3, 2}, // incomplete third CP
137 {4, 2, 3, 2}, // incomplete third CP, and no space for it
138 {5, 2, 3, 2}, // incomplete third CP, and no space for it
140 {10, 3, 6, 3}, // no space for fourth CP
141 {7, 4, 6, 3}, // incomplete fourth CP
142 {8, 4, 6, 3}, // incomplete fourth CP
143 {9, 4, 6, 3}, // incomplete fourth CP
144 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
145 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
146 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
149 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
150 test_offsets_partial t = *it;
151 InternT out[array_size(exp) - 1] = {};
152 assert(t.in_size <= array_size(in));
153 assert(t.out_size <= array_size(out));
154 assert(t.expected_in_next <= t.in_size);
155 assert(t.expected_out_next <= t.out_size);
156 mbstate_t state = {};
157 const ExternT* in_next = nullptr;
158 InternT* out_next = nullptr;
159 codecvt_base::result res = codecvt_base::ok;
161 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
162 assert(res == cvt.partial);
163 assert(in_next == in + t.expected_in_next);
164 assert(out_next == out + t.expected_out_next);
165 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
166 if (t.expected_out_next < array_size(out))
167 assert(out[t.expected_out_next] == 0);
169 state = mbstate_t();
170 int len = cvt.length(state, in, in + t.in_size, t.out_size);
171 assert(len >= 0);
172 assert(static_cast<size_t>(len) == t.expected_in_next);
176 template <class InternT, class ExternT>
177 void utf8_to_utf32_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
178 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
179 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
180 const char32_t expected[] = {'b', 0x0448, 0xD700, 0x10AAAA, 0};
181 static_assert(array_size(input) == 11, "");
182 static_assert(array_size(expected) == 5, "");
184 ExternT in[array_size(input)];
185 InternT exp[array_size(expected)];
186 copy(begin(input), end(input), begin(in));
187 copy(begin(expected), end(expected), begin(exp));
188 assert(char_traits<ExternT>::length(in) == 10);
189 assert(char_traits<InternT>::length(exp) == 4);
191 // There are 5 classes of errors in UTF-8 decoding
192 // 1. Missing leading byte
193 // 2. Missing trailing byte
194 // 3. Surrogate CP
195 // 4. Overlong sequence
196 // 5. CP out of Unicode range
197 test_offsets_error<unsigned char> offsets[] = {
199 // 1. Missing leading byte. We will replace the leading byte with
200 // non-leading byte, such as a byte that is always invalid or a trailing
201 // byte.
203 // replace leading byte with invalid byte
204 {1, 4, 0, 0, 0xFF, 0},
205 {3, 4, 1, 1, 0xFF, 1},
206 {6, 4, 3, 2, 0xFF, 3},
207 {10, 4, 6, 3, 0xFF, 6},
209 // replace leading byte with trailing byte
210 {1, 4, 0, 0, 0b10101010, 0},
211 {3, 4, 1, 1, 0b10101010, 1},
212 {6, 4, 3, 2, 0b10101010, 3},
213 {10, 4, 6, 3, 0b10101010, 6},
215 // 2. Missing trailing byte. We will replace the trailing byte with
216 // non-trailing byte, such as a byte that is always invalid or a leading
217 // byte (simple ASCII byte in our case).
219 // replace first trailing byte with ASCII byte
220 {3, 4, 1, 1, 'z', 2},
221 {6, 4, 3, 2, 'z', 4},
222 {10, 4, 6, 3, 'z', 7},
224 // replace first trailing byte with invalid byte
225 {3, 4, 1, 1, 0xFF, 2},
226 {6, 4, 3, 2, 0xFF, 4},
227 {10, 4, 6, 3, 0xFF, 7},
229 // replace second trailing byte with ASCII byte
230 {6, 4, 3, 2, 'z', 5},
231 {10, 4, 6, 3, 'z', 8},
233 // replace second trailing byte with invalid byte
234 {6, 4, 3, 2, 0xFF, 5},
235 {10, 4, 6, 3, 0xFF, 8},
237 // replace third trailing byte
238 {10, 4, 6, 3, 'z', 9},
239 {10, 4, 6, 3, 0xFF, 9},
241 // 2.1 The following test-cases raise doubt whether error or partial should
242 // be returned. For example, we have 4-byte sequence with valid leading
243 // byte. If we hide the last byte we need to return partial. But, if the
244 // second or third byte, which are visible to the call to codecvt, are
245 // malformed then error should be returned.
247 // replace first trailing byte with ASCII byte, also incomplete at end
248 {5, 4, 3, 2, 'z', 4},
249 {8, 4, 6, 3, 'z', 7},
250 {9, 4, 6, 3, 'z', 7},
252 // replace first trailing byte with invalid byte, also incomplete at end
253 {5, 4, 3, 2, 0xFF, 4},
254 {8, 4, 6, 3, 0xFF, 7},
255 {9, 4, 6, 3, 0xFF, 7},
257 // replace second trailing byte with ASCII byte, also incomplete at end
258 {9, 4, 6, 3, 'z', 8},
260 // replace second trailing byte with invalid byte, also incomplete at end
261 {9, 4, 6, 3, 0xFF, 8},
263 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
264 // CP U+D700
265 {6, 4, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
266 {6, 4, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
267 {6, 4, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
268 {6, 4, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
270 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
271 // just the leading byte is enough to make them overlong, i.e. for the
272 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
273 // zeroes.
274 {3, 4, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
275 {3, 4, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
276 {6, 4, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
277 {10, 4, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
279 // 5. CP above range
280 // turn U+10AAAA into U+14AAAA by changing its leading byte
281 {10, 4, 6, 3, 0b11110101, 6},
282 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
283 {10, 4, 6, 3, 0b10011010, 7},
285 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
286 test_offsets_error<unsigned char> t = *it;
287 InternT out[array_size(exp) - 1] = {};
288 assert(t.in_size <= array_size(in));
289 assert(t.out_size <= array_size(out));
290 assert(t.expected_in_next <= t.in_size);
291 assert(t.expected_out_next <= t.out_size);
292 ExternT old_char = in[t.replace_pos];
293 in[t.replace_pos] = t.replace_char;
295 mbstate_t state = {};
296 const ExternT* in_next = nullptr;
297 InternT* out_next = nullptr;
298 codecvt_base::result res = codecvt_base::ok;
300 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
301 assert(res == cvt.error);
302 assert(in_next == in + t.expected_in_next);
303 assert(out_next == out + t.expected_out_next);
304 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
305 if (t.expected_out_next < array_size(out))
306 assert(out[t.expected_out_next] == 0);
308 state = mbstate_t();
309 int len = cvt.length(state, in, in + t.in_size, t.out_size);
310 assert(len >= 0);
311 assert(static_cast<size_t>(len) == t.expected_in_next);
313 in[t.replace_pos] = old_char;
317 template <class InternT, class ExternT>
318 void utf8_to_utf32_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
319 utf8_to_utf32_in_ok(cvt);
320 utf8_to_utf32_in_partial(cvt);
321 utf8_to_utf32_in_error(cvt);
324 template <class InternT, class ExternT>
325 void utf32_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
326 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
327 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
328 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
329 static_assert(array_size(input) == 5, "");
330 static_assert(array_size(expected) == 11, "");
332 InternT in[array_size(input)];
333 ExternT exp[array_size(expected)];
334 copy(begin(input), end(input), begin(in));
335 copy(begin(expected), end(expected), begin(exp));
336 assert(char_traits<InternT>::length(in) == 4);
337 assert(char_traits<ExternT>::length(exp) == 10);
339 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
340 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
341 test_offsets_ok t = *it;
342 ExternT out[array_size(exp) - 1] = {};
343 assert(t.in_size <= array_size(in));
344 assert(t.out_size <= array_size(out));
345 mbstate_t state = {};
346 const InternT* in_next = nullptr;
347 ExternT* out_next = nullptr;
348 codecvt_base::result res = codecvt_base::ok;
350 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
351 assert(res == cvt.ok);
352 assert(in_next == in + t.in_size);
353 assert(out_next == out + t.out_size);
354 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
355 if (t.out_size < array_size(out))
356 assert(out[t.out_size] == 0);
360 template <class InternT, class ExternT>
361 void utf32_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
362 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
363 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
364 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
365 static_assert(array_size(input) == 5, "");
366 static_assert(array_size(expected) == 11, "");
368 InternT in[array_size(input)];
369 ExternT exp[array_size(expected)];
370 copy(begin(input), end(input), begin(in));
371 copy(begin(expected), end(expected), begin(exp));
372 assert(char_traits<InternT>::length(in) == 4);
373 assert(char_traits<ExternT>::length(exp) == 10);
375 test_offsets_partial offsets[] = {
376 {1, 0, 0, 0}, // no space for first CP
378 {2, 1, 1, 1}, // no space for second CP
379 {2, 2, 1, 1}, // no space for second CP
381 {3, 3, 2, 3}, // no space for third CP
382 {3, 4, 2, 3}, // no space for third CP
383 {3, 5, 2, 3}, // no space for third CP
385 {4, 6, 3, 6}, // no space for fourth CP
386 {4, 7, 3, 6}, // no space for fourth CP
387 {4, 8, 3, 6}, // no space for fourth CP
388 {4, 9, 3, 6}, // no space for fourth CP
390 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
391 test_offsets_partial t = *it;
392 ExternT out[array_size(exp) - 1] = {};
393 assert(t.in_size <= array_size(in));
394 assert(t.out_size <= array_size(out));
395 assert(t.expected_in_next <= t.in_size);
396 assert(t.expected_out_next <= t.out_size);
397 mbstate_t state = {};
398 const InternT* in_next = nullptr;
399 ExternT* out_next = nullptr;
400 codecvt_base::result res = codecvt_base::ok;
402 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
403 assert(res == cvt.partial);
404 assert(in_next == in + t.expected_in_next);
405 assert(out_next == out + t.expected_out_next);
406 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
407 if (t.expected_out_next < array_size(out))
408 assert(out[t.expected_out_next] == 0);
412 template <class InternT, class ExternT>
413 void utf32_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
414 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
415 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
416 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
417 static_assert(array_size(input) == 5, "");
418 static_assert(array_size(expected) == 11, "");
420 InternT in[array_size(input)];
421 ExternT exp[array_size(expected)];
422 copy(begin(input), end(input), begin(in));
423 copy(begin(expected), end(expected), begin(exp));
424 assert(char_traits<InternT>::length(in) == 4);
425 assert(char_traits<ExternT>::length(exp) == 10);
427 test_offsets_error<InternT> offsets[] = {
429 // Surrogate CP
430 {4, 10, 0, 0, 0xD800, 0},
431 {4, 10, 1, 1, 0xDBFF, 1},
432 {4, 10, 2, 3, 0xDC00, 2},
433 {4, 10, 3, 6, 0xDFFF, 3},
435 // CP out of range
436 {4, 10, 0, 0, 0x00110000, 0},
437 {4, 10, 1, 1, 0x00110000, 1},
438 {4, 10, 2, 3, 0x00110000, 2},
439 {4, 10, 3, 6, 0x00110000, 3}};
441 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
442 test_offsets_error<InternT> t = *it;
443 ExternT out[array_size(exp) - 1] = {};
444 assert(t.in_size <= array_size(in));
445 assert(t.out_size <= array_size(out));
446 assert(t.expected_in_next <= t.in_size);
447 assert(t.expected_out_next <= t.out_size);
448 InternT old_char = in[t.replace_pos];
449 in[t.replace_pos] = t.replace_char;
451 mbstate_t state = {};
452 const InternT* in_next = nullptr;
453 ExternT* out_next = nullptr;
454 codecvt_base::result res = codecvt_base::ok;
456 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
457 assert(res == cvt.error);
458 assert(in_next == in + t.expected_in_next);
459 assert(out_next == out + t.expected_out_next);
460 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
461 if (t.expected_out_next < array_size(out))
462 assert(out[t.expected_out_next] == 0);
464 in[t.replace_pos] = old_char;
468 template <class InternT, class ExternT>
469 void utf32_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
470 utf32_to_utf8_out_ok(cvt);
471 utf32_to_utf8_out_partial(cvt);
472 utf32_to_utf8_out_error(cvt);
475 template <class InternT, class ExternT>
476 void test_utf8_utf32_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
477 utf8_to_utf32_in(cvt);
478 utf32_to_utf8_out(cvt);
481 template <class InternT, class ExternT>
482 void utf8_to_utf16_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
483 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
484 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
485 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
486 static_assert(array_size(input) == 11, "");
487 static_assert(array_size(expected) == 6, "");
489 ExternT in[array_size(input)];
490 InternT exp[array_size(expected)];
491 copy(begin(input), end(input), begin(in));
492 copy(begin(expected), end(expected), begin(exp));
493 assert(char_traits<ExternT>::length(in) == 10);
494 assert(char_traits<InternT>::length(exp) == 5);
496 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
497 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
498 test_offsets_ok t = *it;
499 InternT out[array_size(exp) - 1] = {};
500 assert(t.in_size <= array_size(in));
501 assert(t.out_size <= array_size(out));
502 mbstate_t state = {};
503 const ExternT* in_next = nullptr;
504 InternT* out_next = nullptr;
505 codecvt_base::result res = codecvt_base::ok;
507 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
508 assert(res == cvt.ok);
509 assert(in_next == in + t.in_size);
510 assert(out_next == out + t.out_size);
511 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
512 if (t.out_size < array_size(out))
513 assert(out[t.out_size] == 0);
515 state = mbstate_t();
516 int len = cvt.length(state, in, in + t.in_size, t.out_size);
517 assert(len >= 0);
518 assert(static_cast<size_t>(len) == t.in_size);
521 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
522 test_offsets_ok t = *it;
523 InternT out[array_size(exp)] = {};
524 assert(t.in_size <= array_size(in));
525 assert(t.out_size <= array_size(out));
526 mbstate_t state = {};
527 const ExternT* in_next = nullptr;
528 InternT* out_next = nullptr;
529 codecvt_base::result res = codecvt_base::ok;
531 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
532 assert(res == cvt.ok);
533 assert(in_next == in + t.in_size);
534 assert(out_next == out + t.out_size);
535 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
536 if (t.out_size < array_size(out))
537 assert(out[t.out_size] == 0);
539 state = mbstate_t();
540 int len = cvt.length(state, in, in + t.in_size, array_size(out));
541 assert(len >= 0);
542 assert(static_cast<size_t>(len) == t.in_size);
546 template <class InternT, class ExternT>
547 void utf8_to_utf16_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
548 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
549 const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
550 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
551 static_assert(array_size(input) == 11, "");
552 static_assert(array_size(expected) == 6, "");
554 ExternT in[array_size(input)];
555 InternT exp[array_size(expected)];
556 copy(begin(input), end(input), begin(in));
557 copy(begin(expected), end(expected), begin(exp));
558 assert(char_traits<ExternT>::length(in) == 10);
559 assert(char_traits<InternT>::length(exp) == 5);
561 test_offsets_partial offsets[] = {
562 {1, 0, 0, 0}, // no space for first CP
564 {3, 1, 1, 1}, // no space for second CP
565 {2, 2, 1, 1}, // incomplete second CP
566 {2, 1, 1, 1}, // incomplete second CP, and no space for it
568 {6, 2, 3, 2}, // no space for third CP
569 {4, 3, 3, 2}, // incomplete third CP
570 {5, 3, 3, 2}, // incomplete third CP
571 {4, 2, 3, 2}, // incomplete third CP, and no space for it
572 {5, 2, 3, 2}, // incomplete third CP, and no space for it
574 {10, 3, 6, 3}, // no space for fourth CP
575 {10, 4, 6, 3}, // no space for fourth CP
576 {7, 5, 6, 3}, // incomplete fourth CP
577 {8, 5, 6, 3}, // incomplete fourth CP
578 {9, 5, 6, 3}, // incomplete fourth CP
579 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
580 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
581 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
582 {7, 4, 6, 3}, // incomplete fourth CP, and no space for it
583 {8, 4, 6, 3}, // incomplete fourth CP, and no space for it
584 {9, 4, 6, 3}, // incomplete fourth CP, and no space for it
588 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
589 test_offsets_partial t = *it;
590 InternT out[array_size(exp) - 1] = {};
591 assert(t.in_size <= array_size(in));
592 assert(t.out_size <= array_size(out));
593 assert(t.expected_in_next <= t.in_size);
594 assert(t.expected_out_next <= t.out_size);
595 mbstate_t state = {};
596 const ExternT* in_next = nullptr;
597 InternT* out_next = nullptr;
598 codecvt_base::result res = codecvt_base::ok;
600 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
601 assert(res == cvt.partial);
602 assert(in_next == in + t.expected_in_next);
603 assert(out_next == out + t.expected_out_next);
604 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
605 if (t.expected_out_next < array_size(out))
606 assert(out[t.expected_out_next] == 0);
608 state = mbstate_t();
609 int len = cvt.length(state, in, in + t.in_size, t.out_size);
610 assert(len >= 0);
611 assert(static_cast<size_t>(len) == t.expected_in_next);
615 template <class InternT, class ExternT>
616 void utf8_to_utf16_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
617 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
618 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
619 const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
620 static_assert(array_size(input) == 11, "");
621 static_assert(array_size(expected) == 6, "");
623 ExternT in[array_size(input)];
624 InternT exp[array_size(expected)];
625 copy(begin(input), end(input), begin(in));
626 copy(begin(expected), end(expected), begin(exp));
627 assert(char_traits<ExternT>::length(in) == 10);
628 assert(char_traits<InternT>::length(exp) == 5);
630 // There are 5 classes of errors in UTF-8 decoding
631 // 1. Missing leading byte
632 // 2. Missing trailing byte
633 // 3. Surrogate CP
634 // 4. Overlong sequence
635 // 5. CP out of Unicode range
636 test_offsets_error<unsigned char> offsets[] = {
638 // 1. Missing leading byte. We will replace the leading byte with
639 // non-leading byte, such as a byte that is always invalid or a trailing
640 // byte.
642 // replace leading byte with invalid byte
643 {1, 5, 0, 0, 0xFF, 0},
644 {3, 5, 1, 1, 0xFF, 1},
645 {6, 5, 3, 2, 0xFF, 3},
646 {10, 5, 6, 3, 0xFF, 6},
648 // replace leading byte with trailing byte
649 {1, 5, 0, 0, 0b10101010, 0},
650 {3, 5, 1, 1, 0b10101010, 1},
651 {6, 5, 3, 2, 0b10101010, 3},
652 {10, 5, 6, 3, 0b10101010, 6},
654 // 2. Missing trailing byte. We will replace the trailing byte with
655 // non-trailing byte, such as a byte that is always invalid or a leading
656 // byte (simple ASCII byte in our case).
658 // replace first trailing byte with ASCII byte
659 {3, 5, 1, 1, 'z', 2},
660 {6, 5, 3, 2, 'z', 4},
661 {10, 5, 6, 3, 'z', 7},
663 // replace first trailing byte with invalid byte
664 {3, 5, 1, 1, 0xFF, 2},
665 {6, 5, 3, 2, 0xFF, 4},
666 {10, 5, 6, 3, 0xFF, 7},
668 // replace second trailing byte with ASCII byte
669 {6, 5, 3, 2, 'z', 5},
670 {10, 5, 6, 3, 'z', 8},
672 // replace second trailing byte with invalid byte
673 {6, 5, 3, 2, 0xFF, 5},
674 {10, 5, 6, 3, 0xFF, 8},
676 // replace third trailing byte
677 {10, 5, 6, 3, 'z', 9},
678 {10, 5, 6, 3, 0xFF, 9},
680 // 2.1 The following test-cases raise doubt whether error or partial should
681 // be returned. For example, we have 4-byte sequence with valid leading
682 // byte. If we hide the last byte we need to return partial. But, if the
683 // second or third byte, which are visible to the call to codecvt, are
684 // malformed then error should be returned.
686 // replace first trailing byte with ASCII byte, also incomplete at end
687 {5, 5, 3, 2, 'z', 4},
688 {8, 5, 6, 3, 'z', 7},
689 {9, 5, 6, 3, 'z', 7},
691 // replace first trailing byte with invalid byte, also incomplete at end
692 {5, 5, 3, 2, 0xFF, 4},
693 {8, 5, 6, 3, 0xFF, 7},
694 {9, 5, 6, 3, 0xFF, 7},
696 // replace second trailing byte with ASCII byte, also incomplete at end
697 {9, 5, 6, 3, 'z', 8},
699 // replace second trailing byte with invalid byte, also incomplete at end
700 {9, 5, 6, 3, 0xFF, 8},
702 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
703 // CP U+D700
704 {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
705 {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
706 {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
707 {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
709 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
710 // just the leading byte is enough to make them overlong, i.e. for the
711 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
712 // zeroes.
713 {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
714 {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
715 {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
716 {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
718 // 5. CP above range
719 // turn U+10AAAA into U+14AAAA by changing its leading byte
720 {10, 5, 6, 3, 0b11110101, 6},
721 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
722 {10, 5, 6, 3, 0b10011010, 7},
724 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
725 test_offsets_error<unsigned char> t = *it;
726 InternT out[array_size(exp) - 1] = {};
727 assert(t.in_size <= array_size(in));
728 assert(t.out_size <= array_size(out));
729 assert(t.expected_in_next <= t.in_size);
730 assert(t.expected_out_next <= t.out_size);
731 ExternT old_char = in[t.replace_pos];
732 in[t.replace_pos] = t.replace_char;
734 mbstate_t state = {};
735 const ExternT* in_next = nullptr;
736 InternT* out_next = nullptr;
737 codecvt_base::result res = codecvt_base::ok;
739 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
740 assert(res == cvt.error);
741 assert(in_next == in + t.expected_in_next);
742 assert(out_next == out + t.expected_out_next);
743 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
744 if (t.expected_out_next < array_size(out))
745 assert(out[t.expected_out_next] == 0);
747 state = mbstate_t();
748 int len = cvt.length(state, in, in + t.in_size, t.out_size);
749 assert(len >= 0);
750 assert(static_cast<size_t>(len) == t.expected_in_next);
752 in[t.replace_pos] = old_char;
756 template <class InternT, class ExternT>
757 void utf8_to_utf16_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
758 utf8_to_utf16_in_ok(cvt);
759 utf8_to_utf16_in_partial(cvt);
760 utf8_to_utf16_in_error(cvt);
763 template <class InternT, class ExternT>
764 void utf16_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
765 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
766 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
767 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
768 static_assert(array_size(input) == 6, "");
769 static_assert(array_size(expected) == 11, "");
771 InternT in[array_size(input)];
772 ExternT exp[array_size(expected)];
773 copy(begin(input), end(input), begin(in));
774 copy(begin(expected), end(expected), begin(exp));
775 assert(char_traits<InternT>::length(in) == 5);
776 assert(char_traits<ExternT>::length(exp) == 10);
778 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
779 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
780 test_offsets_ok t = *it;
781 ExternT out[array_size(exp) - 1] = {};
782 assert(t.in_size <= array_size(in));
783 assert(t.out_size <= array_size(out));
784 mbstate_t state = {};
785 const InternT* in_next = nullptr;
786 ExternT* out_next = nullptr;
787 codecvt_base::result res = codecvt_base::ok;
789 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
790 assert(res == cvt.ok);
791 assert(in_next == in + t.in_size);
792 assert(out_next == out + t.out_size);
793 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
794 if (t.out_size < array_size(out))
795 assert(out[t.out_size] == 0);
799 template <class InternT, class ExternT>
800 void utf16_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
801 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
802 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
803 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
804 static_assert(array_size(input) == 6, "");
805 static_assert(array_size(expected) == 11, "");
807 InternT in[array_size(input)];
808 ExternT exp[array_size(expected)];
809 copy(begin(input), end(input), begin(in));
810 copy(begin(expected), end(expected), begin(exp));
811 assert(char_traits<InternT>::length(in) == 5);
812 assert(char_traits<ExternT>::length(exp) == 10);
814 test_offsets_partial offsets[] = {
815 {1, 0, 0, 0}, // no space for first CP
817 {2, 1, 1, 1}, // no space for second CP
818 {2, 2, 1, 1}, // no space for second CP
820 {3, 3, 2, 3}, // no space for third CP
821 {3, 4, 2, 3}, // no space for third CP
822 {3, 5, 2, 3}, // no space for third CP
824 {5, 6, 3, 6}, // no space for fourth CP
825 {5, 7, 3, 6}, // no space for fourth CP
826 {5, 8, 3, 6}, // no space for fourth CP
827 {5, 9, 3, 6}, // no space for fourth CP
829 {4, 10, 3, 6}, // incomplete fourth CP
831 {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
832 {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
833 {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
834 {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
836 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
837 test_offsets_partial t = *it;
838 ExternT out[array_size(exp) - 1] = {};
839 assert(t.in_size <= array_size(in));
840 assert(t.out_size <= array_size(out));
841 assert(t.expected_in_next <= t.in_size);
842 assert(t.expected_out_next <= t.out_size);
843 mbstate_t state = {};
844 const InternT* in_next = nullptr;
845 ExternT* out_next = nullptr;
846 codecvt_base::result res = codecvt_base::ok;
848 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
849 assert(res == cvt.partial);
850 assert(in_next == in + t.expected_in_next);
851 assert(out_next == out + t.expected_out_next);
852 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
853 if (t.expected_out_next < array_size(out))
854 assert(out[t.expected_out_next] == 0);
858 template <class InternT, class ExternT>
859 void utf16_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
860 // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
861 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
862 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
863 static_assert(array_size(input) == 6, "");
864 static_assert(array_size(expected) == 11, "");
866 InternT in[array_size(input)];
867 ExternT exp[array_size(expected)];
868 copy(begin(input), end(input), begin(in));
869 copy(begin(expected), end(expected), begin(exp));
870 assert(char_traits<InternT>::length(in) == 5);
871 assert(char_traits<ExternT>::length(exp) == 10);
873 // The only possible error in UTF-16 is unpaired surrogate code units.
874 // So we replace valid code points (scalar values) with lone surrogate CU.
875 test_offsets_error<InternT> offsets[] = {
876 {5, 10, 0, 0, 0xD800, 0},
877 {5, 10, 0, 0, 0xDBFF, 0},
878 {5, 10, 0, 0, 0xDC00, 0},
879 {5, 10, 0, 0, 0xDFFF, 0},
881 {5, 10, 1, 1, 0xD800, 1},
882 {5, 10, 1, 1, 0xDBFF, 1},
883 {5, 10, 1, 1, 0xDC00, 1},
884 {5, 10, 1, 1, 0xDFFF, 1},
886 {5, 10, 2, 3, 0xD800, 2},
887 {5, 10, 2, 3, 0xDBFF, 2},
888 {5, 10, 2, 3, 0xDC00, 2},
889 {5, 10, 2, 3, 0xDFFF, 2},
891 // make the leading surrogate a trailing one
892 {5, 10, 3, 6, 0xDC00, 3},
893 {5, 10, 3, 6, 0xDFFF, 3},
895 // make the trailing surrogate a leading one
896 {5, 10, 3, 6, 0xD800, 4},
897 {5, 10, 3, 6, 0xDBFF, 4},
899 // make the trailing surrogate a BMP char
900 {5, 10, 3, 6, 'z', 4},
903 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
904 test_offsets_error<InternT> t = *it;
905 ExternT out[array_size(exp) - 1] = {};
906 assert(t.in_size <= array_size(in));
907 assert(t.out_size <= array_size(out));
908 assert(t.expected_in_next <= t.in_size);
909 assert(t.expected_out_next <= t.out_size);
910 InternT old_char = in[t.replace_pos];
911 in[t.replace_pos] = t.replace_char;
913 mbstate_t state = {};
914 const InternT* in_next = nullptr;
915 ExternT* out_next = nullptr;
916 codecvt_base::result res = codecvt_base::ok;
918 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
919 assert(res == cvt.error);
920 assert(in_next == in + t.expected_in_next);
921 assert(out_next == out + t.expected_out_next);
922 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
923 if (t.expected_out_next < array_size(out))
924 assert(out[t.expected_out_next] == 0);
926 in[t.replace_pos] = old_char;
930 template <class InternT, class ExternT>
931 void utf16_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
932 utf16_to_utf8_out_ok(cvt);
933 utf16_to_utf8_out_partial(cvt);
934 utf16_to_utf8_out_error(cvt);
937 template <class InternT, class ExternT>
938 void test_utf8_utf16_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
939 utf8_to_utf16_in(cvt);
940 utf16_to_utf8_out(cvt);
943 template <class InternT, class ExternT>
944 void utf8_to_ucs2_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
945 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
946 const unsigned char input[] = "b\u0448\uAAAA";
947 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
948 static_assert(array_size(input) == 7, "");
949 static_assert(array_size(expected) == 4, "");
951 ExternT in[array_size(input)];
952 InternT exp[array_size(expected)];
953 copy(begin(input), end(input), begin(in));
954 copy(begin(expected), end(expected), begin(exp));
955 assert(char_traits<ExternT>::length(in) == 6);
956 assert(char_traits<InternT>::length(exp) == 3);
958 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
959 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
960 test_offsets_ok t = *it;
961 InternT out[array_size(exp) - 1] = {};
962 assert(t.in_size <= array_size(in));
963 assert(t.out_size <= array_size(out));
964 mbstate_t state = {};
965 const ExternT* in_next = nullptr;
966 InternT* out_next = nullptr;
967 codecvt_base::result res = codecvt_base::ok;
969 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
970 assert(res == cvt.ok);
971 assert(in_next == in + t.in_size);
972 assert(out_next == out + t.out_size);
973 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
974 if (t.out_size < array_size(out))
975 assert(out[t.out_size] == 0);
977 state = mbstate_t();
978 int len = cvt.length(state, in, in + t.in_size, t.out_size);
979 assert(len >= 0);
980 assert(static_cast<size_t>(len) == t.in_size);
983 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
984 test_offsets_ok t = *it;
985 InternT out[array_size(exp)] = {};
986 assert(t.in_size <= array_size(in));
987 assert(t.out_size <= array_size(out));
988 mbstate_t state = {};
989 const ExternT* in_next = nullptr;
990 InternT* out_next = nullptr;
991 codecvt_base::result res = codecvt_base::ok;
993 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
994 assert(res == cvt.ok);
995 assert(in_next == in + t.in_size);
996 assert(out_next == out + t.out_size);
997 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
998 if (t.out_size < array_size(out))
999 assert(out[t.out_size] == 0);
1001 state = mbstate_t();
1002 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1003 assert(len >= 0);
1004 assert(static_cast<size_t>(len) == t.in_size);
1008 template <class InternT, class ExternT>
1009 void utf8_to_ucs2_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1010 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1011 const unsigned char input[] = "b\u0448\uAAAA";
1012 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1013 static_assert(array_size(input) == 7, "");
1014 static_assert(array_size(expected) == 4, "");
1016 ExternT in[array_size(input)];
1017 InternT exp[array_size(expected)];
1018 copy(begin(input), end(input), begin(in));
1019 copy(begin(expected), end(expected), begin(exp));
1020 assert(char_traits<ExternT>::length(in) == 6);
1021 assert(char_traits<InternT>::length(exp) == 3);
1023 test_offsets_partial offsets[] = {
1024 {1, 0, 0, 0}, // no space for first CP
1026 {3, 1, 1, 1}, // no space for second CP
1027 {2, 2, 1, 1}, // incomplete second CP
1028 {2, 1, 1, 1}, // incomplete second CP, and no space for it
1030 {6, 2, 3, 2}, // no space for third CP
1031 {4, 3, 3, 2}, // incomplete third CP
1032 {5, 3, 3, 2}, // incomplete third CP
1033 {4, 2, 3, 2}, // incomplete third CP, and no space for it
1034 {5, 2, 3, 2}, // incomplete third CP, and no space for it
1037 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1038 test_offsets_partial t = *it;
1039 InternT out[array_size(exp) - 1] = {};
1040 assert(t.in_size <= array_size(in));
1041 assert(t.out_size <= array_size(out));
1042 assert(t.expected_in_next <= t.in_size);
1043 assert(t.expected_out_next <= t.out_size);
1044 mbstate_t state = {};
1045 const ExternT* in_next = nullptr;
1046 InternT* out_next = nullptr;
1047 codecvt_base::result res = codecvt_base::ok;
1049 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1050 assert(res == cvt.partial);
1051 assert(in_next == in + t.expected_in_next);
1052 assert(out_next == out + t.expected_out_next);
1053 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1054 if (t.expected_out_next < array_size(out))
1055 assert(out[t.expected_out_next] == 0);
1057 state = mbstate_t();
1058 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1059 assert(len >= 0);
1060 assert(static_cast<size_t>(len) == t.expected_in_next);
1064 template <class InternT, class ExternT>
1065 void utf8_to_ucs2_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1066 const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
1067 const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0};
1068 static_assert(array_size(input) == 11, "");
1069 static_assert(array_size(expected) == 6, "");
1071 ExternT in[array_size(input)];
1072 InternT exp[array_size(expected)];
1073 copy(begin(input), end(input), begin(in));
1074 copy(begin(expected), end(expected), begin(exp));
1075 assert(char_traits<ExternT>::length(in) == 10);
1076 assert(char_traits<InternT>::length(exp) == 5);
1078 // There are 5 classes of errors in UTF-8 decoding
1079 // 1. Missing leading byte
1080 // 2. Missing trailing byte
1081 // 3. Surrogate CP
1082 // 4. Overlong sequence
1083 // 5. CP out of Unicode range
1084 test_offsets_error<unsigned char> offsets[] = {
1086 // 1. Missing leading byte. We will replace the leading byte with
1087 // non-leading byte, such as a byte that is always invalid or a trailing
1088 // byte.
1090 // replace leading byte with invalid byte
1091 {1, 5, 0, 0, 0xFF, 0},
1092 {3, 5, 1, 1, 0xFF, 1},
1093 {6, 5, 3, 2, 0xFF, 3},
1094 {10, 5, 6, 3, 0xFF, 6},
1096 // replace leading byte with trailing byte
1097 {1, 5, 0, 0, 0b10101010, 0},
1098 {3, 5, 1, 1, 0b10101010, 1},
1099 {6, 5, 3, 2, 0b10101010, 3},
1100 {10, 5, 6, 3, 0b10101010, 6},
1102 // 2. Missing trailing byte. We will replace the trailing byte with
1103 // non-trailing byte, such as a byte that is always invalid or a leading
1104 // byte (simple ASCII byte in our case).
1106 // replace first trailing byte with ASCII byte
1107 {3, 5, 1, 1, 'z', 2},
1108 {6, 5, 3, 2, 'z', 4},
1109 {10, 5, 6, 3, 'z', 7},
1111 // replace first trailing byte with invalid byte
1112 {3, 5, 1, 1, 0xFF, 2},
1113 {6, 5, 3, 2, 0xFF, 4},
1114 {10, 5, 6, 3, 0xFF, 7},
1116 // replace second trailing byte with ASCII byte
1117 {6, 5, 3, 2, 'z', 5},
1118 {10, 5, 6, 3, 'z', 8},
1120 // replace second trailing byte with invalid byte
1121 {6, 5, 3, 2, 0xFF, 5},
1122 {10, 5, 6, 3, 0xFF, 8},
1124 // replace third trailing byte
1125 {10, 5, 6, 3, 'z', 9},
1126 {10, 5, 6, 3, 0xFF, 9},
1128 // 2.1 The following test-cases raise doubt whether error or partial should
1129 // be returned. For example, we have 4-byte sequence with valid leading
1130 // byte. If we hide the last byte we need to return partial. But, if the
1131 // second or third byte, which are visible to the call to codecvt, are
1132 // malformed then error should be returned.
1134 // replace first trailing byte with ASCII byte, also incomplete at end
1135 {5, 5, 3, 2, 'z', 4},
1136 {8, 5, 6, 3, 'z', 7},
1137 {9, 5, 6, 3, 'z', 7},
1139 // replace first trailing byte with invalid byte, also incomplete at end
1140 {5, 5, 3, 2, 0xFF, 4},
1141 {8, 5, 6, 3, 0xFF, 7},
1142 {9, 5, 6, 3, 0xFF, 7},
1144 // replace second trailing byte with ASCII byte, also incomplete at end
1145 {9, 5, 6, 3, 'z', 8},
1147 // replace second trailing byte with invalid byte, also incomplete at end
1148 {9, 5, 6, 3, 0xFF, 8},
1150 // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
1151 // CP U+D700
1152 {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
1153 {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
1154 {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
1155 {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00
1157 // 4. Overlong sequence. The CPs in the input are chosen such as modifying
1158 // just the leading byte is enough to make them overlong, i.e. for the
1159 // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
1160 // zeroes.
1161 {3, 5, 1, 1, 0b11000000, 1}, // make the 2-byte CP overlong
1162 {3, 5, 1, 1, 0b11000001, 1}, // make the 2-byte CP overlong
1163 {6, 5, 3, 2, 0b11100000, 3}, // make the 3-byte CP overlong
1164 {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong
1166 // 5. CP above range
1167 // turn U+10AAAA into U+14AAAA by changing its leading byte
1168 {10, 5, 6, 3, 0b11110101, 6},
1169 // turn U+10AAAA into U+11AAAA by changing its 2nd byte
1170 {10, 5, 6, 3, 0b10011010, 7},
1171 // Don't replace anything, show full 4-byte CP U+10AAAA
1172 {10, 4, 6, 3, 'b', 0},
1173 {10, 5, 6, 3, 'b', 0},
1174 // Don't replace anything, show incomplete 4-byte CP at the end. It's still
1175 // out of UCS2 range just by seeing the first byte.
1176 {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1177 {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1178 {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
1179 {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1180 {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1181 {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
1183 for (test_offsets_error<unsigned char>* it = begin(offsets); it != end(offsets); ++it) {
1184 test_offsets_error<unsigned char> t = *it;
1185 InternT out[array_size(exp) - 1] = {};
1186 assert(t.in_size <= array_size(in));
1187 assert(t.out_size <= array_size(out));
1188 assert(t.expected_in_next <= t.in_size);
1189 assert(t.expected_out_next <= t.out_size);
1190 ExternT old_char = in[t.replace_pos];
1191 in[t.replace_pos] = t.replace_char;
1193 mbstate_t state = {};
1194 const ExternT* in_next = nullptr;
1195 InternT* out_next = nullptr;
1196 codecvt_base::result res = codecvt_base::ok;
1198 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1199 assert(res == cvt.error);
1200 assert(in_next == in + t.expected_in_next);
1201 assert(out_next == out + t.expected_out_next);
1202 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1203 if (t.expected_out_next < array_size(out))
1204 assert(out[t.expected_out_next] == 0);
1206 state = mbstate_t();
1207 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1208 assert(len >= 0);
1209 assert(static_cast<size_t>(len) == t.expected_in_next);
1211 in[t.replace_pos] = old_char;
1215 template <class InternT, class ExternT>
1216 void utf8_to_ucs2_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1217 utf8_to_ucs2_in_ok(cvt);
1218 utf8_to_ucs2_in_partial(cvt);
1219 utf8_to_ucs2_in_error(cvt);
1222 template <class InternT, class ExternT>
1223 void ucs2_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1224 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1225 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1226 const unsigned char expected[] = "b\u0448\uAAAA";
1227 static_assert(array_size(input) == 4, "");
1228 static_assert(array_size(expected) == 7, "");
1230 InternT in[array_size(input)];
1231 ExternT exp[array_size(expected)];
1232 copy(begin(input), end(input), begin(in));
1233 copy(begin(expected), end(expected), begin(exp));
1234 assert(char_traits<InternT>::length(in) == 3);
1235 assert(char_traits<ExternT>::length(exp) == 6);
1237 test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
1238 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1239 test_offsets_ok t = *it;
1240 ExternT out[array_size(exp) - 1] = {};
1241 assert(t.in_size <= array_size(in));
1242 assert(t.out_size <= array_size(out));
1243 mbstate_t state = {};
1244 const InternT* in_next = nullptr;
1245 ExternT* out_next = nullptr;
1246 codecvt_base::result res = codecvt_base::ok;
1248 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1249 assert(res == cvt.ok);
1250 assert(in_next == in + t.in_size);
1251 assert(out_next == out + t.out_size);
1252 assert(char_traits<ExternT>::compare(out, exp, t.out_size) == 0);
1253 if (t.out_size < array_size(out))
1254 assert(out[t.out_size] == 0);
1258 template <class InternT, class ExternT>
1259 void ucs2_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1260 // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1261 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1262 const unsigned char expected[] = "b\u0448\uAAAA";
1263 static_assert(array_size(input) == 4, "");
1264 static_assert(array_size(expected) == 7, "");
1266 InternT in[array_size(input)];
1267 ExternT exp[array_size(expected)];
1268 copy(begin(input), end(input), begin(in));
1269 copy(begin(expected), end(expected), begin(exp));
1270 assert(char_traits<InternT>::length(in) == 3);
1271 assert(char_traits<ExternT>::length(exp) == 6);
1273 test_offsets_partial offsets[] = {
1274 {1, 0, 0, 0}, // no space for first CP
1276 {2, 1, 1, 1}, // no space for second CP
1277 {2, 2, 1, 1}, // no space for second CP
1279 {3, 3, 2, 3}, // no space for third CP
1280 {3, 4, 2, 3}, // no space for third CP
1281 {3, 5, 2, 3}, // no space for third CP
1283 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1284 test_offsets_partial t = *it;
1285 ExternT out[array_size(exp) - 1] = {};
1286 assert(t.in_size <= array_size(in));
1287 assert(t.out_size <= array_size(out));
1288 assert(t.expected_in_next <= t.in_size);
1289 assert(t.expected_out_next <= t.out_size);
1290 mbstate_t state = {};
1291 const InternT* in_next = nullptr;
1292 ExternT* out_next = nullptr;
1293 codecvt_base::result res = codecvt_base::ok;
1295 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1296 assert(res == cvt.partial);
1297 assert(in_next == in + t.expected_in_next);
1298 assert(out_next == out + t.expected_out_next);
1299 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1300 if (t.expected_out_next < array_size(out))
1301 assert(out[t.expected_out_next] == 0);
1305 template <class InternT, class ExternT>
1306 void ucs2_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1307 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1308 const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
1309 static_assert(array_size(input) == 6, "");
1310 static_assert(array_size(expected) == 11, "");
1312 InternT in[array_size(input)];
1313 ExternT exp[array_size(expected)];
1314 copy(begin(input), end(input), begin(in));
1315 copy(begin(expected), end(expected), begin(exp));
1316 assert(char_traits<InternT>::length(in) == 5);
1317 assert(char_traits<ExternT>::length(exp) == 10);
1319 test_offsets_error<InternT> offsets[] = {
1320 {3, 6, 0, 0, 0xD800, 0},
1321 {3, 6, 0, 0, 0xDBFF, 0},
1322 {3, 6, 0, 0, 0xDC00, 0},
1323 {3, 6, 0, 0, 0xDFFF, 0},
1325 {3, 6, 1, 1, 0xD800, 1},
1326 {3, 6, 1, 1, 0xDBFF, 1},
1327 {3, 6, 1, 1, 0xDC00, 1},
1328 {3, 6, 1, 1, 0xDFFF, 1},
1330 {3, 6, 2, 3, 0xD800, 2},
1331 {3, 6, 2, 3, 0xDBFF, 2},
1332 {3, 6, 2, 3, 0xDC00, 2},
1333 {3, 6, 2, 3, 0xDFFF, 2},
1335 // make the leading surrogate a trailing one
1336 {5, 10, 3, 6, 0xDC00, 3},
1337 {5, 10, 3, 6, 0xDFFF, 3},
1339 // make the trailing surrogate a leading one
1340 {5, 10, 3, 6, 0xD800, 4},
1341 {5, 10, 3, 6, 0xDBFF, 4},
1343 // make the trailing surrogate a BMP char
1344 {5, 10, 3, 6, 'z', 4},
1346 // don't replace anything in the test cases bellow, just show the surrogate
1347 // pair (fourth CP) fully or partially
1348 {5, 10, 3, 6, 'b', 0},
1349 {5, 7, 3, 6, 'b', 0}, // no space for fourth CP
1350 {5, 8, 3, 6, 'b', 0}, // no space for fourth CP
1351 {5, 9, 3, 6, 'b', 0}, // no space for fourth CP
1353 {4, 10, 3, 6, 'b', 0}, // incomplete fourth CP
1354 {4, 7, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1355 {4, 8, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1356 {4, 9, 3, 6, 'b', 0}, // incomplete fourth CP, and no space for it
1359 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1360 test_offsets_error<InternT> t = *it;
1361 ExternT out[array_size(exp) - 1] = {};
1362 assert(t.in_size <= array_size(in));
1363 assert(t.out_size <= array_size(out));
1364 assert(t.expected_in_next <= t.in_size);
1365 assert(t.expected_out_next <= t.out_size);
1366 InternT old_char = in[t.replace_pos];
1367 in[t.replace_pos] = t.replace_char;
1369 mbstate_t state = {};
1370 const InternT* in_next = nullptr;
1371 ExternT* out_next = nullptr;
1372 codecvt_base::result res = codecvt_base::ok;
1374 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1375 assert(res == cvt.error);
1376 assert(in_next == in + t.expected_in_next);
1377 assert(out_next == out + t.expected_out_next);
1378 assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == 0);
1379 if (t.expected_out_next < array_size(out))
1380 assert(out[t.expected_out_next] == 0);
1382 in[t.replace_pos] = old_char;
1386 template <class InternT, class ExternT>
1387 void ucs2_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1388 ucs2_to_utf8_out_ok(cvt);
1389 ucs2_to_utf8_out_partial(cvt);
1390 ucs2_to_utf8_out_error(cvt);
1393 template <class InternT, class ExternT>
1394 void test_utf8_ucs2_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1395 utf8_to_ucs2_in(cvt);
1396 ucs2_to_utf8_out(cvt);
1399 enum utf16_endianess { utf16_big_endian, utf16_little_endian };
1401 template <class Iter1, class Iter2>
1402 Iter2 utf16_to_bytes(Iter1 f, Iter1 l, Iter2 o, utf16_endianess e) {
1403 if (e == utf16_big_endian)
1404 for (; f != l; ++f) {
1405 *o++ = (*f >> 8) & 0xFF;
1406 *o++ = *f & 0xFF;
1408 else
1409 for (; f != l; ++f) {
1410 *o++ = *f & 0xFF;
1411 *o++ = (*f >> 8) & 0xFF;
1413 return o;
1416 template <class InternT>
1417 void utf16_to_utf32_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1418 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1419 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1420 static_assert(array_size(input) == 6, "");
1421 static_assert(array_size(expected) == 5, "");
1423 char in[array_size(input) * 2];
1424 InternT exp[array_size(expected)];
1425 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1426 copy(begin(expected), end(expected), begin(exp));
1428 test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}, {10, 4}};
1429 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1430 test_offsets_ok t = *it;
1431 InternT out[array_size(exp) - 1] = {};
1432 assert(t.in_size <= array_size(in));
1433 assert(t.out_size <= array_size(out));
1434 mbstate_t state = {};
1435 const char* in_next = nullptr;
1436 InternT* out_next = nullptr;
1437 codecvt_base::result res = codecvt_base::ok;
1439 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1440 assert(res == cvt.ok);
1441 assert(in_next == in + t.in_size);
1442 assert(out_next == out + t.out_size);
1443 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1444 if (t.out_size < array_size(out))
1445 assert(out[t.out_size] == 0);
1447 state = mbstate_t();
1448 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1449 assert(len >= 0);
1450 assert(static_cast<size_t>(len) == t.in_size);
1453 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1454 test_offsets_ok t = *it;
1455 InternT out[array_size(exp)] = {};
1456 assert(t.in_size <= array_size(in));
1457 assert(t.out_size <= array_size(out));
1458 mbstate_t state = {};
1459 const char* in_next = nullptr;
1460 InternT* out_next = nullptr;
1461 codecvt_base::result res = codecvt_base::ok;
1463 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1464 assert(res == cvt.ok);
1465 assert(in_next == in + t.in_size);
1466 assert(out_next == out + t.out_size);
1467 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1468 if (t.out_size < array_size(out))
1469 assert(out[t.out_size] == 0);
1471 state = mbstate_t();
1472 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1473 assert(len >= 0);
1474 assert(static_cast<size_t>(len) == t.in_size);
1478 template <class InternT>
1479 void utf16_to_utf32_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1480 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1481 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1482 static_assert(array_size(input) == 6, "");
1483 static_assert(array_size(expected) == 5, "");
1485 char in[array_size(input) * 2];
1486 InternT exp[array_size(expected)];
1487 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1488 copy(begin(expected), end(expected), begin(exp));
1490 test_offsets_partial offsets[] = {
1491 {2, 0, 0, 0}, // no space for first CP
1492 {1, 1, 0, 0}, // incomplete first CP
1493 {1, 0, 0, 0}, // incomplete first CP, and no space for it
1495 {4, 1, 2, 1}, // no space for second CP
1496 {3, 2, 2, 1}, // incomplete second CP
1497 {3, 1, 2, 1}, // incomplete second CP, and no space for it
1499 {6, 2, 4, 2}, // no space for third CP
1500 {5, 3, 4, 2}, // incomplete third CP
1501 {5, 2, 4, 2}, // incomplete third CP, and no space for it
1503 {10, 3, 6, 3}, // no space for fourth CP
1504 {7, 4, 6, 3}, // incomplete fourth CP
1505 {8, 4, 6, 3}, // incomplete fourth CP
1506 {9, 4, 6, 3}, // incomplete fourth CP
1507 {7, 3, 6, 3}, // incomplete fourth CP, and no space for it
1508 {8, 3, 6, 3}, // incomplete fourth CP, and no space for it
1509 {9, 3, 6, 3}, // incomplete fourth CP, and no space for it
1512 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1513 test_offsets_partial t = *it;
1514 InternT out[array_size(exp) - 1] = {};
1515 assert(t.in_size <= array_size(in));
1516 assert(t.out_size <= array_size(out));
1517 assert(t.expected_in_next <= t.in_size);
1518 assert(t.expected_out_next <= t.out_size);
1519 mbstate_t state = {};
1520 const char* in_next = nullptr;
1521 InternT* out_next = nullptr;
1522 codecvt_base::result res = codecvt_base::ok;
1524 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1525 assert(res == cvt.partial);
1526 assert(in_next == in + t.expected_in_next);
1527 assert(out_next == out + t.expected_out_next);
1528 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1529 if (t.expected_out_next < array_size(out))
1530 assert(out[t.expected_out_next] == 0);
1532 state = mbstate_t();
1533 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1534 assert(len >= 0);
1535 assert(static_cast<size_t>(len) == t.expected_in_next);
1539 template <class InternT>
1540 void utf16_to_utf32_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1541 char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1542 const char32_t expected[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1543 static_assert(array_size(input) == 6, "");
1544 static_assert(array_size(expected) == 5, "");
1546 InternT exp[array_size(expected)];
1547 copy(begin(expected), end(expected), begin(exp));
1549 // The only possible error in UTF-16 is unpaired surrogate code units.
1550 // So we replace valid code points (scalar values) with lone surrogate CU.
1551 test_offsets_error<char16_t> offsets[] = {
1552 {10, 4, 0, 0, 0xD800, 0},
1553 {10, 4, 0, 0, 0xDBFF, 0},
1554 {10, 4, 0, 0, 0xDC00, 0},
1555 {10, 4, 0, 0, 0xDFFF, 0},
1557 {10, 4, 2, 1, 0xD800, 1},
1558 {10, 4, 2, 1, 0xDBFF, 1},
1559 {10, 4, 2, 1, 0xDC00, 1},
1560 {10, 4, 2, 1, 0xDFFF, 1},
1562 {10, 4, 4, 2, 0xD800, 2},
1563 {10, 4, 4, 2, 0xDBFF, 2},
1564 {10, 4, 4, 2, 0xDC00, 2},
1565 {10, 4, 4, 2, 0xDFFF, 2},
1567 // make the leading surrogate a trailing one
1568 {10, 4, 6, 3, 0xDC00, 3},
1569 {10, 4, 6, 3, 0xDFFF, 3},
1571 // make the trailing surrogate a leading one
1572 {10, 4, 6, 3, 0xD800, 4},
1573 {10, 4, 6, 3, 0xDBFF, 4},
1575 // make the trailing surrogate a BMP char
1576 {10, 4, 6, 3, 'z', 4},
1579 for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1580 test_offsets_error<char16_t> t = *it;
1581 char in[array_size(input) * 2];
1582 InternT out[array_size(exp) - 1] = {};
1583 assert(t.in_size <= array_size(in));
1584 assert(t.out_size <= array_size(out));
1585 assert(t.expected_in_next <= t.in_size);
1586 assert(t.expected_out_next <= t.out_size);
1587 char16_t old_char = input[t.replace_pos];
1588 input[t.replace_pos] = t.replace_char; // replace in input, not in in
1589 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1591 mbstate_t state = {};
1592 const char* in_next = nullptr;
1593 InternT* out_next = nullptr;
1594 codecvt_base::result res = codecvt_base::ok;
1596 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1597 assert(res == cvt.error);
1598 assert(in_next == in + t.expected_in_next);
1599 assert(out_next == out + t.expected_out_next);
1600 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1601 if (t.expected_out_next < array_size(out))
1602 assert(out[t.expected_out_next] == 0);
1604 state = mbstate_t();
1605 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1606 assert(len >= 0);
1607 assert(static_cast<size_t>(len) == t.expected_in_next);
1609 input[t.replace_pos] = old_char;
1613 template <class InternT>
1614 void utf32_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1615 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1616 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1617 static_assert(array_size(input) == 5, "");
1618 static_assert(array_size(expected) == 6, "");
1620 InternT in[array_size(input)];
1621 char exp[array_size(expected) * 2];
1622 copy(begin(input), end(input), begin(in));
1623 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1625 test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 10}};
1626 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1627 test_offsets_ok t = *it;
1628 char out[array_size(exp) - 2] = {};
1629 assert(t.in_size <= array_size(in));
1630 assert(t.out_size <= array_size(out));
1631 mbstate_t state = {};
1632 const InternT* in_next = nullptr;
1633 char* out_next = nullptr;
1634 codecvt_base::result res = codecvt_base::ok;
1636 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1637 assert(res == cvt.ok);
1638 assert(in_next == in + t.in_size);
1639 assert(out_next == out + t.out_size);
1640 assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1641 if (t.out_size < array_size(out))
1642 assert(out[t.out_size] == 0);
1646 template <class InternT>
1647 void utf32_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1648 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1649 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1650 static_assert(array_size(input) == 5, "");
1651 static_assert(array_size(expected) == 6, "");
1653 InternT in[array_size(input)];
1654 char exp[array_size(expected) * 2];
1655 copy(begin(input), end(input), begin(in));
1656 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1658 test_offsets_partial offsets[] = {
1659 {1, 0, 0, 0}, // no space for first CP
1660 {1, 1, 0, 0}, // no space for first CP
1662 {2, 2, 1, 2}, // no space for second CP
1663 {2, 3, 1, 2}, // no space for second CP
1665 {3, 4, 2, 4}, // no space for third CP
1666 {3, 5, 2, 4}, // no space for third CP
1668 {4, 6, 3, 6}, // no space for fourth CP
1669 {4, 7, 3, 6}, // no space for fourth CP
1670 {4, 8, 3, 6}, // no space for fourth CP
1671 {4, 9, 3, 6}, // no space for fourth CP
1673 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1674 test_offsets_partial t = *it;
1675 char out[array_size(exp) - 2] = {};
1676 assert(t.in_size <= array_size(in));
1677 assert(t.out_size <= array_size(out));
1678 assert(t.expected_in_next <= t.in_size);
1679 assert(t.expected_out_next <= t.out_size);
1680 mbstate_t state = {};
1681 const InternT* in_next = nullptr;
1682 char* out_next = nullptr;
1683 codecvt_base::result res = codecvt_base::ok;
1685 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1686 assert(res == cvt.partial);
1687 assert(in_next == in + t.expected_in_next);
1688 assert(out_next == out + t.expected_out_next);
1689 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1690 if (t.expected_out_next < array_size(out))
1691 assert(out[t.expected_out_next] == 0);
1695 template <class InternT>
1696 void utf32_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1697 const char32_t input[] = {'b', 0x0448, 0xAAAA, 0x10AAAA, 0};
1698 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1699 static_assert(array_size(input) == 5, "");
1700 static_assert(array_size(expected) == 6, "");
1702 InternT in[array_size(input)];
1703 char exp[array_size(expected) * 2];
1704 copy(begin(input), end(input), begin(in));
1705 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1707 test_offsets_error<InternT> offsets[] = {
1709 // Surrogate CP
1710 {4, 10, 0, 0, 0xD800, 0},
1711 {4, 10, 1, 2, 0xDBFF, 1},
1712 {4, 10, 2, 4, 0xDC00, 2},
1713 {4, 10, 3, 6, 0xDFFF, 3},
1715 // CP out of range
1716 {4, 10, 0, 0, 0x00110000, 0},
1717 {4, 10, 1, 2, 0x00110000, 1},
1718 {4, 10, 2, 4, 0x00110000, 2},
1719 {4, 10, 3, 6, 0x00110000, 3}};
1721 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1722 test_offsets_error<InternT> t = *it;
1723 char out[array_size(exp) - 2] = {};
1724 assert(t.in_size <= array_size(in));
1725 assert(t.out_size <= array_size(out));
1726 assert(t.expected_in_next <= t.in_size);
1727 assert(t.expected_out_next <= t.out_size);
1728 InternT old_char = in[t.replace_pos];
1729 in[t.replace_pos] = t.replace_char;
1731 mbstate_t state = {};
1732 const InternT* in_next = nullptr;
1733 char* out_next = nullptr;
1734 codecvt_base::result res = codecvt_base::ok;
1736 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1737 assert(res == cvt.error);
1738 assert(in_next == in + t.expected_in_next);
1739 assert(out_next == out + t.expected_out_next);
1740 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
1741 if (t.expected_out_next < array_size(out))
1742 assert(out[t.expected_out_next] == 0);
1744 in[t.replace_pos] = old_char;
1748 template <class InternT>
1749 void test_utf16_utf32_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1750 utf16_to_utf32_in_ok(cvt, endianess);
1751 utf16_to_utf32_in_partial(cvt, endianess);
1752 utf16_to_utf32_in_error(cvt, endianess);
1753 utf32_to_utf16_out_ok(cvt, endianess);
1754 utf32_to_utf16_out_partial(cvt, endianess);
1755 utf32_to_utf16_out_error(cvt, endianess);
1758 template <class InternT>
1759 void utf16_to_ucs2_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1760 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1761 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1762 static_assert(array_size(input) == 4, "");
1763 static_assert(array_size(expected) == 4, "");
1765 char in[array_size(input) * 2];
1766 InternT exp[array_size(expected)];
1767 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1768 copy(begin(expected), end(expected), begin(exp));
1770 test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}};
1771 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1772 test_offsets_ok t = *it;
1773 InternT out[array_size(exp) - 1] = {};
1774 assert(t.in_size <= array_size(in));
1775 assert(t.out_size <= array_size(out));
1776 mbstate_t state = {};
1777 const char* in_next = nullptr;
1778 InternT* out_next = nullptr;
1779 codecvt_base::result res = codecvt_base::ok;
1781 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1782 assert(res == cvt.ok);
1783 assert(in_next == in + t.in_size);
1784 assert(out_next == out + t.out_size);
1785 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1786 if (t.out_size < array_size(out))
1787 assert(out[t.out_size] == 0);
1789 state = mbstate_t();
1790 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1791 assert(len >= 0);
1792 assert(static_cast<size_t>(len) == t.in_size);
1795 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1796 test_offsets_ok t = *it;
1797 InternT out[array_size(exp)] = {};
1798 assert(t.in_size <= array_size(in));
1799 assert(t.out_size <= array_size(out));
1800 mbstate_t state = {};
1801 const char* in_next = nullptr;
1802 InternT* out_next = nullptr;
1803 codecvt_base::result res = codecvt_base::ok;
1805 res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1806 assert(res == cvt.ok);
1807 assert(in_next == in + t.in_size);
1808 assert(out_next == out + t.out_size);
1809 assert(char_traits<InternT>::compare(out, exp, t.out_size) == 0);
1810 if (t.out_size < array_size(out))
1811 assert(out[t.out_size] == 0);
1813 state = mbstate_t();
1814 int len = cvt.length(state, in, in + t.in_size, array_size(out));
1815 assert(len >= 0);
1816 assert(static_cast<size_t>(len) == t.in_size);
1820 template <class InternT>
1821 void utf16_to_ucs2_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1822 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1823 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1824 static_assert(array_size(input) == 4, "");
1825 static_assert(array_size(expected) == 4, "");
1827 char in[array_size(input) * 2];
1828 InternT exp[array_size(expected)];
1829 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1830 copy(begin(expected), end(expected), begin(exp));
1832 test_offsets_partial offsets[] = {
1833 {2, 0, 0, 0}, // no space for first CP
1834 {1, 1, 0, 0}, // incomplete first CP
1835 {1, 0, 0, 0}, // incomplete first CP, and no space for it
1837 {4, 1, 2, 1}, // no space for second CP
1838 {3, 2, 2, 1}, // incomplete second CP
1839 {3, 1, 2, 1}, // incomplete second CP, and no space for it
1841 {6, 2, 4, 2}, // no space for third CP
1842 {5, 3, 4, 2}, // incomplete third CP
1843 {5, 2, 4, 2}, // incomplete third CP, and no space for it
1846 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
1847 test_offsets_partial t = *it;
1848 InternT out[array_size(exp) - 1] = {};
1849 assert(t.in_size <= array_size(in));
1850 assert(t.out_size <= array_size(out));
1851 assert(t.expected_in_next <= t.in_size);
1852 assert(t.expected_out_next <= t.out_size);
1853 mbstate_t state = {};
1854 const char* in_next = nullptr;
1855 InternT* out_next = nullptr;
1856 codecvt_base::result res = codecvt_base::ok;
1858 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1859 assert(res == cvt.partial);
1860 assert(in_next == in + t.expected_in_next);
1861 assert(out_next == out + t.expected_out_next);
1862 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1863 if (t.expected_out_next < array_size(out))
1864 assert(out[t.expected_out_next] == 0);
1866 state = mbstate_t();
1867 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1868 assert(len >= 0);
1869 assert(static_cast<size_t>(len) == t.expected_in_next);
1873 template <class InternT>
1874 void utf16_to_ucs2_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1875 char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1876 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
1877 static_assert(array_size(input) == 6, "");
1878 static_assert(array_size(expected) == 6, "");
1880 InternT exp[array_size(expected)];
1881 copy(begin(expected), end(expected), begin(exp));
1883 // The only possible error in UTF-16 is unpaired surrogate code units.
1884 // Additionally, because the target encoding is UCS-2, a proper pair of
1885 // surrogates is also error. Simply, any surrogate CU is error.
1886 test_offsets_error<char16_t> offsets[] = {
1887 {6, 3, 0, 0, 0xD800, 0},
1888 {6, 3, 0, 0, 0xDBFF, 0},
1889 {6, 3, 0, 0, 0xDC00, 0},
1890 {6, 3, 0, 0, 0xDFFF, 0},
1892 {6, 3, 2, 1, 0xD800, 1},
1893 {6, 3, 2, 1, 0xDBFF, 1},
1894 {6, 3, 2, 1, 0xDC00, 1},
1895 {6, 3, 2, 1, 0xDFFF, 1},
1897 {6, 3, 4, 2, 0xD800, 2},
1898 {6, 3, 4, 2, 0xDBFF, 2},
1899 {6, 3, 4, 2, 0xDC00, 2},
1900 {6, 3, 4, 2, 0xDFFF, 2},
1902 // make the leading surrogate a trailing one
1903 {10, 5, 6, 3, 0xDC00, 3},
1904 {10, 5, 6, 3, 0xDFFF, 3},
1906 // make the trailing surrogate a leading one
1907 {10, 5, 6, 3, 0xD800, 4},
1908 {10, 5, 6, 3, 0xDBFF, 4},
1910 // make the trailing surrogate a BMP char
1911 {10, 5, 6, 3, 'z', 4},
1913 // don't replace anything in the test cases bellow, just show the surrogate
1914 // pair (fourth CP) fully or partially (just the first surrogate)
1915 {10, 5, 6, 3, 'b', 0},
1916 {8, 5, 6, 3, 'b', 0},
1917 {9, 5, 6, 3, 'b', 0},
1919 {10, 4, 6, 3, 'b', 0},
1920 {8, 4, 6, 3, 'b', 0},
1921 {9, 4, 6, 3, 'b', 0},
1924 for (test_offsets_error<char16_t>* it = begin(offsets); it != end(offsets); ++it) {
1925 test_offsets_error<char16_t> t = *it;
1926 char in[array_size(input) * 2];
1927 InternT out[array_size(exp) - 1] = {};
1928 assert(t.in_size <= array_size(in));
1929 assert(t.out_size <= array_size(out));
1930 assert(t.expected_in_next <= t.in_size);
1931 assert(t.expected_out_next <= t.out_size);
1932 char16_t old_char = input[t.replace_pos];
1933 input[t.replace_pos] = t.replace_char; // replace in input, not in in
1934 utf16_to_bytes(begin(input), end(input), begin(in), endianess);
1936 mbstate_t state = {};
1937 const char* in_next = nullptr;
1938 InternT* out_next = nullptr;
1939 codecvt_base::result res = codecvt_base::ok;
1941 res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1942 assert(res == cvt.error);
1943 assert(in_next == in + t.expected_in_next);
1944 assert(out_next == out + t.expected_out_next);
1945 assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == 0);
1946 if (t.expected_out_next < array_size(out))
1947 assert(out[t.expected_out_next] == 0);
1949 state = mbstate_t();
1950 int len = cvt.length(state, in, in + t.in_size, t.out_size);
1951 assert(len >= 0);
1952 assert(static_cast<size_t>(len) == t.expected_in_next);
1954 input[t.replace_pos] = old_char;
1958 template <class InternT>
1959 void ucs2_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1960 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1961 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1962 static_assert(array_size(input) == 4, "");
1963 static_assert(array_size(expected) == 4, "");
1965 InternT in[array_size(input)];
1966 char exp[array_size(expected) * 2];
1967 copy(begin(input), end(input), begin(in));
1968 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
1970 test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}};
1971 for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
1972 test_offsets_ok t = *it;
1973 char out[array_size(exp) - 2] = {};
1974 assert(t.in_size <= array_size(in));
1975 assert(t.out_size <= array_size(out));
1976 mbstate_t state = {};
1977 const InternT* in_next = nullptr;
1978 char* out_next = nullptr;
1979 codecvt_base::result res = codecvt_base::ok;
1981 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1982 assert(res == cvt.ok);
1983 assert(in_next == in + t.in_size);
1984 assert(out_next == out + t.out_size);
1985 assert(char_traits<char>::compare(out, exp, t.out_size) == 0);
1986 if (t.out_size < array_size(out))
1987 assert(out[t.out_size] == 0);
1991 template <class InternT>
1992 void ucs2_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1993 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0};
1994 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0};
1995 static_assert(array_size(input) == 4, "");
1996 static_assert(array_size(expected) == 4, "");
1998 InternT in[array_size(input)];
1999 char exp[array_size(expected) * 2];
2000 copy(begin(input), end(input), begin(in));
2001 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2003 test_offsets_partial offsets[] = {
2004 {1, 0, 0, 0}, // no space for first CP
2005 {1, 1, 0, 0}, // no space for first CP
2007 {2, 2, 1, 2}, // no space for second CP
2008 {2, 3, 1, 2}, // no space for second CP
2010 {3, 4, 2, 4}, // no space for third CP
2011 {3, 5, 2, 4}, // no space for third CP
2013 for (test_offsets_partial* it = begin(offsets); it != end(offsets); ++it) {
2014 test_offsets_partial t = *it;
2015 char out[array_size(exp) - 2] = {};
2016 assert(t.in_size <= array_size(in));
2017 assert(t.out_size <= array_size(out));
2018 assert(t.expected_in_next <= t.in_size);
2019 assert(t.expected_out_next <= t.out_size);
2020 mbstate_t state = {};
2021 const InternT* in_next = nullptr;
2022 char* out_next = nullptr;
2023 codecvt_base::result res = codecvt_base::ok;
2025 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2026 assert(res == cvt.partial);
2027 assert(in_next == in + t.expected_in_next);
2028 assert(out_next == out + t.expected_out_next);
2029 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2030 if (t.expected_out_next < array_size(out))
2031 assert(out[t.expected_out_next] == 0);
2035 template <class InternT>
2036 void ucs2_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2037 const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2038 const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0};
2039 static_assert(array_size(input) == 6, "");
2040 static_assert(array_size(expected) == 6, "");
2042 InternT in[array_size(input)];
2043 char exp[array_size(expected) * 2];
2044 copy(begin(input), end(input), begin(in));
2045 utf16_to_bytes(begin(expected), end(expected), begin(exp), endianess);
2047 test_offsets_error<InternT> offsets[] = {
2048 {3, 6, 0, 0, 0xD800, 0},
2049 {3, 6, 0, 0, 0xDBFF, 0},
2050 {3, 6, 0, 0, 0xDC00, 0},
2051 {3, 6, 0, 0, 0xDFFF, 0},
2053 {3, 6, 1, 2, 0xD800, 1},
2054 {3, 6, 1, 2, 0xDBFF, 1},
2055 {3, 6, 1, 2, 0xDC00, 1},
2056 {3, 6, 1, 2, 0xDFFF, 1},
2058 {3, 6, 2, 4, 0xD800, 2},
2059 {3, 6, 2, 4, 0xDBFF, 2},
2060 {3, 6, 2, 4, 0xDC00, 2},
2061 {3, 6, 2, 4, 0xDFFF, 2},
2063 // make the leading surrogate a trailing one
2064 {5, 10, 3, 6, 0xDC00, 3},
2065 {5, 10, 3, 6, 0xDFFF, 3},
2067 // make the trailing surrogate a leading one
2068 {5, 10, 3, 6, 0xD800, 4},
2069 {5, 10, 3, 6, 0xDBFF, 4},
2071 // make the trailing surrogate a BMP char
2072 {5, 10, 3, 6, 'z', 4},
2074 // don't replace anything in the test cases bellow, just show the surrogate
2075 // pair (fourth CP) fully or partially (just the first surrogate)
2076 {5, 10, 3, 6, 'b', 0},
2077 {5, 8, 3, 6, 'b', 0},
2078 {5, 9, 3, 6, 'b', 0},
2080 {4, 10, 3, 6, 'b', 0},
2081 {4, 8, 3, 6, 'b', 0},
2082 {4, 9, 3, 6, 'b', 0},
2085 for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
2086 test_offsets_error<InternT> t = *it;
2087 char out[array_size(exp) - 2] = {};
2088 assert(t.in_size <= array_size(in));
2089 assert(t.out_size <= array_size(out));
2090 assert(t.expected_in_next <= t.in_size);
2091 assert(t.expected_out_next <= t.out_size);
2092 InternT old_char = in[t.replace_pos];
2093 in[t.replace_pos] = t.replace_char;
2095 mbstate_t state = {};
2096 const InternT* in_next = nullptr;
2097 char* out_next = nullptr;
2098 codecvt_base::result res = codecvt_base::ok;
2100 res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2101 assert(res == cvt.error);
2102 assert(in_next == in + t.expected_in_next);
2103 assert(out_next == out + t.expected_out_next);
2104 assert(char_traits<char>::compare(out, exp, t.expected_out_next) == 0);
2105 if (t.expected_out_next < array_size(out))
2106 assert(out[t.expected_out_next] == 0);
2108 in[t.replace_pos] = old_char;
2112 template <class InternT>
2113 void test_utf16_ucs2_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2114 utf16_to_ucs2_in_ok(cvt, endianess);
2115 utf16_to_ucs2_in_partial(cvt, endianess);
2116 utf16_to_ucs2_in_error(cvt, endianess);
2117 ucs2_to_utf16_out_ok(cvt, endianess);
2118 ucs2_to_utf16_out_partial(cvt, endianess);
2119 ucs2_to_utf16_out_error(cvt, endianess);
2122 using std::codecvt;
2123 using std::codecvt_utf16;
2124 using std::codecvt_utf8;
2125 using std::codecvt_utf8_utf16;
2126 using std::has_facet;
2127 using std::locale;
2128 using std::use_facet;
2130 void test_utf8_utf32_codecvts() {
2131 typedef codecvt<char32_t, char, mbstate_t> codecvt_c32;
2132 const locale& loc_c = locale::classic();
2133 assert(has_facet<codecvt_c32>(loc_c));
2135 const codecvt_c32& cvt = use_facet<codecvt_c32>(loc_c);
2136 test_utf8_utf32_cvt(cvt);
2138 codecvt_utf8<char32_t> cvt2;
2139 test_utf8_utf32_cvt(cvt2);
2141 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2142 codecvt_utf8<wchar_t> cvt3;
2143 test_utf8_utf32_cvt(cvt3);
2144 #endif
2146 #ifndef TEST_HAS_NO_CHAR8_T
2147 typedef codecvt<char32_t, char8_t, mbstate_t> codecvt_c32_c8;
2148 assert(has_facet<codecvt_c32_c8>(loc_c));
2149 const codecvt_c32_c8& cvt4 = use_facet<codecvt_c32_c8>(loc_c);
2150 test_utf8_utf32_cvt(cvt4);
2151 #endif
2154 void test_utf8_utf16_codecvts() {
2155 typedef codecvt<char16_t, char, mbstate_t> codecvt_c16;
2156 const locale& loc_c = locale::classic();
2157 assert(has_facet<codecvt_c16>(loc_c));
2159 const codecvt_c16& cvt = use_facet<codecvt_c16>(loc_c);
2160 test_utf8_utf16_cvt(cvt);
2162 codecvt_utf8_utf16<char16_t> cvt2;
2163 test_utf8_utf16_cvt(cvt2);
2165 codecvt_utf8_utf16<char32_t> cvt3;
2166 test_utf8_utf16_cvt(cvt3);
2168 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
2169 codecvt_utf8_utf16<wchar_t> cvt4;
2170 test_utf8_utf16_cvt(cvt4);
2171 #endif
2173 #ifndef TEST_HAS_NO_CHAR8_T
2174 typedef codecvt<char16_t, char8_t, mbstate_t> codecvt_c16_c8;
2175 assert(has_facet<codecvt_c16_c8>(loc_c));
2176 const codecvt_c16_c8& cvt5 = use_facet<codecvt_c16_c8>(loc_c);
2177 test_utf8_utf16_cvt(cvt5);
2178 #endif
2181 void test_utf8_ucs2_codecvts() {
2182 codecvt_utf8<char16_t> cvt;
2183 test_utf8_ucs2_cvt(cvt);
2185 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2186 codecvt_utf8<wchar_t> cvt2;
2187 test_utf8_ucs2_cvt(cvt2);
2188 #endif
2191 void test_utf16_utf32_codecvts() {
2192 codecvt_utf16<char32_t> cvt;
2193 test_utf16_utf32_cvt(cvt, utf16_big_endian);
2195 codecvt_utf16<char32_t, 0x10FFFF, std::little_endian> cvt2;
2196 test_utf16_utf32_cvt(cvt2, utf16_little_endian);
2198 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2199 codecvt_utf16<wchar_t> cvt3;
2200 test_utf16_utf32_cvt(cvt3, utf16_big_endian);
2202 codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2203 test_utf16_utf32_cvt(cvt4, utf16_little_endian);
2204 #endif
2207 void test_utf16_ucs2_codecvts() {
2208 codecvt_utf16<char16_t> cvt;
2209 test_utf16_ucs2_cvt(cvt, utf16_big_endian);
2211 codecvt_utf16<char16_t, 0x10FFFF, std::little_endian> cvt2;
2212 test_utf16_ucs2_cvt(cvt2, utf16_little_endian);
2214 #if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2215 codecvt_utf16<wchar_t> cvt3;
2216 test_utf16_ucs2_cvt(cvt3, utf16_big_endian);
2218 codecvt_utf16<wchar_t, 0x10FFFF, std::little_endian> cvt4;
2219 test_utf16_ucs2_cvt(cvt4, utf16_little_endian);
2220 #endif
2223 int main() {
2224 test_utf8_utf32_codecvts();
2225 test_utf8_utf16_codecvts();
2226 test_utf8_ucs2_codecvts();
2227 test_utf16_utf32_codecvts();
2228 test_utf16_ucs2_codecvts();