1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Support/ConvertUTF.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "gtest/gtest.h"
18 TEST(ConvertUTFTest
, ConvertUTF16LittleEndianToUTF8String
) {
19 // Src is the look of disapproval.
20 static const char Src
[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
21 ArrayRef
<char> Ref(Src
, sizeof(Src
) - 1);
23 bool Success
= convertUTF16ToUTF8String(Ref
, Result
);
25 std::string
Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
26 EXPECT_EQ(Expected
, Result
);
29 TEST(ConvertUTFTest
, ConvertUTF16BigEndianToUTF8String
) {
30 // Src is the look of disapproval.
31 static const char Src
[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
32 ArrayRef
<char> Ref(Src
, sizeof(Src
) - 1);
34 bool Success
= convertUTF16ToUTF8String(Ref
, Result
);
36 std::string
Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
37 EXPECT_EQ(Expected
, Result
);
40 TEST(ConvertUTFTest
, ConvertUTF8ToUTF16String
) {
41 // Src is the look of disapproval.
42 static const char Src
[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
43 StringRef
Ref(Src
, sizeof(Src
) - 1);
44 SmallVector
<UTF16
, 5> Result
;
45 bool Success
= convertUTF8ToUTF16String(Ref
, Result
);
47 static const UTF16 Expected
[] = {0x0CA0, 0x005f, 0x0CA0, 0};
48 ASSERT_EQ(3u, Result
.size());
49 for (int I
= 0, E
= 3; I
!= E
; ++I
)
50 EXPECT_EQ(Expected
[I
], Result
[I
]);
53 TEST(ConvertUTFTest
, OddLengthInput
) {
55 bool Success
= convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result
);
56 EXPECT_FALSE(Success
);
59 TEST(ConvertUTFTest
, Empty
) {
61 bool Success
= convertUTF16ToUTF8String(llvm::ArrayRef
<char>(None
), Result
);
63 EXPECT_TRUE(Result
.empty());
66 TEST(ConvertUTFTest
, HasUTF16BOM
) {
67 bool HasBOM
= hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
69 HasBOM
= hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
71 HasBOM
= hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
72 EXPECT_TRUE(HasBOM
); // Don't care about odd lengths.
73 HasBOM
= hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
76 HasBOM
= hasUTF16ByteOrderMark(None
);
78 HasBOM
= hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
82 TEST(ConvertUTFTest
, UTF16WrappersForConvertUTF16ToUTF8String
) {
83 // Src is the look of disapproval.
84 static const char Src
[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
85 ArrayRef
<UTF16
> SrcRef
= makeArrayRef((const UTF16
*)Src
, 4);
87 bool Success
= convertUTF16ToUTF8String(SrcRef
, Result
);
89 std::string
Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
90 EXPECT_EQ(Expected
, Result
);
93 TEST(ConvertUTFTest
, ConvertUTF8toWide
) {
94 // Src is the look of disapproval.
95 static const char Src
[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
97 bool Success
= ConvertUTF8toWide((const char*)Src
, Result
);
99 std::wstring
Expected(L
"\x0ca0_\x0ca0");
100 EXPECT_EQ(Expected
, Result
);
102 Success
= ConvertUTF8toWide(StringRef(Src
, 7), Result
);
103 EXPECT_TRUE(Success
);
104 EXPECT_EQ(Expected
, Result
);
107 TEST(ConvertUTFTest
, convertWideToUTF8
) {
108 // Src is the look of disapproval.
109 static const wchar_t Src
[] = L
"\x0ca0_\x0ca0";
111 bool Success
= convertWideToUTF8(Src
, Result
);
112 EXPECT_TRUE(Success
);
113 std::string
Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
114 EXPECT_EQ(Expected
, Result
);
117 struct ConvertUTFResultContainer
{
118 ConversionResult ErrorCode
;
119 std::vector
<unsigned> UnicodeScalars
;
121 ConvertUTFResultContainer(ConversionResult ErrorCode
)
122 : ErrorCode(ErrorCode
) {}
124 ConvertUTFResultContainer
125 withScalars(unsigned US0
= 0x110000, unsigned US1
= 0x110000,
126 unsigned US2
= 0x110000, unsigned US3
= 0x110000,
127 unsigned US4
= 0x110000, unsigned US5
= 0x110000,
128 unsigned US6
= 0x110000, unsigned US7
= 0x110000) {
129 ConvertUTFResultContainer
Result(*this);
131 Result
.UnicodeScalars
.push_back(US0
);
133 Result
.UnicodeScalars
.push_back(US1
);
135 Result
.UnicodeScalars
.push_back(US2
);
137 Result
.UnicodeScalars
.push_back(US3
);
139 Result
.UnicodeScalars
.push_back(US4
);
141 Result
.UnicodeScalars
.push_back(US5
);
143 Result
.UnicodeScalars
.push_back(US6
);
145 Result
.UnicodeScalars
.push_back(US7
);
150 std::pair
<ConversionResult
, std::vector
<unsigned>>
151 ConvertUTF8ToUnicodeScalarsLenient(StringRef S
) {
152 const UTF8
*SourceStart
= reinterpret_cast<const UTF8
*>(S
.data());
154 const UTF8
*SourceNext
= SourceStart
;
155 std::vector
<UTF32
> Decoded(S
.size(), 0);
156 UTF32
*TargetStart
= Decoded
.data();
159 ConvertUTF8toUTF32(&SourceNext
, SourceStart
+ S
.size(), &TargetStart
,
160 Decoded
.data() + Decoded
.size(), lenientConversion
);
162 Decoded
.resize(TargetStart
- Decoded
.data());
164 return std::make_pair(ErrorCode
, Decoded
);
167 std::pair
<ConversionResult
, std::vector
<unsigned>>
168 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S
) {
169 const UTF8
*SourceStart
= reinterpret_cast<const UTF8
*>(S
.data());
171 const UTF8
*SourceNext
= SourceStart
;
172 std::vector
<UTF32
> Decoded(S
.size(), 0);
173 UTF32
*TargetStart
= Decoded
.data();
175 auto ErrorCode
= ConvertUTF8toUTF32Partial(
176 &SourceNext
, SourceStart
+ S
.size(), &TargetStart
,
177 Decoded
.data() + Decoded
.size(), lenientConversion
);
179 Decoded
.resize(TargetStart
- Decoded
.data());
181 return std::make_pair(ErrorCode
, Decoded
);
184 ::testing::AssertionResult
185 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected
,
186 StringRef S
, bool Partial
= false) {
187 ConversionResult ErrorCode
;
188 std::vector
<unsigned> Decoded
;
190 std::tie(ErrorCode
, Decoded
) = ConvertUTF8ToUnicodeScalarsLenient(S
);
192 std::tie(ErrorCode
, Decoded
) = ConvertUTF8ToUnicodeScalarsPartialLenient(S
);
194 if (Expected
.ErrorCode
!= ErrorCode
)
195 return ::testing::AssertionFailure() << "Expected error code "
196 << Expected
.ErrorCode
<< ", actual "
199 if (Expected
.UnicodeScalars
!= Decoded
)
200 return ::testing::AssertionFailure()
201 << "Expected lenient decoded result:\n"
202 << ::testing::PrintToString(Expected
.UnicodeScalars
) << "\n"
203 << "Actual result:\n" << ::testing::PrintToString(Decoded
);
205 return ::testing::AssertionSuccess();
208 TEST(ConvertUTFTest
, UTF8ToUTF32Lenient
) {
214 // U+0041 LATIN CAPITAL LETTER A
215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
216 ConvertUTFResultContainer(conversionOK
).withScalars(0x0041), "\x41"));
222 // U+0283 LATIN SMALL LETTER ESH
223 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
224 ConvertUTFResultContainer(conversionOK
).withScalars(0x0283),
227 // U+03BA GREEK SMALL LETTER KAPPA
228 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
229 // U+03C3 GREEK SMALL LETTER SIGMA
230 // U+03BC GREEK SMALL LETTER MU
231 // U+03B5 GREEK SMALL LETTER EPSILON
232 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
233 ConvertUTFResultContainer(conversionOK
)
234 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
235 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
241 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
242 // U+6587 CJK UNIFIED IDEOGRAPH-6587
243 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
244 ConvertUTFResultContainer(conversionOK
).withScalars(0x4f8b, 0x6587),
245 "\xe4\xbe\x8b\xe6\x96\x87"));
247 // U+D55C HANGUL SYLLABLE HAN
248 // U+AE00 HANGUL SYLLABLE GEUL
249 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
250 ConvertUTFResultContainer(conversionOK
).withScalars(0xd55c, 0xae00),
251 "\xed\x95\x9c\xea\xb8\x80"));
253 // U+1112 HANGUL CHOSEONG HIEUH
254 // U+1161 HANGUL JUNGSEONG A
255 // U+11AB HANGUL JONGSEONG NIEUN
256 // U+1100 HANGUL CHOSEONG KIYEOK
257 // U+1173 HANGUL JUNGSEONG EU
258 // U+11AF HANGUL JONGSEONG RIEUL
259 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
260 ConvertUTFResultContainer(conversionOK
)
261 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
262 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
269 // U+E0100 VARIATION SELECTOR-17
270 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
271 ConvertUTFResultContainer(conversionOK
).withScalars(0x000E0100),
272 "\xf3\xa0\x84\x80"));
275 // First possible sequence of a certain length
279 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
280 ConvertUTFResultContainer(conversionOK
).withScalars(0x0000),
281 StringRef("\x00", 1)));
283 // U+0080 PADDING CHARACTER
284 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
285 ConvertUTFResultContainer(conversionOK
).withScalars(0x0080),
288 // U+0800 SAMARITAN LETTER ALAF
289 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
290 ConvertUTFResultContainer(conversionOK
).withScalars(0x0800),
293 // U+10000 LINEAR B SYLLABLE B008 A
294 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
295 ConvertUTFResultContainer(conversionOK
).withScalars(0x10000),
296 "\xf0\x90\x80\x80"));
298 // U+200000 (invalid)
299 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
300 ConvertUTFResultContainer(sourceIllegal
)
301 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
302 "\xf8\x88\x80\x80\x80"));
304 // U+4000000 (invalid)
305 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
306 ConvertUTFResultContainer(sourceIllegal
)
307 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
308 "\xfc\x84\x80\x80\x80\x80"));
311 // Last possible sequence of a certain length
315 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
316 ConvertUTFResultContainer(conversionOK
).withScalars(0x007f), "\x7f"));
318 // U+07FF (unassigned)
319 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
320 ConvertUTFResultContainer(conversionOK
).withScalars(0x07ff),
323 // U+FFFF (noncharacter)
324 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
325 ConvertUTFResultContainer(conversionOK
).withScalars(0xffff),
328 // U+1FFFFF (invalid)
329 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
330 ConvertUTFResultContainer(sourceIllegal
)
331 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
332 "\xf7\xbf\xbf\xbf"));
334 // U+3FFFFFF (invalid)
335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
336 ConvertUTFResultContainer(sourceIllegal
)
337 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
338 "\xfb\xbf\xbf\xbf\xbf"));
340 // U+7FFFFFFF (invalid)
341 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
342 ConvertUTFResultContainer(sourceIllegal
)
343 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
344 "\xfd\xbf\xbf\xbf\xbf\xbf"));
347 // Other boundary conditions
350 // U+D7FF (unassigned)
351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
352 ConvertUTFResultContainer(conversionOK
).withScalars(0xd7ff),
355 // U+E000 (private use)
356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
357 ConvertUTFResultContainer(conversionOK
).withScalars(0xe000),
360 // U+FFFD REPLACEMENT CHARACTER
361 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
362 ConvertUTFResultContainer(conversionOK
).withScalars(0xfffd),
365 // U+10FFFF (noncharacter)
366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
367 ConvertUTFResultContainer(conversionOK
).withScalars(0x10ffff),
368 "\xf4\x8f\xbf\xbf"));
370 // U+110000 (invalid)
371 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
372 ConvertUTFResultContainer(sourceIllegal
)
373 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
374 "\xf4\x90\x80\x80"));
377 // Unexpected continuation bytes
380 // A sequence of unexpected continuation bytes that don't follow a first
381 // byte, every byte is a maximal subpart.
383 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
384 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\x80"));
385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
386 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xbf"));
387 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
388 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
390 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
391 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
393 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
394 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
396 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
397 ConvertUTFResultContainer(sourceIllegal
)
398 .withScalars(0xfffd, 0xfffd, 0xfffd),
400 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
401 ConvertUTFResultContainer(sourceIllegal
)
402 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
403 "\x80\xbf\x80\xbf"));
404 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
405 ConvertUTFResultContainer(sourceIllegal
)
406 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
407 "\x80\xbf\x82\xbf\xaa"));
408 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
409 ConvertUTFResultContainer(sourceIllegal
)
410 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
411 "\xaa\xb0\xbb\xbf\xaa\xa0"));
412 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
413 ConvertUTFResultContainer(sourceIllegal
)
414 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
415 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
417 // All continuation bytes (0x80--0xbf).
418 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
419 ConvertUTFResultContainer(sourceIllegal
)
420 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
421 0xfffd, 0xfffd, 0xfffd, 0xfffd)
422 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
423 0xfffd, 0xfffd, 0xfffd, 0xfffd)
424 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
425 0xfffd, 0xfffd, 0xfffd, 0xfffd)
426 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
427 0xfffd, 0xfffd, 0xfffd, 0xfffd)
428 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
429 0xfffd, 0xfffd, 0xfffd, 0xfffd)
430 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
431 0xfffd, 0xfffd, 0xfffd, 0xfffd)
432 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
433 0xfffd, 0xfffd, 0xfffd, 0xfffd)
434 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
435 0xfffd, 0xfffd, 0xfffd, 0xfffd),
436 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
437 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
438 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
439 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
442 // Lonely start bytes
445 // Start bytes of 2-byte sequences (0xc0--0xdf).
446 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
447 ConvertUTFResultContainer(sourceIllegal
)
448 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
449 0xfffd, 0xfffd, 0xfffd, 0xfffd)
450 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
451 0xfffd, 0xfffd, 0xfffd, 0xfffd)
452 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
453 0xfffd, 0xfffd, 0xfffd, 0xfffd)
454 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
455 0xfffd, 0xfffd, 0xfffd, 0xfffd),
456 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
457 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
459 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
460 ConvertUTFResultContainer(sourceIllegal
)
461 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
462 0xfffd, 0x0020, 0xfffd, 0x0020)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020)
467 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468 0xfffd, 0x0020, 0xfffd, 0x0020)
469 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
470 0xfffd, 0x0020, 0xfffd, 0x0020)
471 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
472 0xfffd, 0x0020, 0xfffd, 0x0020)
473 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
474 0xfffd, 0x0020, 0xfffd, 0x0020)
475 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
476 0xfffd, 0x0020, 0xfffd, 0x0020),
477 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
478 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
479 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
480 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
482 // Start bytes of 3-byte sequences (0xe0--0xef).
483 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
484 ConvertUTFResultContainer(sourceIllegal
)
485 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
486 0xfffd, 0xfffd, 0xfffd, 0xfffd)
487 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
488 0xfffd, 0xfffd, 0xfffd, 0xfffd),
489 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
492 ConvertUTFResultContainer(sourceIllegal
)
493 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
494 0xfffd, 0x0020, 0xfffd, 0x0020)
495 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
496 0xfffd, 0x0020, 0xfffd, 0x0020)
497 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
498 0xfffd, 0x0020, 0xfffd, 0x0020)
499 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
500 0xfffd, 0x0020, 0xfffd, 0x0020),
501 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
502 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
504 // Start bytes of 4-byte sequences (0xf0--0xf7).
505 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
506 ConvertUTFResultContainer(sourceIllegal
)
507 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
508 0xfffd, 0xfffd, 0xfffd, 0xfffd),
509 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
512 ConvertUTFResultContainer(sourceIllegal
)
513 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
514 0xfffd, 0x0020, 0xfffd, 0x0020)
515 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
516 0xfffd, 0x0020, 0xfffd, 0x0020),
517 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
519 // Start bytes of 5-byte sequences (0xf8--0xfb).
520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
521 ConvertUTFResultContainer(sourceIllegal
)
522 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
523 "\xf8\xf9\xfa\xfb"));
525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
526 ConvertUTFResultContainer(sourceIllegal
)
527 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
528 0xfffd, 0x0020, 0xfffd, 0x0020),
529 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
531 // Start bytes of 6-byte sequences (0xfc--0xfd).
532 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
533 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
536 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
537 ConvertUTFResultContainer(sourceIllegal
)
538 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
539 "\xfc\x20\xfd\x20"));
542 // Other bytes (0xc0--0xc1, 0xfe--0xff).
545 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
546 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xc0"));
547 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
548 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xc1"));
549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfe"));
551 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
552 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xff"));
554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
555 ConvertUTFResultContainer(sourceIllegal
)
556 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
557 "\xc0\xc1\xfe\xff"));
559 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
560 ConvertUTFResultContainer(sourceIllegal
)
561 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
562 "\xfe\xfe\xff\xff"));
564 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
565 ConvertUTFResultContainer(sourceIllegal
)
566 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
567 "\xfe\x80\x80\x80\x80\x80"));
569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
570 ConvertUTFResultContainer(sourceIllegal
)
571 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
572 "\xff\x80\x80\x80\x80\x80"));
574 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
575 ConvertUTFResultContainer(sourceIllegal
)
576 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
577 0xfffd, 0x0020, 0xfffd, 0x0020),
578 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
581 // Sequences with one continuation byte missing
584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
585 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xc2"));
586 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
587 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xdf"));
588 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
589 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
591 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
592 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
594 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
595 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
597 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
598 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
601 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
603 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
604 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
606 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
607 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
609 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
610 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
612 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
613 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
615 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
616 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
619 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
621 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
622 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
624 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
625 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
627 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
628 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
631 // Overlong sequences with one trailing byte missing.
632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
633 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
636 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
639 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
641 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
642 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
645 ConvertUTFResultContainer(sourceIllegal
)
646 .withScalars(0xfffd, 0xfffd, 0xfffd),
648 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
649 ConvertUTFResultContainer(sourceIllegal
)
650 .withScalars(0xfffd, 0xfffd, 0xfffd),
652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
653 ConvertUTFResultContainer(sourceIllegal
)
654 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
655 "\xf8\x80\x80\x80"));
656 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
657 ConvertUTFResultContainer(sourceIllegal
)
658 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
659 "\xfc\x80\x80\x80\x80"));
661 // Sequences that represent surrogates with one trailing byte missing.
663 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
664 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
666 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
667 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
669 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
670 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
673 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
674 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
677 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
680 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
683 // Ill-formed 4-byte sequences.
684 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
685 // U+1100xx (invalid)
686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
687 ConvertUTFResultContainer(sourceIllegal
)
688 .withScalars(0xfffd, 0xfffd, 0xfffd),
690 // U+13FBxx (invalid)
691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
692 ConvertUTFResultContainer(sourceIllegal
)
693 .withScalars(0xfffd, 0xfffd, 0xfffd),
695 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
696 ConvertUTFResultContainer(sourceIllegal
)
697 .withScalars(0xfffd, 0xfffd, 0xfffd),
699 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
700 ConvertUTFResultContainer(sourceIllegal
)
701 .withScalars(0xfffd, 0xfffd, 0xfffd),
703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
704 ConvertUTFResultContainer(sourceIllegal
)
705 .withScalars(0xfffd, 0xfffd, 0xfffd),
707 // U+1FFBxx (invalid)
708 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
709 ConvertUTFResultContainer(sourceIllegal
)
710 .withScalars(0xfffd, 0xfffd, 0xfffd),
713 // Ill-formed 5-byte sequences.
714 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
715 // U+2000xx (invalid)
716 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
717 ConvertUTFResultContainer(sourceIllegal
)
718 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
719 "\xf8\x88\x80\x80"));
720 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
721 ConvertUTFResultContainer(sourceIllegal
)
722 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
723 "\xf8\xbf\xbf\xbf"));
724 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
725 ConvertUTFResultContainer(sourceIllegal
)
726 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
727 "\xf9\x80\x80\x80"));
728 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
729 ConvertUTFResultContainer(sourceIllegal
)
730 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
731 "\xfa\x80\x80\x80"));
732 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
733 ConvertUTFResultContainer(sourceIllegal
)
734 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
735 "\xfb\x80\x80\x80"));
736 // U+3FFFFxx (invalid)
737 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
738 ConvertUTFResultContainer(sourceIllegal
)
739 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
740 "\xfb\xbf\xbf\xbf"));
742 // Ill-formed 6-byte sequences.
743 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
744 // U+40000xx (invalid)
745 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
746 ConvertUTFResultContainer(sourceIllegal
)
747 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
748 "\xfc\x84\x80\x80\x80"));
749 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
750 ConvertUTFResultContainer(sourceIllegal
)
751 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
752 "\xfc\xbf\xbf\xbf\xbf"));
753 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
754 ConvertUTFResultContainer(sourceIllegal
)
755 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
756 "\xfd\x80\x80\x80\x80"));
757 // U+7FFFFFxx (invalid)
758 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
759 ConvertUTFResultContainer(sourceIllegal
)
760 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
761 "\xfd\xbf\xbf\xbf\xbf"));
764 // Sequences with two continuation bytes missing
767 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
768 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
770 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
771 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
773 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
774 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
776 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
777 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
779 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
780 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
782 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
783 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd),
786 // Overlong sequences with two trailing byte missing.
787 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
788 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xe0"));
789 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
790 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
792 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
793 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
795 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
796 ConvertUTFResultContainer(sourceIllegal
)
797 .withScalars(0xfffd, 0xfffd, 0xfffd),
799 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
800 ConvertUTFResultContainer(sourceIllegal
)
801 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
802 "\xfc\x80\x80\x80"));
804 // Sequences that represent surrogates with two trailing bytes missing.
805 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
806 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xed"));
808 // Ill-formed 4-byte sequences.
809 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
810 // U+110yxx (invalid)
811 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
812 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
814 // U+13Fyxx (invalid)
815 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
816 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
818 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
819 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
821 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
822 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
824 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
825 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
827 // U+1FFyxx (invalid)
828 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
829 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
832 // Ill-formed 5-byte sequences.
833 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
834 // U+200yxx (invalid)
835 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
836 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
838 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
839 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
841 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
842 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
844 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
845 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
847 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
848 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
850 // U+3FFFyxx (invalid)
851 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
852 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
855 // Ill-formed 6-byte sequences.
856 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
857 // U+4000yxx (invalid)
858 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
859 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
860 "\xfc\x84\x80\x80"));
861 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
862 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
863 "\xfc\xbf\xbf\xbf"));
864 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
865 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
866 "\xfd\x80\x80\x80"));
867 // U+7FFFFyxx (invalid)
868 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
869 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
870 "\xfd\xbf\xbf\xbf"));
873 // Sequences with three continuation bytes missing
876 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
877 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf0"));
878 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
879 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf1"));
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf2"));
882 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
883 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf3"));
884 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf4"));
887 // Broken overlong sequences.
888 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
889 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf0"));
890 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
891 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
893 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
894 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
897 // Ill-formed 4-byte sequences.
898 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
899 // U+14yyxx (invalid)
900 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
901 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf5"));
902 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
903 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf6"));
904 // U+1Cyyxx (invalid)
905 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
906 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf7"));
908 // Ill-formed 5-byte sequences.
909 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
910 // U+20yyxx (invalid)
911 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
912 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
914 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
915 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
917 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
918 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
920 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
921 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
923 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
924 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
926 // U+3FCyyxx (invalid)
927 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
928 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
931 // Ill-formed 6-byte sequences.
932 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
933 // U+400yyxx (invalid)
934 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
935 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
937 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
938 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
940 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
941 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
943 // U+7FFCyyxx (invalid)
944 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
945 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd, 0xfffd),
949 // Sequences with four continuation bytes missing
952 // Ill-formed 5-byte sequences.
953 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
954 // U+uzyyxx (invalid)
955 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
956 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf8"));
957 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf9"));
959 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
960 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfa"));
961 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
962 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfb"));
963 // U+3zyyxx (invalid)
964 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
965 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfb"));
967 // Broken overlong sequences.
968 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
969 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xf8"));
970 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
971 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
974 // Ill-formed 6-byte sequences.
975 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
976 // U+uzzyyxx (invalid)
977 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
978 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
980 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
981 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
983 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
984 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
986 // U+7Fzzyyxx (invalid)
987 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
988 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
992 // Sequences with five continuation bytes missing
995 // Ill-formed 6-byte sequences.
996 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
997 // U+uzzyyxx (invalid)
998 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
999 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfc"));
1000 // U+uuzzyyxx (invalid)
1001 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1002 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd), "\xfd"));
1005 // Consecutive sequences with trailing bytes missing
1008 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1009 ConvertUTFResultContainer(sourceIllegal
)
1010 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1011 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1012 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1016 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1018 "\xfc\x80\x80\x80\x80"
1019 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1021 "\xfd\xbf\xbf\xbf\xbf"));
1024 // Overlong UTF-8 sequences
1028 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1029 ConvertUTFResultContainer(conversionOK
).withScalars(0x002f), "\x2f"));
1031 // Overlong sequences of the above.
1032 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1033 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
1035 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1036 ConvertUTFResultContainer(sourceIllegal
)
1037 .withScalars(0xfffd, 0xfffd, 0xfffd),
1039 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1040 ConvertUTFResultContainer(sourceIllegal
)
1041 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1042 "\xf0\x80\x80\xaf"));
1043 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1044 ConvertUTFResultContainer(sourceIllegal
)
1045 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1046 "\xf8\x80\x80\x80\xaf"));
1047 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1048 ConvertUTFResultContainer(sourceIllegal
)
1049 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1050 "\xfc\x80\x80\x80\x80\xaf"));
1053 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1054 ConvertUTFResultContainer(conversionOK
).withScalars(0x0000),
1055 StringRef("\x00", 1)));
1057 // Overlong sequences of the above.
1058 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1059 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
1061 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1062 ConvertUTFResultContainer(sourceIllegal
)
1063 .withScalars(0xfffd, 0xfffd, 0xfffd),
1065 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1066 ConvertUTFResultContainer(sourceIllegal
)
1067 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1068 "\xf0\x80\x80\x80"));
1069 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1070 ConvertUTFResultContainer(sourceIllegal
)
1071 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1072 "\xf8\x80\x80\x80\x80"));
1073 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1074 ConvertUTFResultContainer(sourceIllegal
)
1075 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1076 "\xfc\x80\x80\x80\x80\x80"));
1078 // Other overlong sequences.
1079 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1080 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
1082 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1083 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
1085 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1086 ConvertUTFResultContainer(sourceIllegal
).withScalars(0xfffd, 0xfffd),
1088 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1089 ConvertUTFResultContainer(sourceIllegal
)
1090 .withScalars(0xfffd, 0xfffd, 0xfffd),
1092 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1093 ConvertUTFResultContainer(sourceIllegal
)
1094 .withScalars(0xfffd, 0xfffd, 0xfffd),
1096 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1097 ConvertUTFResultContainer(sourceIllegal
)
1098 .withScalars(0xfffd, 0xfffd, 0xfffd),
1100 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1101 ConvertUTFResultContainer(sourceIllegal
)
1102 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1103 "\xf0\x8f\x80\x80"));
1104 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1105 ConvertUTFResultContainer(sourceIllegal
)
1106 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1107 "\xf0\x8f\xbf\xbf"));
1108 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1109 ConvertUTFResultContainer(sourceIllegal
)
1110 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1111 "\xf8\x87\xbf\xbf\xbf"));
1112 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1113 ConvertUTFResultContainer(sourceIllegal
)
1114 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1115 "\xfc\x83\xbf\xbf\xbf\xbf"));
1118 // Isolated surrogates
1123 // D71. High-surrogate code point: A Unicode code point in the range
1124 // U+D800 to U+DBFF.
1126 // D73. Low-surrogate code point: A Unicode code point in the range
1127 // U+DC00 to U+DFFF.
1129 // Note: U+E0100 is <DB40 DD00> in UTF16.
1134 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1135 ConvertUTFResultContainer(sourceIllegal
)
1136 .withScalars(0xfffd, 0xfffd, 0xfffd),
1140 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1141 ConvertUTFResultContainer(sourceIllegal
)
1142 .withScalars(0xfffd, 0xfffd, 0xfffd),
1146 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1147 ConvertUTFResultContainer(sourceIllegal
)
1148 .withScalars(0xfffd, 0xfffd, 0xfffd),
1154 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1155 ConvertUTFResultContainer(sourceIllegal
)
1156 .withScalars(0xfffd, 0xfffd, 0xfffd),
1160 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1161 ConvertUTFResultContainer(sourceIllegal
)
1162 .withScalars(0xfffd, 0xfffd, 0xfffd),
1166 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1167 ConvertUTFResultContainer(sourceIllegal
)
1168 .withScalars(0xfffd, 0xfffd, 0xfffd),
1174 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1175 ConvertUTFResultContainer(sourceIllegal
)
1176 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1177 "\xed\xa0\x80\xed\xb0\x80"));
1180 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1181 ConvertUTFResultContainer(sourceIllegal
)
1182 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1183 "\xed\xa0\x80\xed\xb4\x80"));
1186 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1187 ConvertUTFResultContainer(sourceIllegal
)
1188 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1189 "\xed\xa0\x80\xed\xbf\xbf"));
1192 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1193 ConvertUTFResultContainer(sourceIllegal
)
1194 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1195 "\xed\xac\xa0\xed\xb0\x80"));
1198 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1199 ConvertUTFResultContainer(sourceIllegal
)
1200 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1201 "\xed\xac\xa0\xed\xb4\x80"));
1204 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1205 ConvertUTFResultContainer(sourceIllegal
)
1206 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1207 "\xed\xac\xa0\xed\xbf\xbf"));
1210 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1211 ConvertUTFResultContainer(sourceIllegal
)
1212 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1213 "\xed\xaf\xbf\xed\xb0\x80"));
1216 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1217 ConvertUTFResultContainer(sourceIllegal
)
1218 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1219 "\xed\xaf\xbf\xed\xb4\x80"));
1222 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1223 ConvertUTFResultContainer(sourceIllegal
)
1224 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1225 "\xed\xaf\xbf\xed\xbf\xbf"));
1233 // D14. Noncharacter: A code point that is permanently reserved for
1234 // internal use and that should never be interchanged. Noncharacters
1235 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1236 // and the values U+FDD0..U+FDEF.
1239 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1240 ConvertUTFResultContainer(conversionOK
).withScalars(0xfffe),
1244 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1245 ConvertUTFResultContainer(conversionOK
).withScalars(0xffff),
1249 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1250 ConvertUTFResultContainer(conversionOK
).withScalars(0x1fffe),
1251 "\xf0\x9f\xbf\xbe"));
1254 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1255 ConvertUTFResultContainer(conversionOK
).withScalars(0x1ffff),
1256 "\xf0\x9f\xbf\xbf"));
1259 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1260 ConvertUTFResultContainer(conversionOK
).withScalars(0x2fffe),
1261 "\xf0\xaf\xbf\xbe"));
1264 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1265 ConvertUTFResultContainer(conversionOK
).withScalars(0x2ffff),
1266 "\xf0\xaf\xbf\xbf"));
1269 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1270 ConvertUTFResultContainer(conversionOK
).withScalars(0x3fffe),
1271 "\xf0\xbf\xbf\xbe"));
1274 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1275 ConvertUTFResultContainer(conversionOK
).withScalars(0x3ffff),
1276 "\xf0\xbf\xbf\xbf"));
1279 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1280 ConvertUTFResultContainer(conversionOK
).withScalars(0x4fffe),
1281 "\xf1\x8f\xbf\xbe"));
1284 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1285 ConvertUTFResultContainer(conversionOK
).withScalars(0x4ffff),
1286 "\xf1\x8f\xbf\xbf"));
1289 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1290 ConvertUTFResultContainer(conversionOK
).withScalars(0x5fffe),
1291 "\xf1\x9f\xbf\xbe"));
1294 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1295 ConvertUTFResultContainer(conversionOK
).withScalars(0x5ffff),
1296 "\xf1\x9f\xbf\xbf"));
1299 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1300 ConvertUTFResultContainer(conversionOK
).withScalars(0x6fffe),
1301 "\xf1\xaf\xbf\xbe"));
1304 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1305 ConvertUTFResultContainer(conversionOK
).withScalars(0x6ffff),
1306 "\xf1\xaf\xbf\xbf"));
1309 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1310 ConvertUTFResultContainer(conversionOK
).withScalars(0x7fffe),
1311 "\xf1\xbf\xbf\xbe"));
1314 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1315 ConvertUTFResultContainer(conversionOK
).withScalars(0x7ffff),
1316 "\xf1\xbf\xbf\xbf"));
1319 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1320 ConvertUTFResultContainer(conversionOK
).withScalars(0x8fffe),
1321 "\xf2\x8f\xbf\xbe"));
1324 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1325 ConvertUTFResultContainer(conversionOK
).withScalars(0x8ffff),
1326 "\xf2\x8f\xbf\xbf"));
1329 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1330 ConvertUTFResultContainer(conversionOK
).withScalars(0x9fffe),
1331 "\xf2\x9f\xbf\xbe"));
1334 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1335 ConvertUTFResultContainer(conversionOK
).withScalars(0x9ffff),
1336 "\xf2\x9f\xbf\xbf"));
1339 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1340 ConvertUTFResultContainer(conversionOK
).withScalars(0xafffe),
1341 "\xf2\xaf\xbf\xbe"));
1344 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1345 ConvertUTFResultContainer(conversionOK
).withScalars(0xaffff),
1346 "\xf2\xaf\xbf\xbf"));
1349 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1350 ConvertUTFResultContainer(conversionOK
).withScalars(0xbfffe),
1351 "\xf2\xbf\xbf\xbe"));
1354 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1355 ConvertUTFResultContainer(conversionOK
).withScalars(0xbffff),
1356 "\xf2\xbf\xbf\xbf"));
1359 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1360 ConvertUTFResultContainer(conversionOK
).withScalars(0xcfffe),
1361 "\xf3\x8f\xbf\xbe"));
1364 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1365 ConvertUTFResultContainer(conversionOK
).withScalars(0xcfffF),
1366 "\xf3\x8f\xbf\xbf"));
1369 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1370 ConvertUTFResultContainer(conversionOK
).withScalars(0xdfffe),
1371 "\xf3\x9f\xbf\xbe"));
1374 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1375 ConvertUTFResultContainer(conversionOK
).withScalars(0xdffff),
1376 "\xf3\x9f\xbf\xbf"));
1379 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1380 ConvertUTFResultContainer(conversionOK
).withScalars(0xefffe),
1381 "\xf3\xaf\xbf\xbe"));
1384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1385 ConvertUTFResultContainer(conversionOK
).withScalars(0xeffff),
1386 "\xf3\xaf\xbf\xbf"));
1389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1390 ConvertUTFResultContainer(conversionOK
).withScalars(0xffffe),
1391 "\xf3\xbf\xbf\xbe"));
1394 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1395 ConvertUTFResultContainer(conversionOK
).withScalars(0xfffff),
1396 "\xf3\xbf\xbf\xbf"));
1399 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1400 ConvertUTFResultContainer(conversionOK
).withScalars(0x10fffe),
1401 "\xf4\x8f\xbf\xbe"));
1404 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1405 ConvertUTFResultContainer(conversionOK
).withScalars(0x10ffff),
1406 "\xf4\x8f\xbf\xbf"));
1409 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1410 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd0),
1414 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1415 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd1),
1419 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1420 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd2),
1424 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1425 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd3),
1429 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1430 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd4),
1434 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1435 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd5),
1439 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1440 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd6),
1444 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1445 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd7),
1449 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1450 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd8),
1454 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1455 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdd9),
1459 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1460 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdda),
1464 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1465 ConvertUTFResultContainer(conversionOK
).withScalars(0xfddb),
1469 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1470 ConvertUTFResultContainer(conversionOK
).withScalars(0xfddc),
1474 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1475 ConvertUTFResultContainer(conversionOK
).withScalars(0xfddd),
1479 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1480 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdde),
1484 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1485 ConvertUTFResultContainer(conversionOK
).withScalars(0xfddf),
1489 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1490 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde0),
1494 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1495 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde1),
1499 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1500 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde2),
1504 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1505 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde3),
1509 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1510 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde4),
1514 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1515 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde5),
1519 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1520 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde6),
1524 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1525 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde7),
1529 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1530 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde8),
1534 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1535 ConvertUTFResultContainer(conversionOK
).withScalars(0xfde9),
1539 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1540 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdea),
1544 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1545 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdeb),
1549 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1550 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdec),
1554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1555 ConvertUTFResultContainer(conversionOK
).withScalars(0xfded),
1559 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1560 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdee),
1564 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1565 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdef),
1569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1570 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf0),
1574 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1575 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf1),
1579 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1580 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf2),
1584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1585 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf3),
1589 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1590 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf4),
1594 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1595 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf5),
1599 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1600 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf6),
1604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1605 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf7),
1609 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1610 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf8),
1614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1615 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdf9),
1619 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1620 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdfa),
1624 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1625 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdfb),
1629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1630 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdfc),
1634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1635 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdfd),
1639 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1640 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdfe),
1644 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1645 ConvertUTFResultContainer(conversionOK
).withScalars(0xfdff),
1649 TEST(ConvertUTFTest
, UTF8ToUTF32PartialLenient
) {
1650 // U+0041 LATIN CAPITAL LETTER A
1651 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1652 ConvertUTFResultContainer(conversionOK
).withScalars(0x0041),
1656 // Sequences with one continuation byte missing
1659 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1660 ConvertUTFResultContainer(sourceExhausted
),
1662 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1663 ConvertUTFResultContainer(sourceExhausted
),
1665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1666 ConvertUTFResultContainer(sourceExhausted
),
1668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1669 ConvertUTFResultContainer(sourceExhausted
),
1671 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1672 ConvertUTFResultContainer(sourceExhausted
),
1674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1675 ConvertUTFResultContainer(sourceExhausted
),
1677 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1678 ConvertUTFResultContainer(sourceExhausted
),
1680 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1681 ConvertUTFResultContainer(sourceExhausted
),
1683 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1684 ConvertUTFResultContainer(sourceExhausted
),
1686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1687 ConvertUTFResultContainer(sourceExhausted
),
1689 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1690 ConvertUTFResultContainer(sourceExhausted
),
1691 "\xf0\x90\x80", true));
1692 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1693 ConvertUTFResultContainer(sourceExhausted
),
1694 "\xf0\xbf\xbf", true));
1695 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1696 ConvertUTFResultContainer(sourceExhausted
),
1697 "\xf1\x80\x80", true));
1698 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1699 ConvertUTFResultContainer(sourceExhausted
),
1700 "\xf3\xbf\xbf", true));
1701 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1702 ConvertUTFResultContainer(sourceExhausted
),
1703 "\xf4\x80\x80", true));
1704 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1705 ConvertUTFResultContainer(sourceExhausted
),
1706 "\xf4\x8f\xbf", true));
1708 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1709 ConvertUTFResultContainer(sourceExhausted
).withScalars(0x0041),