openjdk-23: use OpenJDK 23 as the boot JDK
[oi-userland.git] / components / library / icu-75 / patches / 202-maint-75-ICU-22718-Export-disallowed-ignored-UTS-46-data-for-.patch
blob23929ff8a2415ac4243a5b1a8e7313177cf4dd1f
1 https://github.com/unicode-org/icu/commit/b8389b1186fe4b56ca91eadfa31886a3b4a195c6
3 --- icu/source/tools/icuexportdata/icuexportdata.cpp.orig
4 +++ icu/source/tools/icuexportdata/icuexportdata.cpp
5 @@ -755,9 +755,12 @@
6 std::vector<uint32_t> nonRecursive32;
7 LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
9 + UBool uts46 = false;
11 if (uprv_strcmp(basename, "nfkd") == 0) {
12 mainNormalizer = Normalizer2::getNFKDInstance(status);
13 } else if (uprv_strcmp(basename, "uts46d") == 0) {
14 + uts46 = true;
15 mainNormalizer = Normalizer2::getInstance(nullptr, "uts46", UNORM2_COMPOSE, status);
16 } else {
17 mainNormalizer = nfdNormalizer;
18 @@ -817,23 +820,38 @@
19 nfcNormalizer->normalize(dst, nfc, status);
20 nonNfdOrRoundTrips = (src == nfc);
22 + if (uts46) {
23 + // Work around https://unicode-org.atlassian.net/browse/ICU-22658
24 + // TODO: Remove the workaround after data corresponding to
25 + // https://www.unicode.org/L2/L2024/24061.htm#179-C36 lands
26 + // for Unicode 16.
27 + switch (c) {
28 + case 0x2F868:
29 + dst.truncate(0);
30 + dst.append(UChar32(0x36FC));
31 + break;
32 + case 0x2F874:
33 + dst.truncate(0);
34 + dst.append(UChar32(0x5F53));
35 + break;
36 + case 0x2F91F:
37 + dst.truncate(0);
38 + dst.append(UChar32(0x243AB));
39 + break;
40 + case 0x2F95F:
41 + dst.truncate(0);
42 + dst.append(UChar32(0x7AEE));
43 + break;
44 + case 0x2F9BF:
45 + dst.truncate(0);
46 + dst.append(UChar32(0x45D7));
47 + break;
48 + }
49 + }
50 int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
52 if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
53 - // Characters that normalize to nothing or to U+FFFD (without the
54 - // input being U+FFFD) in ICU4C's UTS 46 normalization normalize
55 - // as in NFD in ICU4X's UTF 46 normalization in the interest
56 - // of data size and ICU4X's normalizer being unable to handle
57 - // normalizing to nothing.
58 - // When UTS 46 is implemented on top of ICU4X, a preprocessing
59 - // step is supposed to remove these characters before the
60 - // normalization step.
61 - if (uprv_strcmp(basename, "uts46d") != 0) {
62 - status.set(U_INTERNAL_PROGRAM_ERROR);
63 - handleError(status, basename);
64 - }
65 - nfdNormalizer->normalize(src, dst, status);
66 - len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
67 - if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
68 + if (!uts46) {
69 status.set(U_INTERNAL_PROGRAM_ERROR);
70 handleError(status, basename);
72 @@ -951,7 +969,13 @@
73 if (!nonNfdOrRoundTrips) {
74 compositionPassthroughBound = c;
76 - if (len == 1 && utf32[0] <= 0xFFFF) {
77 + if (!len) {
78 + if (!uts46) {
79 + status.set(U_INTERNAL_PROGRAM_ERROR);
80 + handleError(status, basename);
81 + }
82 + pendingTrieInsertions.push_back({c, 0xFFFFFFFF, false});
83 + } else if (len == 1 && utf32[0] <= 0xFFFF) {
84 if (startsWithBackwardCombiningStarter) {
85 if (mainNormalizer == nfdNormalizer) {
86 // Not supposed to happen in NFD