1 https://github.com/unicode-org/icu/commit/b8389b1186fe4b56ca91eadfa31886a3b4a195c6
3 --- icu/source/tools/icuexportdata/icuexportdata.cpp.orig
4 +++ icu/source/tools/icuexportdata/icuexportdata.cpp
6 std::vector<uint32_t> nonRecursive32;
7 LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
11 if (uprv_strcmp(basename, "nfkd") == 0) {
12 mainNormalizer = Normalizer2::getNFKDInstance(status);
13 } else if (uprv_strcmp(basename, "uts46d") == 0) {
15 mainNormalizer = Normalizer2::getInstance(nullptr, "uts46", UNORM2_COMPOSE, status);
17 mainNormalizer = nfdNormalizer;
19 nfcNormalizer->normalize(dst, nfc, status);
20 nonNfdOrRoundTrips = (src == nfc);
23 + // Work around https://unicode-org.atlassian.net/browse/ICU-22658
24 + // TODO: Remove the workaround after data corresponding to
25 + // https://www.unicode.org/L2/L2024/24061.htm#179-C36 lands
30 + dst.append(UChar32(0x36FC));
34 + dst.append(UChar32(0x5F53));
38 + dst.append(UChar32(0x243AB));
42 + dst.append(UChar32(0x7AEE));
46 + dst.append(UChar32(0x45D7));
50 int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
52 if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
53 - // Characters that normalize to nothing or to U+FFFD (without the
54 - // input being U+FFFD) in ICU4C's UTS 46 normalization normalize
55 - // as in NFD in ICU4X's UTF 46 normalization in the interest
56 - // of data size and ICU4X's normalizer being unable to handle
57 - // normalizing to nothing.
58 - // When UTS 46 is implemented on top of ICU4X, a preprocessing
59 - // step is supposed to remove these characters before the
60 - // normalization step.
61 - if (uprv_strcmp(basename, "uts46d") != 0) {
62 - status.set(U_INTERNAL_PROGRAM_ERROR);
63 - handleError(status, basename);
65 - nfdNormalizer->normalize(src, dst, status);
66 - len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
67 - if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
69 status.set(U_INTERNAL_PROGRAM_ERROR);
70 handleError(status, basename);
73 if (!nonNfdOrRoundTrips) {
74 compositionPassthroughBound = c;
76 - if (len == 1 && utf32[0] <= 0xFFFF) {
79 + status.set(U_INTERNAL_PROGRAM_ERROR);
80 + handleError(status, basename);
82 + pendingTrieInsertions.push_back({c, 0xFFFFFFFF, false});
83 + } else if (len == 1 && utf32[0] <= 0xFFFF) {
84 if (startsWithBackwardCombiningStarter) {
85 if (mainNormalizer == nfdNormalizer) {
86 // Not supposed to happen in NFD