components/library/icu-75/patches/202-maint-75-ICU-22718-Export-disallowed-ignored-UTS-46-data-for-.patch

   1 https://github.com/unicode-org/icu/commit/b8389b1186fe4b56ca91eadfa31886a3b4a195c6
   2
   3 --- icu/source/tools/icuexportdata/icuexportdata.cpp.orig
   4 +++ icu/source/tools/icuexportdata/icuexportdata.cpp
   5 @@ -755,9 +755,12 @@
   6      std::vector<uint32_t> nonRecursive32;
   7      LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
   8
   9 +    UBool uts46 = false;
  10 +
  11      if (uprv_strcmp(basename, "nfkd") == 0) {
  12          mainNormalizer = Normalizer2::getNFKDInstance(status);
  13      } else if (uprv_strcmp(basename, "uts46d") == 0) {
  14 +        uts46 = true;
  15          mainNormalizer = Normalizer2::getInstance(nullptr, "uts46", UNORM2_COMPOSE, status);
  16      } else {
  17          mainNormalizer = nfdNormalizer;
  18 @@ -817,23 +820,38 @@
  19              nfcNormalizer->normalize(dst, nfc, status);
  20              nonNfdOrRoundTrips = (src == nfc);
  21          }
  22 +        if (uts46) {
  23 +            // Work around https://unicode-org.atlassian.net/browse/ICU-22658
  24 +            // TODO: Remove the workaround after data corresponding to
  25 +            // https://www.unicode.org/L2/L2024/24061.htm#179-C36 lands
  26 +            // for Unicode 16.
  27 +            switch (c) {
  28 +                case 0x2F868:
  29 +                    dst.truncate(0);
  30 +                    dst.append(UChar32(0x36FC));
  31 +                    break;
  32 +                case 0x2F874:
  33 +                    dst.truncate(0);
  34 +                    dst.append(UChar32(0x5F53));
  35 +                    break;
  36 +                case 0x2F91F:
  37 +                    dst.truncate(0);
  38 +                    dst.append(UChar32(0x243AB));
  39 +                    break;
  40 +                case 0x2F95F:
  41 +                    dst.truncate(0);
  42 +                    dst.append(UChar32(0x7AEE));
  43 +                    break;
  44 +                case 0x2F9BF:
  45 +                    dst.truncate(0);
  46 +                    dst.append(UChar32(0x45D7));
  47 +                    break;
  48 +            }
  49 +        }
  50          int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
  51 +
  52          if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
  53 -            // Characters that normalize to nothing or to U+FFFD (without the
  54 -            // input being U+FFFD) in ICU4C's UTS 46 normalization normalize
  55 -            // as in NFD in ICU4X's UTF 46 normalization in the interest
  56 -            // of data size and ICU4X's normalizer being unable to handle
  57 -            // normalizing to nothing.
  58 -            // When UTS 46 is implemented on top of ICU4X, a preprocessing
  59 -            // step is supposed to remove these characters before the
  60 -            // normalization step.
  61 -            if (uprv_strcmp(basename, "uts46d") != 0) {
  62 -                status.set(U_INTERNAL_PROGRAM_ERROR);
  63 -                handleError(status, basename);
  64 -            }
  65 -            nfdNormalizer->normalize(src, dst, status);
  66 -            len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
  67 -            if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
  68 +            if (!uts46) {
  69                  status.set(U_INTERNAL_PROGRAM_ERROR);
  70                  handleError(status, basename);
  71              }
  72 @@ -951,7 +969,13 @@
  73          if (!nonNfdOrRoundTrips) {
  74              compositionPassthroughBound = c;
  75          }
  76 -        if (len == 1 && utf32[0] <= 0xFFFF) {
  77 +        if (!len) {
  78 +            if (!uts46) {
  79 +                status.set(U_INTERNAL_PROGRAM_ERROR);
  80 +                handleError(status, basename);
  81 +            }
  82 +            pendingTrieInsertions.push_back({c, 0xFFFFFFFF, false});
  83 +        } else if (len == 1 && utf32[0] <= 0xFFFF) {
  84              if (startsWithBackwardCombiningStarter) {
  85                  if (mainNormalizer == nfdNormalizer) {
  86                      // Not supposed to happen in NFD