1 /* Grapheme cluster breaks test.
2 Copyright (C) 2010-2025 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify it
5 under the terms of the GNU Lesser General Public License as published
6 by the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
32 test_u32_grapheme_breaks (const char *expected
, ...)
34 size_t n
= strlen (expected
);
42 memset (breaks
, 0xcc, n
);
44 va_start (args
, expected
);
45 for (i
= 0; i
< n
; i
++)
47 int unit
= va_arg (args
, int);
51 ASSERT (va_arg (args
, int) == -1);
54 u32_grapheme_breaks (s
, n
, breaks
);
55 for (i
= 0; i
< n
; i
++)
56 if (breaks
[i
] != (expected
[i
] == '#'))
60 fprintf (stderr
, "wrong grapheme breaks:\n");
62 fprintf (stderr
, " input:");
63 for (j
= 0; j
< n
; j
++)
64 fprintf (stderr
, " %04X", s
[j
]);
67 fprintf (stderr
, "expected:");
68 for (j
= 0; j
< n
; j
++)
69 fprintf (stderr
, " %d", expected
[j
] == '#');
72 fprintf (stderr
, " actual:");
73 for (j
= 0; j
< n
; j
++)
74 fprintf (stderr
, " %d", breaks
[j
]);
85 /* Standalone 1-unit graphemes. */
86 test_u32_grapheme_breaks ("#", 'a', -1);
87 test_u32_grapheme_breaks ("##", 'a', 'b', -1);
88 test_u32_grapheme_breaks ("###", 'a', 'b', 'c', -1);
90 #define HIRAGANA_A 0x3042 /* あ: Hiragana letter 'a'. */
91 test_u32_grapheme_breaks ("#", HIRAGANA_A
, -1);
92 test_u32_grapheme_breaks ("##", HIRAGANA_A
, 'x', -1);
93 test_u32_grapheme_breaks ("##", HIRAGANA_A
, HIRAGANA_A
, -1);
95 /* Combining accents. */
96 #define GRAVE 0x0300 /* Combining grave accent. */
97 #define ACUTE 0x0301 /* Combining acute accent. */
98 test_u32_grapheme_breaks ("#_", 'e', ACUTE
, -1);
99 test_u32_grapheme_breaks ("#__", 'e', ACUTE
, GRAVE
, -1);
100 test_u32_grapheme_breaks ("#_#", 'e', ACUTE
, 'x', -1);
101 test_u32_grapheme_breaks ("#_#_", 'e', ACUTE
, 'e', GRAVE
, -1);
103 /* CR LF handling. */
104 test_u32_grapheme_breaks ("######_#",
105 'a', '\n', 'b', '\r', 'c', '\r', '\n', 'd',
108 /* Emoji modifier / ZWJ sequence. */
109 test_u32_grapheme_breaks ("#____",
110 0x2605, 0x0305, 0x0347, 0x200D, 0x2600,
113 /* Regional indicators. */
114 test_u32_grapheme_breaks ("##_#_#",
115 '.', 0x1F1E9, 0x1F1EA, 0x1F1EB, 0x1F1F7, '.',
118 return test_exit_status
;