1 /* Grapheme cluster break function test.
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify it
5 under the terms of the GNU Lesser General Public License as published
6 by the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
31 graphemebreakproperty_to_string (int gbp
)
35 #define CASE(VALUE) case GBP_##VALUE: return #VALUE;
59 main (int argc
, char *argv
[])
69 fprintf (stderr
, "usage: %s FILENAME\n"
70 "where FILENAME is the location of the GraphemeBreakTest.txt\n"
71 "test file.\n", argv
[0]);
76 stream
= fopen (filename
, "r");
79 fprintf (stderr
, "error during fopen of '%s'\n", filename
);
85 while (fgets (line
, sizeof (line
), stream
))
89 /* Cut off the trailing comment, if any. */
90 char *comment
= strchr (line
, '#');
93 /* Is the remaining line blank? */
94 if (line
[strspn (line
, " \t\r\n")] == '\0')
100 bool incb_consonant_extended
;
101 bool incb_consonant_extended_linker
;
102 bool incb_consonant_extended_linker_extended
;
103 bool emoji_modifier_sequence
;
104 bool emoji_modifier_sequence_before_last_char
;
108 incb_consonant_extended
= false;
109 incb_consonant_extended_linker
= false;
110 incb_consonant_extended_linker_extended
= false;
111 emoji_modifier_sequence
= false;
112 emoji_modifier_sequence_before_last_char
= false;
121 p
+= strspn (p
, " \t\r\n");
122 if (!strncmp (p
, "\303\267" /* ÷ */, 2))
127 else if (!strncmp (p
, "\303\227" /* × */, 2))
129 should_break
= false;
134 fprintf (stderr
, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
135 filename
, lineno
, (int) (p
- line
+ 1));
139 p
+= strspn (p
, " \t\r\n");
144 unsigned int next_int
;
147 if (sscanf (p
, "%x%n", &next_int
, &n
) != 1)
149 fprintf (stderr
, "%s:%d.%d: syntax error at '%s' "
150 "expecting hexadecimal Unicode code point number\n",
151 filename
, lineno
, (int) (p
- line
+ 1), p
);
159 int incb
= uc_indic_conjunct_break (next
);
161 /* Skip unsupported rules involving 3 or more characters. */
162 if (incb_consonant_extended_linker_extended
163 && incb
== UC_INDIC_CONJUNCT_BREAK_CONSONANT
)
164 fprintf (stderr
, "%s:%d: skipping GB9c: should join U+%04X "
166 filename
, lineno
, prev
, next
);
167 else if (last_char_prop
== GBP_ZWJ
168 && emoji_modifier_sequence_before_last_char
169 && uc_is_property_extended_pictographic (next
))
171 int prev_gbp
= uc_graphemeclusterbreak_property (prev
);
172 int next_gbp
= uc_graphemeclusterbreak_property (next
);
173 fprintf (stderr
, "%s:%d: skipping GB11: should join U+%04X (%s) "
176 prev
, graphemebreakproperty_to_string (prev_gbp
),
177 next
, graphemebreakproperty_to_string (next_gbp
));
179 else if (uc_graphemeclusterbreak_property (next
) == GBP_RI
180 && ri_count
% 2 != 0)
182 int prev_gbp
= uc_graphemeclusterbreak_property (prev
);
183 int next_gbp
= uc_graphemeclusterbreak_property (next
);
184 fprintf (stderr
, "%s:%d: skipping GB12: should join U+%04X (%s) "
187 prev
, graphemebreakproperty_to_string (prev_gbp
),
188 next
, graphemebreakproperty_to_string (next_gbp
));
190 else if (uc_is_grapheme_break (prev
, next
) != should_break
)
192 int prev_gbp
= uc_graphemeclusterbreak_property (prev
);
193 int next_gbp
= uc_graphemeclusterbreak_property (next
);
194 fprintf (stderr
, "%s:%d: should %s U+%04X (%s) and "
197 should_break
? "break" : "join",
198 prev
, graphemebreakproperty_to_string (prev_gbp
),
199 next
, graphemebreakproperty_to_string (next_gbp
));
203 p
+= strspn (p
, " \t\r\n");
206 incb_consonant_extended_linker
=
207 incb_consonant_extended
&& incb
== UC_INDIC_CONJUNCT_BREAK_LINKER
;
208 incb_consonant_extended_linker_extended
=
209 (incb_consonant_extended_linker
210 || (incb_consonant_extended_linker_extended
211 && incb
>= UC_INDIC_CONJUNCT_BREAK_LINKER
));
212 incb_consonant_extended
=
213 (incb
== UC_INDIC_CONJUNCT_BREAK_CONSONANT
214 || (incb_consonant_extended
215 && incb
>= UC_INDIC_CONJUNCT_BREAK_LINKER
));
217 emoji_modifier_sequence_before_last_char
= emoji_modifier_sequence
;
218 emoji_modifier_sequence
=
219 (emoji_modifier_sequence
220 && uc_graphemeclusterbreak_property (next
) == GBP_EXTEND
)
221 || uc_is_property_extended_pictographic (next
);
223 last_char_prop
= uc_graphemeclusterbreak_property (next
);
225 if (uc_graphemeclusterbreak_property (next
) == GBP_RI
)