fpurge: Improve configure test.
[gnulib.git] / tests / unigbrk / test-uc-is-grapheme-break.c
blob939b09b3a99eea18707167f6b37b90ff3e783a1a
1 /* Grapheme cluster break function test.
2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify it
5 under the terms of the GNU Lesser General Public License as published
6 by the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
19 #include <config.h>
21 /* Specification. */
22 #include <unigbrk.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #include "unictype.h"
30 const char *
31 graphemebreakproperty_to_string (int gbp)
33 switch (gbp)
35 #define CASE(VALUE) case GBP_##VALUE: return #VALUE;
36 CASE(OTHER)
37 CASE(CR)
38 CASE(LF)
39 CASE(CONTROL)
40 CASE(EXTEND)
41 CASE(PREPEND)
42 CASE(SPACINGMARK)
43 CASE(L)
44 CASE(V)
45 CASE(T)
46 CASE(LV)
47 CASE(LVT)
48 CASE(RI)
49 CASE(ZWJ)
50 CASE(EB)
51 CASE(EM)
52 CASE(GAZ)
53 CASE(EBG)
55 abort ();
58 int
59 main (int argc, char *argv[])
61 const char *filename;
62 FILE *stream;
63 int exit_code;
64 int lineno;
65 char line[1024];
67 if (argc != 2)
69 fprintf (stderr, "usage: %s FILENAME\n"
70 "where FILENAME is the location of the GraphemeBreakTest.txt\n"
71 "test file.\n", argv[0]);
72 exit (1);
75 filename = argv[1];
76 stream = fopen (filename, "r");
77 if (stream == NULL)
79 fprintf (stderr, "error during fopen of '%s'\n", filename);
80 exit (1);
83 exit_code = 0;
84 lineno = 0;
85 while (fgets (line, sizeof (line), stream))
87 lineno++;
89 /* Cut off the trailing comment, if any. */
90 char *comment = strchr (line, '#');
91 if (comment != NULL)
92 *comment = '\0';
93 /* Is the remaining line blank? */
94 if (line[strspn (line, " \t\r\n")] == '\0')
95 continue;
97 const char *p;
98 ucs4_t prev;
99 int last_char_prop;
100 bool incb_consonant_extended;
101 bool incb_consonant_extended_linker;
102 bool incb_consonant_extended_linker_extended;
103 bool emoji_modifier_sequence;
104 bool emoji_modifier_sequence_before_last_char;
105 size_t ri_count;
107 last_char_prop = -1;
108 incb_consonant_extended = false;
109 incb_consonant_extended_linker = false;
110 incb_consonant_extended_linker_extended = false;
111 emoji_modifier_sequence = false;
112 emoji_modifier_sequence_before_last_char = false;
113 ri_count = 0;
114 prev = 0;
115 p = line;
118 bool should_break;
119 ucs4_t next;
121 p += strspn (p, " \t\r\n");
122 if (!strncmp (p, "\303\267" /* ÷ */, 2))
124 should_break = true;
125 p += 2;
127 else if (!strncmp (p, "\303\227" /* × */, 2))
129 should_break = false;
130 p += 2;
132 else
134 fprintf (stderr, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
135 filename, lineno, (int) (p - line + 1));
136 exit (1);
139 p += strspn (p, " \t\r\n");
140 if (*p == '\0')
141 next = 0;
142 else
144 unsigned int next_int;
145 int n;
147 if (sscanf (p, "%x%n", &next_int, &n) != 1)
149 fprintf (stderr, "%s:%d.%d: syntax error at '%s' "
150 "expecting hexadecimal Unicode code point number\n",
151 filename, lineno, (int) (p - line + 1), p);
152 exit (1);
154 p += n;
156 next = next_int;
159 int incb = uc_indic_conjunct_break (next);
161 /* Skip unsupported rules involving 3 or more characters. */
162 if (incb_consonant_extended_linker_extended
163 && incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT)
164 fprintf (stderr, "%s:%d: skipping GB9c: should join U+%04X "
165 "and U+%04X\n",
166 filename, lineno, prev, next);
167 else if (last_char_prop == GBP_ZWJ
168 && emoji_modifier_sequence_before_last_char
169 && uc_is_property_extended_pictographic (next))
171 int prev_gbp = uc_graphemeclusterbreak_property (prev);
172 int next_gbp = uc_graphemeclusterbreak_property (next);
173 fprintf (stderr, "%s:%d: skipping GB11: should join U+%04X (%s) "
174 "and U+%04X (%s)\n",
175 filename, lineno,
176 prev, graphemebreakproperty_to_string (prev_gbp),
177 next, graphemebreakproperty_to_string (next_gbp));
179 else if (uc_graphemeclusterbreak_property (next) == GBP_RI
180 && ri_count % 2 != 0)
182 int prev_gbp = uc_graphemeclusterbreak_property (prev);
183 int next_gbp = uc_graphemeclusterbreak_property (next);
184 fprintf (stderr, "%s:%d: skipping GB12: should join U+%04X (%s) "
185 "and U+%04X (%s)\n",
186 filename, lineno,
187 prev, graphemebreakproperty_to_string (prev_gbp),
188 next, graphemebreakproperty_to_string (next_gbp));
190 else if (uc_is_grapheme_break (prev, next) != should_break)
192 int prev_gbp = uc_graphemeclusterbreak_property (prev);
193 int next_gbp = uc_graphemeclusterbreak_property (next);
194 fprintf (stderr, "%s:%d: should %s U+%04X (%s) and "
195 "U+%04X (%s)\n",
196 filename, lineno,
197 should_break ? "break" : "join",
198 prev, graphemebreakproperty_to_string (prev_gbp),
199 next, graphemebreakproperty_to_string (next_gbp));
200 exit_code = 1;
203 p += strspn (p, " \t\r\n");
204 prev = next;
206 incb_consonant_extended_linker =
207 incb_consonant_extended && incb == UC_INDIC_CONJUNCT_BREAK_LINKER;
208 incb_consonant_extended_linker_extended =
209 (incb_consonant_extended_linker
210 || (incb_consonant_extended_linker_extended
211 && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER));
212 incb_consonant_extended =
213 (incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT
214 || (incb_consonant_extended
215 && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER));
217 emoji_modifier_sequence_before_last_char = emoji_modifier_sequence;
218 emoji_modifier_sequence =
219 (emoji_modifier_sequence
220 && uc_graphemeclusterbreak_property (next) == GBP_EXTEND)
221 || uc_is_property_extended_pictographic (next);
223 last_char_prop = uc_graphemeclusterbreak_property (next);
225 if (uc_graphemeclusterbreak_property (next) == GBP_RI)
226 ri_count++;
227 else
228 ri_count = 0;
230 while (*p != '\0');
233 return exit_code;