1 /* Unicode character case mappings.
2 Copyright (C) 2002, 2009-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
31 #if @HAVE_UNISTRING_WOE32DLL_H@
32 # include <unistring/woe32dll.h>
34 # define LIBUNISTRING_DLL_VARIABLE
41 /* ========================================================================= */
43 /* Character case mappings.
44 These mappings are locale and context independent.
45 WARNING! These functions are not sufficient for languages such as German.
46 Better use the functions below that treat an entire string at once and are
49 /* Return the uppercase mapping of a Unicode character. */
51 uc_toupper (ucs4_t uc
)
54 /* Return the lowercase mapping of a Unicode character. */
56 uc_tolower (ucs4_t uc
)
59 /* Return the titlecase mapping of a Unicode character. */
61 uc_totitle (ucs4_t uc
)
64 /* ========================================================================= */
66 /* String case mappings. */
68 /* These functions are locale dependent. The iso639_language argument
69 identifies the language (e.g. "tr" for Turkish). NULL means to use
70 locale independent case mappings. */
72 /* Return the ISO 639 language code of the current locale.
73 Return "" if it is unknown, or in the "C" locale. */
75 uc_locale_language (void)
80 All functions prefixed with u8_ operate on UTF-8 encoded strings.
81 Their unit is an uint8_t (1 byte).
83 All functions prefixed with u16_ operate on UTF-16 encoded strings.
84 Their unit is an uint16_t (a 2-byte word).
86 All functions prefixed with u32_ operate on UCS-4 encoded strings.
87 Their unit is an uint32_t (a 4-byte word).
89 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
92 Functions returning a string result take a (resultbuf, lengthp) argument
93 pair. If resultbuf is not NULL and the result fits into *lengthp units,
94 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
95 allocated string is returned. In both cases, *lengthp is set to the
96 length (number of units) of the returned string. In case of error,
97 NULL is returned and errno is set. */
99 /* Return the uppercase mapping of a string.
100 The nf argument identifies the normalization form to apply after the
101 case-mapping. It can also be NULL, for no normalization. */
103 u8_toupper (const uint8_t *s
, size_t n
, const char *iso639_language
,
105 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
107 u16_toupper (const uint16_t *s
, size_t n
, const char *iso639_language
,
109 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
111 u32_toupper (const uint32_t *s
, size_t n
, const char *iso639_language
,
113 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
115 /* Return the lowercase mapping of a string.
116 The nf argument identifies the normalization form to apply after the
117 case-mapping. It can also be NULL, for no normalization. */
119 u8_tolower (const uint8_t *s
, size_t n
, const char *iso639_language
,
121 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
123 u16_tolower (const uint16_t *s
, size_t n
, const char *iso639_language
,
125 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
127 u32_tolower (const uint32_t *s
, size_t n
, const char *iso639_language
,
129 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
131 /* Return the titlecase mapping of a string.
132 The nf argument identifies the normalization form to apply after the
133 case-mapping. It can also be NULL, for no normalization. */
135 u8_totitle (const uint8_t *s
, size_t n
, const char *iso639_language
,
137 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
139 u16_totitle (const uint16_t *s
, size_t n
, const char *iso639_language
,
141 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
143 u32_totitle (const uint32_t *s
, size_t n
, const char *iso639_language
,
145 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
147 /* The case-mapping context given by a prefix string. */
148 typedef struct casing_prefix_context
150 /* These fields are private, undocumented. */
151 uint32_t last_char_except_ignorable
;
152 uint32_t last_char_normal_or_above
;
154 casing_prefix_context_t
;
155 /* The case-mapping context of the empty prefix string. */
156 extern @GNULIB_UNICASE_EMPTY_PREFIX_CONTEXT_DLL_VARIABLE@
const casing_prefix_context_t unicase_empty_prefix_context
;
157 /* Return the case-mapping context of a given prefix string. */
158 extern casing_prefix_context_t
159 u8_casing_prefix_context (const uint8_t *s
, size_t n
);
160 extern casing_prefix_context_t
161 u16_casing_prefix_context (const uint16_t *s
, size_t n
);
162 extern casing_prefix_context_t
163 u32_casing_prefix_context (const uint32_t *s
, size_t n
);
164 /* Return the case-mapping context of the prefix concat(A, S), given the
165 case-mapping context of the prefix A. */
166 extern casing_prefix_context_t
167 u8_casing_prefixes_context (const uint8_t *s
, size_t n
,
168 casing_prefix_context_t a_context
);
169 extern casing_prefix_context_t
170 u16_casing_prefixes_context (const uint16_t *s
, size_t n
,
171 casing_prefix_context_t a_context
);
172 extern casing_prefix_context_t
173 u32_casing_prefixes_context (const uint32_t *s
, size_t n
,
174 casing_prefix_context_t a_context
);
176 /* The case-mapping context given by a suffix string. */
177 typedef struct casing_suffix_context
179 /* These fields are private, undocumented. */
180 uint32_t first_char_except_ignorable
;
183 casing_suffix_context_t
;
184 /* The case-mapping context of the empty suffix string. */
185 extern @GNULIB_UNICASE_EMPTY_SUFFIX_CONTEXT_DLL_VARIABLE@
const casing_suffix_context_t unicase_empty_suffix_context
;
186 /* Return the case-mapping context of a given suffix string. */
187 extern casing_suffix_context_t
188 u8_casing_suffix_context (const uint8_t *s
, size_t n
);
189 extern casing_suffix_context_t
190 u16_casing_suffix_context (const uint16_t *s
, size_t n
);
191 extern casing_suffix_context_t
192 u32_casing_suffix_context (const uint32_t *s
, size_t n
);
193 /* Return the case-mapping context of the suffix concat(S, A), given the
194 case-mapping context of the suffix A. */
195 extern casing_suffix_context_t
196 u8_casing_suffixes_context (const uint8_t *s
, size_t n
,
197 casing_suffix_context_t a_context
);
198 extern casing_suffix_context_t
199 u16_casing_suffixes_context (const uint16_t *s
, size_t n
,
200 casing_suffix_context_t a_context
);
201 extern casing_suffix_context_t
202 u32_casing_suffixes_context (const uint32_t *s
, size_t n
,
203 casing_suffix_context_t a_context
);
205 /* Return the uppercase mapping of a string that is surrounded by a prefix
208 u8_ct_toupper (const uint8_t *s
, size_t n
,
209 casing_prefix_context_t prefix_context
,
210 casing_suffix_context_t suffix_context
,
211 const char *iso639_language
,
213 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
215 u16_ct_toupper (const uint16_t *s
, size_t n
,
216 casing_prefix_context_t prefix_context
,
217 casing_suffix_context_t suffix_context
,
218 const char *iso639_language
,
220 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
222 u32_ct_toupper (const uint32_t *s
, size_t n
,
223 casing_prefix_context_t prefix_context
,
224 casing_suffix_context_t suffix_context
,
225 const char *iso639_language
,
227 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
229 /* Return the lowercase mapping of a string that is surrounded by a prefix
232 u8_ct_tolower (const uint8_t *s
, size_t n
,
233 casing_prefix_context_t prefix_context
,
234 casing_suffix_context_t suffix_context
,
235 const char *iso639_language
,
237 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
239 u16_ct_tolower (const uint16_t *s
, size_t n
,
240 casing_prefix_context_t prefix_context
,
241 casing_suffix_context_t suffix_context
,
242 const char *iso639_language
,
244 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
246 u32_ct_tolower (const uint32_t *s
, size_t n
,
247 casing_prefix_context_t prefix_context
,
248 casing_suffix_context_t suffix_context
,
249 const char *iso639_language
,
251 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
253 /* Return the titlecase mapping of a string that is surrounded by a prefix
256 u8_ct_totitle (const uint8_t *s
, size_t n
,
257 casing_prefix_context_t prefix_context
,
258 casing_suffix_context_t suffix_context
,
259 const char *iso639_language
,
261 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
263 u16_ct_totitle (const uint16_t *s
, size_t n
,
264 casing_prefix_context_t prefix_context
,
265 casing_suffix_context_t suffix_context
,
266 const char *iso639_language
,
268 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
270 u32_ct_totitle (const uint32_t *s
, size_t n
,
271 casing_prefix_context_t prefix_context
,
272 casing_suffix_context_t suffix_context
,
273 const char *iso639_language
,
275 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
277 /* Return the case folded string.
278 Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
279 to comparing S1 and S2 with uN_casecmp().
280 The nf argument identifies the normalization form to apply after the
281 case-mapping. It can also be NULL, for no normalization. */
283 u8_casefold (const uint8_t *s
, size_t n
, const char *iso639_language
,
285 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
287 u16_casefold (const uint16_t *s
, size_t n
, const char *iso639_language
,
289 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
291 u32_casefold (const uint32_t *s
, size_t n
, const char *iso639_language
,
293 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
294 /* Likewise, for a string that is surrounded by a prefix and a suffix. */
296 u8_ct_casefold (const uint8_t *s
, size_t n
,
297 casing_prefix_context_t prefix_context
,
298 casing_suffix_context_t suffix_context
,
299 const char *iso639_language
,
301 uint8_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
303 u16_ct_casefold (const uint16_t *s
, size_t n
,
304 casing_prefix_context_t prefix_context
,
305 casing_suffix_context_t suffix_context
,
306 const char *iso639_language
,
308 uint16_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
310 u32_ct_casefold (const uint32_t *s
, size_t n
,
311 casing_prefix_context_t prefix_context
,
312 casing_suffix_context_t suffix_context
,
313 const char *iso639_language
,
315 uint32_t *_UC_RESTRICT resultbuf
, size_t *lengthp
);
317 /* Compare S1 and S2, ignoring differences in case and normalization.
318 The nf argument identifies the normalization form to apply after the
319 case-mapping. It can also be NULL, for no normalization.
320 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
321 return 0. Upon failure, return -1 with errno set. */
323 u8_casecmp (const uint8_t *s1
, size_t n1
,
324 const uint8_t *s2
, size_t n2
,
325 const char *iso639_language
, uninorm_t nf
, int *resultp
);
327 u16_casecmp (const uint16_t *s1
, size_t n1
,
328 const uint16_t *s2
, size_t n2
,
329 const char *iso639_language
, uninorm_t nf
, int *resultp
);
331 u32_casecmp (const uint32_t *s1
, size_t n1
,
332 const uint32_t *s2
, size_t n2
,
333 const char *iso639_language
, uninorm_t nf
, int *resultp
);
335 ulc_casecmp (const char *s1
, size_t n1
,
336 const char *s2
, size_t n2
,
337 const char *iso639_language
, uninorm_t nf
, int *resultp
);
339 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
340 a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
341 function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
342 NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization. */
344 u8_casexfrm (const uint8_t *s
, size_t n
, const char *iso639_language
,
346 char *_UC_RESTRICT resultbuf
, size_t *lengthp
);
348 u16_casexfrm (const uint16_t *s
, size_t n
, const char *iso639_language
,
350 char *_UC_RESTRICT resultbuf
, size_t *lengthp
);
352 u32_casexfrm (const uint32_t *s
, size_t n
, const char *iso639_language
,
354 char *_UC_RESTRICT resultbuf
, size_t *lengthp
);
356 ulc_casexfrm (const char *s
, size_t n
, const char *iso639_language
,
358 char *_UC_RESTRICT resultbuf
, size_t *lengthp
);
360 /* Compare S1 and S2, ignoring differences in case and normalization, using the
361 collation rules of the current locale.
362 The nf argument identifies the normalization form to apply after the
363 case-mapping. It must be either UNINORM_NFC or UNINORM_NFKC. It can also
364 be NULL, for no normalization.
365 If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
366 return 0. Upon failure, return -1 with errno set. */
368 u8_casecoll (const uint8_t *s1
, size_t n1
,
369 const uint8_t *s2
, size_t n2
,
370 const char *iso639_language
, uninorm_t nf
, int *resultp
);
372 u16_casecoll (const uint16_t *s1
, size_t n1
,
373 const uint16_t *s2
, size_t n2
,
374 const char *iso639_language
, uninorm_t nf
, int *resultp
);
376 u32_casecoll (const uint32_t *s1
, size_t n1
,
377 const uint32_t *s2
, size_t n2
,
378 const char *iso639_language
, uninorm_t nf
, int *resultp
);
380 ulc_casecoll (const char *s1
, size_t n1
,
381 const char *s2
, size_t n2
,
382 const char *iso639_language
, uninorm_t nf
, int *resultp
);
385 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
386 otherwise, and return 0. Upon failure, return -1 with errno set. */
388 u8_is_uppercase (const uint8_t *s
, size_t n
,
389 const char *iso639_language
,
392 u16_is_uppercase (const uint16_t *s
, size_t n
,
393 const char *iso639_language
,
396 u32_is_uppercase (const uint32_t *s
, size_t n
,
397 const char *iso639_language
,
400 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
401 otherwise, and return 0. Upon failure, return -1 with errno set. */
403 u8_is_lowercase (const uint8_t *s
, size_t n
,
404 const char *iso639_language
,
407 u16_is_lowercase (const uint16_t *s
, size_t n
,
408 const char *iso639_language
,
411 u32_is_lowercase (const uint32_t *s
, size_t n
,
412 const char *iso639_language
,
415 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
416 otherwise, and return 0. Upon failure, return -1 with errno set. */
418 u8_is_titlecase (const uint8_t *s
, size_t n
,
419 const char *iso639_language
,
422 u16_is_titlecase (const uint16_t *s
, size_t n
,
423 const char *iso639_language
,
426 u32_is_titlecase (const uint32_t *s
, size_t n
,
427 const char *iso639_language
,
430 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
431 false otherwise, and return 0. Upon failure, return -1 with errno set. */
433 u8_is_casefolded (const uint8_t *s
, size_t n
,
434 const char *iso639_language
,
437 u16_is_casefolded (const uint16_t *s
, size_t n
,
438 const char *iso639_language
,
441 u32_is_casefolded (const uint32_t *s
, size_t n
,
442 const char *iso639_language
,
445 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
446 either upper case or lower case or title case is not a no-op.
447 Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
448 under the lower case mapping, and under the title case mapping; in other
449 words, when NFD(S) consists entirely of caseless characters.
450 Upon failure, return -1 with errno set. */
452 u8_is_cased (const uint8_t *s
, size_t n
,
453 const char *iso639_language
,
456 u16_is_cased (const uint16_t *s
, size_t n
,
457 const char *iso639_language
,
460 u32_is_cased (const uint32_t *s
, size_t n
,
461 const char *iso639_language
,
465 /* ========================================================================= */
471 #endif /* _UNICASE_H */