2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2010,2011 Nexenta Systems, Inc. All rights reserved.
14 * Copyright 2012 Garrett D'Amore <garrett@damore.org>
15 * Copyright 2013 DEY Storage Systems, Inc.
19 * LC_CTYPE database generation routines for localedef.
25 #include <sys/types.h>
32 #include "localedef.h"
36 static avl_tree_t ctypes
;
38 static wchar_t last_ctype
;
40 typedef struct ctype_node
{
48 typedef struct width_node
{
56 ctype_compare(const void *n1
, const void *n2
)
58 const ctype_node_t
*c1
= n1
;
59 const ctype_node_t
*c2
= n2
;
61 return (c1
->wc
< c2
->wc
? -1 : c1
->wc
> c2
->wc
? 1 : 0);
67 avl_create(&ctypes
, ctype_compare
, sizeof (ctype_node_t
),
68 offsetof(ctype_node_t
, avl
));
73 add_ctype_impl(ctype_node_t
*ctn
)
77 ctn
->ctype
|= (_ISUPPER
| _ISALPHA
| _ISGRAPH
| _ISPRINT
);
80 ctn
->ctype
|= (_ISLOWER
| _ISALPHA
| _ISGRAPH
| _ISPRINT
);
83 ctn
->ctype
|= (_ISALPHA
| _ISGRAPH
| _ISPRINT
);
86 ctn
->ctype
|= (_ISDIGIT
| _ISGRAPH
| _ISPRINT
| _ISXDIGIT
);
89 ctn
->ctype
|= _ISSPACE
;
92 ctn
->ctype
|= _ISCNTRL
;
95 ctn
->ctype
|= (_ISGRAPH
| _ISPRINT
);
98 ctn
->ctype
|= _ISPRINT
;
101 ctn
->ctype
|= (_ISPUNCT
| _ISGRAPH
| _ISPRINT
);
104 ctn
->ctype
|= (_ISXDIGIT
| _ISPRINT
);
107 ctn
->ctype
|= (_ISBLANK
| _ISSPACE
);
110 ctn
->ctype
|= (_E1
| _ISPRINT
| _ISGRAPH
);
113 ctn
->ctype
|= (_E2
| _ISPRINT
| _ISGRAPH
);
116 ctn
->ctype
|= (_E3
| _ISPRINT
| _ISGRAPH
);
119 ctn
->ctype
|= (_E4
| _ISPRINT
| _ISGRAPH
);
122 ctn
->ctype
|= (_E5
| _ISPRINT
| _ISGRAPH
);
126 * We can't do anything with this. The character
127 * should already be specified as a digit or alpha.
131 errf(_("not a valid character class"));
135 static ctype_node_t
*
136 get_ctype(wchar_t wc
)
143 if ((ctn
= avl_find(&ctypes
, &srch
, &where
)) == NULL
) {
144 if ((ctn
= calloc(1, sizeof (*ctn
))) == NULL
) {
145 errf(_("out of memory"));
150 avl_insert(&ctypes
, ctn
, where
);
160 if ((ctn
= get_ctype(val
)) == NULL
) {
165 last_ctype
= ctn
->wc
;
169 add_ctype_range(int end
)
174 if (end
< last_ctype
) {
175 errf(_("malformed character range (%u ... %u))"),
179 for (cur
= last_ctype
+ 1; cur
<= end
; cur
++) {
180 if ((ctn
= get_ctype(cur
)) == NULL
) {
191 * A word about widths: if the width mask is specified, then libc
192 * unconditionally honors it. Otherwise, it assumes printable
193 * characters have width 1, and non-printable characters have width
194 * -1 (except for NULL which is special with with 0). Hence, we have
195 * no need to inject defaults here -- the "default" unset value of 0
196 * indicates that libc should use its own logic in wcwidth as described.
199 add_width(int wc
, int width
)
203 if ((ctn
= get_ctype(wc
)) == NULL
) {
207 ctn
->ctype
&= ~(_CTYPE_SWM
);
210 ctn
->ctype
|= _CTYPE_SW0
;
213 ctn
->ctype
|= _CTYPE_SW1
;
216 ctn
->ctype
|= _CTYPE_SW2
;
219 ctn
->ctype
|= _CTYPE_SW3
;
225 add_width_range(int start
, int end
, int width
)
227 for (; start
<= end
; start
++) {
228 add_width(start
, width
);
233 add_caseconv(int val
, int wc
)
237 ctn
= get_ctype(val
);
261 ctype_node_t
*ctn
, *last_ct
, *last_lo
, *last_up
;
262 _FileRuneEntry
*ct
= NULL
;
263 _FileRuneEntry
*lo
= NULL
;
264 _FileRuneEntry
*up
= NULL
;
267 (void) memset(&rl
, 0, sizeof (rl
));
272 if ((f
= open_category()) == NULL
)
275 (void) memcpy(rl
.magic
, _FILE_RUNE_MAGIC_1
, 8);
276 (void) strncpy(rl
.encoding
, get_wide_encoding(), sizeof (rl
.encoding
));
279 * Initialize the identity map.
281 for (wc
= 0; (unsigned)wc
< _CACHED_RUNES
; wc
++) {
282 rl
.maplower
[wc
] = wc
;
283 rl
.mapupper
[wc
] = wc
;
286 for (ctn
= avl_first(&ctypes
); ctn
; ctn
= AVL_NEXT(&ctypes
, ctn
)) {
293 * POSIX requires certain portable characters have
294 * certain types. Add them if they are missing.
296 if ((wc
>= 1) && (wc
<= 127)) {
297 if ((wc
>= 'A') && (wc
<= 'Z'))
298 ctn
->ctype
|= _ISUPPER
;
299 if ((wc
>= 'a') && (wc
<= 'z'))
300 ctn
->ctype
|= _ISLOWER
;
301 if ((wc
>= '0') && (wc
<= '9'))
302 ctn
->ctype
|= _ISDIGIT
;
304 ctn
->ctype
|= _ISPRINT
;
305 if (strchr(" \f\n\r\t\v", (char)wc
) != NULL
)
306 ctn
->ctype
|= _ISSPACE
;
307 if (strchr("0123456789ABCDEFabcdef", (char)wc
) != NULL
)
308 ctn
->ctype
|= _ISXDIGIT
;
309 if (strchr(" \t", (char)wc
))
310 ctn
->ctype
|= _ISBLANK
;
313 * Technically these settings are only
314 * required for the C locale. However, it
315 * turns out that because of the historical
316 * version of isprint(), we need them for all
317 * locales as well. Note that these are not
318 * necessarily valid punctation characters in
319 * the current language, but ispunct() needs
320 * to return TRUE for them.
322 if (strchr("!\"'#$%&()*+,-./:;<=>?@[\\]^_`{|}~",
324 ctn
->ctype
|= _ISPUNCT
;
328 * POSIX also requires that certain types imply
329 * others. Add any inferred types here.
331 if (ctn
->ctype
& (_ISUPPER
|_ISLOWER
))
332 ctn
->ctype
|= _ISALPHA
;
333 if (ctn
->ctype
& _ISDIGIT
)
334 ctn
->ctype
|= _ISXDIGIT
;
335 if (ctn
->ctype
& _ISBLANK
)
336 ctn
->ctype
|= _ISSPACE
;
337 if (ctn
->ctype
& (_ISALPHA
|_ISDIGIT
|_ISXDIGIT
))
338 ctn
->ctype
|= _ISGRAPH
;
339 if (ctn
->ctype
& _ISGRAPH
)
340 ctn
->ctype
|= _ISPRINT
;
343 * Finally, POSIX requires that certain combinations
344 * are invalid. We don't flag this as a fatal error,
345 * but we will warn about.
347 if ((ctn
->ctype
& _ISALPHA
) &&
348 (ctn
->ctype
& (_ISPUNCT
|_ISDIGIT
)))
350 if ((ctn
->ctype
& _ISPUNCT
) &
351 (ctn
->ctype
& (_ISDIGIT
|_ISALPHA
|_ISXDIGIT
)))
353 if ((ctn
->ctype
& _ISSPACE
) && (ctn
->ctype
& _ISGRAPH
))
355 if ((ctn
->ctype
& _ISCNTRL
) & _ISPRINT
)
357 if ((wc
== ' ') && (ctn
->ctype
& (_ISPUNCT
|_ISGRAPH
)))
361 warn("conflicting classes for character 0x%x (%x)",
365 * Handle the lower 256 characters using the simple
366 * optimization. Note that if we have not defined the
367 * upper/lower case, then we identity map it.
369 if ((unsigned)wc
< _CACHED_RUNES
) {
370 rl
.runetype
[wc
] = ctn
->ctype
;
372 rl
.maplower
[wc
] = ctn
->tolower
;
374 rl
.mapupper
[wc
] = ctn
->toupper
;
378 if ((last_ct
!= NULL
) && (last_ct
->ctype
== ctn
->ctype
) &&
379 (last_ct
->wc
+ 1 == wc
)) {
380 ct
[rl
.runetype_ext_nranges
-1].max
= wc
;
382 rl
.runetype_ext_nranges
++;
383 ct
= reallocarray(ct
, rl
.runetype_ext_nranges
,
385 ct
[rl
.runetype_ext_nranges
- 1].min
= wc
;
386 ct
[rl
.runetype_ext_nranges
- 1].max
= wc
;
387 ct
[rl
.runetype_ext_nranges
- 1].map
= ctn
->ctype
;
390 if (ctn
->tolower
== 0) {
392 } else if ((last_lo
!= NULL
) &&
393 (last_lo
->tolower
+ 1 == ctn
->tolower
)) {
394 lo
[rl
.maplower_ext_nranges
-1].max
= wc
;
397 rl
.maplower_ext_nranges
++;
398 lo
= reallocarray(lo
, rl
.maplower_ext_nranges
,
400 lo
[rl
.maplower_ext_nranges
- 1].min
= wc
;
401 lo
[rl
.maplower_ext_nranges
- 1].max
= wc
;
402 lo
[rl
.maplower_ext_nranges
- 1].map
= ctn
->tolower
;
406 if (ctn
->toupper
== 0) {
408 } else if ((last_up
!= NULL
) &&
409 (last_up
->toupper
+ 1 == ctn
->toupper
)) {
410 up
[rl
.mapupper_ext_nranges
-1].max
= wc
;
413 rl
.mapupper_ext_nranges
++;
414 up
= reallocarray(up
, rl
.mapupper_ext_nranges
,
416 up
[rl
.mapupper_ext_nranges
- 1].min
= wc
;
417 up
[rl
.mapupper_ext_nranges
- 1].max
= wc
;
418 up
[rl
.mapupper_ext_nranges
- 1].map
= ctn
->toupper
;
423 if ((wr_category(&rl
, sizeof (rl
), f
) < 0) ||
424 (wr_category(ct
, sizeof (*ct
) * rl
.runetype_ext_nranges
, f
) < 0) ||
425 (wr_category(lo
, sizeof (*lo
) * rl
.maplower_ext_nranges
, f
) < 0) ||
426 (wr_category(up
, sizeof (*up
) * rl
.mapupper_ext_nranges
, f
) < 0)) {