.
[glibc/history.git] / locale / programs / ld-ctype.c
blobd4474bf1a2dba0e15f8c789fbd3a39fb58b69389
1 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 #include <alloca.h>
24 #include <byteswap.h>
25 #include <endian.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <obstack.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <wchar.h>
32 #include <wctype.h>
33 #include <sys/uio.h>
35 #include "localedef.h"
36 #include "charmap.h"
37 #include "localeinfo.h"
38 #include "langinfo.h"
39 #include "linereader.h"
40 #include "locfile-token.h"
41 #include "locfile.h"
43 #include <assert.h>
46 #ifdef PREDEFINED_CLASSES
47 /* These are the extra bits not in wctype.h since these are not preallocated
48 classes. */
49 # define _ISwspecial1 (1 << 29)
50 # define _ISwspecial2 (1 << 30)
51 # define _ISwspecial3 (1 << 31)
52 #endif
55 /* The bit used for representing a special class. */
56 #define BITPOS(class) ((class) - tok_upper)
57 #define BIT(class) (_ISbit (BITPOS (class)))
58 #define BITw(class) (_ISwbit (BITPOS (class)))
60 #define ELEM(ctype, collection, idx, value) \
61 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
62 &ctype->collection##_act idx, value)
65 /* To be compatible with former implementations we for now restrict
66 the number of bits for character classes to 16. When compatibility
67 is not necessary anymore increase the number to 32. */
68 #define char_class_t uint16_t
69 #define char_class32_t uint32_t
72 /* Type to describe a transliteration action. We have a possibly
73 multiple character from-string and a set of multiple character
74 to-strings. All are 32bit values since this is what is used in
75 the gconv functions. */
76 struct translit_to_t
78 uint32_t *str;
80 struct translit_to_t *next;
83 struct translit_t
85 uint32_t *from;
87 const char *fname;
88 size_t lineno;
90 struct translit_to_t *to;
92 struct translit_t *next;
95 struct translit_ignore_t
97 uint32_t from;
98 uint32_t to;
99 uint32_t step;
101 const char *fname;
102 size_t lineno;
104 struct translit_ignore_t *next;
108 /* Type to describe a transliteration include statement. */
109 struct translit_include_t
111 const char *copy_locale;
112 const char *copy_repertoire;
114 struct translit_include_t *next;
118 /* Sparse table of uint32_t. */
119 #define TABLE idx_table
120 #define ELEMENT uint32_t
121 #define DEFAULT ((uint32_t) ~0)
122 #define NO_FINALIZE
123 #include "3level.h"
126 /* The real definition of the struct for the LC_CTYPE locale. */
127 struct locale_ctype_t
129 uint32_t *charnames;
130 size_t charnames_max;
131 size_t charnames_act;
132 /* An index lookup table, to speedup find_idx. */
133 struct idx_table charnames_idx;
135 struct repertoire_t *repertoire;
137 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
139 size_t nr_charclass;
140 const char *classnames[MAX_NR_CHARCLASS];
141 uint32_t last_class_char;
142 uint32_t class256_collection[256];
143 uint32_t *class_collection;
144 size_t class_collection_max;
145 size_t class_collection_act;
146 uint32_t class_done;
147 uint32_t class_offset;
149 struct charseq **mbdigits;
150 size_t mbdigits_act;
151 size_t mbdigits_max;
152 uint32_t *wcdigits;
153 size_t wcdigits_act;
154 size_t wcdigits_max;
156 struct charseq *mboutdigits[10];
157 uint32_t wcoutdigits[10];
158 size_t outdigits_act;
160 /* If the following number ever turns out to be too small simply
161 increase it. But I doubt it will. --drepper@gnu */
162 #define MAX_NR_CHARMAP 16
163 const char *mapnames[MAX_NR_CHARMAP];
164 uint32_t *map_collection[MAX_NR_CHARMAP];
165 uint32_t map256_collection[2][256];
166 size_t map_collection_max[MAX_NR_CHARMAP];
167 size_t map_collection_act[MAX_NR_CHARMAP];
168 size_t map_collection_nr;
169 size_t last_map_idx;
170 int tomap_done[MAX_NR_CHARMAP];
171 uint32_t map_offset;
173 /* Transliteration information. */
174 struct translit_include_t *translit_include;
175 struct translit_t *translit;
176 struct translit_ignore_t *translit_ignore;
177 uint32_t ntranslit_ignore;
179 uint32_t *default_missing;
180 const char *default_missing_file;
181 size_t default_missing_lineno;
183 uint32_t to_nonascii;
185 /* The arrays for the binary representation. */
186 char_class_t *ctype_b;
187 char_class32_t *ctype32_b;
188 uint32_t **map_b;
189 uint32_t **map32_b;
190 uint32_t **class_b;
191 struct iovec *class_3level;
192 struct iovec *map_3level;
193 uint32_t *class_name_ptr;
194 uint32_t *map_name_ptr;
195 struct iovec width;
196 uint32_t mb_cur_max;
197 const char *codeset_name;
198 uint32_t *translit_from_idx;
199 uint32_t *translit_from_tbl;
200 uint32_t *translit_to_idx;
201 uint32_t *translit_to_tbl;
202 uint32_t translit_idx_size;
203 size_t translit_from_tbl_size;
204 size_t translit_to_tbl_size;
206 struct obstack mempool;
210 /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
211 whether 'int' is 16 bit, 32 bit, or 64 bit. */
212 #define EMPTY ((uint32_t) ~0)
215 #define obstack_chunk_alloc xmalloc
216 #define obstack_chunk_free free
219 /* Prototypes for local functions. */
220 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
221 const struct charmap_t *charmap,
222 struct localedef_t *copy_locale,
223 int ignore_content);
224 static void ctype_class_new (struct linereader *lr,
225 struct locale_ctype_t *ctype, const char *name);
226 static void ctype_map_new (struct linereader *lr,
227 struct locale_ctype_t *ctype,
228 const char *name, const struct charmap_t *charmap);
229 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
230 size_t *max, size_t *act, unsigned int idx);
231 static void set_class_defaults (struct locale_ctype_t *ctype,
232 const struct charmap_t *charmap,
233 struct repertoire_t *repertoire);
234 static void allocate_arrays (struct locale_ctype_t *ctype,
235 const struct charmap_t *charmap,
236 struct repertoire_t *repertoire);
239 static const char *longnames[] =
241 "zero", "one", "two", "three", "four",
242 "five", "six", "seven", "eight", "nine"
244 static const char *uninames[] =
246 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
247 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
249 static const unsigned char digits[] = "0123456789";
252 static void
253 ctype_startup (struct linereader *lr, struct localedef_t *locale,
254 const struct charmap_t *charmap,
255 struct localedef_t *copy_locale, int ignore_content)
257 unsigned int cnt;
258 struct locale_ctype_t *ctype;
260 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
262 if (copy_locale == NULL)
264 /* Allocate the needed room. */
265 locale->categories[LC_CTYPE].ctype = ctype =
266 (struct locale_ctype_t *) xcalloc (1,
267 sizeof (struct locale_ctype_t));
269 /* We have seen no names yet. */
270 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
271 ctype->charnames =
272 (unsigned int *) xmalloc (ctype->charnames_max
273 * sizeof (unsigned int));
274 for (cnt = 0; cnt < 256; ++cnt)
275 ctype->charnames[cnt] = cnt;
276 ctype->charnames_act = 256;
277 idx_table_init (&ctype->charnames_idx);
279 /* Fill character class information. */
280 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
281 /* The order of the following instructions determines the bit
282 positions! */
283 ctype_class_new (lr, ctype, "upper");
284 ctype_class_new (lr, ctype, "lower");
285 ctype_class_new (lr, ctype, "alpha");
286 ctype_class_new (lr, ctype, "digit");
287 ctype_class_new (lr, ctype, "xdigit");
288 ctype_class_new (lr, ctype, "space");
289 ctype_class_new (lr, ctype, "print");
290 ctype_class_new (lr, ctype, "graph");
291 ctype_class_new (lr, ctype, "blank");
292 ctype_class_new (lr, ctype, "cntrl");
293 ctype_class_new (lr, ctype, "punct");
294 ctype_class_new (lr, ctype, "alnum");
295 #ifdef PREDEFINED_CLASSES
296 /* The following are extensions from ISO 14652. */
297 ctype_class_new (lr, ctype, "left_to_right");
298 ctype_class_new (lr, ctype, "right_to_left");
299 ctype_class_new (lr, ctype, "num_terminator");
300 ctype_class_new (lr, ctype, "num_separator");
301 ctype_class_new (lr, ctype, "segment_separator");
302 ctype_class_new (lr, ctype, "block_separator");
303 ctype_class_new (lr, ctype, "direction_control");
304 ctype_class_new (lr, ctype, "sym_swap_layout");
305 ctype_class_new (lr, ctype, "char_shape_selector");
306 ctype_class_new (lr, ctype, "num_shape_selector");
307 ctype_class_new (lr, ctype, "non_spacing");
308 ctype_class_new (lr, ctype, "non_spacing_level3");
309 ctype_class_new (lr, ctype, "normal_connect");
310 ctype_class_new (lr, ctype, "r_connect");
311 ctype_class_new (lr, ctype, "no_connect");
312 ctype_class_new (lr, ctype, "no_connect-space");
313 ctype_class_new (lr, ctype, "vowel_connect");
314 #endif
316 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
317 ctype->class_collection
318 = (uint32_t *) xcalloc (sizeof (unsigned long int),
319 ctype->class_collection_max);
320 ctype->class_collection_act = 256;
322 /* Fill character map information. */
323 ctype->last_map_idx = MAX_NR_CHARMAP;
324 ctype_map_new (lr, ctype, "toupper", charmap);
325 ctype_map_new (lr, ctype, "tolower", charmap);
326 #ifdef PREDEFINED_CLASSES
327 ctype_map_new (lr, ctype, "tosymmetric", charmap);
328 #endif
330 /* Fill first 256 entries in `toXXX' arrays. */
331 for (cnt = 0; cnt < 256; ++cnt)
333 ctype->map_collection[0][cnt] = cnt;
334 ctype->map_collection[1][cnt] = cnt;
335 #ifdef PREDEFINED_CLASSES
336 ctype->map_collection[2][cnt] = cnt;
337 #endif
338 ctype->map256_collection[0][cnt] = cnt;
339 ctype->map256_collection[1][cnt] = cnt;
342 if (enc_not_ascii_compatible)
343 ctype->to_nonascii = 1;
345 obstack_init (&ctype->mempool);
347 else
348 ctype = locale->categories[LC_CTYPE].ctype =
349 copy_locale->categories[LC_CTYPE].ctype;
354 void
355 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
357 /* See POSIX.2, table 2-6 for the meaning of the following table. */
358 #define NCLASS 12
359 static const struct
361 const char *name;
362 const char allow[NCLASS];
364 valid_table[NCLASS] =
366 /* The order is important. See token.h for more information.
367 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
368 { "upper", "--MX-XDDXXX-" },
369 { "lower", "--MX-XDDXXX-" },
370 { "alpha", "---X-XDDXXX-" },
371 { "digit", "XXX--XDDXXX-" },
372 { "xdigit", "-----XDDXXX-" },
373 { "space", "XXXXX------X" },
374 { "print", "---------X--" },
375 { "graph", "---------X--" },
376 { "blank", "XXXXXM-----X" },
377 { "cntrl", "XXXXX-XX--XX" },
378 { "punct", "XXXXX-DD-X-X" },
379 { "alnum", "-----XDDXXX-" }
381 size_t cnt;
382 int cls1, cls2;
383 uint32_t space_value;
384 struct charseq *space_seq;
385 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
386 int warned;
387 const void *key;
388 size_t len;
389 void *vdata;
390 void *curs;
392 /* Now resolve copying and also handle completely missing definitions. */
393 if (ctype == NULL)
395 const char *repertoire_name;
397 /* First see whether we were supposed to copy. If yes, find the
398 actual definition. */
399 if (locale->copy_name[LC_CTYPE] != NULL)
401 /* Find the copying locale. This has to happen transitively since
402 the locale we are copying from might also copying another one. */
403 struct localedef_t *from = locale;
406 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
407 from->repertoire_name, charmap);
408 while (from->categories[LC_CTYPE].ctype == NULL
409 && from->copy_name[LC_CTYPE] != NULL);
411 ctype = locale->categories[LC_CTYPE].ctype
412 = from->categories[LC_CTYPE].ctype;
415 /* If there is still no definition issue an warning and create an
416 empty one. */
417 if (ctype == NULL)
419 if (! be_quiet)
420 WITH_CUR_LOCALE (error (0, 0, _("\
421 No definition for %s category found"), "LC_CTYPE"));
422 ctype_startup (NULL, locale, charmap, NULL, 0);
423 ctype = locale->categories[LC_CTYPE].ctype;
426 /* Get the repertoire we have to use. */
427 repertoire_name = locale->repertoire_name ?: repertoire_global;
428 if (repertoire_name != NULL)
429 ctype->repertoire = repertoire_read (repertoire_name);
432 /* We need the name of the currently used 8-bit character set to
433 make correct conversion between this 8-bit representation and the
434 ISO 10646 character set used internally for wide characters. */
435 ctype->codeset_name = charmap->code_set_name;
436 if (ctype->codeset_name == NULL)
438 if (! be_quiet)
439 WITH_CUR_LOCALE (error (0, 0, _("\
440 No character set name specified in charmap")));
441 ctype->codeset_name = "//UNKNOWN//";
444 /* Set default value for classes not specified. */
445 set_class_defaults (ctype, charmap, ctype->repertoire);
447 /* Check according to table. */
448 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
450 uint32_t tmp = ctype->class_collection[cnt];
452 if (tmp != 0)
454 for (cls1 = 0; cls1 < NCLASS; ++cls1)
455 if ((tmp & _ISwbit (cls1)) != 0)
456 for (cls2 = 0; cls2 < NCLASS; ++cls2)
457 if (valid_table[cls1].allow[cls2] != '-')
459 int eq = (tmp & _ISwbit (cls2)) != 0;
460 switch (valid_table[cls1].allow[cls2])
462 case 'M':
463 if (!eq)
465 uint32_t value = ctype->charnames[cnt];
467 if (!be_quiet)
468 WITH_CUR_LOCALE (error (0, 0, _("\
469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
470 value > 0xffff ? 8 : 4,
471 value,
472 valid_table[cls1].name,
473 valid_table[cls2].name));
475 break;
477 case 'X':
478 if (eq)
480 uint32_t value = ctype->charnames[cnt];
482 if (!be_quiet)
483 WITH_CUR_LOCALE (error (0, 0, _("\
484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
485 value > 0xffff ? 8 : 4,
486 value,
487 valid_table[cls1].name,
488 valid_table[cls2].name));
490 break;
492 case 'D':
493 ctype->class_collection[cnt] |= _ISwbit (cls2);
494 break;
496 default:
497 WITH_CUR_LOCALE (error (5, 0, _("\
498 internal error in %s, line %u"), __FUNCTION__, __LINE__));
504 for (cnt = 0; cnt < 256; ++cnt)
506 uint32_t tmp = ctype->class256_collection[cnt];
508 if (tmp != 0)
510 for (cls1 = 0; cls1 < NCLASS; ++cls1)
511 if ((tmp & _ISbit (cls1)) != 0)
512 for (cls2 = 0; cls2 < NCLASS; ++cls2)
513 if (valid_table[cls1].allow[cls2] != '-')
515 int eq = (tmp & _ISbit (cls2)) != 0;
516 switch (valid_table[cls1].allow[cls2])
518 case 'M':
519 if (!eq)
521 char buf[17];
523 snprintf (buf, sizeof buf, "\\%Zo", cnt);
525 if (!be_quiet)
526 WITH_CUR_LOCALE (error (0, 0, _("\
527 character '%s' in class `%s' must be in class `%s'"),
528 buf,
529 valid_table[cls1].name,
530 valid_table[cls2].name));
532 break;
534 case 'X':
535 if (eq)
537 char buf[17];
539 snprintf (buf, sizeof buf, "\\%Zo", cnt);
541 if (!be_quiet)
542 WITH_CUR_LOCALE (error (0, 0, _("\
543 character '%s' in class `%s' must not be in class `%s'"),
544 buf,
545 valid_table[cls1].name,
546 valid_table[cls2].name));
548 break;
550 case 'D':
551 ctype->class256_collection[cnt] |= _ISbit (cls2);
552 break;
554 default:
555 WITH_CUR_LOCALE (error (5, 0, _("\
556 internal error in %s, line %u"), __FUNCTION__, __LINE__));
562 /* ... and now test <SP> as a special case. */
563 space_value = 32;
564 if (((cnt = BITPOS (tok_space),
565 (ELEM (ctype, class_collection, , space_value)
566 & BITw (tok_space)) == 0)
567 || (cnt = BITPOS (tok_blank),
568 (ELEM (ctype, class_collection, , space_value)
569 & BITw (tok_blank)) == 0)))
571 if (!be_quiet)
572 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
573 valid_table[cnt].name));
575 else if (((cnt = BITPOS (tok_punct),
576 (ELEM (ctype, class_collection, , space_value)
577 & BITw (tok_punct)) != 0)
578 || (cnt = BITPOS (tok_graph),
579 (ELEM (ctype, class_collection, , space_value)
580 & BITw (tok_graph))
581 != 0)))
583 if (!be_quiet)
584 WITH_CUR_LOCALE (error (0, 0, _("\
585 <SP> character must not be in class `%s'"),
586 valid_table[cnt].name));
588 else
589 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
591 space_seq = charmap_find_value (charmap, "SP", 2);
592 if (space_seq == NULL)
593 space_seq = charmap_find_value (charmap, "space", 5);
594 if (space_seq == NULL)
595 space_seq = charmap_find_value (charmap, "U00000020", 9);
596 if (space_seq == NULL || space_seq->nbytes != 1)
598 if (!be_quiet)
599 WITH_CUR_LOCALE (error (0, 0, _("\
600 character <SP> not defined in character map")));
602 else if (((cnt = BITPOS (tok_space),
603 (ctype->class256_collection[space_seq->bytes[0]]
604 & BIT (tok_space)) == 0)
605 || (cnt = BITPOS (tok_blank),
606 (ctype->class256_collection[space_seq->bytes[0]]
607 & BIT (tok_blank)) == 0)))
609 if (!be_quiet)
610 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
611 valid_table[cnt].name));
613 else if (((cnt = BITPOS (tok_punct),
614 (ctype->class256_collection[space_seq->bytes[0]]
615 & BIT (tok_punct)) != 0)
616 || (cnt = BITPOS (tok_graph),
617 (ctype->class256_collection[space_seq->bytes[0]]
618 & BIT (tok_graph)) != 0)))
620 if (!be_quiet)
621 WITH_CUR_LOCALE (error (0, 0, _("\
622 <SP> character must not be in class `%s'"),
623 valid_table[cnt].name));
625 else
626 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
628 /* Now that the tests are done make sure the name array contains all
629 characters which are handled in the WIDTH section of the
630 character set definition file. */
631 if (charmap->width_rules != NULL)
632 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
634 unsigned char bytes[charmap->mb_cur_max];
635 int nbytes = charmap->width_rules[cnt].from->nbytes;
637 /* We have the range of character for which the width is
638 specified described using byte sequences of the multibyte
639 charset. We have to convert this to UCS4 now. And we
640 cannot simply convert the beginning and the end of the
641 sequence, we have to iterate over the byte sequence and
642 convert it for every single character. */
643 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
645 while (nbytes < charmap->width_rules[cnt].to->nbytes
646 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
647 nbytes) <= 0)
649 /* Find the UCS value for `bytes'. */
650 int inner;
651 uint32_t wch;
652 struct charseq *seq
653 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
655 if (seq == NULL)
656 wch = ILLEGAL_CHAR_VALUE;
657 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
658 wch = seq->ucs4;
659 else
660 wch = repertoire_find_value (ctype->repertoire, seq->name,
661 strlen (seq->name));
663 if (wch != ILLEGAL_CHAR_VALUE)
664 /* We are only interested in the side-effects of the
665 `find_idx' call. It will add appropriate entries in
666 the name array if this is necessary. */
667 (void) find_idx (ctype, NULL, NULL, NULL, wch);
669 /* "Increment" the bytes sequence. */
670 inner = nbytes - 1;
671 while (inner >= 0 && bytes[inner] == 0xff)
672 --inner;
674 if (inner < 0)
676 /* We have to extend the byte sequence. */
677 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
678 break;
680 bytes[0] = 1;
681 memset (&bytes[1], 0, nbytes);
682 ++nbytes;
684 else
686 ++bytes[inner];
687 while (++inner < nbytes)
688 bytes[inner] = 0;
693 /* Now set all the other characters of the character set to the
694 default width. */
695 curs = NULL;
696 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
698 struct charseq *data = (struct charseq *) vdata;
700 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
701 data->ucs4 = repertoire_find_value (ctype->repertoire,
702 data->name, len);
704 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
705 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
708 /* There must be a multiple of 10 digits. */
709 if (ctype->mbdigits_act % 10 != 0)
711 assert (ctype->mbdigits_act == ctype->wcdigits_act);
712 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
713 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
714 WITH_CUR_LOCALE (error (0, 0, _("\
715 `digit' category has not entries in groups of ten")));
718 /* Check the input digits. There must be a multiple of ten available.
719 In each group it could be that one or the other character is missing.
720 In this case the whole group must be removed. */
721 cnt = 0;
722 while (cnt < ctype->mbdigits_act)
724 size_t inner;
725 for (inner = 0; inner < 10; ++inner)
726 if (ctype->mbdigits[cnt + inner] == NULL)
727 break;
729 if (inner == 10)
730 cnt += 10;
731 else
733 /* Remove the group. */
734 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
735 ((ctype->wcdigits_act - cnt - 10)
736 * sizeof (ctype->mbdigits[0])));
737 ctype->mbdigits_act -= 10;
741 /* If no input digits are given use the default. */
742 if (ctype->mbdigits_act == 0)
744 if (ctype->mbdigits_max == 0)
746 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
747 10 * sizeof (struct charseq *));
748 ctype->mbdigits_max = 10;
751 for (cnt = 0; cnt < 10; ++cnt)
753 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
754 (char *) digits + cnt, 1);
755 if (ctype->mbdigits[cnt] == NULL)
757 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
758 longnames[cnt],
759 strlen (longnames[cnt]));
760 if (ctype->mbdigits[cnt] == NULL)
762 /* Hum, this ain't good. */
763 WITH_CUR_LOCALE (error (0, 0, _("\
764 no input digits defined and none of the standard names in the charmap")));
766 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
767 sizeof (struct charseq) + 1);
769 /* This is better than nothing. */
770 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
771 ctype->mbdigits[cnt]->nbytes = 1;
776 ctype->mbdigits_act = 10;
779 /* Check the wide character input digits. There must be a multiple
780 of ten available. In each group it could be that one or the other
781 character is missing. In this case the whole group must be
782 removed. */
783 cnt = 0;
784 while (cnt < ctype->wcdigits_act)
786 size_t inner;
787 for (inner = 0; inner < 10; ++inner)
788 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
789 break;
791 if (inner == 10)
792 cnt += 10;
793 else
795 /* Remove the group. */
796 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
797 ((ctype->wcdigits_act - cnt - 10)
798 * sizeof (ctype->wcdigits[0])));
799 ctype->wcdigits_act -= 10;
803 /* If no input digits are given use the default. */
804 if (ctype->wcdigits_act == 0)
806 if (ctype->wcdigits_max == 0)
808 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
809 10 * sizeof (uint32_t));
810 ctype->wcdigits_max = 10;
813 for (cnt = 0; cnt < 10; ++cnt)
814 ctype->wcdigits[cnt] = L'0' + cnt;
816 ctype->mbdigits_act = 10;
819 /* Check the outdigits. */
820 warned = 0;
821 for (cnt = 0; cnt < 10; ++cnt)
822 if (ctype->mboutdigits[cnt] == NULL)
824 static struct charseq replace[2];
826 if (!warned)
828 WITH_CUR_LOCALE (error (0, 0, _("\
829 not all characters used in `outdigit' are available in the charmap")));
830 warned = 1;
833 replace[0].nbytes = 1;
834 replace[0].bytes[0] = '?';
835 replace[0].bytes[1] = '\0';
836 ctype->mboutdigits[cnt] = &replace[0];
839 warned = 0;
840 for (cnt = 0; cnt < 10; ++cnt)
841 if (ctype->wcoutdigits[cnt] == 0)
843 if (!warned)
845 WITH_CUR_LOCALE (error (0, 0, _("\
846 not all characters used in `outdigit' are available in the repertoire")));
847 warned = 1;
850 ctype->wcoutdigits[cnt] = L'?';
853 /* Sort the entries in the translit_ignore list. */
854 if (ctype->translit_ignore != NULL)
856 struct translit_ignore_t *firstp = ctype->translit_ignore;
857 struct translit_ignore_t *runp;
859 ctype->ntranslit_ignore = 1;
861 for (runp = firstp->next; runp != NULL; runp = runp->next)
863 struct translit_ignore_t *lastp = NULL;
864 struct translit_ignore_t *cmpp;
866 ++ctype->ntranslit_ignore;
868 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
869 if (runp->from < cmpp->from)
870 break;
872 runp->next = lastp;
873 if (lastp == NULL)
874 firstp = runp;
877 ctype->translit_ignore = firstp;
882 void
883 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
884 const char *output_path)
886 static const char nulbytes[4] = { 0, 0, 0, 0 };
887 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
888 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
889 + ctype->nr_charclass + ctype->map_collection_nr);
890 struct iovec *iov = alloca (sizeof *iov
891 * (2 + nelems + 2 * ctype->nr_charclass
892 + ctype->map_collection_nr + 4));
893 struct locale_file data;
894 uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
895 uint32_t default_missing_len;
896 size_t elem, cnt, offset, total;
897 char *cp;
899 /* Now prepare the output: Find the sizes of the table we can use. */
900 allocate_arrays (ctype, charmap, ctype->repertoire);
902 data.magic = LIMAGIC (LC_CTYPE);
903 data.n = nelems;
904 iov[0].iov_base = (void *) &data;
905 iov[0].iov_len = sizeof (data);
907 iov[1].iov_base = (void *) idx;
908 iov[1].iov_len = nelems * sizeof (uint32_t);
910 idx[0] = iov[0].iov_len + iov[1].iov_len;
911 offset = 0;
913 for (elem = 0; elem < nelems; ++elem)
915 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
916 switch (elem)
918 #define CTYPE_EMPTY(name) \
919 case name: \
920 iov[2 + elem + offset].iov_base = NULL; \
921 iov[2 + elem + offset].iov_len = 0; \
922 idx[elem + 1] = idx[elem]; \
923 break
925 CTYPE_EMPTY(_NL_CTYPE_GAP1);
926 CTYPE_EMPTY(_NL_CTYPE_GAP2);
927 CTYPE_EMPTY(_NL_CTYPE_GAP3);
928 CTYPE_EMPTY(_NL_CTYPE_GAP4);
929 CTYPE_EMPTY(_NL_CTYPE_GAP5);
930 CTYPE_EMPTY(_NL_CTYPE_GAP6);
932 #define CTYPE_DATA(name, base, len) \
933 case _NL_ITEM_INDEX (name): \
934 iov[2 + elem + offset].iov_base = (base); \
935 iov[2 + elem + offset].iov_len = (len); \
936 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; \
937 break
939 CTYPE_DATA (_NL_CTYPE_CLASS,
940 ctype->ctype_b,
941 (256 + 128) * sizeof (char_class_t));
943 CTYPE_DATA (_NL_CTYPE_TOUPPER,
944 ctype->map_b[0],
945 (256 + 128) * sizeof (uint32_t));
946 CTYPE_DATA (_NL_CTYPE_TOLOWER,
947 ctype->map_b[1],
948 (256 + 128) * sizeof (uint32_t));
950 CTYPE_DATA (_NL_CTYPE_TOUPPER32,
951 ctype->map32_b[0],
952 256 * sizeof (uint32_t));
953 CTYPE_DATA (_NL_CTYPE_TOLOWER32,
954 ctype->map32_b[1],
955 256 * sizeof (uint32_t));
957 CTYPE_DATA (_NL_CTYPE_CLASS32,
958 ctype->ctype32_b,
959 256 * sizeof (char_class32_t));
961 CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
962 &ctype->class_offset, sizeof (uint32_t));
964 CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
965 &ctype->map_offset, sizeof (uint32_t));
967 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
968 &ctype->translit_idx_size, sizeof (uint32_t));
970 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
971 ctype->translit_from_idx,
972 ctype->translit_idx_size * sizeof (uint32_t));
974 CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
975 ctype->translit_from_tbl,
976 ctype->translit_from_tbl_size);
978 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
979 ctype->translit_to_idx,
980 ctype->translit_idx_size * sizeof (uint32_t));
982 CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
983 ctype->translit_to_tbl, ctype->translit_to_tbl_size);
985 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
986 /* The class name array. */
987 total = 0;
988 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
990 iov[2 + elem + offset].iov_base
991 = (void *) ctype->classnames[cnt];
992 iov[2 + elem + offset].iov_len
993 = strlen (ctype->classnames[cnt]) + 1;
994 total += iov[2 + elem + offset].iov_len;
996 iov[2 + elem + offset].iov_base = (void *) nulbytes;
997 iov[2 + elem + offset].iov_len = 4 - (total % 4);
998 total += 4 - (total % 4);
1000 idx[elem + 1] = idx[elem] + total;
1001 break;
1003 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1004 /* The class name array. */
1005 total = 0;
1006 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1008 iov[2 + elem + offset].iov_base
1009 = (void *) ctype->mapnames[cnt];
1010 iov[2 + elem + offset].iov_len
1011 = strlen (ctype->mapnames[cnt]) + 1;
1012 total += iov[2 + elem + offset].iov_len;
1014 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1015 iov[2 + elem + offset].iov_len = 4 - (total % 4);
1016 total += 4 - (total % 4);
1018 idx[elem + 1] = idx[elem] + total;
1019 break;
1021 CTYPE_DATA (_NL_CTYPE_WIDTH,
1022 ctype->width.iov_base,
1023 ctype->width.iov_len);
1025 CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1026 &ctype->mb_cur_max, sizeof (uint32_t));
1028 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1029 total = strlen (ctype->codeset_name) + 1;
1030 if (total % 4 == 0)
1031 iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1032 else
1034 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1035 memset (mempcpy (iov[2 + elem + offset].iov_base,
1036 ctype->codeset_name, total),
1037 '\0', 4 - (total & 3));
1038 total = (total + 3) & ~3;
1040 iov[2 + elem + offset].iov_len = total;
1041 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1042 break;
1045 CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1046 &ctype->to_nonascii, sizeof (uint32_t));
1048 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1049 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1050 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1051 *(uint32_t *) iov[2 + elem + offset].iov_base =
1052 ctype->mbdigits_act / 10;
1053 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1054 break;
1056 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1057 /* Align entries. */
1058 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1059 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1060 idx[elem] += iov[2 + elem + offset].iov_len;
1061 ++offset;
1063 iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1064 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1065 *(uint32_t *) iov[2 + elem + offset].iov_base =
1066 ctype->wcdigits_act / 10;
1067 idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1068 break;
1070 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1071 /* Compute the length of all possible characters. For INDIGITS
1072 there might be more than one. We simply concatenate all of
1073 them with a NUL byte following. The NUL byte wouldn't be
1074 necessary but it makes it easier for the user. */
1075 total = 0;
1077 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1078 cnt < ctype->mbdigits_act; cnt += 10)
1079 total += ctype->mbdigits[cnt]->nbytes + 1;
1080 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1081 iov[2 + elem + offset].iov_len = total;
1083 cp = iov[2 + elem + offset].iov_base;
1084 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1085 cnt < ctype->mbdigits_act; cnt += 10)
1087 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1088 ctype->mbdigits[cnt]->nbytes);
1089 *cp++ = '\0';
1091 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1092 break;
1094 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1095 /* Compute the length of all possible characters. For INDIGITS
1096 there might be more than one. We simply concatenate all of
1097 them with a NUL byte following. The NUL byte wouldn't be
1098 necessary but it makes it easier for the user. */
1099 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1100 total = ctype->mboutdigits[cnt]->nbytes + 1;
1101 iov[2 + elem + offset].iov_base = (char *) alloca (total);
1102 iov[2 + elem + offset].iov_len = total;
1104 *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1105 ctype->mboutdigits[cnt]->bytes,
1106 ctype->mboutdigits[cnt]->nbytes) = '\0';
1107 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1108 break;
1110 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1111 total = ctype->wcdigits_act / 10;
1113 iov[2 + elem + offset].iov_base =
1114 (uint32_t *) alloca (total * sizeof (uint32_t));
1115 iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1117 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1118 cnt < ctype->wcdigits_act; cnt += 10)
1119 ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1120 = ctype->wcdigits[cnt];
1121 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1122 break;
1124 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1125 /* Align entries. */
1126 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1127 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1128 idx[elem] += iov[2 + elem + offset].iov_len;
1129 ++offset;
1130 /* FALLTRHOUGH */
1132 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1133 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1134 iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1135 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1136 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1137 break;
1139 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1140 /* Align entries. */
1141 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1142 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1143 idx[elem] += iov[2 + elem + offset].iov_len;
1144 ++offset;
1146 default_missing_len = (ctype->default_missing
1147 ? wcslen ((wchar_t *)ctype->default_missing)
1148 : 0);
1149 iov[2 + elem + offset].iov_base = &default_missing_len;
1150 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1151 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1152 break;
1154 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1155 iov[2 + elem + offset].iov_base =
1156 ctype->default_missing ?: (uint32_t *) L"";
1157 iov[2 + elem + offset].iov_len =
1158 wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1159 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1160 break;
1162 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1163 /* Align entries. */
1164 iov[2 + elem + offset].iov_base = (void *) nulbytes;
1165 iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1166 idx[elem] += iov[2 + elem + offset].iov_len;
1167 ++offset;
1169 iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1170 iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1171 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1172 break;
1174 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1176 uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1177 * 3 * sizeof (uint32_t));
1178 struct translit_ignore_t *runp;
1180 iov[2 + elem + offset].iov_base = ranges;
1181 iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1182 * 3 * sizeof (uint32_t));
1184 for (runp = ctype->translit_ignore; runp != NULL;
1185 runp = runp->next)
1187 *ranges++ = runp->from;
1188 *ranges++ = runp->to;
1189 *ranges++ = runp->step;
1192 /* Remove the following line in case a new entry is added
1193 after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */
1194 if (elem < nelems)
1195 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1196 break;
1198 default:
1199 assert (! "unknown CTYPE element");
1201 else
1203 /* Handle extra maps. */
1204 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1205 if (nr < ctype->nr_charclass)
1207 iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1208 iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1209 idx[elem] += iov[2 + elem + offset].iov_len;
1210 ++offset;
1212 iov[2 + elem + offset] = ctype->class_3level[nr];
1214 else
1216 nr -= ctype->nr_charclass;
1217 assert (nr < ctype->map_collection_nr);
1218 iov[2 + elem + offset] = ctype->map_3level[nr];
1220 idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1224 assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1225 + ctype->map_collection_nr + 4 + 2));
1227 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1228 iov);
1232 /* Local functions. */
1233 static void
1234 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1235 const char *name)
1237 size_t cnt;
1239 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1240 if (strcmp (ctype->classnames[cnt], name) == 0)
1241 break;
1243 if (cnt < ctype->nr_charclass)
1245 lr_error (lr, _("character class `%s' already defined"), name);
1246 return;
1249 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1250 /* Exit code 2 is prescribed in P1003.2b. */
1251 WITH_CUR_LOCALE (error (2, 0, _("\
1252 implementation limit: no more than %Zd character classes allowed"),
1253 MAX_NR_CHARCLASS));
1255 ctype->classnames[ctype->nr_charclass++] = name;
1259 static void
1260 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1261 const char *name, const struct charmap_t *charmap)
1263 size_t max_chars = 0;
1264 size_t cnt;
1266 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1268 if (strcmp (ctype->mapnames[cnt], name) == 0)
1269 break;
1271 if (max_chars < ctype->map_collection_max[cnt])
1272 max_chars = ctype->map_collection_max[cnt];
1275 if (cnt < ctype->map_collection_nr)
1277 lr_error (lr, _("character map `%s' already defined"), name);
1278 return;
1281 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1282 /* Exit code 2 is prescribed in P1003.2b. */
1283 WITH_CUR_LOCALE (error (2, 0, _("\
1284 implementation limit: no more than %d character maps allowed"),
1285 MAX_NR_CHARMAP));
1287 ctype->mapnames[cnt] = name;
1289 if (max_chars == 0)
1290 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1291 else
1292 ctype->map_collection_max[cnt] = max_chars;
1294 ctype->map_collection[cnt] = (uint32_t *)
1295 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1296 ctype->map_collection_act[cnt] = 256;
1298 ++ctype->map_collection_nr;
1302 /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1303 is possible if we only want to extend the name array. */
1304 static uint32_t *
1305 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1306 size_t *act, uint32_t idx)
1308 size_t cnt;
1310 if (idx < 256)
1311 return table == NULL ? NULL : &(*table)[idx];
1313 /* Use the charnames_idx lookup table instead of the slow search loop. */
1314 #if 1
1315 cnt = idx_table_get (&ctype->charnames_idx, idx);
1316 if (cnt == EMPTY)
1317 /* Not found. */
1318 cnt = ctype->charnames_act;
1319 #else
1320 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1321 if (ctype->charnames[cnt] == idx)
1322 break;
1323 #endif
1325 /* We have to distinguish two cases: the name is found or not. */
1326 if (cnt == ctype->charnames_act)
1328 /* Extend the name array. */
1329 if (ctype->charnames_act == ctype->charnames_max)
1331 ctype->charnames_max *= 2;
1332 ctype->charnames = (uint32_t *)
1333 xrealloc (ctype->charnames,
1334 sizeof (uint32_t) * ctype->charnames_max);
1336 ctype->charnames[ctype->charnames_act++] = idx;
1337 idx_table_add (&ctype->charnames_idx, idx, cnt);
1340 if (table == NULL)
1341 /* We have done everything we are asked to do. */
1342 return NULL;
1344 if (max == NULL)
1345 /* The caller does not want to extend the table. */
1346 return (cnt >= *act ? NULL : &(*table)[cnt]);
1348 if (cnt >= *act)
1350 if (cnt >= *max)
1352 size_t old_max = *max;
1354 *max *= 2;
1355 while (*max <= cnt);
1357 *table =
1358 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1359 memset (&(*table)[old_max], '\0',
1360 (*max - old_max) * sizeof (uint32_t));
1363 *act = cnt + 1;
1366 return &(*table)[cnt];
1370 static int
1371 get_character (struct token *now, const struct charmap_t *charmap,
1372 struct repertoire_t *repertoire,
1373 struct charseq **seqp, uint32_t *wchp)
1375 if (now->tok == tok_bsymbol)
1377 /* This will hopefully be the normal case. */
1378 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1379 now->val.str.lenmb);
1380 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1381 now->val.str.lenmb);
1383 else if (now->tok == tok_ucs4)
1385 char utmp[10];
1387 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1388 *seqp = charmap_find_value (charmap, utmp, 9);
1390 if (*seqp == NULL)
1391 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1393 if (*seqp == NULL)
1395 /* Compute the value in the charmap from the UCS value. */
1396 const char *symbol = repertoire_find_symbol (repertoire,
1397 now->val.ucs4);
1399 if (symbol == NULL)
1400 *seqp = NULL;
1401 else
1402 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1404 if (*seqp == NULL)
1406 if (repertoire != NULL)
1408 /* Insert a negative entry. */
1409 static const struct charseq negative
1410 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1411 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1412 sizeof (uint32_t));
1413 *newp = now->val.ucs4;
1415 insert_entry (&repertoire->seq_table, newp,
1416 sizeof (uint32_t), (void *) &negative);
1419 else
1420 (*seqp)->ucs4 = now->val.ucs4;
1422 else if ((*seqp)->ucs4 != now->val.ucs4)
1423 *seqp = NULL;
1425 *wchp = now->val.ucs4;
1427 else if (now->tok == tok_charcode)
1429 /* We must map from the byte code to UCS4. */
1430 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1431 now->val.str.lenmb);
1433 if (*seqp == NULL)
1434 *wchp = ILLEGAL_CHAR_VALUE;
1435 else
1437 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1438 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1439 strlen ((*seqp)->name));
1440 *wchp = (*seqp)->ucs4;
1443 else
1444 return 1;
1446 return 0;
1450 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1451 the .(2). counterparts. */
1452 static void
1453 charclass_symbolic_ellipsis (struct linereader *ldfile,
1454 struct locale_ctype_t *ctype,
1455 const struct charmap_t *charmap,
1456 struct repertoire_t *repertoire,
1457 struct token *now,
1458 const char *last_str,
1459 unsigned long int class256_bit,
1460 unsigned long int class_bit, int base,
1461 int ignore_content, int handle_digits, int step)
1463 const char *nowstr = now->val.str.startmb;
1464 char tmp[now->val.str.lenmb + 1];
1465 const char *cp;
1466 char *endp;
1467 unsigned long int from;
1468 unsigned long int to;
1470 /* We have to compute the ellipsis values using the symbolic names. */
1471 assert (last_str != NULL);
1473 if (strlen (last_str) != now->val.str.lenmb)
1475 invalid_range:
1476 lr_error (ldfile,
1477 _("`%s' and `%.*s' are not valid names for symbolic range"),
1478 last_str, (int) now->val.str.lenmb, nowstr);
1479 return;
1482 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1483 /* Nothing to do, the names are the same. */
1484 return;
1486 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1489 errno = 0;
1490 from = strtoul (cp, &endp, base);
1491 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1492 goto invalid_range;
1494 to = strtoul (nowstr + (cp - last_str), &endp, base);
1495 if ((to == UINT_MAX && errno == ERANGE)
1496 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1497 goto invalid_range;
1499 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1500 if (!ignore_content)
1502 now->val.str.startmb = tmp;
1503 while ((from += step) <= to)
1505 struct charseq *seq;
1506 uint32_t wch;
1508 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1509 (int) (cp - last_str), last_str,
1510 (int) (now->val.str.lenmb - (cp - last_str)),
1511 from);
1513 get_character (now, charmap, repertoire, &seq, &wch);
1515 if (seq != NULL && seq->nbytes == 1)
1516 /* Yep, we can store information about this byte sequence. */
1517 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1519 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1520 /* We have the UCS4 position. */
1521 *find_idx (ctype, &ctype->class_collection,
1522 &ctype->class_collection_max,
1523 &ctype->class_collection_act, wch) |= class_bit;
1525 if (handle_digits == 1)
1527 /* We must store the digit values. */
1528 if (ctype->mbdigits_act == ctype->mbdigits_max)
1530 ctype->mbdigits_max *= 2;
1531 ctype->mbdigits = xrealloc (ctype->mbdigits,
1532 (ctype->mbdigits_max
1533 * sizeof (char *)));
1534 ctype->wcdigits_max *= 2;
1535 ctype->wcdigits = xrealloc (ctype->wcdigits,
1536 (ctype->wcdigits_max
1537 * sizeof (uint32_t)));
1540 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1541 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1543 else if (handle_digits == 2)
1545 /* We must store the digit values. */
1546 if (ctype->outdigits_act >= 10)
1548 lr_error (ldfile, _("\
1549 %s: field `%s' does not contain exactly ten entries"),
1550 "LC_CTYPE", "outdigit");
1551 return;
1554 ctype->mboutdigits[ctype->outdigits_act] = seq;
1555 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1556 ++ctype->outdigits_act;
1563 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1564 static void
1565 charclass_ucs4_ellipsis (struct linereader *ldfile,
1566 struct locale_ctype_t *ctype,
1567 const struct charmap_t *charmap,
1568 struct repertoire_t *repertoire,
1569 struct token *now, uint32_t last_wch,
1570 unsigned long int class256_bit,
1571 unsigned long int class_bit, int ignore_content,
1572 int handle_digits, int step)
1574 if (last_wch > now->val.ucs4)
1576 lr_error (ldfile, _("\
1577 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1578 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1579 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1580 return;
1583 if (!ignore_content)
1584 while ((last_wch += step) <= now->val.ucs4)
1586 /* We have to find out whether there is a byte sequence corresponding
1587 to this UCS4 value. */
1588 struct charseq *seq;
1589 char utmp[10];
1591 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1592 seq = charmap_find_value (charmap, utmp, 9);
1593 if (seq == NULL)
1595 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1596 seq = charmap_find_value (charmap, utmp, 5);
1599 if (seq == NULL)
1600 /* Try looking in the repertoire map. */
1601 seq = repertoire_find_seq (repertoire, last_wch);
1603 /* If this is the first time we look for this sequence create a new
1604 entry. */
1605 if (seq == NULL)
1607 static const struct charseq negative
1608 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1610 /* Find the symbolic name for this UCS4 value. */
1611 if (repertoire != NULL)
1613 const char *symbol = repertoire_find_symbol (repertoire,
1614 last_wch);
1615 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1616 sizeof (uint32_t));
1617 *newp = last_wch;
1619 if (symbol != NULL)
1620 /* We have a name, now search the multibyte value. */
1621 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1623 if (seq == NULL)
1624 /* We have to create a fake entry. */
1625 seq = (struct charseq *) &negative;
1626 else
1627 seq->ucs4 = last_wch;
1629 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1630 seq);
1632 else
1633 /* We have to create a fake entry. */
1634 seq = (struct charseq *) &negative;
1637 /* We have a name, now search the multibyte value. */
1638 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1639 /* Yep, we can store information about this byte sequence. */
1640 ctype->class256_collection[(size_t) seq->bytes[0]]
1641 |= class256_bit;
1643 /* And of course we have the UCS4 position. */
1644 if (class_bit != 0)
1645 *find_idx (ctype, &ctype->class_collection,
1646 &ctype->class_collection_max,
1647 &ctype->class_collection_act, last_wch) |= class_bit;
1649 if (handle_digits == 1)
1651 /* We must store the digit values. */
1652 if (ctype->mbdigits_act == ctype->mbdigits_max)
1654 ctype->mbdigits_max *= 2;
1655 ctype->mbdigits = xrealloc (ctype->mbdigits,
1656 (ctype->mbdigits_max
1657 * sizeof (char *)));
1658 ctype->wcdigits_max *= 2;
1659 ctype->wcdigits = xrealloc (ctype->wcdigits,
1660 (ctype->wcdigits_max
1661 * sizeof (uint32_t)));
1664 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1665 ? seq : NULL);
1666 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1668 else if (handle_digits == 2)
1670 /* We must store the digit values. */
1671 if (ctype->outdigits_act >= 10)
1673 lr_error (ldfile, _("\
1674 %s: field `%s' does not contain exactly ten entries"),
1675 "LC_CTYPE", "outdigit");
1676 return;
1679 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1680 ? seq : NULL);
1681 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1682 ++ctype->outdigits_act;
1688 /* Ellipsis as in `/xea/x12.../xea/x34'. */
1689 static void
1690 charclass_charcode_ellipsis (struct linereader *ldfile,
1691 struct locale_ctype_t *ctype,
1692 const struct charmap_t *charmap,
1693 struct repertoire_t *repertoire,
1694 struct token *now, char *last_charcode,
1695 uint32_t last_charcode_len,
1696 unsigned long int class256_bit,
1697 unsigned long int class_bit, int ignore_content,
1698 int handle_digits)
1700 /* First check whether the to-value is larger. */
1701 if (now->val.charcode.nbytes != last_charcode_len)
1703 lr_error (ldfile, _("\
1704 start and end character sequence of range must have the same length"));
1705 return;
1708 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1710 lr_error (ldfile, _("\
1711 to-value character sequence is smaller than from-value sequence"));
1712 return;
1715 if (!ignore_content)
1719 /* Increment the byte sequence value. */
1720 struct charseq *seq;
1721 uint32_t wch;
1722 int i;
1724 for (i = last_charcode_len - 1; i >= 0; --i)
1725 if (++last_charcode[i] != 0)
1726 break;
1728 if (last_charcode_len == 1)
1729 /* Of course we have the charcode value. */
1730 ctype->class256_collection[(size_t) last_charcode[0]]
1731 |= class256_bit;
1733 /* Find the symbolic name. */
1734 seq = charmap_find_symbol (charmap, last_charcode,
1735 last_charcode_len);
1736 if (seq != NULL)
1738 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1739 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1740 strlen (seq->name));
1741 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1743 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1744 *find_idx (ctype, &ctype->class_collection,
1745 &ctype->class_collection_max,
1746 &ctype->class_collection_act, wch) |= class_bit;
1748 else
1749 wch = ILLEGAL_CHAR_VALUE;
1751 if (handle_digits == 1)
1753 /* We must store the digit values. */
1754 if (ctype->mbdigits_act == ctype->mbdigits_max)
1756 ctype->mbdigits_max *= 2;
1757 ctype->mbdigits = xrealloc (ctype->mbdigits,
1758 (ctype->mbdigits_max
1759 * sizeof (char *)));
1760 ctype->wcdigits_max *= 2;
1761 ctype->wcdigits = xrealloc (ctype->wcdigits,
1762 (ctype->wcdigits_max
1763 * sizeof (uint32_t)));
1766 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1767 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1768 seq->nbytes = last_charcode_len;
1770 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1771 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1773 else if (handle_digits == 2)
1775 struct charseq *seq;
1776 /* We must store the digit values. */
1777 if (ctype->outdigits_act >= 10)
1779 lr_error (ldfile, _("\
1780 %s: field `%s' does not contain exactly ten entries"),
1781 "LC_CTYPE", "outdigit");
1782 return;
1785 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1786 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1787 seq->nbytes = last_charcode_len;
1789 ctype->mboutdigits[ctype->outdigits_act] = seq;
1790 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1791 ++ctype->outdigits_act;
1794 while (memcmp (last_charcode, now->val.charcode.bytes,
1795 last_charcode_len) != 0);
1800 static uint32_t *
1801 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1802 uint32_t wch)
1804 struct translit_t *trunp = ctype->translit;
1805 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1807 while (trunp != NULL)
1809 /* XXX We simplify things here. The transliterations we look
1810 for are only allowed to have one character. */
1811 if (trunp->from[0] == wch && trunp->from[1] == 0)
1813 /* Found it. Now look for a transliteration which can be
1814 represented with the character set. */
1815 struct translit_to_t *torunp = trunp->to;
1817 while (torunp != NULL)
1819 int i;
1821 for (i = 0; torunp->str[i] != 0; ++i)
1823 char utmp[10];
1825 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1826 if (charmap_find_value (charmap, utmp, 9) == NULL)
1827 /* This character cannot be represented. */
1828 break;
1831 if (torunp->str[i] == 0)
1832 return torunp->str;
1834 torunp = torunp->next;
1837 break;
1840 trunp = trunp->next;
1843 /* Check for ignored chars. */
1844 while (tirunp != NULL)
1846 if (tirunp->from <= wch && tirunp->to >= wch)
1848 uint32_t wi;
1850 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1851 if (wi == wch)
1852 return (uint32_t []) { 0 };
1856 /* Nothing found. */
1857 return NULL;
1861 uint32_t *
1862 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1863 uint32_t wch)
1865 struct locale_ctype_t *ctype;
1866 uint32_t *result = NULL;
1868 assert (locale != NULL);
1869 ctype = locale->categories[LC_CTYPE].ctype;
1871 if (ctype == NULL)
1872 return NULL;
1874 if (ctype->translit != NULL)
1875 result = find_translit2 (ctype, charmap, wch);
1877 if (result == NULL)
1879 struct translit_include_t *irunp = ctype->translit_include;
1881 while (irunp != NULL && result == NULL)
1883 result = find_translit (find_locale (CTYPE_LOCALE,
1884 irunp->copy_locale,
1885 irunp->copy_repertoire,
1886 charmap),
1887 charmap, wch);
1888 irunp = irunp->next;
1892 return result;
1896 /* Read one transliteration entry. */
1897 static uint32_t *
1898 read_widestring (struct linereader *ldfile, struct token *now,
1899 const struct charmap_t *charmap,
1900 struct repertoire_t *repertoire)
1902 uint32_t *wstr;
1904 if (now->tok == tok_default_missing)
1905 /* The special name "" will denote this case. */
1906 wstr = ((uint32_t *) { 0 });
1907 else if (now->tok == tok_bsymbol)
1909 /* Get the value from the repertoire. */
1910 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1911 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1912 now->val.str.lenmb);
1913 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1915 /* We cannot proceed, we don't know the UCS4 value. */
1916 free (wstr);
1917 return NULL;
1920 wstr[1] = 0;
1922 else if (now->tok == tok_ucs4)
1924 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1925 wstr[0] = now->val.ucs4;
1926 wstr[1] = 0;
1928 else if (now->tok == tok_charcode)
1930 /* Argh, we have to convert to the symbol name first and then to the
1931 UCS4 value. */
1932 struct charseq *seq = charmap_find_symbol (charmap,
1933 now->val.str.startmb,
1934 now->val.str.lenmb);
1935 if (seq == NULL)
1936 /* Cannot find the UCS4 value. */
1937 return NULL;
1939 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1940 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1941 strlen (seq->name));
1942 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1943 /* We cannot proceed, we don't know the UCS4 value. */
1944 return NULL;
1946 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1947 wstr[0] = seq->ucs4;
1948 wstr[1] = 0;
1950 else if (now->tok == tok_string)
1952 wstr = now->val.str.startwc;
1953 if (wstr == NULL || wstr[0] == 0)
1954 return NULL;
1956 else
1958 if (now->tok != tok_eol && now->tok != tok_eof)
1959 lr_ignore_rest (ldfile, 0);
1960 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1961 return (uint32_t *) -1l;
1964 return wstr;
1968 static void
1969 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1970 struct token *now, const struct charmap_t *charmap,
1971 struct repertoire_t *repertoire)
1973 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1974 struct translit_t *result;
1975 struct translit_to_t **top;
1976 struct obstack *ob = &ctype->mempool;
1977 int first;
1978 int ignore;
1980 if (from_wstr == NULL)
1981 /* There is no valid from string. */
1982 return;
1984 result = (struct translit_t *) obstack_alloc (ob,
1985 sizeof (struct translit_t));
1986 result->from = from_wstr;
1987 result->fname = ldfile->fname;
1988 result->lineno = ldfile->lineno;
1989 result->next = NULL;
1990 result->to = NULL;
1991 top = &result->to;
1992 first = 1;
1993 ignore = 0;
1995 while (1)
1997 uint32_t *to_wstr;
1999 /* Next we have one or more transliterations. They are
2000 separated by semicolons. */
2001 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2003 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2005 /* One string read. */
2006 const uint32_t zero = 0;
2008 if (!ignore)
2010 obstack_grow (ob, &zero, 4);
2011 to_wstr = obstack_finish (ob);
2013 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2014 (*top)->str = to_wstr;
2015 (*top)->next = NULL;
2018 if (now->tok == tok_eol)
2020 result->next = ctype->translit;
2021 ctype->translit = result;
2022 return;
2025 if (!ignore)
2026 top = &(*top)->next;
2027 ignore = 0;
2029 else
2031 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2032 if (to_wstr == (uint32_t *) -1l)
2034 /* An error occurred. */
2035 obstack_free (ob, result);
2036 return;
2039 if (to_wstr == NULL)
2040 ignore = 1;
2041 else
2042 /* This value is usable. */
2043 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2045 first = 0;
2051 static void
2052 read_translit_ignore_entry (struct linereader *ldfile,
2053 struct locale_ctype_t *ctype,
2054 const struct charmap_t *charmap,
2055 struct repertoire_t *repertoire)
2057 /* We expect a semicolon-separated list of characters we ignore. We are
2058 only interested in the wide character definitions. These must be
2059 single characters, possibly defining a range when an ellipsis is used. */
2060 while (1)
2062 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2063 verbose);
2064 struct translit_ignore_t *newp;
2065 uint32_t from;
2067 if (now->tok == tok_eol || now->tok == tok_eof)
2069 lr_error (ldfile,
2070 _("premature end of `translit_ignore' definition"));
2071 return;
2074 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2076 lr_error (ldfile, _("syntax error"));
2077 lr_ignore_rest (ldfile, 0);
2078 return;
2081 if (now->tok == tok_ucs4)
2082 from = now->val.ucs4;
2083 else
2084 /* Try to get the value. */
2085 from = repertoire_find_value (repertoire, now->val.str.startmb,
2086 now->val.str.lenmb);
2088 if (from == ILLEGAL_CHAR_VALUE)
2090 lr_error (ldfile, "invalid character name");
2091 newp = NULL;
2093 else
2095 newp = (struct translit_ignore_t *)
2096 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2097 newp->from = from;
2098 newp->to = from;
2099 newp->step = 1;
2101 newp->next = ctype->translit_ignore;
2102 ctype->translit_ignore = newp;
2105 /* Now we expect either a semicolon, an ellipsis, or the end of the
2106 line. */
2107 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2109 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2111 /* XXX Should we bother implementing `....'? `...' certainly
2112 will not be implemented. */
2113 uint32_t to;
2114 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2116 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2118 if (now->tok == tok_eol || now->tok == tok_eof)
2120 lr_error (ldfile,
2121 _("premature end of `translit_ignore' definition"));
2122 return;
2125 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2127 lr_error (ldfile, _("syntax error"));
2128 lr_ignore_rest (ldfile, 0);
2129 return;
2132 if (now->tok == tok_ucs4)
2133 to = now->val.ucs4;
2134 else
2135 /* Try to get the value. */
2136 to = repertoire_find_value (repertoire, now->val.str.startmb,
2137 now->val.str.lenmb);
2139 if (to == ILLEGAL_CHAR_VALUE)
2140 lr_error (ldfile, "invalid character name");
2141 else
2143 /* Make sure the `to'-value is larger. */
2144 if (to >= from)
2146 newp->to = to;
2147 newp->step = step;
2149 else
2150 lr_error (ldfile, _("\
2151 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2152 (to | from) < 65536 ? 4 : 8, to,
2153 (to | from) < 65536 ? 4 : 8, from);
2156 /* And the next token. */
2157 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2160 if (now->tok == tok_eol || now->tok == tok_eof)
2161 /* We are done. */
2162 return;
2164 if (now->tok == tok_semicolon)
2165 /* Next round. */
2166 continue;
2168 /* If we come here something is wrong. */
2169 lr_error (ldfile, _("syntax error"));
2170 lr_ignore_rest (ldfile, 0);
2171 return;
2176 /* The parser for the LC_CTYPE section of the locale definition. */
2177 void
2178 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2179 const struct charmap_t *charmap, const char *repertoire_name,
2180 int ignore_content)
2182 struct repertoire_t *repertoire = NULL;
2183 struct locale_ctype_t *ctype;
2184 struct token *now;
2185 enum token_t nowtok;
2186 size_t cnt;
2187 struct charseq *last_seq;
2188 uint32_t last_wch = 0;
2189 enum token_t last_token;
2190 enum token_t ellipsis_token;
2191 int step;
2192 char last_charcode[16];
2193 size_t last_charcode_len = 0;
2194 const char *last_str = NULL;
2195 int mapidx;
2196 struct localedef_t *copy_locale = NULL;
2198 /* Get the repertoire we have to use. */
2199 if (repertoire_name != NULL)
2200 repertoire = repertoire_read (repertoire_name);
2202 /* The rest of the line containing `LC_CTYPE' must be free. */
2203 lr_ignore_rest (ldfile, 1);
2208 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2209 nowtok = now->tok;
2211 while (nowtok == tok_eol);
2213 /* If we see `copy' now we are almost done. */
2214 if (nowtok == tok_copy)
2216 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2217 if (now->tok != tok_string)
2219 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2221 skip_category:
2223 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2224 while (now->tok != tok_eof && now->tok != tok_end);
2226 if (now->tok != tok_eof
2227 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2228 now->tok == tok_eof))
2229 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2230 else if (now->tok != tok_lc_ctype)
2232 lr_error (ldfile, _("\
2233 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2234 lr_ignore_rest (ldfile, 0);
2236 else
2237 lr_ignore_rest (ldfile, 1);
2239 return;
2242 if (! ignore_content)
2244 /* Get the locale definition. */
2245 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2246 repertoire_name, charmap, NULL);
2247 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2249 /* Not yet loaded. So do it now. */
2250 if (locfile_read (copy_locale, charmap) != 0)
2251 goto skip_category;
2254 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2255 return;
2258 lr_ignore_rest (ldfile, 1);
2260 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2261 nowtok = now->tok;
2264 /* Prepare the data structures. */
2265 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2266 ctype = result->categories[LC_CTYPE].ctype;
2268 /* Remember the repertoire we use. */
2269 if (!ignore_content)
2270 ctype->repertoire = repertoire;
2272 while (1)
2274 unsigned long int class_bit = 0;
2275 unsigned long int class256_bit = 0;
2276 int handle_digits = 0;
2278 /* Of course we don't proceed beyond the end of file. */
2279 if (nowtok == tok_eof)
2280 break;
2282 /* Ingore empty lines. */
2283 if (nowtok == tok_eol)
2285 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286 nowtok = now->tok;
2287 continue;
2290 switch (nowtok)
2292 case tok_charclass:
2293 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294 while (now->tok == tok_ident || now->tok == tok_string)
2296 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2297 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2298 if (now->tok != tok_semicolon)
2299 break;
2300 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2302 if (now->tok != tok_eol)
2303 SYNTAX_ERROR (_("\
2304 %s: syntax error in definition of new character class"), "LC_CTYPE");
2305 break;
2307 case tok_charconv:
2308 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2309 while (now->tok == tok_ident || now->tok == tok_string)
2311 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2312 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2313 if (now->tok != tok_semicolon)
2314 break;
2315 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2317 if (now->tok != tok_eol)
2318 SYNTAX_ERROR (_("\
2319 %s: syntax error in definition of new character map"), "LC_CTYPE");
2320 break;
2322 case tok_class:
2323 /* Ignore the rest of the line if we don't need the input of
2324 this line. */
2325 if (ignore_content)
2327 lr_ignore_rest (ldfile, 0);
2328 break;
2331 /* We simply forget the `class' keyword and use the following
2332 operand to determine the bit. */
2333 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2334 if (now->tok == tok_ident || now->tok == tok_string)
2336 /* Must can be one of the predefined class names. */
2337 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2338 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2339 break;
2340 if (cnt >= ctype->nr_charclass)
2342 #ifdef PREDEFINED_CLASSES
2343 if (now->val.str.lenmb == 8
2344 && memcmp ("special1", now->val.str.startmb, 8) == 0)
2345 class_bit = _ISwspecial1;
2346 else if (now->val.str.lenmb == 8
2347 && memcmp ("special2", now->val.str.startmb, 8) == 0)
2348 class_bit = _ISwspecial2;
2349 else if (now->val.str.lenmb == 8
2350 && memcmp ("special3", now->val.str.startmb, 8) == 0)
2351 class_bit = _ISwspecial3;
2352 else
2353 #endif
2355 /* OK, it's a new class. */
2356 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2358 class_bit = _ISwbit (ctype->nr_charclass - 1);
2361 else
2363 class_bit = _ISwbit (cnt);
2365 free (now->val.str.startmb);
2368 else if (now->tok == tok_digit)
2369 goto handle_tok_digit;
2370 else if (now->tok < tok_upper || now->tok > tok_blank)
2371 goto err_label;
2372 else
2374 class_bit = BITw (now->tok);
2375 class256_bit = BIT (now->tok);
2378 /* The next character must be a semicolon. */
2379 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2380 if (now->tok != tok_semicolon)
2381 goto err_label;
2382 goto read_charclass;
2384 case tok_upper:
2385 case tok_lower:
2386 case tok_alpha:
2387 case tok_alnum:
2388 case tok_space:
2389 case tok_cntrl:
2390 case tok_punct:
2391 case tok_graph:
2392 case tok_print:
2393 case tok_xdigit:
2394 case tok_blank:
2395 /* Ignore the rest of the line if we don't need the input of
2396 this line. */
2397 if (ignore_content)
2399 lr_ignore_rest (ldfile, 0);
2400 break;
2403 class_bit = BITw (now->tok);
2404 class256_bit = BIT (now->tok);
2405 handle_digits = 0;
2406 read_charclass:
2407 ctype->class_done |= class_bit;
2408 last_token = tok_none;
2409 ellipsis_token = tok_none;
2410 step = 1;
2411 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2412 while (now->tok != tok_eol && now->tok != tok_eof)
2414 uint32_t wch;
2415 struct charseq *seq;
2417 if (ellipsis_token == tok_none)
2419 if (get_character (now, charmap, repertoire, &seq, &wch))
2420 goto err_label;
2422 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2423 /* Yep, we can store information about this byte
2424 sequence. */
2425 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2427 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2428 && class_bit != 0)
2429 /* We have the UCS4 position. */
2430 *find_idx (ctype, &ctype->class_collection,
2431 &ctype->class_collection_max,
2432 &ctype->class_collection_act, wch) |= class_bit;
2434 last_token = now->tok;
2435 /* Terminate the string. */
2436 if (last_token == tok_bsymbol)
2438 now->val.str.startmb[now->val.str.lenmb] = '\0';
2439 last_str = now->val.str.startmb;
2441 else
2442 last_str = NULL;
2443 last_seq = seq;
2444 last_wch = wch;
2445 memcpy (last_charcode, now->val.charcode.bytes, 16);
2446 last_charcode_len = now->val.charcode.nbytes;
2448 if (!ignore_content && handle_digits == 1)
2450 /* We must store the digit values. */
2451 if (ctype->mbdigits_act == ctype->mbdigits_max)
2453 ctype->mbdigits_max += 10;
2454 ctype->mbdigits = xrealloc (ctype->mbdigits,
2455 (ctype->mbdigits_max
2456 * sizeof (char *)));
2457 ctype->wcdigits_max += 10;
2458 ctype->wcdigits = xrealloc (ctype->wcdigits,
2459 (ctype->wcdigits_max
2460 * sizeof (uint32_t)));
2463 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2464 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2466 else if (!ignore_content && handle_digits == 2)
2468 /* We must store the digit values. */
2469 if (ctype->outdigits_act >= 10)
2471 lr_error (ldfile, _("\
2472 %s: field `%s' does not contain exactly ten entries"),
2473 "LC_CTYPE", "outdigit");
2474 lr_ignore_rest (ldfile, 0);
2475 break;
2478 ctype->mboutdigits[ctype->outdigits_act] = seq;
2479 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2480 ++ctype->outdigits_act;
2483 else
2485 /* Now it gets complicated. We have to resolve the
2486 ellipsis problem. First we must distinguish between
2487 the different kind of ellipsis and this must match the
2488 tokens we have seen. */
2489 assert (last_token != tok_none);
2491 if (last_token != now->tok)
2493 lr_error (ldfile, _("\
2494 ellipsis range must be marked by two operands of same type"));
2495 lr_ignore_rest (ldfile, 0);
2496 break;
2499 if (last_token == tok_bsymbol)
2501 if (ellipsis_token == tok_ellipsis3)
2502 lr_error (ldfile, _("with symbolic name range values \
2503 the absolute ellipsis `...' must not be used"));
2505 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2506 repertoire, now, last_str,
2507 class256_bit, class_bit,
2508 (ellipsis_token
2509 == tok_ellipsis4
2510 ? 10 : 16),
2511 ignore_content,
2512 handle_digits, step);
2514 else if (last_token == tok_ucs4)
2516 if (ellipsis_token != tok_ellipsis2)
2517 lr_error (ldfile, _("\
2518 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2520 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2521 repertoire, now, last_wch,
2522 class256_bit, class_bit,
2523 ignore_content, handle_digits,
2524 step);
2526 else
2528 assert (last_token == tok_charcode);
2530 if (ellipsis_token != tok_ellipsis3)
2531 lr_error (ldfile, _("\
2532 with character code range values one must use the absolute ellipsis `...'"));
2534 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2535 repertoire, now,
2536 last_charcode,
2537 last_charcode_len,
2538 class256_bit, class_bit,
2539 ignore_content,
2540 handle_digits);
2543 /* Now we have used the last value. */
2544 last_token = tok_none;
2547 /* Next we expect a semicolon or the end of the line. */
2548 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2549 if (now->tok == tok_eol || now->tok == tok_eof)
2550 break;
2552 if (last_token != tok_none
2553 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2555 if (now->tok == tok_ellipsis2_2)
2557 now->tok = tok_ellipsis2;
2558 step = 2;
2560 else if (now->tok == tok_ellipsis4_2)
2562 now->tok = tok_ellipsis4;
2563 step = 2;
2566 ellipsis_token = now->tok;
2568 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2569 continue;
2572 if (now->tok != tok_semicolon)
2573 goto err_label;
2575 /* And get the next character. */
2576 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2578 ellipsis_token = tok_none;
2579 step = 1;
2581 break;
2583 case tok_digit:
2584 /* Ignore the rest of the line if we don't need the input of
2585 this line. */
2586 if (ignore_content)
2588 lr_ignore_rest (ldfile, 0);
2589 break;
2592 handle_tok_digit:
2593 class_bit = _ISwdigit;
2594 class256_bit = _ISdigit;
2595 handle_digits = 1;
2596 goto read_charclass;
2598 case tok_outdigit:
2599 /* Ignore the rest of the line if we don't need the input of
2600 this line. */
2601 if (ignore_content)
2603 lr_ignore_rest (ldfile, 0);
2604 break;
2607 if (ctype->outdigits_act != 0)
2608 lr_error (ldfile, _("\
2609 %s: field `%s' declared more than once"),
2610 "LC_CTYPE", "outdigit");
2611 class_bit = 0;
2612 class256_bit = 0;
2613 handle_digits = 2;
2614 goto read_charclass;
2616 case tok_toupper:
2617 /* Ignore the rest of the line if we don't need the input of
2618 this line. */
2619 if (ignore_content)
2621 lr_ignore_rest (ldfile, 0);
2622 break;
2625 mapidx = 0;
2626 goto read_mapping;
2628 case tok_tolower:
2629 /* Ignore the rest of the line if we don't need the input of
2630 this line. */
2631 if (ignore_content)
2633 lr_ignore_rest (ldfile, 0);
2634 break;
2637 mapidx = 1;
2638 goto read_mapping;
2640 case tok_map:
2641 /* Ignore the rest of the line if we don't need the input of
2642 this line. */
2643 if (ignore_content)
2645 lr_ignore_rest (ldfile, 0);
2646 break;
2649 /* We simply forget the `map' keyword and use the following
2650 operand to determine the mapping. */
2651 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2652 if (now->tok == tok_ident || now->tok == tok_string)
2654 size_t cnt;
2656 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2657 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2658 break;
2660 if (cnt < ctype->map_collection_nr)
2661 free (now->val.str.startmb);
2662 else
2663 /* OK, it's a new map. */
2664 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2666 mapidx = cnt;
2668 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2669 goto err_label;
2670 else
2671 mapidx = now->tok - tok_toupper;
2673 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674 /* This better should be a semicolon. */
2675 if (now->tok != tok_semicolon)
2676 goto err_label;
2678 read_mapping:
2679 /* Test whether this mapping was already defined. */
2680 if (ctype->tomap_done[mapidx])
2682 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2683 ctype->mapnames[mapidx]);
2684 lr_ignore_rest (ldfile, 0);
2685 break;
2687 ctype->tomap_done[mapidx] = 1;
2689 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2690 while (now->tok != tok_eol && now->tok != tok_eof)
2692 struct charseq *from_seq;
2693 uint32_t from_wch;
2694 struct charseq *to_seq;
2695 uint32_t to_wch;
2697 /* Every pair starts with an opening brace. */
2698 if (now->tok != tok_open_brace)
2699 goto err_label;
2701 /* Next comes the from-value. */
2702 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2703 if (get_character (now, charmap, repertoire, &from_seq,
2704 &from_wch) != 0)
2705 goto err_label;
2707 /* The next is a comma. */
2708 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2709 if (now->tok != tok_comma)
2710 goto err_label;
2712 /* And the other value. */
2713 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2714 if (get_character (now, charmap, repertoire, &to_seq,
2715 &to_wch) != 0)
2716 goto err_label;
2718 /* And the last thing is the closing brace. */
2719 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2720 if (now->tok != tok_close_brace)
2721 goto err_label;
2723 if (!ignore_content)
2725 /* Check whether the mapping converts from an ASCII value
2726 to a non-ASCII value. */
2727 if (from_seq != NULL && from_seq->nbytes == 1
2728 && isascii (from_seq->bytes[0])
2729 && to_seq != NULL && (to_seq->nbytes != 1
2730 || !isascii (to_seq->bytes[0])))
2731 ctype->to_nonascii = 1;
2733 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2734 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2735 /* We can use this value. */
2736 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2737 = to_seq->bytes[0];
2739 if (from_wch != ILLEGAL_CHAR_VALUE
2740 && to_wch != ILLEGAL_CHAR_VALUE)
2741 /* Both correct values. */
2742 *find_idx (ctype, &ctype->map_collection[mapidx],
2743 &ctype->map_collection_max[mapidx],
2744 &ctype->map_collection_act[mapidx],
2745 from_wch) = to_wch;
2748 /* Now comes a semicolon or the end of the line/file. */
2749 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2750 if (now->tok == tok_semicolon)
2751 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2753 break;
2755 case tok_translit_start:
2756 /* Ignore the entire translit section with its peculiar syntax
2757 if we don't need the input. */
2758 if (ignore_content)
2762 lr_ignore_rest (ldfile, 0);
2763 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2765 while (now->tok != tok_translit_end && now->tok != tok_eof);
2767 if (now->tok == tok_eof)
2768 lr_error (ldfile, _(\
2769 "%s: `translit_start' section does not end with `translit_end'"),
2770 "LC_CTYPE");
2772 break;
2775 /* The rest of the line better should be empty. */
2776 lr_ignore_rest (ldfile, 1);
2778 /* We count here the number of allocated entries in the `translit'
2779 array. */
2780 cnt = 0;
2782 ldfile->translate_strings = 1;
2783 ldfile->return_widestr = 1;
2785 /* We proceed until we see the `translit_end' token. */
2786 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2787 now->tok != tok_translit_end && now->tok != tok_eof)
2789 if (now->tok == tok_eol)
2790 /* Ignore empty lines. */
2791 continue;
2793 if (now->tok == tok_include)
2795 /* We have to include locale. */
2796 const char *locale_name;
2797 const char *repertoire_name;
2798 struct translit_include_t *include_stmt, **include_ptr;
2800 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2801 /* This should be a string or an identifier. In any
2802 case something to name a locale. */
2803 if (now->tok != tok_string && now->tok != tok_ident)
2805 translit_syntax:
2806 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2807 lr_ignore_rest (ldfile, 0);
2808 continue;
2810 locale_name = now->val.str.startmb;
2812 /* Next should be a semicolon. */
2813 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2814 if (now->tok != tok_semicolon)
2815 goto translit_syntax;
2817 /* Now the repertoire name. */
2818 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2819 if ((now->tok != tok_string && now->tok != tok_ident)
2820 || now->val.str.startmb == NULL)
2821 goto translit_syntax;
2822 repertoire_name = now->val.str.startmb;
2823 if (repertoire_name[0] == '\0')
2824 /* Ignore the empty string. */
2825 repertoire_name = NULL;
2827 /* Save the include statement for later processing. */
2828 include_stmt = (struct translit_include_t *)
2829 xmalloc (sizeof (struct translit_include_t));
2830 include_stmt->copy_locale = locale_name;
2831 include_stmt->copy_repertoire = repertoire_name;
2832 include_stmt->next = NULL;
2834 include_ptr = &ctype->translit_include;
2835 while (*include_ptr != NULL)
2836 include_ptr = &(*include_ptr)->next;
2837 *include_ptr = include_stmt;
2839 /* The rest of the line must be empty. */
2840 lr_ignore_rest (ldfile, 1);
2842 /* Make sure the locale is read. */
2843 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2844 1, NULL);
2845 continue;
2847 else if (now->tok == tok_default_missing)
2849 uint32_t *wstr;
2851 while (1)
2853 /* We expect a single character or string as the
2854 argument. */
2855 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2856 wstr = read_widestring (ldfile, now, charmap,
2857 repertoire);
2859 if (wstr != NULL)
2861 if (ctype->default_missing != NULL)
2863 lr_error (ldfile, _("\
2864 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2865 WITH_CUR_LOCALE (error_at_line (0, 0,
2866 ctype->default_missing_file,
2867 ctype->default_missing_lineno,
2868 _("\
2869 previous definition was here")));
2871 else
2873 ctype->default_missing = wstr;
2874 ctype->default_missing_file = ldfile->fname;
2875 ctype->default_missing_lineno = ldfile->lineno;
2877 /* We can have more entries, ignore them. */
2878 lr_ignore_rest (ldfile, 0);
2879 break;
2881 else if (wstr == (uint32_t *) -1l)
2882 /* This was an syntax error. */
2883 break;
2885 /* Maybe there is another replacement we can use. */
2886 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2887 if (now->tok == tok_eol || now->tok == tok_eof)
2889 /* Nothing found. We tell the user. */
2890 lr_error (ldfile, _("\
2891 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2892 break;
2894 if (now->tok != tok_semicolon)
2895 goto translit_syntax;
2898 continue;
2900 else if (now->tok == tok_translit_ignore)
2902 read_translit_ignore_entry (ldfile, ctype, charmap,
2903 repertoire);
2904 continue;
2907 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2909 ldfile->return_widestr = 0;
2911 if (now->tok == tok_eof)
2912 lr_error (ldfile, _(\
2913 "%s: `translit_start' section does not end with `translit_end'"),
2914 "LC_CTYPE");
2916 break;
2918 case tok_ident:
2919 /* Ignore the rest of the line if we don't need the input of
2920 this line. */
2921 if (ignore_content)
2923 lr_ignore_rest (ldfile, 0);
2924 break;
2927 /* This could mean one of several things. First test whether
2928 it's a character class name. */
2929 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2930 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2931 break;
2932 if (cnt < ctype->nr_charclass)
2934 class_bit = _ISwbit (cnt);
2935 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2936 free (now->val.str.startmb);
2937 goto read_charclass;
2939 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2940 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2941 break;
2942 if (cnt < ctype->map_collection_nr)
2944 mapidx = cnt;
2945 free (now->val.str.startmb);
2946 goto read_mapping;
2948 #ifdef PREDEFINED_CLASSES
2949 if (strcmp (now->val.str.startmb, "special1") == 0)
2951 class_bit = _ISwspecial1;
2952 free (now->val.str.startmb);
2953 goto read_charclass;
2955 if (strcmp (now->val.str.startmb, "special2") == 0)
2957 class_bit = _ISwspecial2;
2958 free (now->val.str.startmb);
2959 goto read_charclass;
2961 if (strcmp (now->val.str.startmb, "special3") == 0)
2963 class_bit = _ISwspecial3;
2964 free (now->val.str.startmb);
2965 goto read_charclass;
2967 if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2969 mapidx = 2;
2970 goto read_mapping;
2972 #endif
2973 break;
2975 case tok_end:
2976 /* Next we assume `LC_CTYPE'. */
2977 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2978 if (now->tok == tok_eof)
2979 break;
2980 if (now->tok == tok_eol)
2981 lr_error (ldfile, _("%s: incomplete `END' line"),
2982 "LC_CTYPE");
2983 else if (now->tok != tok_lc_ctype)
2984 lr_error (ldfile, _("\
2985 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2986 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2987 return;
2989 default:
2990 err_label:
2991 if (now->tok != tok_eof)
2992 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2995 /* Prepare for the next round. */
2996 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2997 nowtok = now->tok;
3000 /* When we come here we reached the end of the file. */
3001 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3005 static void
3006 set_class_defaults (struct locale_ctype_t *ctype,
3007 const struct charmap_t *charmap,
3008 struct repertoire_t *repertoire)
3010 size_t cnt;
3012 /* These function defines the default values for the classes and conversions
3013 according to POSIX.2 2.5.2.1.
3014 It may seem that the order of these if-blocks is arbitrary but it is NOT.
3015 Don't move them unless you know what you do! */
3017 auto void set_default (int bitpos, int from, int to);
3019 void set_default (int bitpos, int from, int to)
3021 char tmp[2];
3022 int ch;
3023 int bit = _ISbit (bitpos);
3024 int bitw = _ISwbit (bitpos);
3025 /* Define string. */
3026 strcpy (tmp, "?");
3028 for (ch = from; ch <= to; ++ch)
3030 struct charseq *seq;
3031 tmp[0] = ch;
3033 seq = charmap_find_value (charmap, tmp, 1);
3034 if (seq == NULL)
3036 char buf[10];
3037 sprintf (buf, "U%08X", ch);
3038 seq = charmap_find_value (charmap, buf, 9);
3040 if (seq == NULL)
3042 if (!be_quiet)
3043 WITH_CUR_LOCALE (error (0, 0, _("\
3044 %s: character `%s' not defined while needed as default value"),
3045 "LC_CTYPE", tmp));
3047 else if (seq->nbytes != 1)
3048 WITH_CUR_LOCALE (error (0, 0, _("\
3049 %s: character `%s' in charmap not representable with one byte"),
3050 "LC_CTYPE", tmp));
3051 else
3052 ctype->class256_collection[seq->bytes[0]] |= bit;
3054 /* No need to search here, the ASCII value is also the Unicode
3055 value. */
3056 ELEM (ctype, class_collection, , ch) |= bitw;
3060 /* Set default values if keyword was not present. */
3061 if ((ctype->class_done & BITw (tok_upper)) == 0)
3062 /* "If this keyword [lower] is not specified, the lowercase letters
3063 `A' through `Z', ..., shall automatically belong to this class,
3064 with implementation defined character values." [P1003.2, 2.5.2.1] */
3065 set_default (BITPOS (tok_upper), 'A', 'Z');
3067 if ((ctype->class_done & BITw (tok_lower)) == 0)
3068 /* "If this keyword [lower] is not specified, the lowercase letters
3069 `a' through `z', ..., shall automatically belong to this class,
3070 with implementation defined character values." [P1003.2, 2.5.2.1] */
3071 set_default (BITPOS (tok_lower), 'a', 'z');
3073 if ((ctype->class_done & BITw (tok_alpha)) == 0)
3075 /* Table 2-6 in P1003.2 says that characters in class `upper' or
3076 class `lower' *must* be in class `alpha'. */
3077 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3078 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3080 for (cnt = 0; cnt < 256; ++cnt)
3081 if ((ctype->class256_collection[cnt] & mask) != 0)
3082 ctype->class256_collection[cnt] |= BIT (tok_alpha);
3084 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3085 if ((ctype->class_collection[cnt] & maskw) != 0)
3086 ctype->class_collection[cnt] |= BITw (tok_alpha);
3089 if ((ctype->class_done & BITw (tok_digit)) == 0)
3090 /* "If this keyword [digit] is not specified, the digits `0' through
3091 `9', ..., shall automatically belong to this class, with
3092 implementation-defined character values." [P1003.2, 2.5.2.1] */
3093 set_default (BITPOS (tok_digit), '0', '9');
3095 /* "Only characters specified for the `alpha' and `digit' keyword
3096 shall be specified. Characters specified for the keyword `alpha'
3097 and `digit' are automatically included in this class. */
3099 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3100 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3102 for (cnt = 0; cnt < 256; ++cnt)
3103 if ((ctype->class256_collection[cnt] & mask) != 0)
3104 ctype->class256_collection[cnt] |= BIT (tok_alnum);
3106 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3107 if ((ctype->class_collection[cnt] & maskw) != 0)
3108 ctype->class_collection[cnt] |= BITw (tok_alnum);
3111 if ((ctype->class_done & BITw (tok_space)) == 0)
3112 /* "If this keyword [space] is not specified, the characters <space>,
3113 <form-feed>, <newline>, <carriage-return>, <tab>, and
3114 <vertical-tab>, ..., shall automatically belong to this class,
3115 with implementation-defined character values." [P1003.2, 2.5.2.1] */
3117 struct charseq *seq;
3119 seq = charmap_find_value (charmap, "space", 5);
3120 if (seq == NULL)
3121 seq = charmap_find_value (charmap, "SP", 2);
3122 if (seq == NULL)
3123 seq = charmap_find_value (charmap, "U00000020", 9);
3124 if (seq == NULL)
3126 if (!be_quiet)
3127 WITH_CUR_LOCALE (error (0, 0, _("\
3128 %s: character `%s' not defined while needed as default value"),
3129 "LC_CTYPE", "<space>"));
3131 else if (seq->nbytes != 1)
3132 WITH_CUR_LOCALE (error (0, 0, _("\
3133 %s: character `%s' in charmap not representable with one byte"),
3134 "LC_CTYPE", "<space>"));
3135 else
3136 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3138 /* No need to search. */
3139 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3141 seq = charmap_find_value (charmap, "form-feed", 9);
3142 if (seq == NULL)
3143 seq = charmap_find_value (charmap, "U0000000C", 9);
3144 if (seq == NULL)
3146 if (!be_quiet)
3147 WITH_CUR_LOCALE (error (0, 0, _("\
3148 %s: character `%s' not defined while needed as default value"),
3149 "LC_CTYPE", "<form-feed>"));
3151 else if (seq->nbytes != 1)
3152 WITH_CUR_LOCALE (error (0, 0, _("\
3153 %s: character `%s' in charmap not representable with one byte"),
3154 "LC_CTYPE", "<form-feed>"));
3155 else
3156 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3158 /* No need to search. */
3159 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3162 seq = charmap_find_value (charmap, "newline", 7);
3163 if (seq == NULL)
3164 seq = charmap_find_value (charmap, "U0000000A", 9);
3165 if (seq == NULL)
3167 if (!be_quiet)
3168 WITH_CUR_LOCALE (error (0, 0, _("\
3169 %s: character `%s' not defined while needed as default value"),
3170 "LC_CTYPE", "<newline>"));
3172 else if (seq->nbytes != 1)
3173 WITH_CUR_LOCALE (error (0, 0, _("\
3174 %s: character `%s' in charmap not representable with one byte"),
3175 "LC_CTYPE", "<newline>"));
3176 else
3177 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3179 /* No need to search. */
3180 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3183 seq = charmap_find_value (charmap, "carriage-return", 15);
3184 if (seq == NULL)
3185 seq = charmap_find_value (charmap, "U0000000D", 9);
3186 if (seq == NULL)
3188 if (!be_quiet)
3189 WITH_CUR_LOCALE (error (0, 0, _("\
3190 %s: character `%s' not defined while needed as default value"),
3191 "LC_CTYPE", "<carriage-return>"));
3193 else if (seq->nbytes != 1)
3194 WITH_CUR_LOCALE (error (0, 0, _("\
3195 %s: character `%s' in charmap not representable with one byte"),
3196 "LC_CTYPE", "<carriage-return>"));
3197 else
3198 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3200 /* No need to search. */
3201 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3204 seq = charmap_find_value (charmap, "tab", 3);
3205 if (seq == NULL)
3206 seq = charmap_find_value (charmap, "U00000009", 9);
3207 if (seq == NULL)
3209 if (!be_quiet)
3210 WITH_CUR_LOCALE (error (0, 0, _("\
3211 %s: character `%s' not defined while needed as default value"),
3212 "LC_CTYPE", "<tab>"));
3214 else if (seq->nbytes != 1)
3215 WITH_CUR_LOCALE (error (0, 0, _("\
3216 %s: character `%s' in charmap not representable with one byte"),
3217 "LC_CTYPE", "<tab>"));
3218 else
3219 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3221 /* No need to search. */
3222 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3225 seq = charmap_find_value (charmap, "vertical-tab", 12);
3226 if (seq == NULL)
3227 seq = charmap_find_value (charmap, "U0000000B", 9);
3228 if (seq == NULL)
3230 if (!be_quiet)
3231 WITH_CUR_LOCALE (error (0, 0, _("\
3232 %s: character `%s' not defined while needed as default value"),
3233 "LC_CTYPE", "<vertical-tab>"));
3235 else if (seq->nbytes != 1)
3236 WITH_CUR_LOCALE (error (0, 0, _("\
3237 %s: character `%s' in charmap not representable with one byte"),
3238 "LC_CTYPE", "<vertical-tab>"));
3239 else
3240 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3242 /* No need to search. */
3243 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3246 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3247 /* "If this keyword is not specified, the digits `0' to `9', the
3248 uppercase letters `A' through `F', and the lowercase letters `a'
3249 through `f', ..., shell automatically belong to this class, with
3250 implementation defined character values." [P1003.2, 2.5.2.1] */
3252 set_default (BITPOS (tok_xdigit), '0', '9');
3253 set_default (BITPOS (tok_xdigit), 'A', 'F');
3254 set_default (BITPOS (tok_xdigit), 'a', 'f');
3257 if ((ctype->class_done & BITw (tok_blank)) == 0)
3258 /* "If this keyword [blank] is unspecified, the characters <space> and
3259 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3261 struct charseq *seq;
3263 seq = charmap_find_value (charmap, "space", 5);
3264 if (seq == NULL)
3265 seq = charmap_find_value (charmap, "SP", 2);
3266 if (seq == NULL)
3267 seq = charmap_find_value (charmap, "U00000020", 9);
3268 if (seq == NULL)
3270 if (!be_quiet)
3271 WITH_CUR_LOCALE (error (0, 0, _("\
3272 %s: character `%s' not defined while needed as default value"),
3273 "LC_CTYPE", "<space>"));
3275 else if (seq->nbytes != 1)
3276 WITH_CUR_LOCALE (error (0, 0, _("\
3277 %s: character `%s' in charmap not representable with one byte"),
3278 "LC_CTYPE", "<space>"));
3279 else
3280 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3282 /* No need to search. */
3283 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3286 seq = charmap_find_value (charmap, "tab", 3);
3287 if (seq == NULL)
3288 seq = charmap_find_value (charmap, "U00000009", 9);
3289 if (seq == NULL)
3291 if (!be_quiet)
3292 WITH_CUR_LOCALE (error (0, 0, _("\
3293 %s: character `%s' not defined while needed as default value"),
3294 "LC_CTYPE", "<tab>"));
3296 else if (seq->nbytes != 1)
3297 WITH_CUR_LOCALE (error (0, 0, _("\
3298 %s: character `%s' in charmap not representable with one byte"),
3299 "LC_CTYPE", "<tab>"));
3300 else
3301 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3303 /* No need to search. */
3304 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3307 if ((ctype->class_done & BITw (tok_graph)) == 0)
3308 /* "If this keyword [graph] is not specified, characters specified for
3309 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3310 shall belong to this character class." [P1003.2, 2.5.2.1] */
3312 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3313 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3314 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3315 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3316 BITw (tok_punct);
3317 size_t cnt;
3319 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3320 if ((ctype->class_collection[cnt] & maskw) != 0)
3321 ctype->class_collection[cnt] |= BITw (tok_graph);
3323 for (cnt = 0; cnt < 256; ++cnt)
3324 if ((ctype->class256_collection[cnt] & mask) != 0)
3325 ctype->class256_collection[cnt] |= BIT (tok_graph);
3328 if ((ctype->class_done & BITw (tok_print)) == 0)
3329 /* "If this keyword [print] is not provided, characters specified for
3330 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3331 and the <space> character shall belong to this character class."
3332 [P1003.2, 2.5.2.1] */
3334 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3335 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3336 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3337 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3338 BITw (tok_punct);
3339 size_t cnt;
3340 struct charseq *seq;
3342 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3343 if ((ctype->class_collection[cnt] & maskw) != 0)
3344 ctype->class_collection[cnt] |= BITw (tok_print);
3346 for (cnt = 0; cnt < 256; ++cnt)
3347 if ((ctype->class256_collection[cnt] & mask) != 0)
3348 ctype->class256_collection[cnt] |= BIT (tok_print);
3351 seq = charmap_find_value (charmap, "space", 5);
3352 if (seq == NULL)
3353 seq = charmap_find_value (charmap, "SP", 2);
3354 if (seq == NULL)
3355 seq = charmap_find_value (charmap, "U00000020", 9);
3356 if (seq == NULL)
3358 if (!be_quiet)
3359 WITH_CUR_LOCALE (error (0, 0, _("\
3360 %s: character `%s' not defined while needed as default value"),
3361 "LC_CTYPE", "<space>"));
3363 else if (seq->nbytes != 1)
3364 WITH_CUR_LOCALE (error (0, 0, _("\
3365 %s: character `%s' in charmap not representable with one byte"),
3366 "LC_CTYPE", "<space>"));
3367 else
3368 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3370 /* No need to search. */
3371 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3374 if (ctype->tomap_done[0] == 0)
3375 /* "If this keyword [toupper] is not specified, the lowercase letters
3376 `a' through `z', and their corresponding uppercase letters `A' to
3377 `Z', ..., shall automatically be included, with implementation-
3378 defined character values." [P1003.2, 2.5.2.1] */
3380 char tmp[4];
3381 int ch;
3383 strcpy (tmp, "<?>");
3385 for (ch = 'a'; ch <= 'z'; ++ch)
3387 struct charseq *seq_from, *seq_to;
3389 tmp[1] = (char) ch;
3391 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3392 if (seq_from == NULL)
3394 char buf[10];
3395 sprintf (buf, "U%08X", ch);
3396 seq_from = charmap_find_value (charmap, buf, 9);
3398 if (seq_from == NULL)
3400 if (!be_quiet)
3401 WITH_CUR_LOCALE (error (0, 0, _("\
3402 %s: character `%s' not defined while needed as default value"),
3403 "LC_CTYPE", tmp));
3405 else if (seq_from->nbytes != 1)
3407 if (!be_quiet)
3408 WITH_CUR_LOCALE (error (0, 0, _("\
3409 %s: character `%s' needed as default value not representable with one byte"),
3410 "LC_CTYPE", tmp));
3412 else
3414 /* This conversion is implementation defined. */
3415 tmp[1] = (char) (ch + ('A' - 'a'));
3416 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3417 if (seq_to == NULL)
3419 char buf[10];
3420 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3421 seq_to = charmap_find_value (charmap, buf, 9);
3423 if (seq_to == NULL)
3425 if (!be_quiet)
3426 WITH_CUR_LOCALE (error (0, 0, _("\
3427 %s: character `%s' not defined while needed as default value"),
3428 "LC_CTYPE", tmp));
3430 else if (seq_to->nbytes != 1)
3432 if (!be_quiet)
3433 WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: character `%s' needed as default value not representable with one byte"),
3435 "LC_CTYPE", tmp));
3437 else
3438 /* The index [0] is determined by the order of the
3439 `ctype_map_newP' calls in `ctype_startup'. */
3440 ctype->map256_collection[0][seq_from->bytes[0]]
3441 = seq_to->bytes[0];
3444 /* No need to search. */
3445 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3449 if (ctype->tomap_done[1] == 0)
3450 /* "If this keyword [tolower] is not specified, the mapping shall be
3451 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3453 for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3454 if (ctype->map_collection[0][cnt] != 0)
3455 ELEM (ctype, map_collection, [1],
3456 ctype->map_collection[0][cnt])
3457 = ctype->charnames[cnt];
3459 for (cnt = 0; cnt < 256; ++cnt)
3460 if (ctype->map256_collection[0][cnt] != 0)
3461 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3464 if (ctype->outdigits_act != 10)
3466 if (ctype->outdigits_act != 0)
3467 WITH_CUR_LOCALE (error (0, 0, _("\
3468 %s: field `%s' does not contain exactly ten entries"),
3469 "LC_CTYPE", "outdigit"));
3471 for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3473 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3474 (char *) digits + cnt,
3477 if (ctype->mboutdigits[cnt] == NULL)
3478 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3479 longnames[cnt],
3480 strlen (longnames[cnt]));
3482 if (ctype->mboutdigits[cnt] == NULL)
3483 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3484 uninames[cnt], 9);
3486 if (ctype->mboutdigits[cnt] == NULL)
3488 /* Provide a replacement. */
3489 WITH_CUR_LOCALE (error (0, 0, _("\
3490 no output digits defined and none of the standard names in the charmap")));
3492 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3493 sizeof (struct charseq)
3494 + 1);
3496 /* This is better than nothing. */
3497 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3498 ctype->mboutdigits[cnt]->nbytes = 1;
3501 ctype->wcoutdigits[cnt] = L'0' + cnt;
3504 ctype->outdigits_act = 10;
3509 /* Construction of sparse 3-level tables.
3510 See wchar-lookup.h for their structure and the meaning of p and q. */
3512 struct wctype_table
3514 /* Parameters. */
3515 unsigned int p;
3516 unsigned int q;
3517 /* Working representation. */
3518 size_t level1_alloc;
3519 size_t level1_size;
3520 uint32_t *level1;
3521 size_t level2_alloc;
3522 size_t level2_size;
3523 uint32_t *level2;
3524 size_t level3_alloc;
3525 size_t level3_size;
3526 uint32_t *level3;
3527 /* Compressed representation. */
3528 size_t result_size;
3529 char *result;
3532 /* Initialize. Assumes t->p and t->q have already been set. */
3533 static inline void
3534 wctype_table_init (struct wctype_table *t)
3536 t->level1 = NULL;
3537 t->level1_alloc = t->level1_size = 0;
3538 t->level2 = NULL;
3539 t->level2_alloc = t->level2_size = 0;
3540 t->level3 = NULL;
3541 t->level3_alloc = t->level3_size = 0;
3544 /* Retrieve an entry. */
3545 static inline int
3546 wctype_table_get (struct wctype_table *t, uint32_t wc)
3548 uint32_t index1 = wc >> (t->q + t->p + 5);
3549 if (index1 < t->level1_size)
3551 uint32_t lookup1 = t->level1[index1];
3552 if (lookup1 != EMPTY)
3554 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3555 + (lookup1 << t->q);
3556 uint32_t lookup2 = t->level2[index2];
3557 if (lookup2 != EMPTY)
3559 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3560 + (lookup2 << t->p);
3561 uint32_t lookup3 = t->level3[index3];
3562 uint32_t index4 = wc & 0x1f;
3564 return (lookup3 >> index4) & 1;
3568 return 0;
3571 /* Add one entry. */
3572 static void
3573 wctype_table_add (struct wctype_table *t, uint32_t wc)
3575 uint32_t index1 = wc >> (t->q + t->p + 5);
3576 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3577 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3578 uint32_t index4 = wc & 0x1f;
3579 size_t i, i1, i2;
3581 if (index1 >= t->level1_size)
3583 if (index1 >= t->level1_alloc)
3585 size_t alloc = 2 * t->level1_alloc;
3586 if (alloc <= index1)
3587 alloc = index1 + 1;
3588 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3589 alloc * sizeof (uint32_t));
3590 t->level1_alloc = alloc;
3592 while (index1 >= t->level1_size)
3593 t->level1[t->level1_size++] = EMPTY;
3596 if (t->level1[index1] == EMPTY)
3598 if (t->level2_size == t->level2_alloc)
3600 size_t alloc = 2 * t->level2_alloc + 1;
3601 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3602 (alloc << t->q) * sizeof (uint32_t));
3603 t->level2_alloc = alloc;
3605 i1 = t->level2_size << t->q;
3606 i2 = (t->level2_size + 1) << t->q;
3607 for (i = i1; i < i2; i++)
3608 t->level2[i] = EMPTY;
3609 t->level1[index1] = t->level2_size++;
3612 index2 += t->level1[index1] << t->q;
3614 if (t->level2[index2] == EMPTY)
3616 if (t->level3_size == t->level3_alloc)
3618 size_t alloc = 2 * t->level3_alloc + 1;
3619 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3620 (alloc << t->p) * sizeof (uint32_t));
3621 t->level3_alloc = alloc;
3623 i1 = t->level3_size << t->p;
3624 i2 = (t->level3_size + 1) << t->p;
3625 for (i = i1; i < i2; i++)
3626 t->level3[i] = 0;
3627 t->level2[index2] = t->level3_size++;
3630 index3 += t->level2[index2] << t->p;
3632 t->level3[index3] |= (uint32_t)1 << index4;
3635 /* Finalize and shrink. */
3636 static void
3637 wctype_table_finalize (struct wctype_table *t)
3639 size_t i, j, k;
3640 uint32_t reorder3[t->level3_size];
3641 uint32_t reorder2[t->level2_size];
3642 uint32_t level1_offset, level2_offset, level3_offset;
3644 /* Uniquify level3 blocks. */
3645 k = 0;
3646 for (j = 0; j < t->level3_size; j++)
3648 for (i = 0; i < k; i++)
3649 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3650 (1 << t->p) * sizeof (uint32_t)) == 0)
3651 break;
3652 /* Relocate block j to block i. */
3653 reorder3[j] = i;
3654 if (i == k)
3656 if (i != j)
3657 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3658 (1 << t->p) * sizeof (uint32_t));
3659 k++;
3662 t->level3_size = k;
3664 for (i = 0; i < (t->level2_size << t->q); i++)
3665 if (t->level2[i] != EMPTY)
3666 t->level2[i] = reorder3[t->level2[i]];
3668 /* Uniquify level2 blocks. */
3669 k = 0;
3670 for (j = 0; j < t->level2_size; j++)
3672 for (i = 0; i < k; i++)
3673 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3674 (1 << t->q) * sizeof (uint32_t)) == 0)
3675 break;
3676 /* Relocate block j to block i. */
3677 reorder2[j] = i;
3678 if (i == k)
3680 if (i != j)
3681 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3682 (1 << t->q) * sizeof (uint32_t));
3683 k++;
3686 t->level2_size = k;
3688 for (i = 0; i < t->level1_size; i++)
3689 if (t->level1[i] != EMPTY)
3690 t->level1[i] = reorder2[t->level1[i]];
3692 /* Create and fill the resulting compressed representation. */
3693 t->result_size =
3694 5 * sizeof (uint32_t)
3695 + t->level1_size * sizeof (uint32_t)
3696 + (t->level2_size << t->q) * sizeof (uint32_t)
3697 + (t->level3_size << t->p) * sizeof (uint32_t);
3698 t->result = (char *) xmalloc (t->result_size);
3700 level1_offset =
3701 5 * sizeof (uint32_t);
3702 level2_offset =
3703 5 * sizeof (uint32_t)
3704 + t->level1_size * sizeof (uint32_t);
3705 level3_offset =
3706 5 * sizeof (uint32_t)
3707 + t->level1_size * sizeof (uint32_t)
3708 + (t->level2_size << t->q) * sizeof (uint32_t);
3710 ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3711 ((uint32_t *) t->result)[1] = t->level1_size;
3712 ((uint32_t *) t->result)[2] = t->p + 5;
3713 ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3714 ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3716 for (i = 0; i < t->level1_size; i++)
3717 ((uint32_t *) (t->result + level1_offset))[i] =
3718 (t->level1[i] == EMPTY
3720 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3722 for (i = 0; i < (t->level2_size << t->q); i++)
3723 ((uint32_t *) (t->result + level2_offset))[i] =
3724 (t->level2[i] == EMPTY
3726 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3728 for (i = 0; i < (t->level3_size << t->p); i++)
3729 ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3731 if (t->level1_alloc > 0)
3732 free (t->level1);
3733 if (t->level2_alloc > 0)
3734 free (t->level2);
3735 if (t->level3_alloc > 0)
3736 free (t->level3);
3739 #define TABLE wcwidth_table
3740 #define ELEMENT uint8_t
3741 #define DEFAULT 0xff
3742 #include "3level.h"
3744 #define TABLE wctrans_table
3745 #define ELEMENT int32_t
3746 #define DEFAULT 0
3747 #define wctrans_table_add wctrans_table_add_internal
3748 #include "3level.h"
3749 #undef wctrans_table_add
3750 /* The wctrans_table must actually store the difference between the
3751 desired result and the argument. */
3752 static inline void
3753 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3755 wctrans_table_add_internal (t, wc, mapped_wc - wc);
3759 /* Flattens the included transliterations into a translit list.
3760 Inserts them in the list at `cursor', and returns the new cursor. */
3761 static struct translit_t **
3762 translit_flatten (struct locale_ctype_t *ctype,
3763 const struct charmap_t *charmap,
3764 struct translit_t **cursor)
3766 while (ctype->translit_include != NULL)
3768 const char *copy_locale = ctype->translit_include->copy_locale;
3769 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3770 struct localedef_t *other;
3772 /* Unchain the include statement. During the depth-first traversal
3773 we don't want to visit any locale more than once. */
3774 ctype->translit_include = ctype->translit_include->next;
3776 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3778 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3780 WITH_CUR_LOCALE (error (0, 0, _("\
3781 %s: transliteration data from locale `%s' not available"),
3782 "LC_CTYPE", copy_locale));
3784 else
3786 struct locale_ctype_t *other_ctype =
3787 other->categories[LC_CTYPE].ctype;
3789 cursor = translit_flatten (other_ctype, charmap, cursor);
3790 assert (other_ctype->translit_include == NULL);
3792 if (other_ctype->translit != NULL)
3794 /* Insert the other_ctype->translit list at *cursor. */
3795 struct translit_t *endp = other_ctype->translit;
3796 while (endp->next != NULL)
3797 endp = endp->next;
3799 endp->next = *cursor;
3800 *cursor = other_ctype->translit;
3802 /* Avoid any risk of circular lists. */
3803 other_ctype->translit = NULL;
3805 cursor = &endp->next;
3808 if (ctype->default_missing == NULL)
3809 ctype->default_missing = other_ctype->default_missing;
3813 return cursor;
3816 static void
3817 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3818 struct repertoire_t *repertoire)
3820 size_t idx, nr;
3821 const void *key;
3822 size_t len;
3823 void *vdata;
3824 void *curs;
3826 /* You wonder about this amount of memory? This is only because some
3827 users do not manage to address the array with unsigned values or
3828 data types with range >= 256. '\200' would result in the array
3829 index -128. To help these poor people we duplicate the entries for
3830 128 up to 255 below the entry for \0. */
3831 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3832 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3833 ctype->class_b = (uint32_t **)
3834 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3835 ctype->class_3level = (struct iovec *)
3836 xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3838 /* This is the array accessed using the multibyte string elements. */
3839 for (idx = 0; idx < 256; ++idx)
3840 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3842 /* Mirror first 127 entries. We must take care that entry -1 is not
3843 mirrored because EOF == -1. */
3844 for (idx = 0; idx < 127; ++idx)
3845 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3847 /* The 32 bit array contains all characters < 0x100. */
3848 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3849 if (ctype->charnames[idx] < 0x100)
3850 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3852 for (nr = 0; nr < ctype->nr_charclass; nr++)
3854 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3856 /* We only set CLASS_B for the bits in the ISO C classes, not
3857 the user defined classes. The number should not change but
3858 who knows. */
3859 #define LAST_ISO_C_BIT 11
3860 if (nr <= LAST_ISO_C_BIT)
3861 for (idx = 0; idx < 256; ++idx)
3862 if (ctype->class256_collection[idx] & _ISbit (nr))
3863 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3866 for (nr = 0; nr < ctype->nr_charclass; nr++)
3868 struct wctype_table t;
3870 t.p = 4; /* or: 5 */
3871 t.q = 7; /* or: 6 */
3872 wctype_table_init (&t);
3874 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3875 if (ctype->class_collection[idx] & _ISwbit (nr))
3876 wctype_table_add (&t, ctype->charnames[idx]);
3878 wctype_table_finalize (&t);
3880 if (verbose)
3881 WITH_CUR_LOCALE (fprintf (stderr, _("\
3882 %s: table for class \"%s\": %lu bytes\n"),
3883 "LC_CTYPE", ctype->classnames[nr],
3884 (unsigned long int) t.result_size));
3886 ctype->class_3level[nr].iov_base = t.result;
3887 ctype->class_3level[nr].iov_len = t.result_size;
3890 /* Room for table of mappings. */
3891 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3892 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3893 * sizeof (uint32_t *));
3894 ctype->map_3level = (struct iovec *)
3895 xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3897 /* Fill in all mappings. */
3898 for (idx = 0; idx < 2; ++idx)
3900 unsigned int idx2;
3902 /* Allocate table. */
3903 ctype->map_b[idx] = (uint32_t *)
3904 xmalloc ((256 + 128) * sizeof (uint32_t));
3906 /* Copy values from collection. */
3907 for (idx2 = 0; idx2 < 256; ++idx2)
3908 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3910 /* Mirror first 127 entries. We must take care not to map entry
3911 -1 because EOF == -1. */
3912 for (idx2 = 0; idx2 < 127; ++idx2)
3913 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3915 /* EOF must map to EOF. */
3916 ctype->map_b[idx][127] = EOF;
3919 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3921 unsigned int idx2;
3923 /* Allocate table. */
3924 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3926 /* Copy values from collection. Default is identity mapping. */
3927 for (idx2 = 0; idx2 < 256; ++idx2)
3928 ctype->map32_b[idx][idx2] =
3929 (ctype->map_collection[idx][idx2] != 0
3930 ? ctype->map_collection[idx][idx2]
3931 : idx2);
3934 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3936 struct wctrans_table t;
3938 t.p = 7;
3939 t.q = 9;
3940 wctrans_table_init (&t);
3942 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3943 if (ctype->map_collection[nr][idx] != 0)
3944 wctrans_table_add (&t, ctype->charnames[idx],
3945 ctype->map_collection[nr][idx]);
3947 wctrans_table_finalize (&t);
3949 if (verbose)
3950 WITH_CUR_LOCALE (fprintf (stderr, _("\
3951 %s: table for map \"%s\": %lu bytes\n"),
3952 "LC_CTYPE", ctype->mapnames[nr],
3953 (unsigned long int) t.result_size));
3955 ctype->map_3level[nr].iov_base = t.result;
3956 ctype->map_3level[nr].iov_len = t.result_size;
3959 /* Extra array for class and map names. */
3960 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3961 * sizeof (uint32_t));
3962 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3963 * sizeof (uint32_t));
3965 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3966 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3968 /* Array for width information. Because the expected widths are very
3969 small (never larger than 2) we use only one single byte. This
3970 saves space.
3971 We put only printable characters in the table. wcwidth is specified
3972 to return -1 for non-printable characters. Doing the check here
3973 saves a run-time check.
3974 But we put L'\0' in the table. This again saves a run-time check. */
3976 struct wcwidth_table t;
3978 t.p = 7;
3979 t.q = 9;
3980 wcwidth_table_init (&t);
3982 /* First set all the printable characters of the character set to
3983 the default width. */
3984 curs = NULL;
3985 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3987 struct charseq *data = (struct charseq *) vdata;
3989 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3990 data->ucs4 = repertoire_find_value (ctype->repertoire,
3991 data->name, len);
3993 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3995 uint32_t *class_bits =
3996 find_idx (ctype, &ctype->class_collection, NULL,
3997 &ctype->class_collection_act, data->ucs4);
3999 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4000 wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4004 /* Now add the explicitly specified widths. */
4005 if (charmap->width_rules != NULL)
4007 size_t cnt;
4009 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4011 unsigned char bytes[charmap->mb_cur_max];
4012 int nbytes = charmap->width_rules[cnt].from->nbytes;
4014 /* We have the range of character for which the width is
4015 specified described using byte sequences of the multibyte
4016 charset. We have to convert this to UCS4 now. And we
4017 cannot simply convert the beginning and the end of the
4018 sequence, we have to iterate over the byte sequence and
4019 convert it for every single character. */
4020 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4022 while (nbytes < charmap->width_rules[cnt].to->nbytes
4023 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4024 nbytes) <= 0)
4026 /* Find the UCS value for `bytes'. */
4027 int inner;
4028 uint32_t wch;
4029 struct charseq *seq =
4030 charmap_find_symbol (charmap, (char *) bytes, nbytes);
4032 if (seq == NULL)
4033 wch = ILLEGAL_CHAR_VALUE;
4034 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4035 wch = seq->ucs4;
4036 else
4037 wch = repertoire_find_value (ctype->repertoire, seq->name,
4038 strlen (seq->name));
4040 if (wch != ILLEGAL_CHAR_VALUE)
4042 /* Store the value. */
4043 uint32_t *class_bits =
4044 find_idx (ctype, &ctype->class_collection, NULL,
4045 &ctype->class_collection_act, wch);
4047 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4048 wcwidth_table_add (&t, wch,
4049 charmap->width_rules[cnt].width);
4052 /* "Increment" the bytes sequence. */
4053 inner = nbytes - 1;
4054 while (inner >= 0 && bytes[inner] == 0xff)
4055 --inner;
4057 if (inner < 0)
4059 /* We have to extend the byte sequence. */
4060 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4061 break;
4063 bytes[0] = 1;
4064 memset (&bytes[1], 0, nbytes);
4065 ++nbytes;
4067 else
4069 ++bytes[inner];
4070 while (++inner < nbytes)
4071 bytes[inner] = 0;
4077 /* Set the width of L'\0' to 0. */
4078 wcwidth_table_add (&t, 0, 0);
4080 wcwidth_table_finalize (&t);
4082 if (verbose)
4083 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4084 "LC_CTYPE", (unsigned long int) t.result_size));
4086 ctype->width.iov_base = t.result;
4087 ctype->width.iov_len = t.result_size;
4090 /* Set MB_CUR_MAX. */
4091 ctype->mb_cur_max = charmap->mb_cur_max;
4093 /* Now determine the table for the transliteration information.
4095 XXX It is not yet clear to me whether it is worth implementing a
4096 complicated algorithm which uses a hash table to locate the entries.
4097 For now I'll use a simple array which can be searching using binary
4098 search. */
4099 if (ctype->translit_include != NULL)
4100 /* Traverse the locales mentioned in the `include' statements in a
4101 depth-first way and fold in their transliteration information. */
4102 translit_flatten (ctype, charmap, &ctype->translit);
4104 if (ctype->translit != NULL)
4106 /* First count how many entries we have. This is the upper limit
4107 since some entries from the included files might be overwritten. */
4108 size_t number = 0;
4109 size_t cnt;
4110 struct translit_t *runp = ctype->translit;
4111 struct translit_t **sorted;
4112 size_t from_len, to_len;
4114 while (runp != NULL)
4116 ++number;
4117 runp = runp->next;
4120 /* Next we allocate an array large enough and fill in the values. */
4121 sorted = (struct translit_t **) alloca (number
4122 * sizeof (struct translit_t **));
4123 runp = ctype->translit;
4124 number = 0;
4127 /* Search for the place where to insert this string.
4128 XXX Better use a real sorting algorithm later. */
4129 size_t idx = 0;
4130 int replace = 0;
4132 while (idx < number)
4134 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4135 (const wchar_t *) runp->from);
4136 if (res == 0)
4138 replace = 1;
4139 break;
4141 if (res > 0)
4142 break;
4143 ++idx;
4146 if (replace)
4147 sorted[idx] = runp;
4148 else
4150 memmove (&sorted[idx + 1], &sorted[idx],
4151 (number - idx) * sizeof (struct translit_t *));
4152 sorted[idx] = runp;
4153 ++number;
4156 runp = runp->next;
4158 while (runp != NULL);
4160 /* The next step is putting all the possible transliteration
4161 strings in one memory block so that we can write it out.
4162 We need several different blocks:
4163 - index to the from-string array
4164 - from-string array
4165 - index to the to-string array
4166 - to-string array.
4168 from_len = to_len = 0;
4169 for (cnt = 0; cnt < number; ++cnt)
4171 struct translit_to_t *srunp;
4172 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4173 srunp = sorted[cnt]->to;
4174 while (srunp != NULL)
4176 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4177 srunp = srunp->next;
4179 /* Plus one for the extra NUL character marking the end of
4180 the list for the current entry. */
4181 ++to_len;
4184 /* We can allocate the arrays for the results. */
4185 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4186 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4187 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4188 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4190 from_len = 0;
4191 to_len = 0;
4192 for (cnt = 0; cnt < number; ++cnt)
4194 size_t len;
4195 struct translit_to_t *srunp;
4197 ctype->translit_from_idx[cnt] = from_len;
4198 ctype->translit_to_idx[cnt] = to_len;
4200 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4201 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4202 (const wchar_t *) sorted[cnt]->from, len);
4203 from_len += len;
4205 ctype->translit_to_idx[cnt] = to_len;
4206 srunp = sorted[cnt]->to;
4207 while (srunp != NULL)
4209 len = wcslen ((const wchar_t *) srunp->str) + 1;
4210 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4211 (const wchar_t *) srunp->str, len);
4212 to_len += len;
4213 srunp = srunp->next;
4215 ctype->translit_to_tbl[to_len++] = L'\0';
4218 /* Store the information about the length. */
4219 ctype->translit_idx_size = number;
4220 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4221 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4223 else
4225 /* Provide some dummy pointers since we have nothing to write out. */
4226 static uint32_t no_str = { 0 };
4228 ctype->translit_from_idx = &no_str;
4229 ctype->translit_from_tbl = &no_str;
4230 ctype->translit_to_tbl = &no_str;
4231 ctype->translit_idx_size = 0;
4232 ctype->translit_from_tbl_size = 0;
4233 ctype->translit_to_tbl_size = 0;