locale/programs/ld-ctype.c

   1 /* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <alloca.h>
  24 #include <byteswap.h>
  25 #include <endian.h>
  26 #include <errno.h>
  27 #include <limits.h>
  28 #include <obstack.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32 #include <wctype.h>
  33 #include <sys/uio.h>
  34
  35 #include "localedef.h"
  36 #include "charmap.h"
  37 #include "localeinfo.h"
  38 #include "langinfo.h"
  39 #include "linereader.h"
  40 #include "locfile-token.h"
  41 #include "locfile.h"
  42
  43 #include <assert.h>
  44
  45
  46 #ifdef PREDEFINED_CLASSES
  47 /* These are the extra bits not in wctype.h since these are not preallocated
  48    classes.  */
  49 # define _ISwspecial1   (1 << 29)
  50 # define _ISwspecial2   (1 << 30)
  51 # define _ISwspecial3   (1 << 31)
  52 #endif
  53
  54
  55 /* The bit used for representing a special class.  */
  56 #define BITPOS(class) ((class) - tok_upper)
  57 #define BIT(class) (_ISbit (BITPOS (class)))
  58 #define BITw(class) (_ISwbit (BITPOS (class)))
  59
  60 #define ELEM(ctype, collection, idx, value)                                   \
  61   *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx,     \
  62              &ctype->collection##_act idx, value)
  63
  64
  65 /* To be compatible with former implementations we for now restrict
  66    the number of bits for character classes to 16.  When compatibility
  67    is not necessary anymore increase the number to 32.  */
  68 #define char_class_t uint16_t
  69 #define char_class32_t uint32_t
  70
  71
  72 /* Type to describe a transliteration action.  We have a possibly
  73    multiple character from-string and a set of multiple character
  74    to-strings.  All are 32bit values since this is what is used in
  75    the gconv functions.  */
  76 struct translit_to_t
  77 {
  78   uint32_t *str;
  79
  80   struct translit_to_t *next;
  81 };
  82
  83 struct translit_t
  84 {
  85   uint32_t *from;
  86
  87   const char *fname;
  88   size_t lineno;
  89
  90   struct translit_to_t *to;
  91
  92   struct translit_t *next;
  93 };
  94
  95 struct translit_ignore_t
  96 {
  97   uint32_t from;
  98   uint32_t to;
  99   uint32_t step;
 100
 101   const char *fname;
 102   size_t lineno;
 103
 104   struct translit_ignore_t *next;
 105 };
 106
 107
 108 /* Type to describe a transliteration include statement.  */
 109 struct translit_include_t
 110 {
 111   const char *copy_locale;
 112   const char *copy_repertoire;
 113
 114   struct translit_include_t *next;
 115 };
 116
 117
 118 /* Sparse table of uint32_t.  */
 119 #define TABLE idx_table
 120 #define ELEMENT uint32_t
 121 #define DEFAULT ((uint32_t) ~0)
 122 #define NO_FINALIZE
 123 #include "3level.h"
 124
 125
 126 /* The real definition of the struct for the LC_CTYPE locale.  */
 127 struct locale_ctype_t
 128 {
 129   uint32_t *charnames;
 130   size_t charnames_max;
 131   size_t charnames_act;
 132   /* An index lookup table, to speedup find_idx.  */
 133   struct idx_table charnames_idx;
 134
 135   struct repertoire_t *repertoire;
 136
 137   /* We will allow up to 8 * sizeof (uint32_t) character classes.  */
 138 #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
 139   size_t nr_charclass;
 140   const char *classnames[MAX_NR_CHARCLASS];
 141   uint32_t last_class_char;
 142   uint32_t class256_collection[256];
 143   uint32_t *class_collection;
 144   size_t class_collection_max;
 145   size_t class_collection_act;
 146   uint32_t class_done;
 147   uint32_t class_offset;
 148
 149   struct charseq **mbdigits;
 150   size_t mbdigits_act;
 151   size_t mbdigits_max;
 152   uint32_t *wcdigits;
 153   size_t wcdigits_act;
 154   size_t wcdigits_max;
 155
 156   struct charseq *mboutdigits[10];
 157   uint32_t wcoutdigits[10];
 158   size_t outdigits_act;
 159
 160   /* If the following number ever turns out to be too small simply
 161      increase it.  But I doubt it will.  --drepper@gnu */
 162 #define MAX_NR_CHARMAP 16
 163   const char *mapnames[MAX_NR_CHARMAP];
 164   uint32_t *map_collection[MAX_NR_CHARMAP];
 165   uint32_t map256_collection[2][256];
 166   size_t map_collection_max[MAX_NR_CHARMAP];
 167   size_t map_collection_act[MAX_NR_CHARMAP];
 168   size_t map_collection_nr;
 169   size_t last_map_idx;
 170   int tomap_done[MAX_NR_CHARMAP];
 171   uint32_t map_offset;
 172
 173   /* Transliteration information.  */
 174   struct translit_include_t *translit_include;
 175   struct translit_t *translit;
 176   struct translit_ignore_t *translit_ignore;
 177   uint32_t ntranslit_ignore;
 178
 179   uint32_t *default_missing;
 180   const char *default_missing_file;
 181   size_t default_missing_lineno;
 182
 183   uint32_t to_nonascii;
 184
 185   /* The arrays for the binary representation.  */
 186   char_class_t *ctype_b;
 187   char_class32_t *ctype32_b;
 188   uint32_t **map_b;
 189   uint32_t **map32_b;
 190   uint32_t **class_b;
 191   struct iovec *class_3level;
 192   struct iovec *map_3level;
 193   uint32_t *class_name_ptr;
 194   uint32_t *map_name_ptr;
 195   struct iovec width;
 196   uint32_t mb_cur_max;
 197   const char *codeset_name;
 198   uint32_t *translit_from_idx;
 199   uint32_t *translit_from_tbl;
 200   uint32_t *translit_to_idx;
 201   uint32_t *translit_to_tbl;
 202   uint32_t translit_idx_size;
 203   size_t translit_from_tbl_size;
 204   size_t translit_to_tbl_size;
 205
 206   struct obstack mempool;
 207 };
 208
 209
 210 /* Marker for an empty slot.  This has the value 0xFFFFFFFF, regardless
 211    whether 'int' is 16 bit, 32 bit, or 64 bit.  */
 212 #define EMPTY ((uint32_t) ~0)
 213
 214
 215 #define obstack_chunk_alloc xmalloc
 216 #define obstack_chunk_free free
 217
 218
 219 /* Prototypes for local functions.  */
 220 static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
 221                            const struct charmap_t *charmap,
 222                            struct localedef_t *copy_locale,
 223                            int ignore_content);
 224 static void ctype_class_new (struct linereader *lr,
 225                              struct locale_ctype_t *ctype, const char *name);
 226 static void ctype_map_new (struct linereader *lr,
 227                            struct locale_ctype_t *ctype,
 228                            const char *name, const struct charmap_t *charmap);
 229 static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
 230                            size_t *max, size_t *act, unsigned int idx);
 231 static void set_class_defaults (struct locale_ctype_t *ctype,
 232                                 const struct charmap_t *charmap,
 233                                 struct repertoire_t *repertoire);
 234 static void allocate_arrays (struct locale_ctype_t *ctype,
 235                              const struct charmap_t *charmap,
 236                              struct repertoire_t *repertoire);
 237
 238
 239 static const char *longnames[] =
 240 {
 241   "zero", "one", "two", "three", "four",
 242   "five", "six", "seven", "eight", "nine"
 243 };
 244 static const char *uninames[] =
 245 {
 246   "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
 247   "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
 248 };
 249 static const unsigned char digits[] = "0123456789";
 250
 251
 252 static void
 253 ctype_startup (struct linereader *lr, struct localedef_t *locale,
 254                const struct charmap_t *charmap,
 255                struct localedef_t *copy_locale, int ignore_content)
 256 {
 257   unsigned int cnt;
 258   struct locale_ctype_t *ctype;
 259
 260   if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
 261     {
 262       if (copy_locale == NULL)
 263         {
 264           /* Allocate the needed room.  */
 265           locale->categories[LC_CTYPE].ctype = ctype =
 266             (struct locale_ctype_t *) xcalloc (1,
 267                                                sizeof (struct locale_ctype_t));
 268
 269           /* We have seen no names yet.  */
 270           ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
 271           ctype->charnames =
 272             (unsigned int *) xmalloc (ctype->charnames_max
 273                                       * sizeof (unsigned int));
 274           for (cnt = 0; cnt < 256; ++cnt)
 275             ctype->charnames[cnt] = cnt;
 276           ctype->charnames_act = 256;
 277           idx_table_init (&ctype->charnames_idx);
 278
 279           /* Fill character class information.  */
 280           ctype->last_class_char = ILLEGAL_CHAR_VALUE;
 281           /* The order of the following instructions determines the bit
 282              positions!  */
 283           ctype_class_new (lr, ctype, "upper");
 284           ctype_class_new (lr, ctype, "lower");
 285           ctype_class_new (lr, ctype, "alpha");
 286           ctype_class_new (lr, ctype, "digit");
 287           ctype_class_new (lr, ctype, "xdigit");
 288           ctype_class_new (lr, ctype, "space");
 289           ctype_class_new (lr, ctype, "print");
 290           ctype_class_new (lr, ctype, "graph");
 291           ctype_class_new (lr, ctype, "blank");
 292           ctype_class_new (lr, ctype, "cntrl");
 293           ctype_class_new (lr, ctype, "punct");
 294           ctype_class_new (lr, ctype, "alnum");
 295 #ifdef PREDEFINED_CLASSES
 296           /* The following are extensions from ISO 14652.  */
 297           ctype_class_new (lr, ctype, "left_to_right");
 298           ctype_class_new (lr, ctype, "right_to_left");
 299           ctype_class_new (lr, ctype, "num_terminator");
 300           ctype_class_new (lr, ctype, "num_separator");
 301           ctype_class_new (lr, ctype, "segment_separator");
 302           ctype_class_new (lr, ctype, "block_separator");
 303           ctype_class_new (lr, ctype, "direction_control");
 304           ctype_class_new (lr, ctype, "sym_swap_layout");
 305           ctype_class_new (lr, ctype, "char_shape_selector");
 306           ctype_class_new (lr, ctype, "num_shape_selector");
 307           ctype_class_new (lr, ctype, "non_spacing");
 308           ctype_class_new (lr, ctype, "non_spacing_level3");
 309           ctype_class_new (lr, ctype, "normal_connect");
 310           ctype_class_new (lr, ctype, "r_connect");
 311           ctype_class_new (lr, ctype, "no_connect");
 312           ctype_class_new (lr, ctype, "no_connect-space");
 313           ctype_class_new (lr, ctype, "vowel_connect");
 314 #endif
 315
 316           ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
 317           ctype->class_collection
 318             = (uint32_t *) xcalloc (sizeof (unsigned long int),
 319                                     ctype->class_collection_max);
 320           ctype->class_collection_act = 256;
 321
 322           /* Fill character map information.  */
 323           ctype->last_map_idx = MAX_NR_CHARMAP;
 324           ctype_map_new (lr, ctype, "toupper", charmap);
 325           ctype_map_new (lr, ctype, "tolower", charmap);
 326 #ifdef PREDEFINED_CLASSES
 327           ctype_map_new (lr, ctype, "tosymmetric", charmap);
 328 #endif
 329
 330           /* Fill first 256 entries in `toXXX' arrays.  */
 331           for (cnt = 0; cnt < 256; ++cnt)
 332             {
 333               ctype->map_collection[0][cnt] = cnt;
 334               ctype->map_collection[1][cnt] = cnt;
 335 #ifdef PREDEFINED_CLASSES
 336               ctype->map_collection[2][cnt] = cnt;
 337 #endif
 338               ctype->map256_collection[0][cnt] = cnt;
 339               ctype->map256_collection[1][cnt] = cnt;
 340             }
 341
 342           if (enc_not_ascii_compatible)
 343             ctype->to_nonascii = 1;
 344
 345           obstack_init (&ctype->mempool);
 346         }
 347       else
 348         ctype = locale->categories[LC_CTYPE].ctype =
 349           copy_locale->categories[LC_CTYPE].ctype;
 350     }
 351 }
 352
 353
 354 void
 355 ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
 356 {
 357   /* See POSIX.2, table 2-6 for the meaning of the following table.  */
 358 #define NCLASS 12
 359   static const struct
 360   {
 361     const char *name;
 362     const char allow[NCLASS];
 363   }
 364   valid_table[NCLASS] =
 365   {
 366     /* The order is important.  See token.h for more information.
 367        M = Always, D = Default, - = Permitted, X = Mutually exclusive  */
 368     { "upper",  "--MX-XDDXXX-" },
 369     { "lower",  "--MX-XDDXXX-" },
 370     { "alpha",  "---X-XDDXXX-" },
 371     { "digit",  "XXX--XDDXXX-" },
 372     { "xdigit", "-----XDDXXX-" },
 373     { "space",  "XXXXX------X" },
 374     { "print",  "---------X--" },
 375     { "graph",  "---------X--" },
 376     { "blank",  "XXXXXM-----X" },
 377     { "cntrl",  "XXXXX-XX--XX" },
 378     { "punct",  "XXXXX-DD-X-X" },
 379     { "alnum",  "-----XDDXXX-" }
 380   };
 381   size_t cnt;
 382   int cls1, cls2;
 383   uint32_t space_value;
 384   struct charseq *space_seq;
 385   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 386   int warned;
 387   const void *key;
 388   size_t len;
 389   void *vdata;
 390   void *curs;
 391
 392   /* Now resolve copying and also handle completely missing definitions.  */
 393   if (ctype == NULL)
 394     {
 395       const char *repertoire_name;
 396
 397       /* First see whether we were supposed to copy.  If yes, find the
 398          actual definition.  */
 399       if (locale->copy_name[LC_CTYPE] != NULL)
 400         {
 401           /* Find the copying locale.  This has to happen transitively since
 402              the locale we are copying from might also copying another one.  */
 403           struct localedef_t *from = locale;
 404
 405           do
 406             from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
 407                                 from->repertoire_name, charmap);
 408           while (from->categories[LC_CTYPE].ctype == NULL
 409                  && from->copy_name[LC_CTYPE] != NULL);
 410
 411           ctype = locale->categories[LC_CTYPE].ctype
 412             = from->categories[LC_CTYPE].ctype;
 413         }
 414
 415       /* If there is still no definition issue an warning and create an
 416          empty one.  */
 417       if (ctype == NULL)
 418         {
 419           if (! be_quiet)
 420             WITH_CUR_LOCALE (error (0, 0, _("\
 421 No definition for %s category found"), "LC_CTYPE"));
 422           ctype_startup (NULL, locale, charmap, NULL, 0);
 423           ctype = locale->categories[LC_CTYPE].ctype;
 424         }
 425
 426       /* Get the repertoire we have to use.  */
 427       repertoire_name = locale->repertoire_name ?: repertoire_global;
 428       if (repertoire_name != NULL)
 429         ctype->repertoire = repertoire_read (repertoire_name);
 430     }
 431
 432   /* We need the name of the currently used 8-bit character set to
 433      make correct conversion between this 8-bit representation and the
 434      ISO 10646 character set used internally for wide characters.  */
 435   ctype->codeset_name = charmap->code_set_name;
 436   if (ctype->codeset_name == NULL)
 437     {
 438       if (! be_quiet)
 439         WITH_CUR_LOCALE (error (0, 0, _("\
 440 No character set name specified in charmap")));
 441       ctype->codeset_name = "//UNKNOWN//";
 442     }
 443
 444   /* Set default value for classes not specified.  */
 445   set_class_defaults (ctype, charmap, ctype->repertoire);
 446
 447   /* Check according to table.  */
 448   for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
 449     {
 450       uint32_t tmp = ctype->class_collection[cnt];
 451
 452       if (tmp != 0)
 453         {
 454           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 455             if ((tmp & _ISwbit (cls1)) != 0)
 456               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 457                 if (valid_table[cls1].allow[cls2] != '-')
 458                   {
 459                     int eq = (tmp & _ISwbit (cls2)) != 0;
 460                     switch (valid_table[cls1].allow[cls2])
 461                       {
 462                       case 'M':
 463                         if (!eq)
 464                           {
 465                             uint32_t value = ctype->charnames[cnt];
 466
 467                             if (!be_quiet)
 468                               WITH_CUR_LOCALE (error (0, 0, _("\
 469 character L'\\u%0*x' in class `%s' must be in class `%s'"),
 470                                                       value > 0xffff ? 8 : 4,
 471                                                       value,
 472                                                       valid_table[cls1].name,
 473                                                       valid_table[cls2].name));
 474                           }
 475                         break;
 476
 477                       case 'X':
 478                         if (eq)
 479                           {
 480                             uint32_t value = ctype->charnames[cnt];
 481
 482                             if (!be_quiet)
 483                               WITH_CUR_LOCALE (error (0, 0, _("\
 484 character L'\\u%0*x' in class `%s' must not be in class `%s'"),
 485                                                       value > 0xffff ? 8 : 4,
 486                                                       value,
 487                                                       valid_table[cls1].name,
 488                                                       valid_table[cls2].name));
 489                           }
 490                         break;
 491
 492                       case 'D':
 493                         ctype->class_collection[cnt] |= _ISwbit (cls2);
 494                         break;
 495
 496                       default:
 497                         WITH_CUR_LOCALE (error (5, 0, _("\
 498 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 499                       }
 500                   }
 501         }
 502     }
 503
 504   for (cnt = 0; cnt < 256; ++cnt)
 505     {
 506       uint32_t tmp = ctype->class256_collection[cnt];
 507
 508       if (tmp != 0)
 509         {
 510           for (cls1 = 0; cls1 < NCLASS; ++cls1)
 511             if ((tmp & _ISbit (cls1)) != 0)
 512               for (cls2 = 0; cls2 < NCLASS; ++cls2)
 513                 if (valid_table[cls1].allow[cls2] != '-')
 514                   {
 515                     int eq = (tmp & _ISbit (cls2)) != 0;
 516                     switch (valid_table[cls1].allow[cls2])
 517                       {
 518                       case 'M':
 519                         if (!eq)
 520                           {
 521                             char buf[17];
 522
 523                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 524
 525                             if (!be_quiet)
 526                               WITH_CUR_LOCALE (error (0, 0, _("\
 527 character '%s' in class `%s' must be in class `%s'"),
 528                                                       buf,
 529                                                       valid_table[cls1].name,
 530                                                       valid_table[cls2].name));
 531                           }
 532                         break;
 533
 534                       case 'X':
 535                         if (eq)
 536                           {
 537                             char buf[17];
 538
 539                             snprintf (buf, sizeof buf, "\\%Zo", cnt);
 540
 541                             if (!be_quiet)
 542                               WITH_CUR_LOCALE (error (0, 0, _("\
 543 character '%s' in class `%s' must not be in class `%s'"),
 544                                                       buf,
 545                                                       valid_table[cls1].name,
 546                                                       valid_table[cls2].name));
 547                           }
 548                         break;
 549
 550                       case 'D':
 551                         ctype->class256_collection[cnt] |= _ISbit (cls2);
 552                         break;
 553
 554                       default:
 555                         WITH_CUR_LOCALE (error (5, 0, _("\
 556 internal error in %s, line %u"), __FUNCTION__, __LINE__));
 557                       }
 558                   }
 559         }
 560     }
 561
 562   /* ... and now test <SP> as a special case.  */
 563   space_value = 32;
 564   if (((cnt = BITPOS (tok_space),
 565         (ELEM (ctype, class_collection, , space_value)
 566          & BITw (tok_space)) == 0)
 567        || (cnt = BITPOS (tok_blank),
 568            (ELEM (ctype, class_collection, , space_value)
 569             & BITw (tok_blank)) == 0)))
 570     {
 571       if (!be_quiet)
 572         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 573                                 valid_table[cnt].name));
 574     }
 575   else if (((cnt = BITPOS (tok_punct),
 576              (ELEM (ctype, class_collection, , space_value)
 577               & BITw (tok_punct)) != 0)
 578             || (cnt = BITPOS (tok_graph),
 579                 (ELEM (ctype, class_collection, , space_value)
 580                  & BITw (tok_graph))
 581                 != 0)))
 582     {
 583       if (!be_quiet)
 584         WITH_CUR_LOCALE (error (0, 0, _("\
 585 <SP> character must not be in class `%s'"),
 586                                 valid_table[cnt].name));
 587     }
 588   else
 589     ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
 590
 591   space_seq = charmap_find_value (charmap, "SP", 2);
 592   if (space_seq == NULL)
 593     space_seq = charmap_find_value (charmap, "space", 5);
 594   if (space_seq == NULL)
 595     space_seq = charmap_find_value (charmap, "U00000020", 9);
 596   if (space_seq == NULL || space_seq->nbytes != 1)
 597     {
 598       if (!be_quiet)
 599         WITH_CUR_LOCALE (error (0, 0, _("\
 600 character <SP> not defined in character map")));
 601     }
 602   else if (((cnt = BITPOS (tok_space),
 603              (ctype->class256_collection[space_seq->bytes[0]]
 604               & BIT (tok_space)) == 0)
 605             || (cnt = BITPOS (tok_blank),
 606                 (ctype->class256_collection[space_seq->bytes[0]]
 607                  & BIT (tok_blank)) == 0)))
 608     {
 609       if (!be_quiet)
 610         WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
 611                                 valid_table[cnt].name));
 612     }
 613   else if (((cnt = BITPOS (tok_punct),
 614              (ctype->class256_collection[space_seq->bytes[0]]
 615               & BIT (tok_punct)) != 0)
 616             || (cnt = BITPOS (tok_graph),
 617                 (ctype->class256_collection[space_seq->bytes[0]]
 618                  & BIT (tok_graph)) != 0)))
 619     {
 620       if (!be_quiet)
 621         WITH_CUR_LOCALE (error (0, 0, _("\
 622 <SP> character must not be in class `%s'"),
 623                                 valid_table[cnt].name));
 624     }
 625   else
 626     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 627
 628   /* Now that the tests are done make sure the name array contains all
 629      characters which are handled in the WIDTH section of the
 630      character set definition file.  */
 631   if (charmap->width_rules != NULL)
 632     for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
 633       {
 634         unsigned char bytes[charmap->mb_cur_max];
 635         int nbytes = charmap->width_rules[cnt].from->nbytes;
 636
 637         /* We have the range of character for which the width is
 638            specified described using byte sequences of the multibyte
 639            charset.  We have to convert this to UCS4 now.  And we
 640            cannot simply convert the beginning and the end of the
 641            sequence, we have to iterate over the byte sequence and
 642            convert it for every single character.  */
 643         memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
 644
 645         while (nbytes < charmap->width_rules[cnt].to->nbytes
 646                || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
 647                           nbytes) <= 0)
 648           {
 649             /* Find the UCS value for `bytes'.  */
 650             int inner;
 651             uint32_t wch;
 652             struct charseq *seq
 653               = charmap_find_symbol (charmap, (char *) bytes, nbytes);
 654
 655             if (seq == NULL)
 656               wch = ILLEGAL_CHAR_VALUE;
 657             else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 658               wch = seq->ucs4;
 659             else
 660               wch = repertoire_find_value (ctype->repertoire, seq->name,
 661                                            strlen (seq->name));
 662
 663             if (wch != ILLEGAL_CHAR_VALUE)
 664               /* We are only interested in the side-effects of the
 665                  `find_idx' call.  It will add appropriate entries in
 666                  the name array if this is necessary.  */
 667               (void) find_idx (ctype, NULL, NULL, NULL, wch);
 668
 669             /* "Increment" the bytes sequence.  */
 670             inner = nbytes - 1;
 671             while (inner >= 0 && bytes[inner] == 0xff)
 672               --inner;
 673
 674             if (inner < 0)
 675               {
 676                 /* We have to extend the byte sequence.  */
 677                 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
 678                   break;
 679
 680                 bytes[0] = 1;
 681                 memset (&bytes[1], 0, nbytes);
 682                 ++nbytes;
 683               }
 684             else
 685               {
 686                 ++bytes[inner];
 687                 while (++inner < nbytes)
 688                   bytes[inner] = 0;
 689               }
 690           }
 691       }
 692
 693   /* Now set all the other characters of the character set to the
 694      default width.  */
 695   curs = NULL;
 696   while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
 697     {
 698       struct charseq *data = (struct charseq *) vdata;
 699
 700       if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
 701         data->ucs4 = repertoire_find_value (ctype->repertoire,
 702                                             data->name, len);
 703
 704       if (data->ucs4 != ILLEGAL_CHAR_VALUE)
 705         (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
 706     }
 707
 708   /* There must be a multiple of 10 digits.  */
 709   if (ctype->mbdigits_act % 10 != 0)
 710     {
 711       assert (ctype->mbdigits_act == ctype->wcdigits_act);
 712       ctype->wcdigits_act -= ctype->mbdigits_act % 10;
 713       ctype->mbdigits_act -= ctype->mbdigits_act % 10;
 714       WITH_CUR_LOCALE (error (0, 0, _("\
 715 `digit' category has not entries in groups of ten")));
 716     }
 717
 718   /* Check the input digits.  There must be a multiple of ten available.
 719      In each group it could be that one or the other character is missing.
 720      In this case the whole group must be removed.  */
 721   cnt = 0;
 722   while (cnt < ctype->mbdigits_act)
 723     {
 724       size_t inner;
 725       for (inner = 0; inner < 10; ++inner)
 726         if (ctype->mbdigits[cnt + inner] == NULL)
 727           break;
 728
 729       if (inner == 10)
 730         cnt += 10;
 731       else
 732         {
 733           /* Remove the group.  */
 734           memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
 735                    ((ctype->wcdigits_act - cnt - 10)
 736                     * sizeof (ctype->mbdigits[0])));
 737           ctype->mbdigits_act -= 10;
 738         }
 739     }
 740
 741   /* If no input digits are given use the default.  */
 742   if (ctype->mbdigits_act == 0)
 743     {
 744       if (ctype->mbdigits_max == 0)
 745         {
 746           ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 747                                            10 * sizeof (struct charseq *));
 748           ctype->mbdigits_max = 10;
 749         }
 750
 751       for (cnt = 0; cnt < 10; ++cnt)
 752         {
 753           ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 754                                                       (char *) digits + cnt, 1);
 755           if (ctype->mbdigits[cnt] == NULL)
 756             {
 757               ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
 758                                                           longnames[cnt],
 759                                                           strlen (longnames[cnt]));
 760               if (ctype->mbdigits[cnt] == NULL)
 761                 {
 762                   /* Hum, this ain't good.  */
 763                   WITH_CUR_LOCALE (error (0, 0, _("\
 764 no input digits defined and none of the standard names in the charmap")));
 765
 766                   ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 767                                                         sizeof (struct charseq) + 1);
 768
 769                   /* This is better than nothing.  */
 770                   ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
 771                   ctype->mbdigits[cnt]->nbytes = 1;
 772                 }
 773             }
 774         }
 775
 776       ctype->mbdigits_act = 10;
 777     }
 778
 779   /* Check the wide character input digits.  There must be a multiple
 780      of ten available.  In each group it could be that one or the other
 781      character is missing.  In this case the whole group must be
 782      removed.  */
 783   cnt = 0;
 784   while (cnt < ctype->wcdigits_act)
 785     {
 786       size_t inner;
 787       for (inner = 0; inner < 10; ++inner)
 788         if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
 789           break;
 790
 791       if (inner == 10)
 792         cnt += 10;
 793       else
 794         {
 795           /* Remove the group.  */
 796           memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
 797                    ((ctype->wcdigits_act - cnt - 10)
 798                     * sizeof (ctype->wcdigits[0])));
 799           ctype->wcdigits_act -= 10;
 800         }
 801     }
 802
 803   /* If no input digits are given use the default.  */
 804   if (ctype->wcdigits_act == 0)
 805     {
 806       if (ctype->wcdigits_max == 0)
 807         {
 808           ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
 809                                            10 * sizeof (uint32_t));
 810           ctype->wcdigits_max = 10;
 811         }
 812
 813       for (cnt = 0; cnt < 10; ++cnt)
 814         ctype->wcdigits[cnt] = L'0' + cnt;
 815
 816       ctype->mbdigits_act = 10;
 817     }
 818
 819   /* Check the outdigits.  */
 820   warned = 0;
 821   for (cnt = 0; cnt < 10; ++cnt)
 822     if (ctype->mboutdigits[cnt] == NULL)
 823       {
 824         static struct charseq replace[2];
 825
 826         if (!warned)
 827           {
 828             WITH_CUR_LOCALE (error (0, 0, _("\
 829 not all characters used in `outdigit' are available in the charmap")));
 830             warned = 1;
 831           }
 832
 833         replace[0].nbytes = 1;
 834         replace[0].bytes[0] = '?';
 835         replace[0].bytes[1] = '\0';
 836         ctype->mboutdigits[cnt] = &replace[0];
 837       }
 838
 839   warned = 0;
 840   for (cnt = 0; cnt < 10; ++cnt)
 841     if (ctype->wcoutdigits[cnt] == 0)
 842       {
 843         if (!warned)
 844           {
 845             WITH_CUR_LOCALE (error (0, 0, _("\
 846 not all characters used in `outdigit' are available in the repertoire")));
 847             warned = 1;
 848           }
 849
 850         ctype->wcoutdigits[cnt] = L'?';
 851       }
 852
 853   /* Sort the entries in the translit_ignore list.  */
 854   if (ctype->translit_ignore != NULL)
 855     {
 856       struct translit_ignore_t *firstp = ctype->translit_ignore;
 857       struct translit_ignore_t *runp;
 858
 859       ctype->ntranslit_ignore = 1;
 860
 861       for (runp = firstp->next; runp != NULL; runp = runp->next)
 862         {
 863           struct translit_ignore_t *lastp = NULL;
 864           struct translit_ignore_t *cmpp;
 865
 866           ++ctype->ntranslit_ignore;
 867
 868           for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
 869             if (runp->from < cmpp->from)
 870               break;
 871
 872           runp->next = lastp;
 873           if (lastp == NULL)
 874             firstp = runp;
 875         }
 876
 877       ctype->translit_ignore = firstp;
 878     }
 879 }
 880
 881
 882 void
 883 ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 884               const char *output_path)
 885 {
 886   static const char nulbytes[4] = { 0, 0, 0, 0 };
 887   struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
 888   const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
 889                          + ctype->nr_charclass + ctype->map_collection_nr);
 890   struct iovec *iov = alloca (sizeof *iov
 891                               * (2 + nelems + 2 * ctype->nr_charclass
 892                                  + ctype->map_collection_nr + 4));
 893   struct locale_file data;
 894   uint32_t *idx = alloca (sizeof *idx * (nelems + 1));
 895   uint32_t default_missing_len;
 896   size_t elem, cnt, offset, total;
 897   char *cp;
 898
 899   /* Now prepare the output: Find the sizes of the table we can use.  */
 900   allocate_arrays (ctype, charmap, ctype->repertoire);
 901
 902   data.magic = LIMAGIC (LC_CTYPE);
 903   data.n = nelems;
 904   iov[0].iov_base = (void *) &data;
 905   iov[0].iov_len = sizeof (data);
 906
 907   iov[1].iov_base = (void *) idx;
 908   iov[1].iov_len = nelems * sizeof (uint32_t);
 909
 910   idx[0] = iov[0].iov_len + iov[1].iov_len;
 911   offset = 0;
 912
 913   for (elem = 0; elem < nelems; ++elem)
 914     {
 915       if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
 916         switch (elem)
 917           {
 918 #define CTYPE_EMPTY(name) \
 919           case name:                                                          \
 920             iov[2 + elem + offset].iov_base = NULL;                           \
 921             iov[2 + elem + offset].iov_len = 0;                               \
 922             idx[elem + 1] = idx[elem];                                        \
 923             break
 924
 925           CTYPE_EMPTY(_NL_CTYPE_GAP1);
 926           CTYPE_EMPTY(_NL_CTYPE_GAP2);
 927           CTYPE_EMPTY(_NL_CTYPE_GAP3);
 928           CTYPE_EMPTY(_NL_CTYPE_GAP4);
 929           CTYPE_EMPTY(_NL_CTYPE_GAP5);
 930           CTYPE_EMPTY(_NL_CTYPE_GAP6);
 931
 932 #define CTYPE_DATA(name, base, len)                                           \
 933           case _NL_ITEM_INDEX (name):                                         \
 934             iov[2 + elem + offset].iov_base = (base);                         \
 935             iov[2 + elem + offset].iov_len = (len);                           \
 936             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;       \
 937             break
 938
 939           CTYPE_DATA (_NL_CTYPE_CLASS,
 940                       ctype->ctype_b,
 941                       (256 + 128) * sizeof (char_class_t));
 942
 943           CTYPE_DATA (_NL_CTYPE_TOUPPER,
 944                       ctype->map_b[0],
 945                       (256 + 128) * sizeof (uint32_t));
 946           CTYPE_DATA (_NL_CTYPE_TOLOWER,
 947                       ctype->map_b[1],
 948                       (256 + 128) * sizeof (uint32_t));
 949
 950           CTYPE_DATA (_NL_CTYPE_TOUPPER32,
 951                       ctype->map32_b[0],
 952                       256 * sizeof (uint32_t));
 953           CTYPE_DATA (_NL_CTYPE_TOLOWER32,
 954                       ctype->map32_b[1],
 955                       256 * sizeof (uint32_t));
 956
 957           CTYPE_DATA (_NL_CTYPE_CLASS32,
 958                       ctype->ctype32_b,
 959                       256 * sizeof (char_class32_t));
 960
 961           CTYPE_DATA (_NL_CTYPE_CLASS_OFFSET,
 962                       &ctype->class_offset, sizeof (uint32_t));
 963
 964           CTYPE_DATA (_NL_CTYPE_MAP_OFFSET,
 965                       &ctype->map_offset, sizeof (uint32_t));
 966
 967           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
 968                       &ctype->translit_idx_size, sizeof (uint32_t));
 969
 970           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
 971                       ctype->translit_from_idx,
 972                       ctype->translit_idx_size * sizeof (uint32_t));
 973
 974           CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
 975                       ctype->translit_from_tbl,
 976                       ctype->translit_from_tbl_size);
 977
 978           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
 979                       ctype->translit_to_idx,
 980                       ctype->translit_idx_size * sizeof (uint32_t));
 981
 982           CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
 983                       ctype->translit_to_tbl, ctype->translit_to_tbl_size);
 984
 985           case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
 986             /* The class name array.  */
 987             total = 0;
 988             for (cnt = 0; cnt < ctype->nr_charclass; ++cnt, ++offset)
 989               {
 990                 iov[2 + elem + offset].iov_base
 991                   = (void *) ctype->classnames[cnt];
 992                 iov[2 + elem + offset].iov_len
 993                   = strlen (ctype->classnames[cnt]) + 1;
 994                 total += iov[2 + elem + offset].iov_len;
 995               }
 996             iov[2 + elem + offset].iov_base = (void *) nulbytes;
 997             iov[2 + elem + offset].iov_len = 4 - (total % 4);
 998             total += 4 - (total % 4);
 999
1000             idx[elem + 1] = idx[elem] + total;
1001             break;
1002
1003           case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1004             /* The class name array.  */
1005             total = 0;
1006             for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt, ++offset)
1007               {
1008                 iov[2 + elem + offset].iov_base
1009                   = (void *) ctype->mapnames[cnt];
1010                 iov[2 + elem + offset].iov_len
1011                   = strlen (ctype->mapnames[cnt]) + 1;
1012                 total += iov[2 + elem + offset].iov_len;
1013               }
1014             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1015             iov[2 + elem + offset].iov_len = 4 - (total % 4);
1016             total += 4 - (total % 4);
1017
1018             idx[elem + 1] = idx[elem] + total;
1019             break;
1020
1021           CTYPE_DATA (_NL_CTYPE_WIDTH,
1022                       ctype->width.iov_base,
1023                       ctype->width.iov_len);
1024
1025           CTYPE_DATA (_NL_CTYPE_MB_CUR_MAX,
1026                       &ctype->mb_cur_max, sizeof (uint32_t));
1027
1028           case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1029             total = strlen (ctype->codeset_name) + 1;
1030             if (total % 4 == 0)
1031               iov[2 + elem + offset].iov_base = (char *) ctype->codeset_name;
1032             else
1033               {
1034                 iov[2 + elem + offset].iov_base = alloca ((total + 3) & ~3);
1035                 memset (mempcpy (iov[2 + elem + offset].iov_base,
1036                                  ctype->codeset_name, total),
1037                         '\0', 4 - (total & 3));
1038                 total = (total + 3) & ~3;
1039               }
1040             iov[2 + elem + offset].iov_len = total;
1041             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1042             break;
1043
1044
1045           CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
1046                       &ctype->to_nonascii, sizeof (uint32_t));
1047
1048           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1049             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1050             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1051             *(uint32_t *) iov[2 + elem + offset].iov_base =
1052               ctype->mbdigits_act / 10;
1053             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1054             break;
1055
1056           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1057             /* Align entries.  */
1058             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1059             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1060             idx[elem] += iov[2 + elem + offset].iov_len;
1061             ++offset;
1062
1063             iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
1064             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1065             *(uint32_t *) iov[2 + elem + offset].iov_base =
1066               ctype->wcdigits_act / 10;
1067             idx[elem + 1] = idx[elem] + sizeof (uint32_t);
1068             break;
1069
1070           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1071             /* Compute the length of all possible characters.  For INDIGITS
1072                there might be more than one.  We simply concatenate all of
1073                them with a NUL byte following.  The NUL byte wouldn't be
1074                necessary but it makes it easier for the user.  */
1075             total = 0;
1076
1077             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1078                  cnt < ctype->mbdigits_act; cnt += 10)
1079               total += ctype->mbdigits[cnt]->nbytes + 1;
1080             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1081             iov[2 + elem + offset].iov_len = total;
1082
1083             cp = iov[2 + elem + offset].iov_base;
1084             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1085                  cnt < ctype->mbdigits_act; cnt += 10)
1086               {
1087                 cp = mempcpy (cp, ctype->mbdigits[cnt]->bytes,
1088                               ctype->mbdigits[cnt]->nbytes);
1089                 *cp++ = '\0';
1090               }
1091             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1092             break;
1093
1094           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1095             /* Compute the length of all possible characters.  For INDIGITS
1096                there might be more than one.  We simply concatenate all of
1097                them with a NUL byte following.  The NUL byte wouldn't be
1098                necessary but it makes it easier for the user.  */
1099             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1100             total = ctype->mboutdigits[cnt]->nbytes + 1;
1101             iov[2 + elem + offset].iov_base = (char *) alloca (total);
1102             iov[2 + elem + offset].iov_len = total;
1103
1104             *(char *) mempcpy (iov[2 + elem + offset].iov_base,
1105                                ctype->mboutdigits[cnt]->bytes,
1106                                ctype->mboutdigits[cnt]->nbytes) = '\0';
1107             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1108             break;
1109
1110           case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1111             total = ctype->wcdigits_act / 10;
1112
1113             iov[2 + elem + offset].iov_base =
1114               (uint32_t *) alloca (total * sizeof (uint32_t));
1115             iov[2 + elem + offset].iov_len = total * sizeof (uint32_t);
1116
1117             for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1118                  cnt < ctype->wcdigits_act; cnt += 10)
1119               ((uint32_t *) iov[2 + elem + offset].iov_base)[cnt / 10]
1120                 = ctype->wcdigits[cnt];
1121             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1122             break;
1123
1124           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC):
1125             /* Align entries.  */
1126             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1127             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1128             idx[elem] += iov[2 + elem + offset].iov_len;
1129             ++offset;
1130             /* FALLTRHOUGH */
1131
1132           case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT1_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1133             cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1134             iov[2 + elem + offset].iov_base = &ctype->wcoutdigits[cnt];
1135             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1136             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1137             break;
1138
1139           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1140             /* Align entries.  */
1141             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1142             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1143             idx[elem] += iov[2 + elem + offset].iov_len;
1144             ++offset;
1145
1146             default_missing_len = (ctype->default_missing
1147                                    ? wcslen ((wchar_t *)ctype->default_missing)
1148                                    : 0);
1149             iov[2 + elem + offset].iov_base = &default_missing_len;
1150             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1151             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1152             break;
1153
1154           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1155             iov[2 + elem + offset].iov_base =
1156               ctype->default_missing ?: (uint32_t *) L"";
1157             iov[2 + elem + offset].iov_len =
1158               wcslen (iov[2 + elem + offset].iov_base) * sizeof (uint32_t);
1159             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1160             break;
1161
1162           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1163             /* Align entries.  */
1164             iov[2 + elem + offset].iov_base = (void *) nulbytes;
1165             iov[2 + elem + offset].iov_len = (4 - idx[elem] % 4) % 4;
1166             idx[elem] += iov[2 + elem + offset].iov_len;
1167             ++offset;
1168
1169             iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore;
1170             iov[2 + elem + offset].iov_len = sizeof (uint32_t);
1171             idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1172             break;
1173
1174           case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1175             {
1176               uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore
1177                                                       * 3 * sizeof (uint32_t));
1178               struct translit_ignore_t *runp;
1179
1180               iov[2 + elem + offset].iov_base = ranges;
1181               iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore
1182                                                 * 3 * sizeof (uint32_t));
1183
1184               for (runp = ctype->translit_ignore; runp != NULL;
1185                    runp = runp->next)
1186                 {
1187                   *ranges++ = runp->from;
1188                   *ranges++ = runp->to;
1189                   *ranges++ = runp->step;
1190                 }
1191             }
1192             /* Remove the following line in case a new entry is added
1193                after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN.  */
1194             if (elem < nelems)
1195               idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1196             break;
1197
1198           default:
1199             assert (! "unknown CTYPE element");
1200           }
1201       else
1202         {
1203           /* Handle extra maps.  */
1204           size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1205           if (nr < ctype->nr_charclass)
1206             {
1207               iov[2 + elem + offset].iov_base = ctype->class_b[nr];
1208               iov[2 + elem + offset].iov_len = 256 / 32 * sizeof (uint32_t);
1209               idx[elem] += iov[2 + elem + offset].iov_len;
1210               ++offset;
1211
1212               iov[2 + elem + offset] = ctype->class_3level[nr];
1213             }
1214           else
1215             {
1216               nr -= ctype->nr_charclass;
1217               assert (nr < ctype->map_collection_nr);
1218               iov[2 + elem + offset] = ctype->map_3level[nr];
1219             }
1220           idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len;
1221         }
1222     }
1223
1224   assert (2 + elem + offset == (nelems + 2 * ctype->nr_charclass
1225                                 + ctype->map_collection_nr + 4 + 2));
1226
1227   write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", 2 + elem + offset,
1228                      iov);
1229 }
1230
1231
1232 /* Local functions.  */
1233 static void
1234 ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1235                  const char *name)
1236 {
1237   size_t cnt;
1238
1239   for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1240     if (strcmp (ctype->classnames[cnt], name) == 0)
1241       break;
1242
1243   if (cnt < ctype->nr_charclass)
1244     {
1245       lr_error (lr, _("character class `%s' already defined"), name);
1246       return;
1247     }
1248
1249   if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1250     /* Exit code 2 is prescribed in P1003.2b.  */
1251     WITH_CUR_LOCALE (error (2, 0, _("\
1252 implementation limit: no more than %Zd character classes allowed"),
1253                             MAX_NR_CHARCLASS));
1254
1255   ctype->classnames[ctype->nr_charclass++] = name;
1256 }
1257
1258
1259 static void
1260 ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1261                const char *name, const struct charmap_t *charmap)
1262 {
1263   size_t max_chars = 0;
1264   size_t cnt;
1265
1266   for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1267     {
1268       if (strcmp (ctype->mapnames[cnt], name) == 0)
1269         break;
1270
1271       if (max_chars < ctype->map_collection_max[cnt])
1272         max_chars = ctype->map_collection_max[cnt];
1273     }
1274
1275   if (cnt < ctype->map_collection_nr)
1276     {
1277       lr_error (lr, _("character map `%s' already defined"), name);
1278       return;
1279     }
1280
1281   if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1282     /* Exit code 2 is prescribed in P1003.2b.  */
1283     WITH_CUR_LOCALE (error (2, 0, _("\
1284 implementation limit: no more than %d character maps allowed"),
1285                             MAX_NR_CHARMAP));
1286
1287   ctype->mapnames[cnt] = name;
1288
1289   if (max_chars == 0)
1290     ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1291   else
1292     ctype->map_collection_max[cnt] = max_chars;
1293
1294   ctype->map_collection[cnt] = (uint32_t *)
1295     xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1296   ctype->map_collection_act[cnt] = 256;
1297
1298   ++ctype->map_collection_nr;
1299 }
1300
1301
1302 /* We have to be prepared that TABLE, MAX, and ACT can be NULL.  This
1303    is possible if we only want to extend the name array.  */
1304 static uint32_t *
1305 find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1306           size_t *act, uint32_t idx)
1307 {
1308   size_t cnt;
1309
1310   if (idx < 256)
1311     return table == NULL ? NULL : &(*table)[idx];
1312
1313   /* Use the charnames_idx lookup table instead of the slow search loop.  */
1314 #if 1
1315   cnt = idx_table_get (&ctype->charnames_idx, idx);
1316   if (cnt == EMPTY)
1317     /* Not found.  */
1318     cnt = ctype->charnames_act;
1319 #else
1320   for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1321     if (ctype->charnames[cnt] == idx)
1322       break;
1323 #endif
1324
1325   /* We have to distinguish two cases: the name is found or not.  */
1326   if (cnt == ctype->charnames_act)
1327     {
1328       /* Extend the name array.  */
1329       if (ctype->charnames_act == ctype->charnames_max)
1330         {
1331           ctype->charnames_max *= 2;
1332           ctype->charnames = (uint32_t *)
1333             xrealloc (ctype->charnames,
1334                       sizeof (uint32_t) * ctype->charnames_max);
1335         }
1336       ctype->charnames[ctype->charnames_act++] = idx;
1337       idx_table_add (&ctype->charnames_idx, idx, cnt);
1338     }
1339
1340   if (table == NULL)
1341     /* We have done everything we are asked to do.  */
1342     return NULL;
1343
1344   if (max == NULL)
1345     /* The caller does not want to extend the table.  */
1346     return (cnt >= *act ? NULL : &(*table)[cnt]);
1347
1348   if (cnt >= *act)
1349     {
1350       if (cnt >= *max)
1351         {
1352           size_t old_max = *max;
1353           do
1354             *max *= 2;
1355           while (*max <= cnt);
1356
1357           *table =
1358             (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1359           memset (&(*table)[old_max], '\0',
1360                   (*max - old_max) * sizeof (uint32_t));
1361         }
1362
1363       *act = cnt + 1;
1364     }
1365
1366   return &(*table)[cnt];
1367 }
1368
1369
1370 static int
1371 get_character (struct token *now, const struct charmap_t *charmap,
1372                struct repertoire_t *repertoire,
1373                struct charseq **seqp, uint32_t *wchp)
1374 {
1375   if (now->tok == tok_bsymbol)
1376     {
1377       /* This will hopefully be the normal case.  */
1378       *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1379                                      now->val.str.lenmb);
1380       *seqp = charmap_find_value (charmap, now->val.str.startmb,
1381                                   now->val.str.lenmb);
1382     }
1383   else if (now->tok == tok_ucs4)
1384     {
1385       char utmp[10];
1386
1387       snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1388       *seqp = charmap_find_value (charmap, utmp, 9);
1389
1390       if (*seqp == NULL)
1391         *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1392
1393       if (*seqp == NULL)
1394         {
1395           /* Compute the value in the charmap from the UCS value.  */
1396           const char *symbol = repertoire_find_symbol (repertoire,
1397                                                        now->val.ucs4);
1398
1399           if (symbol == NULL)
1400             *seqp = NULL;
1401           else
1402             *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1403
1404           if (*seqp == NULL)
1405             {
1406               if (repertoire != NULL)
1407                 {
1408                   /* Insert a negative entry.  */
1409                   static const struct charseq negative
1410                     = { .ucs4 = ILLEGAL_CHAR_VALUE };
1411                   uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1412                                                   sizeof (uint32_t));
1413                   *newp = now->val.ucs4;
1414
1415                   insert_entry (&repertoire->seq_table, newp,
1416                                 sizeof (uint32_t), (void *) &negative);
1417                 }
1418             }
1419           else
1420             (*seqp)->ucs4 = now->val.ucs4;
1421         }
1422       else if ((*seqp)->ucs4 != now->val.ucs4)
1423         *seqp = NULL;
1424
1425       *wchp = now->val.ucs4;
1426     }
1427   else if (now->tok == tok_charcode)
1428     {
1429       /* We must map from the byte code to UCS4.  */
1430       *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1431                                    now->val.str.lenmb);
1432
1433       if (*seqp == NULL)
1434         *wchp = ILLEGAL_CHAR_VALUE;
1435       else
1436         {
1437           if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1438             (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1439                                                    strlen ((*seqp)->name));
1440           *wchp = (*seqp)->ucs4;
1441         }
1442     }
1443   else
1444     return 1;
1445
1446   return 0;
1447 }
1448
1449
1450 /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1451    the .(2). counterparts.  */
1452 static void
1453 charclass_symbolic_ellipsis (struct linereader *ldfile,
1454                              struct locale_ctype_t *ctype,
1455                              const struct charmap_t *charmap,
1456                              struct repertoire_t *repertoire,
1457                              struct token *now,
1458                              const char *last_str,
1459                              unsigned long int class256_bit,
1460                              unsigned long int class_bit, int base,
1461                              int ignore_content, int handle_digits, int step)
1462 {
1463   const char *nowstr = now->val.str.startmb;
1464   char tmp[now->val.str.lenmb + 1];
1465   const char *cp;
1466   char *endp;
1467   unsigned long int from;
1468   unsigned long int to;
1469
1470   /* We have to compute the ellipsis values using the symbolic names.  */
1471   assert (last_str != NULL);
1472
1473   if (strlen (last_str) != now->val.str.lenmb)
1474     {
1475     invalid_range:
1476       lr_error (ldfile,
1477                 _("`%s' and `%.*s' are not valid names for symbolic range"),
1478                 last_str, (int) now->val.str.lenmb, nowstr);
1479       return;
1480     }
1481
1482   if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1483     /* Nothing to do, the names are the same.  */
1484     return;
1485
1486   for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1487     ;
1488
1489   errno = 0;
1490   from = strtoul (cp, &endp, base);
1491   if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1492     goto invalid_range;
1493
1494   to = strtoul (nowstr + (cp - last_str), &endp, base);
1495   if ((to == UINT_MAX && errno == ERANGE)
1496       || (endp - nowstr) != now->val.str.lenmb || from >= to)
1497     goto invalid_range;
1498
1499   /* OK, we have a range FROM - TO.  Now we can create the symbolic names.  */
1500   if (!ignore_content)
1501     {
1502       now->val.str.startmb = tmp;
1503       while ((from += step) <= to)
1504         {
1505           struct charseq *seq;
1506           uint32_t wch;
1507
1508           sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1509                    (int) (cp - last_str), last_str,
1510                    (int) (now->val.str.lenmb - (cp - last_str)),
1511                    from);
1512
1513           get_character (now, charmap, repertoire, &seq, &wch);
1514
1515           if (seq != NULL && seq->nbytes == 1)
1516             /* Yep, we can store information about this byte sequence.  */
1517             ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1518
1519           if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1520             /* We have the UCS4 position.  */
1521             *find_idx (ctype, &ctype->class_collection,
1522                        &ctype->class_collection_max,
1523                        &ctype->class_collection_act, wch) |= class_bit;
1524
1525           if (handle_digits == 1)
1526             {
1527               /* We must store the digit values.  */
1528               if (ctype->mbdigits_act == ctype->mbdigits_max)
1529                 {
1530                   ctype->mbdigits_max *= 2;
1531                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1532                                               (ctype->mbdigits_max
1533                                                * sizeof (char *)));
1534                   ctype->wcdigits_max *= 2;
1535                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1536                                               (ctype->wcdigits_max
1537                                                * sizeof (uint32_t)));
1538                 }
1539
1540               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1541               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1542             }
1543           else if (handle_digits == 2)
1544             {
1545               /* We must store the digit values.  */
1546               if (ctype->outdigits_act >= 10)
1547                 {
1548                   lr_error (ldfile, _("\
1549 %s: field `%s' does not contain exactly ten entries"),
1550                             "LC_CTYPE", "outdigit");
1551                   return;
1552                 }
1553
1554               ctype->mboutdigits[ctype->outdigits_act] = seq;
1555               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1556               ++ctype->outdigits_act;
1557             }
1558         }
1559     }
1560 }
1561
1562
1563 /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'.  */
1564 static void
1565 charclass_ucs4_ellipsis (struct linereader *ldfile,
1566                          struct locale_ctype_t *ctype,
1567                          const struct charmap_t *charmap,
1568                          struct repertoire_t *repertoire,
1569                          struct token *now, uint32_t last_wch,
1570                          unsigned long int class256_bit,
1571                          unsigned long int class_bit, int ignore_content,
1572                          int handle_digits, int step)
1573 {
1574   if (last_wch > now->val.ucs4)
1575     {
1576       lr_error (ldfile, _("\
1577 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1578                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1579                 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1580       return;
1581     }
1582
1583   if (!ignore_content)
1584     while ((last_wch += step) <= now->val.ucs4)
1585       {
1586         /* We have to find out whether there is a byte sequence corresponding
1587            to this UCS4 value.  */
1588         struct charseq *seq;
1589         char utmp[10];
1590
1591         snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1592         seq = charmap_find_value (charmap, utmp, 9);
1593         if (seq == NULL)
1594           {
1595             snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1596             seq = charmap_find_value (charmap, utmp, 5);
1597           }
1598
1599         if (seq == NULL)
1600           /* Try looking in the repertoire map.  */
1601           seq = repertoire_find_seq (repertoire, last_wch);
1602
1603         /* If this is the first time we look for this sequence create a new
1604            entry.  */
1605         if (seq == NULL)
1606           {
1607             static const struct charseq negative
1608               = { .ucs4 = ILLEGAL_CHAR_VALUE };
1609
1610             /* Find the symbolic name for this UCS4 value.  */
1611             if (repertoire != NULL)
1612               {
1613                 const char *symbol = repertoire_find_symbol (repertoire,
1614                                                              last_wch);
1615                 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1616                                                 sizeof (uint32_t));
1617                 *newp = last_wch;
1618
1619                 if (symbol != NULL)
1620                   /* We have a name, now search the multibyte value.  */
1621                   seq = charmap_find_value (charmap, symbol, strlen (symbol));
1622
1623                 if (seq == NULL)
1624                   /* We have to create a fake entry.  */
1625                   seq = (struct charseq *) &negative;
1626                 else
1627                   seq->ucs4 = last_wch;
1628
1629                 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1630                               seq);
1631               }
1632             else
1633               /* We have to create a fake entry.  */
1634               seq = (struct charseq *) &negative;
1635           }
1636
1637         /* We have a name, now search the multibyte value.  */
1638         if (seq->ucs4 == last_wch && seq->nbytes == 1)
1639           /* Yep, we can store information about this byte sequence.  */
1640           ctype->class256_collection[(size_t) seq->bytes[0]]
1641             |= class256_bit;
1642
1643         /* And of course we have the UCS4 position.  */
1644         if (class_bit != 0)
1645           *find_idx (ctype, &ctype->class_collection,
1646                      &ctype->class_collection_max,
1647                      &ctype->class_collection_act, last_wch) |= class_bit;
1648
1649         if (handle_digits == 1)
1650           {
1651             /* We must store the digit values.  */
1652             if (ctype->mbdigits_act == ctype->mbdigits_max)
1653               {
1654                 ctype->mbdigits_max *= 2;
1655                 ctype->mbdigits = xrealloc (ctype->mbdigits,
1656                                             (ctype->mbdigits_max
1657                                              * sizeof (char *)));
1658                 ctype->wcdigits_max *= 2;
1659                 ctype->wcdigits = xrealloc (ctype->wcdigits,
1660                                             (ctype->wcdigits_max
1661                                              * sizeof (uint32_t)));
1662               }
1663
1664             ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1665                                                       ? seq : NULL);
1666             ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1667           }
1668         else if (handle_digits == 2)
1669           {
1670             /* We must store the digit values.  */
1671             if (ctype->outdigits_act >= 10)
1672               {
1673                 lr_error (ldfile, _("\
1674 %s: field `%s' does not contain exactly ten entries"),
1675                           "LC_CTYPE", "outdigit");
1676                 return;
1677               }
1678
1679             ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1680                                                         ? seq : NULL);
1681             ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1682             ++ctype->outdigits_act;
1683           }
1684       }
1685 }
1686
1687
1688 /* Ellipsis as in `/xea/x12.../xea/x34'.  */
1689 static void
1690 charclass_charcode_ellipsis (struct linereader *ldfile,
1691                              struct locale_ctype_t *ctype,
1692                              const struct charmap_t *charmap,
1693                              struct repertoire_t *repertoire,
1694                              struct token *now, char *last_charcode,
1695                              uint32_t last_charcode_len,
1696                              unsigned long int class256_bit,
1697                              unsigned long int class_bit, int ignore_content,
1698                              int handle_digits)
1699 {
1700   /* First check whether the to-value is larger.  */
1701   if (now->val.charcode.nbytes != last_charcode_len)
1702     {
1703       lr_error (ldfile, _("\
1704 start and end character sequence of range must have the same length"));
1705       return;
1706     }
1707
1708   if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1709     {
1710       lr_error (ldfile, _("\
1711 to-value character sequence is smaller than from-value sequence"));
1712       return;
1713     }
1714
1715   if (!ignore_content)
1716     {
1717       do
1718         {
1719           /* Increment the byte sequence value.  */
1720           struct charseq *seq;
1721           uint32_t wch;
1722           int i;
1723
1724           for (i = last_charcode_len - 1; i >= 0; --i)
1725             if (++last_charcode[i] != 0)
1726               break;
1727
1728           if (last_charcode_len == 1)
1729             /* Of course we have the charcode value.  */
1730             ctype->class256_collection[(size_t) last_charcode[0]]
1731               |= class256_bit;
1732
1733           /* Find the symbolic name.  */
1734           seq = charmap_find_symbol (charmap, last_charcode,
1735                                      last_charcode_len);
1736           if (seq != NULL)
1737             {
1738               if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1739                 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1740                                                    strlen (seq->name));
1741               wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1742
1743               if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1744                 *find_idx (ctype, &ctype->class_collection,
1745                            &ctype->class_collection_max,
1746                            &ctype->class_collection_act, wch) |= class_bit;
1747             }
1748           else
1749             wch = ILLEGAL_CHAR_VALUE;
1750
1751           if (handle_digits == 1)
1752             {
1753               /* We must store the digit values.  */
1754               if (ctype->mbdigits_act == ctype->mbdigits_max)
1755                 {
1756                   ctype->mbdigits_max *= 2;
1757                   ctype->mbdigits = xrealloc (ctype->mbdigits,
1758                                               (ctype->mbdigits_max
1759                                                * sizeof (char *)));
1760                   ctype->wcdigits_max *= 2;
1761                   ctype->wcdigits = xrealloc (ctype->wcdigits,
1762                                               (ctype->wcdigits_max
1763                                                * sizeof (uint32_t)));
1764                 }
1765
1766               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1767               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1768               seq->nbytes = last_charcode_len;
1769
1770               ctype->mbdigits[ctype->mbdigits_act++] = seq;
1771               ctype->wcdigits[ctype->wcdigits_act++] = wch;
1772             }
1773           else if (handle_digits == 2)
1774             {
1775               struct charseq *seq;
1776               /* We must store the digit values.  */
1777               if (ctype->outdigits_act >= 10)
1778                 {
1779                   lr_error (ldfile, _("\
1780 %s: field `%s' does not contain exactly ten entries"),
1781                             "LC_CTYPE", "outdigit");
1782                   return;
1783                 }
1784
1785               seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1786               memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1787               seq->nbytes = last_charcode_len;
1788
1789               ctype->mboutdigits[ctype->outdigits_act] = seq;
1790               ctype->wcoutdigits[ctype->outdigits_act] = wch;
1791               ++ctype->outdigits_act;
1792             }
1793         }
1794       while (memcmp (last_charcode, now->val.charcode.bytes,
1795                      last_charcode_len) != 0);
1796     }
1797 }
1798
1799
1800 static uint32_t *
1801 find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1802                 uint32_t wch)
1803 {
1804   struct translit_t *trunp = ctype->translit;
1805   struct translit_ignore_t *tirunp = ctype->translit_ignore;
1806
1807   while (trunp != NULL)
1808     {
1809       /* XXX We simplify things here.  The transliterations we look
1810          for are only allowed to have one character.  */
1811       if (trunp->from[0] == wch && trunp->from[1] == 0)
1812         {
1813           /* Found it.  Now look for a transliteration which can be
1814              represented with the character set.  */
1815           struct translit_to_t *torunp = trunp->to;
1816
1817           while (torunp != NULL)
1818             {
1819               int i;
1820
1821               for (i = 0; torunp->str[i] != 0; ++i)
1822                 {
1823                   char utmp[10];
1824
1825                   snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1826                   if (charmap_find_value (charmap, utmp, 9) == NULL)
1827                     /* This character cannot be represented.  */
1828                     break;
1829                 }
1830
1831               if (torunp->str[i] == 0)
1832                 return torunp->str;
1833
1834               torunp = torunp->next;
1835             }
1836
1837           break;
1838         }
1839
1840       trunp = trunp->next;
1841     }
1842
1843   /* Check for ignored chars.  */
1844   while (tirunp != NULL)
1845     {
1846       if (tirunp->from <= wch && tirunp->to >= wch)
1847         {
1848           uint32_t wi;
1849
1850           for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1851             if (wi == wch)
1852               return (uint32_t []) { 0 };
1853         }
1854     }
1855
1856   /* Nothing found.  */
1857   return NULL;
1858 }
1859
1860
1861 uint32_t *
1862 find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1863                uint32_t wch)
1864 {
1865   struct locale_ctype_t *ctype;
1866   uint32_t *result = NULL;
1867
1868   assert (locale != NULL);
1869   ctype = locale->categories[LC_CTYPE].ctype;
1870
1871   if (ctype == NULL)
1872     return NULL;
1873
1874   if (ctype->translit != NULL)
1875     result = find_translit2 (ctype, charmap, wch);
1876
1877   if (result == NULL)
1878     {
1879       struct translit_include_t *irunp = ctype->translit_include;
1880
1881       while (irunp != NULL && result == NULL)
1882         {
1883           result = find_translit (find_locale (CTYPE_LOCALE,
1884                                                irunp->copy_locale,
1885                                                irunp->copy_repertoire,
1886                                                charmap),
1887                                   charmap, wch);
1888           irunp = irunp->next;
1889         }
1890     }
1891
1892   return result;
1893 }
1894
1895
1896 /* Read one transliteration entry.  */
1897 static uint32_t *
1898 read_widestring (struct linereader *ldfile, struct token *now,
1899                  const struct charmap_t *charmap,
1900                  struct repertoire_t *repertoire)
1901 {
1902   uint32_t *wstr;
1903
1904   if (now->tok == tok_default_missing)
1905     /* The special name "" will denote this case.  */
1906     wstr = ((uint32_t *) { 0 });
1907   else if (now->tok == tok_bsymbol)
1908     {
1909       /* Get the value from the repertoire.  */
1910       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1911       wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1912                                        now->val.str.lenmb);
1913       if (wstr[0] == ILLEGAL_CHAR_VALUE)
1914         {
1915           /* We cannot proceed, we don't know the UCS4 value.  */
1916           free (wstr);
1917           return NULL;
1918         }
1919
1920       wstr[1] = 0;
1921     }
1922   else if (now->tok == tok_ucs4)
1923     {
1924       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1925       wstr[0] = now->val.ucs4;
1926       wstr[1] = 0;
1927     }
1928   else if (now->tok == tok_charcode)
1929     {
1930       /* Argh, we have to convert to the symbol name first and then to the
1931          UCS4 value.  */
1932       struct charseq *seq = charmap_find_symbol (charmap,
1933                                                  now->val.str.startmb,
1934                                                  now->val.str.lenmb);
1935       if (seq == NULL)
1936         /* Cannot find the UCS4 value.  */
1937         return NULL;
1938
1939       if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1940         seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1941                                            strlen (seq->name));
1942       if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1943         /* We cannot proceed, we don't know the UCS4 value.  */
1944         return NULL;
1945
1946       wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1947       wstr[0] = seq->ucs4;
1948       wstr[1] = 0;
1949     }
1950   else if (now->tok == tok_string)
1951     {
1952       wstr = now->val.str.startwc;
1953       if (wstr == NULL || wstr[0] == 0)
1954         return NULL;
1955     }
1956   else
1957     {
1958       if (now->tok != tok_eol && now->tok != tok_eof)
1959         lr_ignore_rest (ldfile, 0);
1960       SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1961       return (uint32_t *) -1l;
1962     }
1963
1964   return wstr;
1965 }
1966
1967
1968 static void
1969 read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1970                      struct token *now, const struct charmap_t *charmap,
1971                      struct repertoire_t *repertoire)
1972 {
1973   uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1974   struct translit_t *result;
1975   struct translit_to_t **top;
1976   struct obstack *ob = &ctype->mempool;
1977   int first;
1978   int ignore;
1979
1980   if (from_wstr == NULL)
1981     /* There is no valid from string.  */
1982     return;
1983
1984   result = (struct translit_t *) obstack_alloc (ob,
1985                                                 sizeof (struct translit_t));
1986   result->from = from_wstr;
1987   result->fname = ldfile->fname;
1988   result->lineno = ldfile->lineno;
1989   result->next = NULL;
1990   result->to = NULL;
1991   top = &result->to;
1992   first = 1;
1993   ignore = 0;
1994
1995   while (1)
1996     {
1997       uint32_t *to_wstr;
1998
1999       /* Next we have one or more transliterations.  They are
2000          separated by semicolons.  */
2001       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2002
2003       if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
2004         {
2005           /* One string read.  */
2006           const uint32_t zero = 0;
2007
2008           if (!ignore)
2009             {
2010               obstack_grow (ob, &zero, 4);
2011               to_wstr = obstack_finish (ob);
2012
2013               *top = obstack_alloc (ob, sizeof (struct translit_to_t));
2014               (*top)->str = to_wstr;
2015               (*top)->next = NULL;
2016             }
2017
2018           if (now->tok == tok_eol)
2019             {
2020               result->next = ctype->translit;
2021               ctype->translit = result;
2022               return;
2023             }
2024
2025           if (!ignore)
2026             top = &(*top)->next;
2027           ignore = 0;
2028         }
2029       else
2030         {
2031           to_wstr = read_widestring (ldfile, now, charmap, repertoire);
2032           if (to_wstr == (uint32_t *) -1l)
2033             {
2034               /* An error occurred.  */
2035               obstack_free (ob, result);
2036               return;
2037             }
2038
2039           if (to_wstr == NULL)
2040             ignore = 1;
2041           else
2042             /* This value is usable.  */
2043             obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
2044
2045           first = 0;
2046         }
2047     }
2048 }
2049
2050
2051 static void
2052 read_translit_ignore_entry (struct linereader *ldfile,
2053                             struct locale_ctype_t *ctype,
2054                             const struct charmap_t *charmap,
2055                             struct repertoire_t *repertoire)
2056 {
2057   /* We expect a semicolon-separated list of characters we ignore.  We are
2058      only interested in the wide character definitions.  These must be
2059      single characters, possibly defining a range when an ellipsis is used.  */
2060   while (1)
2061     {
2062       struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
2063                                     verbose);
2064       struct translit_ignore_t *newp;
2065       uint32_t from;
2066
2067       if (now->tok == tok_eol || now->tok == tok_eof)
2068         {
2069           lr_error (ldfile,
2070                     _("premature end of `translit_ignore' definition"));
2071           return;
2072         }
2073
2074       if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2075         {
2076           lr_error (ldfile, _("syntax error"));
2077           lr_ignore_rest (ldfile, 0);
2078           return;
2079         }
2080
2081       if (now->tok == tok_ucs4)
2082         from = now->val.ucs4;
2083       else
2084         /* Try to get the value.  */
2085         from = repertoire_find_value (repertoire, now->val.str.startmb,
2086                                       now->val.str.lenmb);
2087
2088       if (from == ILLEGAL_CHAR_VALUE)
2089         {
2090           lr_error (ldfile, "invalid character name");
2091           newp = NULL;
2092         }
2093       else
2094         {
2095           newp = (struct translit_ignore_t *)
2096             obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
2097           newp->from = from;
2098           newp->to = from;
2099           newp->step = 1;
2100
2101           newp->next = ctype->translit_ignore;
2102           ctype->translit_ignore = newp;
2103         }
2104
2105       /* Now we expect either a semicolon, an ellipsis, or the end of the
2106          line.  */
2107       now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2108
2109       if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2110         {
2111           /* XXX Should we bother implementing `....'?  `...' certainly
2112              will not be implemented.  */
2113           uint32_t to;
2114           int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2115
2116           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2117
2118           if (now->tok == tok_eol || now->tok == tok_eof)
2119             {
2120               lr_error (ldfile,
2121                         _("premature end of `translit_ignore' definition"));
2122               return;
2123             }
2124
2125           if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2126             {
2127               lr_error (ldfile, _("syntax error"));
2128               lr_ignore_rest (ldfile, 0);
2129               return;
2130             }
2131
2132           if (now->tok == tok_ucs4)
2133             to = now->val.ucs4;
2134           else
2135             /* Try to get the value.  */
2136             to = repertoire_find_value (repertoire, now->val.str.startmb,
2137                                         now->val.str.lenmb);
2138
2139           if (to == ILLEGAL_CHAR_VALUE)
2140             lr_error (ldfile, "invalid character name");
2141           else
2142             {
2143               /* Make sure the `to'-value is larger.  */
2144               if (to >= from)
2145                 {
2146                   newp->to = to;
2147                   newp->step = step;
2148                 }
2149               else
2150                 lr_error (ldfile, _("\
2151 to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2152                           (to | from) < 65536 ? 4 : 8, to,
2153                           (to | from) < 65536 ? 4 : 8, from);
2154             }
2155
2156           /* And the next token.  */
2157           now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2158         }
2159
2160       if (now->tok == tok_eol || now->tok == tok_eof)
2161         /* We are done.  */
2162         return;
2163
2164       if (now->tok == tok_semicolon)
2165         /* Next round.  */
2166         continue;
2167
2168       /* If we come here something is wrong.  */
2169       lr_error (ldfile, _("syntax error"));
2170       lr_ignore_rest (ldfile, 0);
2171       return;
2172     }
2173 }
2174
2175
2176 /* The parser for the LC_CTYPE section of the locale definition.  */
2177 void
2178 ctype_read (struct linereader *ldfile, struct localedef_t *result,
2179             const struct charmap_t *charmap, const char *repertoire_name,
2180             int ignore_content)
2181 {
2182   struct repertoire_t *repertoire = NULL;
2183   struct locale_ctype_t *ctype;
2184   struct token *now;
2185   enum token_t nowtok;
2186   size_t cnt;
2187   struct charseq *last_seq;
2188   uint32_t last_wch = 0;
2189   enum token_t last_token;
2190   enum token_t ellipsis_token;
2191   int step;
2192   char last_charcode[16];
2193   size_t last_charcode_len = 0;
2194   const char *last_str = NULL;
2195   int mapidx;
2196   struct localedef_t *copy_locale = NULL;
2197
2198   /* Get the repertoire we have to use.  */
2199   if (repertoire_name != NULL)
2200     repertoire = repertoire_read (repertoire_name);
2201
2202   /* The rest of the line containing `LC_CTYPE' must be free.  */
2203   lr_ignore_rest (ldfile, 1);
2204
2205
2206   do
2207     {
2208       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2209       nowtok = now->tok;
2210     }
2211   while (nowtok == tok_eol);
2212
2213   /* If we see `copy' now we are almost done.  */
2214   if (nowtok == tok_copy)
2215     {
2216       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2217       if (now->tok != tok_string)
2218         {
2219           SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2220
2221         skip_category:
2222           do
2223             now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2224           while (now->tok != tok_eof && now->tok != tok_end);
2225
2226           if (now->tok != tok_eof
2227               || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2228                   now->tok == tok_eof))
2229             lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2230           else if (now->tok != tok_lc_ctype)
2231             {
2232               lr_error (ldfile, _("\
2233 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2234               lr_ignore_rest (ldfile, 0);
2235             }
2236           else
2237             lr_ignore_rest (ldfile, 1);
2238
2239           return;
2240         }
2241
2242       if (! ignore_content)
2243         {
2244           /* Get the locale definition.  */
2245           copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2246                                      repertoire_name, charmap, NULL);
2247           if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2248             {
2249               /* Not yet loaded.  So do it now.  */
2250               if (locfile_read (copy_locale, charmap) != 0)
2251                 goto skip_category;
2252             }
2253
2254           if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2255             return;
2256         }
2257
2258       lr_ignore_rest (ldfile, 1);
2259
2260       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2261       nowtok = now->tok;
2262     }
2263
2264   /* Prepare the data structures.  */
2265   ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2266   ctype = result->categories[LC_CTYPE].ctype;
2267
2268   /* Remember the repertoire we use.  */
2269   if (!ignore_content)
2270     ctype->repertoire = repertoire;
2271
2272   while (1)
2273     {
2274       unsigned long int class_bit = 0;
2275       unsigned long int class256_bit = 0;
2276       int handle_digits = 0;
2277
2278       /* Of course we don't proceed beyond the end of file.  */
2279       if (nowtok == tok_eof)
2280         break;
2281
2282       /* Ingore empty lines.  */
2283       if (nowtok == tok_eol)
2284         {
2285           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2286           nowtok = now->tok;
2287           continue;
2288         }
2289
2290       switch (nowtok)
2291         {
2292         case tok_charclass:
2293           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294           while (now->tok == tok_ident || now->tok == tok_string)
2295             {
2296               ctype_class_new (ldfile, ctype, now->val.str.startmb);
2297               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2298               if (now->tok != tok_semicolon)
2299                 break;
2300               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2301             }
2302           if (now->tok != tok_eol)
2303             SYNTAX_ERROR (_("\
2304 %s: syntax error in definition of new character class"), "LC_CTYPE");
2305           break;
2306
2307         case tok_charconv:
2308           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2309           while (now->tok == tok_ident || now->tok == tok_string)
2310             {
2311               ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2312               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2313               if (now->tok != tok_semicolon)
2314                 break;
2315               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2316             }
2317           if (now->tok != tok_eol)
2318             SYNTAX_ERROR (_("\
2319 %s: syntax error in definition of new character map"), "LC_CTYPE");
2320           break;
2321
2322         case tok_class:
2323           /* Ignore the rest of the line if we don't need the input of
2324              this line.  */
2325           if (ignore_content)
2326             {
2327               lr_ignore_rest (ldfile, 0);
2328               break;
2329             }
2330
2331           /* We simply forget the `class' keyword and use the following
2332              operand to determine the bit.  */
2333           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2334           if (now->tok == tok_ident || now->tok == tok_string)
2335             {
2336               /* Must can be one of the predefined class names.  */
2337               for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2338                 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2339                   break;
2340               if (cnt >= ctype->nr_charclass)
2341                 {
2342 #ifdef PREDEFINED_CLASSES
2343                   if (now->val.str.lenmb == 8
2344                       && memcmp ("special1", now->val.str.startmb, 8) == 0)
2345                     class_bit = _ISwspecial1;
2346                   else if (now->val.str.lenmb == 8
2347                       && memcmp ("special2", now->val.str.startmb, 8) == 0)
2348                     class_bit = _ISwspecial2;
2349                   else if (now->val.str.lenmb == 8
2350                       && memcmp ("special3", now->val.str.startmb, 8) == 0)
2351                     class_bit = _ISwspecial3;
2352                   else
2353 #endif
2354                     {
2355                       /* OK, it's a new class.  */
2356                       ctype_class_new (ldfile, ctype, now->val.str.startmb);
2357
2358                       class_bit = _ISwbit (ctype->nr_charclass - 1);
2359                     }
2360                 }
2361               else
2362                 {
2363                   class_bit = _ISwbit (cnt);
2364
2365                   free (now->val.str.startmb);
2366                 }
2367             }
2368           else if (now->tok == tok_digit)
2369             goto handle_tok_digit;
2370           else if (now->tok < tok_upper || now->tok > tok_blank)
2371             goto err_label;
2372           else
2373             {
2374               class_bit = BITw (now->tok);
2375               class256_bit = BIT (now->tok);
2376             }
2377
2378           /* The next character must be a semicolon.  */
2379           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2380           if (now->tok != tok_semicolon)
2381             goto err_label;
2382           goto read_charclass;
2383
2384         case tok_upper:
2385         case tok_lower:
2386         case tok_alpha:
2387         case tok_alnum:
2388         case tok_space:
2389         case tok_cntrl:
2390         case tok_punct:
2391         case tok_graph:
2392         case tok_print:
2393         case tok_xdigit:
2394         case tok_blank:
2395           /* Ignore the rest of the line if we don't need the input of
2396              this line.  */
2397           if (ignore_content)
2398             {
2399               lr_ignore_rest (ldfile, 0);
2400               break;
2401             }
2402
2403           class_bit = BITw (now->tok);
2404           class256_bit = BIT (now->tok);
2405           handle_digits = 0;
2406         read_charclass:
2407           ctype->class_done |= class_bit;
2408           last_token = tok_none;
2409           ellipsis_token = tok_none;
2410           step = 1;
2411           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2412           while (now->tok != tok_eol && now->tok != tok_eof)
2413             {
2414               uint32_t wch;
2415               struct charseq *seq;
2416
2417               if (ellipsis_token == tok_none)
2418                 {
2419                   if (get_character (now, charmap, repertoire, &seq, &wch))
2420                     goto err_label;
2421
2422                   if (!ignore_content && seq != NULL && seq->nbytes == 1)
2423                     /* Yep, we can store information about this byte
2424                        sequence.  */
2425                     ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2426
2427                   if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2428                       && class_bit != 0)
2429                     /* We have the UCS4 position.  */
2430                     *find_idx (ctype, &ctype->class_collection,
2431                                &ctype->class_collection_max,
2432                                &ctype->class_collection_act, wch) |= class_bit;
2433
2434                   last_token = now->tok;
2435                   /* Terminate the string.  */
2436                   if (last_token == tok_bsymbol)
2437                     {
2438                       now->val.str.startmb[now->val.str.lenmb] = '\0';
2439                       last_str = now->val.str.startmb;
2440                     }
2441                   else
2442                     last_str = NULL;
2443                   last_seq = seq;
2444                   last_wch = wch;
2445                   memcpy (last_charcode, now->val.charcode.bytes, 16);
2446                   last_charcode_len = now->val.charcode.nbytes;
2447
2448                   if (!ignore_content && handle_digits == 1)
2449                     {
2450                       /* We must store the digit values.  */
2451                       if (ctype->mbdigits_act == ctype->mbdigits_max)
2452                         {
2453                           ctype->mbdigits_max += 10;
2454                           ctype->mbdigits = xrealloc (ctype->mbdigits,
2455                                                       (ctype->mbdigits_max
2456                                                        * sizeof (char *)));
2457                           ctype->wcdigits_max += 10;
2458                           ctype->wcdigits = xrealloc (ctype->wcdigits,
2459                                                       (ctype->wcdigits_max
2460                                                        * sizeof (uint32_t)));
2461                         }
2462
2463                       ctype->mbdigits[ctype->mbdigits_act++] = seq;
2464                       ctype->wcdigits[ctype->wcdigits_act++] = wch;
2465                     }
2466                   else if (!ignore_content && handle_digits == 2)
2467                     {
2468                       /* We must store the digit values.  */
2469                       if (ctype->outdigits_act >= 10)
2470                         {
2471                           lr_error (ldfile, _("\
2472 %s: field `%s' does not contain exactly ten entries"),
2473                             "LC_CTYPE", "outdigit");
2474                           lr_ignore_rest (ldfile, 0);
2475                           break;
2476                         }
2477
2478                       ctype->mboutdigits[ctype->outdigits_act] = seq;
2479                       ctype->wcoutdigits[ctype->outdigits_act] = wch;
2480                       ++ctype->outdigits_act;
2481                     }
2482                 }
2483               else
2484                 {
2485                   /* Now it gets complicated.  We have to resolve the
2486                      ellipsis problem.  First we must distinguish between
2487                      the different kind of ellipsis and this must match the
2488                      tokens we have seen.  */
2489                   assert (last_token != tok_none);
2490
2491                   if (last_token != now->tok)
2492                     {
2493                       lr_error (ldfile, _("\
2494 ellipsis range must be marked by two operands of same type"));
2495                       lr_ignore_rest (ldfile, 0);
2496                       break;
2497                     }
2498
2499                   if (last_token == tok_bsymbol)
2500                     {
2501                       if (ellipsis_token == tok_ellipsis3)
2502                         lr_error (ldfile, _("with symbolic name range values \
2503 the absolute ellipsis `...' must not be used"));
2504
2505                       charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2506                                                    repertoire, now, last_str,
2507                                                    class256_bit, class_bit,
2508                                                    (ellipsis_token
2509                                                     == tok_ellipsis4
2510                                                     ? 10 : 16),
2511                                                    ignore_content,
2512                                                    handle_digits, step);
2513                     }
2514                   else if (last_token == tok_ucs4)
2515                     {
2516                       if (ellipsis_token != tok_ellipsis2)
2517                         lr_error (ldfile, _("\
2518 with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2519
2520                       charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2521                                                repertoire, now, last_wch,
2522                                                class256_bit, class_bit,
2523                                                ignore_content, handle_digits,
2524                                                step);
2525                     }
2526                   else
2527                     {
2528                       assert (last_token == tok_charcode);
2529
2530                       if (ellipsis_token != tok_ellipsis3)
2531                         lr_error (ldfile, _("\
2532 with character code range values one must use the absolute ellipsis `...'"));
2533
2534                       charclass_charcode_ellipsis (ldfile, ctype, charmap,
2535                                                    repertoire, now,
2536                                                    last_charcode,
2537                                                    last_charcode_len,
2538                                                    class256_bit, class_bit,
2539                                                    ignore_content,
2540                                                    handle_digits);
2541                     }
2542
2543                   /* Now we have used the last value.  */
2544                   last_token = tok_none;
2545                 }
2546
2547               /* Next we expect a semicolon or the end of the line.  */
2548               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2549               if (now->tok == tok_eol || now->tok == tok_eof)
2550                 break;
2551
2552               if (last_token != tok_none
2553                   && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2554                 {
2555                   if (now->tok == tok_ellipsis2_2)
2556                     {
2557                       now->tok = tok_ellipsis2;
2558                       step = 2;
2559                     }
2560                   else if (now->tok == tok_ellipsis4_2)
2561                     {
2562                       now->tok = tok_ellipsis4;
2563                       step = 2;
2564                     }
2565
2566                   ellipsis_token = now->tok;
2567
2568                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2569                   continue;
2570                 }
2571
2572               if (now->tok != tok_semicolon)
2573                 goto err_label;
2574
2575               /* And get the next character.  */
2576               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2577
2578               ellipsis_token = tok_none;
2579               step = 1;
2580             }
2581           break;
2582
2583         case tok_digit:
2584           /* Ignore the rest of the line if we don't need the input of
2585              this line.  */
2586           if (ignore_content)
2587             {
2588               lr_ignore_rest (ldfile, 0);
2589               break;
2590             }
2591
2592         handle_tok_digit:
2593           class_bit = _ISwdigit;
2594           class256_bit = _ISdigit;
2595           handle_digits = 1;
2596           goto read_charclass;
2597
2598         case tok_outdigit:
2599           /* Ignore the rest of the line if we don't need the input of
2600              this line.  */
2601           if (ignore_content)
2602             {
2603               lr_ignore_rest (ldfile, 0);
2604               break;
2605             }
2606
2607           if (ctype->outdigits_act != 0)
2608             lr_error (ldfile, _("\
2609 %s: field `%s' declared more than once"),
2610                       "LC_CTYPE", "outdigit");
2611           class_bit = 0;
2612           class256_bit = 0;
2613           handle_digits = 2;
2614           goto read_charclass;
2615
2616         case tok_toupper:
2617           /* Ignore the rest of the line if we don't need the input of
2618              this line.  */
2619           if (ignore_content)
2620             {
2621               lr_ignore_rest (ldfile, 0);
2622               break;
2623             }
2624
2625           mapidx = 0;
2626           goto read_mapping;
2627
2628         case tok_tolower:
2629           /* Ignore the rest of the line if we don't need the input of
2630              this line.  */
2631           if (ignore_content)
2632             {
2633               lr_ignore_rest (ldfile, 0);
2634               break;
2635             }
2636
2637           mapidx = 1;
2638           goto read_mapping;
2639
2640         case tok_map:
2641           /* Ignore the rest of the line if we don't need the input of
2642              this line.  */
2643           if (ignore_content)
2644             {
2645               lr_ignore_rest (ldfile, 0);
2646               break;
2647             }
2648
2649           /* We simply forget the `map' keyword and use the following
2650              operand to determine the mapping.  */
2651           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2652           if (now->tok == tok_ident || now->tok == tok_string)
2653             {
2654               size_t cnt;
2655
2656               for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2657                 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2658                   break;
2659
2660               if (cnt < ctype->map_collection_nr)
2661                 free (now->val.str.startmb);
2662               else
2663                 /* OK, it's a new map.  */
2664                 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2665
2666               mapidx = cnt;
2667             }
2668           else if (now->tok < tok_toupper || now->tok > tok_tolower)
2669             goto err_label;
2670           else
2671             mapidx = now->tok - tok_toupper;
2672
2673           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2674           /* This better should be a semicolon.  */
2675           if (now->tok != tok_semicolon)
2676             goto err_label;
2677
2678         read_mapping:
2679           /* Test whether this mapping was already defined.  */
2680           if (ctype->tomap_done[mapidx])
2681             {
2682               lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2683                         ctype->mapnames[mapidx]);
2684               lr_ignore_rest (ldfile, 0);
2685               break;
2686             }
2687           ctype->tomap_done[mapidx] = 1;
2688
2689           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2690           while (now->tok != tok_eol && now->tok != tok_eof)
2691             {
2692               struct charseq *from_seq;
2693               uint32_t from_wch;
2694               struct charseq *to_seq;
2695               uint32_t to_wch;
2696
2697               /* Every pair starts with an opening brace.  */
2698               if (now->tok != tok_open_brace)
2699                 goto err_label;
2700
2701               /* Next comes the from-value.  */
2702               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2703               if (get_character (now, charmap, repertoire, &from_seq,
2704                                  &from_wch) != 0)
2705                 goto err_label;
2706
2707               /* The next is a comma.  */
2708               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2709               if (now->tok != tok_comma)
2710                 goto err_label;
2711
2712               /* And the other value.  */
2713               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2714               if (get_character (now, charmap, repertoire, &to_seq,
2715                                  &to_wch) != 0)
2716                 goto err_label;
2717
2718               /* And the last thing is the closing brace.  */
2719               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2720               if (now->tok != tok_close_brace)
2721                 goto err_label;
2722
2723               if (!ignore_content)
2724                 {
2725                   /* Check whether the mapping converts from an ASCII value
2726                      to a non-ASCII value.  */
2727                   if (from_seq != NULL && from_seq->nbytes == 1
2728                       && isascii (from_seq->bytes[0])
2729                       && to_seq != NULL && (to_seq->nbytes != 1
2730                                             || !isascii (to_seq->bytes[0])))
2731                     ctype->to_nonascii = 1;
2732
2733                   if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2734                       && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2735                     /* We can use this value.  */
2736                     ctype->map256_collection[mapidx][from_seq->bytes[0]]
2737                       = to_seq->bytes[0];
2738
2739                   if (from_wch != ILLEGAL_CHAR_VALUE
2740                       && to_wch != ILLEGAL_CHAR_VALUE)
2741                     /* Both correct values.  */
2742                     *find_idx (ctype, &ctype->map_collection[mapidx],
2743                                &ctype->map_collection_max[mapidx],
2744                                &ctype->map_collection_act[mapidx],
2745                                from_wch) = to_wch;
2746                 }
2747
2748               /* Now comes a semicolon or the end of the line/file.  */
2749               now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2750               if (now->tok == tok_semicolon)
2751                 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2752             }
2753           break;
2754
2755         case tok_translit_start:
2756           /* Ignore the entire translit section with its peculiar syntax
2757              if we don't need the input.  */
2758           if (ignore_content)
2759             {
2760               do
2761                 {
2762                   lr_ignore_rest (ldfile, 0);
2763                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2764                 }
2765               while (now->tok != tok_translit_end && now->tok != tok_eof);
2766
2767               if (now->tok == tok_eof)
2768                 lr_error (ldfile, _(\
2769 "%s: `translit_start' section does not end with `translit_end'"),
2770                           "LC_CTYPE");
2771
2772               break;
2773             }
2774
2775           /* The rest of the line better should be empty.  */
2776           lr_ignore_rest (ldfile, 1);
2777
2778           /* We count here the number of allocated entries in the `translit'
2779              array.  */
2780           cnt = 0;
2781
2782           ldfile->translate_strings = 1;
2783           ldfile->return_widestr = 1;
2784
2785           /* We proceed until we see the `translit_end' token.  */
2786           while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2787                  now->tok != tok_translit_end && now->tok != tok_eof)
2788             {
2789               if (now->tok == tok_eol)
2790                 /* Ignore empty lines.  */
2791                 continue;
2792
2793               if (now->tok == tok_include)
2794                 {
2795                   /* We have to include locale.  */
2796                   const char *locale_name;
2797                   const char *repertoire_name;
2798                   struct translit_include_t *include_stmt, **include_ptr;
2799
2800                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2801                   /* This should be a string or an identifier.  In any
2802                      case something to name a locale.  */
2803                   if (now->tok != tok_string && now->tok != tok_ident)
2804                     {
2805                     translit_syntax:
2806                       lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2807                       lr_ignore_rest (ldfile, 0);
2808                       continue;
2809                     }
2810                   locale_name = now->val.str.startmb;
2811
2812                   /* Next should be a semicolon.  */
2813                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2814                   if (now->tok != tok_semicolon)
2815                     goto translit_syntax;
2816
2817                   /* Now the repertoire name.  */
2818                   now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2819                   if ((now->tok != tok_string && now->tok != tok_ident)
2820                       || now->val.str.startmb == NULL)
2821                     goto translit_syntax;
2822                   repertoire_name = now->val.str.startmb;
2823                   if (repertoire_name[0] == '\0')
2824                     /* Ignore the empty string.  */
2825                     repertoire_name = NULL;
2826
2827                   /* Save the include statement for later processing.  */
2828                   include_stmt = (struct translit_include_t *)
2829                     xmalloc (sizeof (struct translit_include_t));
2830                   include_stmt->copy_locale = locale_name;
2831                   include_stmt->copy_repertoire = repertoire_name;
2832                   include_stmt->next = NULL;
2833
2834                   include_ptr = &ctype->translit_include;
2835                   while (*include_ptr != NULL)
2836                     include_ptr = &(*include_ptr)->next;
2837                   *include_ptr = include_stmt;
2838
2839                   /* The rest of the line must be empty.  */
2840                   lr_ignore_rest (ldfile, 1);
2841
2842                   /* Make sure the locale is read.  */
2843                   add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2844                                    1, NULL);
2845                   continue;
2846                 }
2847               else if (now->tok == tok_default_missing)
2848                 {
2849                   uint32_t *wstr;
2850
2851                   while (1)
2852                     {
2853                       /* We expect a single character or string as the
2854                          argument.  */
2855                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2856                       wstr = read_widestring (ldfile, now, charmap,
2857                                               repertoire);
2858
2859                       if (wstr != NULL)
2860                         {
2861                           if (ctype->default_missing != NULL)
2862                             {
2863                               lr_error (ldfile, _("\
2864 %s: duplicate `default_missing' definition"), "LC_CTYPE");
2865                               WITH_CUR_LOCALE (error_at_line (0, 0,
2866                                                               ctype->default_missing_file,
2867                                                               ctype->default_missing_lineno,
2868                                                               _("\
2869 previous definition was here")));
2870                             }
2871                           else
2872                             {
2873                               ctype->default_missing = wstr;
2874                               ctype->default_missing_file = ldfile->fname;
2875                               ctype->default_missing_lineno = ldfile->lineno;
2876                             }
2877                           /* We can have more entries, ignore them.  */
2878                           lr_ignore_rest (ldfile, 0);
2879                           break;
2880                         }
2881                       else if (wstr == (uint32_t *) -1l)
2882                         /* This was an syntax error.  */
2883                         break;
2884
2885                       /* Maybe there is another replacement we can use.  */
2886                       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2887                       if (now->tok == tok_eol || now->tok == tok_eof)
2888                         {
2889                           /* Nothing found.  We tell the user.  */
2890                           lr_error (ldfile, _("\
2891 %s: no representable `default_missing' definition found"), "LC_CTYPE");
2892                           break;
2893                         }
2894                       if (now->tok != tok_semicolon)
2895                         goto translit_syntax;
2896                     }
2897
2898                   continue;
2899                 }
2900               else if (now->tok == tok_translit_ignore)
2901                 {
2902                   read_translit_ignore_entry (ldfile, ctype, charmap,
2903                                               repertoire);
2904                   continue;
2905                 }
2906
2907               read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2908             }
2909           ldfile->return_widestr = 0;
2910
2911           if (now->tok == tok_eof)
2912             lr_error (ldfile, _(\
2913 "%s: `translit_start' section does not end with `translit_end'"),
2914                       "LC_CTYPE");
2915
2916           break;
2917
2918         case tok_ident:
2919           /* Ignore the rest of the line if we don't need the input of
2920              this line.  */
2921           if (ignore_content)
2922             {
2923               lr_ignore_rest (ldfile, 0);
2924               break;
2925             }
2926
2927           /* This could mean one of several things.  First test whether
2928              it's a character class name.  */
2929           for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2930             if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2931               break;
2932           if (cnt < ctype->nr_charclass)
2933             {
2934               class_bit = _ISwbit (cnt);
2935               class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2936               free (now->val.str.startmb);
2937               goto read_charclass;
2938             }
2939           for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2940             if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2941               break;
2942           if (cnt < ctype->map_collection_nr)
2943             {
2944               mapidx = cnt;
2945               free (now->val.str.startmb);
2946               goto read_mapping;
2947             }
2948 #ifdef PREDEFINED_CLASSES
2949           if (strcmp (now->val.str.startmb, "special1") == 0)
2950             {
2951               class_bit = _ISwspecial1;
2952               free (now->val.str.startmb);
2953               goto read_charclass;
2954             }
2955           if (strcmp (now->val.str.startmb, "special2") == 0)
2956             {
2957               class_bit = _ISwspecial2;
2958               free (now->val.str.startmb);
2959               goto read_charclass;
2960             }
2961           if (strcmp (now->val.str.startmb, "special3") == 0)
2962             {
2963               class_bit = _ISwspecial3;
2964               free (now->val.str.startmb);
2965               goto read_charclass;
2966             }
2967           if (strcmp (now->val.str.startmb, "tosymmetric") == 0)
2968             {
2969               mapidx = 2;
2970               goto read_mapping;
2971             }
2972 #endif
2973           break;
2974
2975         case tok_end:
2976           /* Next we assume `LC_CTYPE'.  */
2977           now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2978           if (now->tok == tok_eof)
2979             break;
2980           if (now->tok == tok_eol)
2981             lr_error (ldfile, _("%s: incomplete `END' line"),
2982                       "LC_CTYPE");
2983           else if (now->tok != tok_lc_ctype)
2984             lr_error (ldfile, _("\
2985 %1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2986           lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2987           return;
2988
2989         default:
2990         err_label:
2991           if (now->tok != tok_eof)
2992             SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2993         }
2994
2995       /* Prepare for the next round.  */
2996       now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2997       nowtok = now->tok;
2998     }
2999
3000   /* When we come here we reached the end of the file.  */
3001   lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
3002 }
3003
3004
3005 static void
3006 set_class_defaults (struct locale_ctype_t *ctype,
3007                     const struct charmap_t *charmap,
3008                     struct repertoire_t *repertoire)
3009 {
3010   size_t cnt;
3011
3012   /* These function defines the default values for the classes and conversions
3013      according to POSIX.2 2.5.2.1.
3014      It may seem that the order of these if-blocks is arbitrary but it is NOT.
3015      Don't move them unless you know what you do!  */
3016
3017   auto void set_default (int bitpos, int from, int to);
3018
3019   void set_default (int bitpos, int from, int to)
3020     {
3021       char tmp[2];
3022       int ch;
3023       int bit = _ISbit (bitpos);
3024       int bitw = _ISwbit (bitpos);
3025       /* Define string.  */
3026       strcpy (tmp, "?");
3027
3028       for (ch = from; ch <= to; ++ch)
3029         {
3030           struct charseq *seq;
3031           tmp[0] = ch;
3032
3033           seq = charmap_find_value (charmap, tmp, 1);
3034           if (seq == NULL)
3035             {
3036               char buf[10];
3037               sprintf (buf, "U%08X", ch);
3038               seq = charmap_find_value (charmap, buf, 9);
3039             }
3040           if (seq == NULL)
3041             {
3042               if (!be_quiet)
3043                 WITH_CUR_LOCALE (error (0, 0, _("\
3044 %s: character `%s' not defined while needed as default value"),
3045                                         "LC_CTYPE", tmp));
3046             }
3047           else if (seq->nbytes != 1)
3048             WITH_CUR_LOCALE (error (0, 0, _("\
3049 %s: character `%s' in charmap not representable with one byte"),
3050                                     "LC_CTYPE", tmp));
3051           else
3052             ctype->class256_collection[seq->bytes[0]] |= bit;
3053
3054           /* No need to search here, the ASCII value is also the Unicode
3055              value.  */
3056           ELEM (ctype, class_collection, , ch) |= bitw;
3057         }
3058     }
3059
3060   /* Set default values if keyword was not present.  */
3061   if ((ctype->class_done & BITw (tok_upper)) == 0)
3062     /* "If this keyword [lower] is not specified, the lowercase letters
3063         `A' through `Z', ..., shall automatically belong to this class,
3064         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3065     set_default (BITPOS (tok_upper), 'A', 'Z');
3066
3067   if ((ctype->class_done & BITw (tok_lower)) == 0)
3068     /* "If this keyword [lower] is not specified, the lowercase letters
3069         `a' through `z', ..., shall automatically belong to this class,
3070         with implementation defined character values."  [P1003.2, 2.5.2.1]  */
3071     set_default (BITPOS (tok_lower), 'a', 'z');
3072
3073   if ((ctype->class_done & BITw (tok_alpha)) == 0)
3074     {
3075       /* Table 2-6 in P1003.2 says that characters in class `upper' or
3076          class `lower' *must* be in class `alpha'.  */
3077       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
3078       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
3079
3080       for (cnt = 0; cnt < 256; ++cnt)
3081         if ((ctype->class256_collection[cnt] & mask) != 0)
3082           ctype->class256_collection[cnt] |= BIT (tok_alpha);
3083
3084       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3085         if ((ctype->class_collection[cnt] & maskw) != 0)
3086           ctype->class_collection[cnt] |= BITw (tok_alpha);
3087     }
3088
3089   if ((ctype->class_done & BITw (tok_digit)) == 0)
3090     /* "If this keyword [digit] is not specified, the digits `0' through
3091         `9', ..., shall automatically belong to this class, with
3092         implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3093     set_default (BITPOS (tok_digit), '0', '9');
3094
3095   /* "Only characters specified for the `alpha' and `digit' keyword
3096      shall be specified.  Characters specified for the keyword `alpha'
3097      and `digit' are automatically included in this class.  */
3098   {
3099     unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
3100     unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
3101
3102     for (cnt = 0; cnt < 256; ++cnt)
3103       if ((ctype->class256_collection[cnt] & mask) != 0)
3104         ctype->class256_collection[cnt] |= BIT (tok_alnum);
3105
3106     for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3107       if ((ctype->class_collection[cnt] & maskw) != 0)
3108         ctype->class_collection[cnt] |= BITw (tok_alnum);
3109   }
3110
3111   if ((ctype->class_done & BITw (tok_space)) == 0)
3112     /* "If this keyword [space] is not specified, the characters <space>,
3113         <form-feed>, <newline>, <carriage-return>, <tab>, and
3114         <vertical-tab>, ..., shall automatically belong to this class,
3115         with implementation-defined character values."  [P1003.2, 2.5.2.1]  */
3116     {
3117       struct charseq *seq;
3118
3119       seq = charmap_find_value (charmap, "space", 5);
3120       if (seq == NULL)
3121         seq = charmap_find_value (charmap, "SP", 2);
3122       if (seq == NULL)
3123         seq = charmap_find_value (charmap, "U00000020", 9);
3124       if (seq == NULL)
3125         {
3126           if (!be_quiet)
3127             WITH_CUR_LOCALE (error (0, 0, _("\
3128 %s: character `%s' not defined while needed as default value"),
3129                                     "LC_CTYPE", "<space>"));
3130         }
3131       else if (seq->nbytes != 1)
3132         WITH_CUR_LOCALE (error (0, 0, _("\
3133 %s: character `%s' in charmap not representable with one byte"),
3134                                 "LC_CTYPE", "<space>"));
3135       else
3136         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3137
3138       /* No need to search.  */
3139       ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
3140
3141       seq = charmap_find_value (charmap, "form-feed", 9);
3142       if (seq == NULL)
3143         seq = charmap_find_value (charmap, "U0000000C", 9);
3144       if (seq == NULL)
3145         {
3146           if (!be_quiet)
3147             WITH_CUR_LOCALE (error (0, 0, _("\
3148 %s: character `%s' not defined while needed as default value"),
3149                                     "LC_CTYPE", "<form-feed>"));
3150         }
3151       else if (seq->nbytes != 1)
3152         WITH_CUR_LOCALE (error (0, 0, _("\
3153 %s: character `%s' in charmap not representable with one byte"),
3154                                 "LC_CTYPE", "<form-feed>"));
3155       else
3156         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3157
3158       /* No need to search.  */
3159       ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3160
3161
3162       seq = charmap_find_value (charmap, "newline", 7);
3163       if (seq == NULL)
3164         seq = charmap_find_value (charmap, "U0000000A", 9);
3165       if (seq == NULL)
3166         {
3167           if (!be_quiet)
3168             WITH_CUR_LOCALE (error (0, 0, _("\
3169 %s: character `%s' not defined while needed as default value"),
3170                                     "LC_CTYPE", "<newline>"));
3171         }
3172       else if (seq->nbytes != 1)
3173         WITH_CUR_LOCALE (error (0, 0, _("\
3174 %s: character `%s' in charmap not representable with one byte"),
3175                                 "LC_CTYPE", "<newline>"));
3176       else
3177         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3178
3179       /* No need to search.  */
3180       ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3181
3182
3183       seq = charmap_find_value (charmap, "carriage-return", 15);
3184       if (seq == NULL)
3185         seq = charmap_find_value (charmap, "U0000000D", 9);
3186       if (seq == NULL)
3187         {
3188           if (!be_quiet)
3189             WITH_CUR_LOCALE (error (0, 0, _("\
3190 %s: character `%s' not defined while needed as default value"),
3191                                     "LC_CTYPE", "<carriage-return>"));
3192         }
3193       else if (seq->nbytes != 1)
3194         WITH_CUR_LOCALE (error (0, 0, _("\
3195 %s: character `%s' in charmap not representable with one byte"),
3196                                 "LC_CTYPE", "<carriage-return>"));
3197       else
3198         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3199
3200       /* No need to search.  */
3201       ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3202
3203
3204       seq = charmap_find_value (charmap, "tab", 3);
3205       if (seq == NULL)
3206         seq = charmap_find_value (charmap, "U00000009", 9);
3207       if (seq == NULL)
3208         {
3209           if (!be_quiet)
3210             WITH_CUR_LOCALE (error (0, 0, _("\
3211 %s: character `%s' not defined while needed as default value"),
3212                                     "LC_CTYPE", "<tab>"));
3213         }
3214       else if (seq->nbytes != 1)
3215         WITH_CUR_LOCALE (error (0, 0, _("\
3216 %s: character `%s' in charmap not representable with one byte"),
3217                                 "LC_CTYPE", "<tab>"));
3218       else
3219         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3220
3221       /* No need to search.  */
3222       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3223
3224
3225       seq = charmap_find_value (charmap, "vertical-tab", 12);
3226       if (seq == NULL)
3227         seq = charmap_find_value (charmap, "U0000000B", 9);
3228       if (seq == NULL)
3229         {
3230           if (!be_quiet)
3231             WITH_CUR_LOCALE (error (0, 0, _("\
3232 %s: character `%s' not defined while needed as default value"),
3233                                     "LC_CTYPE", "<vertical-tab>"));
3234         }
3235       else if (seq->nbytes != 1)
3236         WITH_CUR_LOCALE (error (0, 0, _("\
3237 %s: character `%s' in charmap not representable with one byte"),
3238                                 "LC_CTYPE", "<vertical-tab>"));
3239       else
3240         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3241
3242       /* No need to search.  */
3243       ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3244     }
3245
3246   if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3247     /* "If this keyword is not specified, the digits `0' to `9', the
3248         uppercase letters `A' through `F', and the lowercase letters `a'
3249         through `f', ..., shell automatically belong to this class, with
3250         implementation defined character values."  [P1003.2, 2.5.2.1]  */
3251     {
3252       set_default (BITPOS (tok_xdigit), '0', '9');
3253       set_default (BITPOS (tok_xdigit), 'A', 'F');
3254       set_default (BITPOS (tok_xdigit), 'a', 'f');
3255     }
3256
3257   if ((ctype->class_done & BITw (tok_blank)) == 0)
3258     /* "If this keyword [blank] is unspecified, the characters <space> and
3259        <tab> shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3260    {
3261       struct charseq *seq;
3262
3263       seq = charmap_find_value (charmap, "space", 5);
3264       if (seq == NULL)
3265         seq = charmap_find_value (charmap, "SP", 2);
3266       if (seq == NULL)
3267         seq = charmap_find_value (charmap, "U00000020", 9);
3268       if (seq == NULL)
3269         {
3270           if (!be_quiet)
3271             WITH_CUR_LOCALE (error (0, 0, _("\
3272 %s: character `%s' not defined while needed as default value"),
3273                                     "LC_CTYPE", "<space>"));
3274         }
3275       else if (seq->nbytes != 1)
3276         WITH_CUR_LOCALE (error (0, 0, _("\
3277 %s: character `%s' in charmap not representable with one byte"),
3278                                 "LC_CTYPE", "<space>"));
3279       else
3280         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3281
3282       /* No need to search.  */
3283       ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3284
3285
3286       seq = charmap_find_value (charmap, "tab", 3);
3287       if (seq == NULL)
3288         seq = charmap_find_value (charmap, "U00000009", 9);
3289       if (seq == NULL)
3290         {
3291           if (!be_quiet)
3292             WITH_CUR_LOCALE (error (0, 0, _("\
3293 %s: character `%s' not defined while needed as default value"),
3294                                     "LC_CTYPE", "<tab>"));
3295         }
3296       else if (seq->nbytes != 1)
3297         WITH_CUR_LOCALE (error (0, 0, _("\
3298 %s: character `%s' in charmap not representable with one byte"),
3299                                 "LC_CTYPE", "<tab>"));
3300       else
3301         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3302
3303       /* No need to search.  */
3304       ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3305     }
3306
3307   if ((ctype->class_done & BITw (tok_graph)) == 0)
3308     /* "If this keyword [graph] is not specified, characters specified for
3309         the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3310         shall belong to this character class."  [P1003.2, 2.5.2.1]  */
3311     {
3312       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3313         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3314       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3315         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3316         BITw (tok_punct);
3317       size_t cnt;
3318
3319       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3320         if ((ctype->class_collection[cnt] & maskw) != 0)
3321           ctype->class_collection[cnt] |= BITw (tok_graph);
3322
3323       for (cnt = 0; cnt < 256; ++cnt)
3324         if ((ctype->class256_collection[cnt] & mask) != 0)
3325           ctype->class256_collection[cnt] |= BIT (tok_graph);
3326     }
3327
3328   if ((ctype->class_done & BITw (tok_print)) == 0)
3329     /* "If this keyword [print] is not provided, characters specified for
3330         the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3331         and the <space> character shall belong to this character class."
3332         [P1003.2, 2.5.2.1]  */
3333     {
3334       unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3335         BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3336       unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3337         BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3338         BITw (tok_punct);
3339       size_t cnt;
3340       struct charseq *seq;
3341
3342       for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3343         if ((ctype->class_collection[cnt] & maskw) != 0)
3344           ctype->class_collection[cnt] |= BITw (tok_print);
3345
3346       for (cnt = 0; cnt < 256; ++cnt)
3347         if ((ctype->class256_collection[cnt] & mask) != 0)
3348           ctype->class256_collection[cnt] |= BIT (tok_print);
3349
3350
3351       seq = charmap_find_value (charmap, "space", 5);
3352       if (seq == NULL)
3353         seq = charmap_find_value (charmap, "SP", 2);
3354       if (seq == NULL)
3355         seq = charmap_find_value (charmap, "U00000020", 9);
3356       if (seq == NULL)
3357         {
3358           if (!be_quiet)
3359             WITH_CUR_LOCALE (error (0, 0, _("\
3360 %s: character `%s' not defined while needed as default value"),
3361                                     "LC_CTYPE", "<space>"));
3362         }
3363       else if (seq->nbytes != 1)
3364         WITH_CUR_LOCALE (error (0, 0, _("\
3365 %s: character `%s' in charmap not representable with one byte"),
3366                                 "LC_CTYPE", "<space>"));
3367       else
3368         ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3369
3370       /* No need to search.  */
3371       ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3372     }
3373
3374   if (ctype->tomap_done[0] == 0)
3375     /* "If this keyword [toupper] is not specified, the lowercase letters
3376         `a' through `z', and their corresponding uppercase letters `A' to
3377         `Z', ..., shall automatically be included, with implementation-
3378         defined character values."  [P1003.2, 2.5.2.1]  */
3379     {
3380       char tmp[4];
3381       int ch;
3382
3383       strcpy (tmp, "<?>");
3384
3385       for (ch = 'a'; ch <= 'z'; ++ch)
3386         {
3387           struct charseq *seq_from, *seq_to;
3388
3389           tmp[1] = (char) ch;
3390
3391           seq_from = charmap_find_value (charmap, &tmp[1], 1);
3392           if (seq_from == NULL)
3393             {
3394               char buf[10];
3395               sprintf (buf, "U%08X", ch);
3396               seq_from = charmap_find_value (charmap, buf, 9);
3397             }
3398           if (seq_from == NULL)
3399             {
3400               if (!be_quiet)
3401                 WITH_CUR_LOCALE (error (0, 0, _("\
3402 %s: character `%s' not defined while needed as default value"),
3403                                         "LC_CTYPE", tmp));
3404             }
3405           else if (seq_from->nbytes != 1)
3406             {
3407               if (!be_quiet)
3408                 WITH_CUR_LOCALE (error (0, 0, _("\
3409 %s: character `%s' needed as default value not representable with one byte"),
3410                                         "LC_CTYPE", tmp));
3411             }
3412           else
3413             {
3414               /* This conversion is implementation defined.  */
3415               tmp[1] = (char) (ch + ('A' - 'a'));
3416               seq_to = charmap_find_value (charmap, &tmp[1], 1);
3417               if (seq_to == NULL)
3418                 {
3419                   char buf[10];
3420                   sprintf (buf, "U%08X", ch + ('A' - 'a'));
3421                   seq_to = charmap_find_value (charmap, buf, 9);
3422                 }
3423               if (seq_to == NULL)
3424                 {
3425                   if (!be_quiet)
3426                     WITH_CUR_LOCALE (error (0, 0, _("\
3427 %s: character `%s' not defined while needed as default value"),
3428                                             "LC_CTYPE", tmp));
3429                 }
3430               else if (seq_to->nbytes != 1)
3431                 {
3432                   if (!be_quiet)
3433                     WITH_CUR_LOCALE (error (0, 0, _("\
3434 %s: character `%s' needed as default value not representable with one byte"),
3435                                             "LC_CTYPE", tmp));
3436                 }
3437               else
3438                 /* The index [0] is determined by the order of the
3439                    `ctype_map_newP' calls in `ctype_startup'.  */
3440                 ctype->map256_collection[0][seq_from->bytes[0]]
3441                   = seq_to->bytes[0];
3442             }
3443
3444           /* No need to search.  */
3445           ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3446         }
3447     }
3448
3449   if (ctype->tomap_done[1] == 0)
3450     /* "If this keyword [tolower] is not specified, the mapping shall be
3451        the reverse mapping of the one specified to `toupper'."  [P1003.2]  */
3452     {
3453       for (cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3454         if (ctype->map_collection[0][cnt] != 0)
3455           ELEM (ctype, map_collection, [1],
3456                 ctype->map_collection[0][cnt])
3457             = ctype->charnames[cnt];
3458
3459       for (cnt = 0; cnt < 256; ++cnt)
3460         if (ctype->map256_collection[0][cnt] != 0)
3461           ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3462     }
3463
3464   if (ctype->outdigits_act != 10)
3465     {
3466       if (ctype->outdigits_act != 0)
3467         WITH_CUR_LOCALE (error (0, 0, _("\
3468 %s: field `%s' does not contain exactly ten entries"),
3469                                 "LC_CTYPE", "outdigit"));
3470
3471       for (cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3472         {
3473           ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3474                                                          (char *) digits + cnt,
3475                                                          1);
3476
3477           if (ctype->mboutdigits[cnt] == NULL)
3478             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3479                                                            longnames[cnt],
3480                                                            strlen (longnames[cnt]));
3481
3482           if (ctype->mboutdigits[cnt] == NULL)
3483             ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3484                                                            uninames[cnt], 9);
3485
3486           if (ctype->mboutdigits[cnt] == NULL)
3487             {
3488               /* Provide a replacement.  */
3489               WITH_CUR_LOCALE (error (0, 0, _("\
3490 no output digits defined and none of the standard names in the charmap")));
3491
3492               ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3493                                                        sizeof (struct charseq)
3494                                                        + 1);
3495
3496               /* This is better than nothing.  */
3497               ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3498               ctype->mboutdigits[cnt]->nbytes = 1;
3499             }
3500
3501           ctype->wcoutdigits[cnt] = L'0' + cnt;
3502         }
3503
3504       ctype->outdigits_act = 10;
3505     }
3506 }
3507
3508
3509 /* Construction of sparse 3-level tables.
3510    See wchar-lookup.h for their structure and the meaning of p and q.  */
3511
3512 struct wctype_table
3513 {
3514   /* Parameters.  */
3515   unsigned int p;
3516   unsigned int q;
3517   /* Working representation.  */
3518   size_t level1_alloc;
3519   size_t level1_size;
3520   uint32_t *level1;
3521   size_t level2_alloc;
3522   size_t level2_size;
3523   uint32_t *level2;
3524   size_t level3_alloc;
3525   size_t level3_size;
3526   uint32_t *level3;
3527   /* Compressed representation.  */
3528   size_t result_size;
3529   char *result;
3530 };
3531
3532 /* Initialize.  Assumes t->p and t->q have already been set.  */
3533 static inline void
3534 wctype_table_init (struct wctype_table *t)
3535 {
3536   t->level1 = NULL;
3537   t->level1_alloc = t->level1_size = 0;
3538   t->level2 = NULL;
3539   t->level2_alloc = t->level2_size = 0;
3540   t->level3 = NULL;
3541   t->level3_alloc = t->level3_size = 0;
3542 }
3543
3544 /* Retrieve an entry.  */
3545 static inline int
3546 wctype_table_get (struct wctype_table *t, uint32_t wc)
3547 {
3548   uint32_t index1 = wc >> (t->q + t->p + 5);
3549   if (index1 < t->level1_size)
3550     {
3551       uint32_t lookup1 = t->level1[index1];
3552       if (lookup1 != EMPTY)
3553         {
3554           uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3555                             + (lookup1 << t->q);
3556           uint32_t lookup2 = t->level2[index2];
3557           if (lookup2 != EMPTY)
3558             {
3559               uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3560                                 + (lookup2 << t->p);
3561               uint32_t lookup3 = t->level3[index3];
3562               uint32_t index4 = wc & 0x1f;
3563
3564               return (lookup3 >> index4) & 1;
3565             }
3566         }
3567     }
3568   return 0;
3569 }
3570
3571 /* Add one entry.  */
3572 static void
3573 wctype_table_add (struct wctype_table *t, uint32_t wc)
3574 {
3575   uint32_t index1 = wc >> (t->q + t->p + 5);
3576   uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3577   uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3578   uint32_t index4 = wc & 0x1f;
3579   size_t i, i1, i2;
3580
3581   if (index1 >= t->level1_size)
3582     {
3583       if (index1 >= t->level1_alloc)
3584         {
3585           size_t alloc = 2 * t->level1_alloc;
3586           if (alloc <= index1)
3587             alloc = index1 + 1;
3588           t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3589                                              alloc * sizeof (uint32_t));
3590           t->level1_alloc = alloc;
3591         }
3592       while (index1 >= t->level1_size)
3593         t->level1[t->level1_size++] = EMPTY;
3594     }
3595
3596   if (t->level1[index1] == EMPTY)
3597     {
3598       if (t->level2_size == t->level2_alloc)
3599         {
3600           size_t alloc = 2 * t->level2_alloc + 1;
3601           t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3602                                              (alloc << t->q) * sizeof (uint32_t));
3603           t->level2_alloc = alloc;
3604         }
3605       i1 = t->level2_size << t->q;
3606       i2 = (t->level2_size + 1) << t->q;
3607       for (i = i1; i < i2; i++)
3608         t->level2[i] = EMPTY;
3609       t->level1[index1] = t->level2_size++;
3610     }
3611
3612   index2 += t->level1[index1] << t->q;
3613
3614   if (t->level2[index2] == EMPTY)
3615     {
3616       if (t->level3_size == t->level3_alloc)
3617         {
3618           size_t alloc = 2 * t->level3_alloc + 1;
3619           t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3620                                              (alloc << t->p) * sizeof (uint32_t));
3621           t->level3_alloc = alloc;
3622         }
3623       i1 = t->level3_size << t->p;
3624       i2 = (t->level3_size + 1) << t->p;
3625       for (i = i1; i < i2; i++)
3626         t->level3[i] = 0;
3627       t->level2[index2] = t->level3_size++;
3628     }
3629
3630   index3 += t->level2[index2] << t->p;
3631
3632   t->level3[index3] |= (uint32_t)1 << index4;
3633 }
3634
3635 /* Finalize and shrink.  */
3636 static void
3637 wctype_table_finalize (struct wctype_table *t)
3638 {
3639   size_t i, j, k;
3640   uint32_t reorder3[t->level3_size];
3641   uint32_t reorder2[t->level2_size];
3642   uint32_t level1_offset, level2_offset, level3_offset;
3643
3644   /* Uniquify level3 blocks.  */
3645   k = 0;
3646   for (j = 0; j < t->level3_size; j++)
3647     {
3648       for (i = 0; i < k; i++)
3649         if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3650                     (1 << t->p) * sizeof (uint32_t)) == 0)
3651           break;
3652       /* Relocate block j to block i.  */
3653       reorder3[j] = i;
3654       if (i == k)
3655         {
3656           if (i != j)
3657             memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3658                     (1 << t->p) * sizeof (uint32_t));
3659           k++;
3660         }
3661     }
3662   t->level3_size = k;
3663
3664   for (i = 0; i < (t->level2_size << t->q); i++)
3665     if (t->level2[i] != EMPTY)
3666       t->level2[i] = reorder3[t->level2[i]];
3667
3668   /* Uniquify level2 blocks.  */
3669   k = 0;
3670   for (j = 0; j < t->level2_size; j++)
3671     {
3672       for (i = 0; i < k; i++)
3673         if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3674                     (1 << t->q) * sizeof (uint32_t)) == 0)
3675           break;
3676       /* Relocate block j to block i.  */
3677       reorder2[j] = i;
3678       if (i == k)
3679         {
3680           if (i != j)
3681             memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3682                     (1 << t->q) * sizeof (uint32_t));
3683           k++;
3684         }
3685     }
3686   t->level2_size = k;
3687
3688   for (i = 0; i < t->level1_size; i++)
3689     if (t->level1[i] != EMPTY)
3690       t->level1[i] = reorder2[t->level1[i]];
3691
3692   /* Create and fill the resulting compressed representation.  */
3693   t->result_size =
3694     5 * sizeof (uint32_t)
3695     + t->level1_size * sizeof (uint32_t)
3696     + (t->level2_size << t->q) * sizeof (uint32_t)
3697     + (t->level3_size << t->p) * sizeof (uint32_t);
3698   t->result = (char *) xmalloc (t->result_size);
3699
3700   level1_offset =
3701     5 * sizeof (uint32_t);
3702   level2_offset =
3703     5 * sizeof (uint32_t)
3704     + t->level1_size * sizeof (uint32_t);
3705   level3_offset =
3706     5 * sizeof (uint32_t)
3707     + t->level1_size * sizeof (uint32_t)
3708     + (t->level2_size << t->q) * sizeof (uint32_t);
3709
3710   ((uint32_t *) t->result)[0] = t->q + t->p + 5;
3711   ((uint32_t *) t->result)[1] = t->level1_size;
3712   ((uint32_t *) t->result)[2] = t->p + 5;
3713   ((uint32_t *) t->result)[3] = (1 << t->q) - 1;
3714   ((uint32_t *) t->result)[4] = (1 << t->p) - 1;
3715
3716   for (i = 0; i < t->level1_size; i++)
3717     ((uint32_t *) (t->result + level1_offset))[i] =
3718       (t->level1[i] == EMPTY
3719        ? 0
3720        : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3721
3722   for (i = 0; i < (t->level2_size << t->q); i++)
3723     ((uint32_t *) (t->result + level2_offset))[i] =
3724       (t->level2[i] == EMPTY
3725        ? 0
3726        : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3727
3728   for (i = 0; i < (t->level3_size << t->p); i++)
3729     ((uint32_t *) (t->result + level3_offset))[i] = t->level3[i];
3730
3731   if (t->level1_alloc > 0)
3732     free (t->level1);
3733   if (t->level2_alloc > 0)
3734     free (t->level2);
3735   if (t->level3_alloc > 0)
3736     free (t->level3);
3737 }
3738
3739 #define TABLE wcwidth_table
3740 #define ELEMENT uint8_t
3741 #define DEFAULT 0xff
3742 #include "3level.h"
3743
3744 #define TABLE wctrans_table
3745 #define ELEMENT int32_t
3746 #define DEFAULT 0
3747 #define wctrans_table_add wctrans_table_add_internal
3748 #include "3level.h"
3749 #undef wctrans_table_add
3750 /* The wctrans_table must actually store the difference between the
3751    desired result and the argument.  */
3752 static inline void
3753 wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
3754 {
3755   wctrans_table_add_internal (t, wc, mapped_wc - wc);
3756 }
3757
3758
3759 /* Flattens the included transliterations into a translit list.
3760    Inserts them in the list at `cursor', and returns the new cursor.  */
3761 static struct translit_t **
3762 translit_flatten (struct locale_ctype_t *ctype,
3763                   const struct charmap_t *charmap,
3764                   struct translit_t **cursor)
3765 {
3766   while (ctype->translit_include != NULL)
3767     {
3768       const char *copy_locale = ctype->translit_include->copy_locale;
3769       const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3770       struct localedef_t *other;
3771
3772       /* Unchain the include statement.  During the depth-first traversal
3773          we don't want to visit any locale more than once.  */
3774       ctype->translit_include = ctype->translit_include->next;
3775
3776       other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3777
3778       if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3779         {
3780           WITH_CUR_LOCALE (error (0, 0, _("\
3781 %s: transliteration data from locale `%s' not available"),
3782                                   "LC_CTYPE", copy_locale));
3783         }
3784       else
3785         {
3786           struct locale_ctype_t *other_ctype =
3787             other->categories[LC_CTYPE].ctype;
3788
3789           cursor = translit_flatten (other_ctype, charmap, cursor);
3790           assert (other_ctype->translit_include == NULL);
3791
3792           if (other_ctype->translit != NULL)
3793             {
3794               /* Insert the other_ctype->translit list at *cursor.  */
3795               struct translit_t *endp = other_ctype->translit;
3796               while (endp->next != NULL)
3797                 endp = endp->next;
3798
3799               endp->next = *cursor;
3800               *cursor = other_ctype->translit;
3801
3802               /* Avoid any risk of circular lists.  */
3803               other_ctype->translit = NULL;
3804
3805               cursor = &endp->next;
3806             }
3807
3808           if (ctype->default_missing == NULL)
3809             ctype->default_missing = other_ctype->default_missing;
3810         }
3811     }
3812
3813   return cursor;
3814 }
3815
3816 static void
3817 allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3818                  struct repertoire_t *repertoire)
3819 {
3820   size_t idx, nr;
3821   const void *key;
3822   size_t len;
3823   void *vdata;
3824   void *curs;
3825
3826   /* You wonder about this amount of memory?  This is only because some
3827      users do not manage to address the array with unsigned values or
3828      data types with range >= 256.  '\200' would result in the array
3829      index -128.  To help these poor people we duplicate the entries for
3830      128 up to 255 below the entry for \0.  */
3831   ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3832   ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3833   ctype->class_b = (uint32_t **)
3834     xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3835   ctype->class_3level = (struct iovec *)
3836     xmalloc (ctype->nr_charclass * sizeof (struct iovec));
3837
3838   /* This is the array accessed using the multibyte string elements.  */
3839   for (idx = 0; idx < 256; ++idx)
3840     ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3841
3842   /* Mirror first 127 entries.  We must take care that entry -1 is not
3843      mirrored because EOF == -1.  */
3844   for (idx = 0; idx < 127; ++idx)
3845     ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3846
3847   /* The 32 bit array contains all characters < 0x100.  */
3848   for (idx = 0; idx < ctype->class_collection_act; ++idx)
3849     if (ctype->charnames[idx] < 0x100)
3850       ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3851
3852   for (nr = 0; nr < ctype->nr_charclass; nr++)
3853     {
3854       ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3855
3856       /* We only set CLASS_B for the bits in the ISO C classes, not
3857          the user defined classes.  The number should not change but
3858          who knows.  */
3859 #define LAST_ISO_C_BIT 11
3860       if (nr <= LAST_ISO_C_BIT)
3861         for (idx = 0; idx < 256; ++idx)
3862           if (ctype->class256_collection[idx] & _ISbit (nr))
3863             ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3864     }
3865
3866   for (nr = 0; nr < ctype->nr_charclass; nr++)
3867     {
3868       struct wctype_table t;
3869
3870       t.p = 4; /* or: 5 */
3871       t.q = 7; /* or: 6 */
3872       wctype_table_init (&t);
3873
3874       for (idx = 0; idx < ctype->class_collection_act; ++idx)
3875         if (ctype->class_collection[idx] & _ISwbit (nr))
3876           wctype_table_add (&t, ctype->charnames[idx]);
3877
3878       wctype_table_finalize (&t);
3879
3880       if (verbose)
3881         WITH_CUR_LOCALE (fprintf (stderr, _("\
3882 %s: table for class \"%s\": %lu bytes\n"),
3883                                  "LC_CTYPE", ctype->classnames[nr],
3884                                  (unsigned long int) t.result_size));
3885
3886       ctype->class_3level[nr].iov_base = t.result;
3887       ctype->class_3level[nr].iov_len = t.result_size;
3888     }
3889
3890   /* Room for table of mappings.  */
3891   ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3892   ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3893                                           * sizeof (uint32_t *));
3894   ctype->map_3level = (struct iovec *)
3895     xmalloc (ctype->map_collection_nr * sizeof (struct iovec));
3896
3897   /* Fill in all mappings.  */
3898   for (idx = 0; idx < 2; ++idx)
3899     {
3900       unsigned int idx2;
3901
3902       /* Allocate table.  */
3903       ctype->map_b[idx] = (uint32_t *)
3904         xmalloc ((256 + 128) * sizeof (uint32_t));
3905
3906       /* Copy values from collection.  */
3907       for (idx2 = 0; idx2 < 256; ++idx2)
3908         ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3909
3910       /* Mirror first 127 entries.  We must take care not to map entry
3911          -1 because EOF == -1.  */
3912       for (idx2 = 0; idx2 < 127; ++idx2)
3913         ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3914
3915       /* EOF must map to EOF.  */
3916       ctype->map_b[idx][127] = EOF;
3917     }
3918
3919   for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3920     {
3921       unsigned int idx2;
3922
3923       /* Allocate table.  */
3924       ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3925
3926       /* Copy values from collection.  Default is identity mapping.  */
3927       for (idx2 = 0; idx2 < 256; ++idx2)
3928         ctype->map32_b[idx][idx2] =
3929           (ctype->map_collection[idx][idx2] != 0
3930            ? ctype->map_collection[idx][idx2]
3931            : idx2);
3932     }
3933
3934   for (nr = 0; nr < ctype->map_collection_nr; nr++)
3935     {
3936       struct wctrans_table t;
3937
3938       t.p = 7;
3939       t.q = 9;
3940       wctrans_table_init (&t);
3941
3942       for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3943         if (ctype->map_collection[nr][idx] != 0)
3944           wctrans_table_add (&t, ctype->charnames[idx],
3945                              ctype->map_collection[nr][idx]);
3946
3947       wctrans_table_finalize (&t);
3948
3949       if (verbose)
3950         WITH_CUR_LOCALE (fprintf (stderr, _("\
3951 %s: table for map \"%s\": %lu bytes\n"),
3952                                  "LC_CTYPE", ctype->mapnames[nr],
3953                                  (unsigned long int) t.result_size));
3954
3955       ctype->map_3level[nr].iov_base = t.result;
3956       ctype->map_3level[nr].iov_len = t.result_size;
3957     }
3958
3959   /* Extra array for class and map names.  */
3960   ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3961                                                 * sizeof (uint32_t));
3962   ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3963                                               * sizeof (uint32_t));
3964
3965   ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3966   ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3967
3968   /* Array for width information.  Because the expected widths are very
3969      small (never larger than 2) we use only one single byte.  This
3970      saves space.
3971      We put only printable characters in the table.  wcwidth is specified
3972      to return -1 for non-printable characters.  Doing the check here
3973      saves a run-time check.
3974      But we put L'\0' in the table.  This again saves a run-time check.  */
3975   {
3976     struct wcwidth_table t;
3977
3978     t.p = 7;
3979     t.q = 9;
3980     wcwidth_table_init (&t);
3981
3982     /* First set all the printable characters of the character set to
3983        the default width.  */
3984     curs = NULL;
3985     while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3986       {
3987         struct charseq *data = (struct charseq *) vdata;
3988
3989         if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3990           data->ucs4 = repertoire_find_value (ctype->repertoire,
3991                                               data->name, len);
3992
3993         if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3994           {
3995             uint32_t *class_bits =
3996               find_idx (ctype, &ctype->class_collection, NULL,
3997                         &ctype->class_collection_act, data->ucs4);
3998
3999             if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4000               wcwidth_table_add (&t, data->ucs4, charmap->width_default);
4001           }
4002       }
4003
4004     /* Now add the explicitly specified widths.  */
4005     if (charmap->width_rules != NULL)
4006       {
4007         size_t cnt;
4008
4009         for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
4010           {
4011             unsigned char bytes[charmap->mb_cur_max];
4012             int nbytes = charmap->width_rules[cnt].from->nbytes;
4013
4014             /* We have the range of character for which the width is
4015                specified described using byte sequences of the multibyte
4016                charset.  We have to convert this to UCS4 now.  And we
4017                cannot simply convert the beginning and the end of the
4018                sequence, we have to iterate over the byte sequence and
4019                convert it for every single character.  */
4020             memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
4021
4022             while (nbytes < charmap->width_rules[cnt].to->nbytes
4023                    || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
4024                               nbytes) <= 0)
4025               {
4026                 /* Find the UCS value for `bytes'.  */
4027                 int inner;
4028                 uint32_t wch;
4029                 struct charseq *seq =
4030                   charmap_find_symbol (charmap, (char *) bytes, nbytes);
4031
4032                 if (seq == NULL)
4033                   wch = ILLEGAL_CHAR_VALUE;
4034                 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
4035                   wch = seq->ucs4;
4036                 else
4037                   wch = repertoire_find_value (ctype->repertoire, seq->name,
4038                                                strlen (seq->name));
4039
4040                 if (wch != ILLEGAL_CHAR_VALUE)
4041                   {
4042                     /* Store the value.  */
4043                     uint32_t *class_bits =
4044                       find_idx (ctype, &ctype->class_collection, NULL,
4045                                 &ctype->class_collection_act, wch);
4046
4047                     if (class_bits != NULL && (*class_bits & BITw (tok_print)))
4048                       wcwidth_table_add (&t, wch,
4049                                          charmap->width_rules[cnt].width);
4050                   }
4051
4052                 /* "Increment" the bytes sequence.  */
4053                 inner = nbytes - 1;
4054                 while (inner >= 0 && bytes[inner] == 0xff)
4055                   --inner;
4056
4057                 if (inner < 0)
4058                   {
4059                     /* We have to extend the byte sequence.  */
4060                     if (nbytes >= charmap->width_rules[cnt].to->nbytes)
4061                       break;
4062
4063                     bytes[0] = 1;
4064                     memset (&bytes[1], 0, nbytes);
4065                     ++nbytes;
4066                   }
4067                 else
4068                   {
4069                     ++bytes[inner];
4070                     while (++inner < nbytes)
4071                       bytes[inner] = 0;
4072                   }
4073               }
4074           }
4075       }
4076
4077     /* Set the width of L'\0' to 0.  */
4078     wcwidth_table_add (&t, 0, 0);
4079
4080     wcwidth_table_finalize (&t);
4081
4082     if (verbose)
4083       WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
4084                                "LC_CTYPE", (unsigned long int) t.result_size));
4085
4086     ctype->width.iov_base = t.result;
4087     ctype->width.iov_len = t.result_size;
4088   }
4089
4090   /* Set MB_CUR_MAX.  */
4091   ctype->mb_cur_max = charmap->mb_cur_max;
4092
4093   /* Now determine the table for the transliteration information.
4094
4095      XXX It is not yet clear to me whether it is worth implementing a
4096      complicated algorithm which uses a hash table to locate the entries.
4097      For now I'll use a simple array which can be searching using binary
4098      search.  */
4099   if (ctype->translit_include != NULL)
4100     /* Traverse the locales mentioned in the `include' statements in a
4101        depth-first way and fold in their transliteration information.  */
4102     translit_flatten (ctype, charmap, &ctype->translit);
4103
4104   if (ctype->translit != NULL)
4105     {
4106       /* First count how many entries we have.  This is the upper limit
4107          since some entries from the included files might be overwritten.  */
4108       size_t number = 0;
4109       size_t cnt;
4110       struct translit_t *runp = ctype->translit;
4111       struct translit_t **sorted;
4112       size_t from_len, to_len;
4113
4114       while (runp != NULL)
4115         {
4116           ++number;
4117           runp = runp->next;
4118         }
4119
4120       /* Next we allocate an array large enough and fill in the values.  */
4121       sorted = (struct translit_t **) alloca (number
4122                                               * sizeof (struct translit_t **));
4123       runp = ctype->translit;
4124       number = 0;
4125       do
4126         {
4127           /* Search for the place where to insert this string.
4128              XXX Better use a real sorting algorithm later.  */
4129           size_t idx = 0;
4130           int replace = 0;
4131
4132           while (idx < number)
4133             {
4134               int res = wcscmp ((const wchar_t *) sorted[idx]->from,
4135                                 (const wchar_t *) runp->from);
4136               if (res == 0)
4137                 {
4138                   replace = 1;
4139                   break;
4140                 }
4141               if (res > 0)
4142                 break;
4143               ++idx;
4144             }
4145
4146           if (replace)
4147             sorted[idx] = runp;
4148           else
4149             {
4150               memmove (&sorted[idx + 1], &sorted[idx],
4151                        (number - idx) * sizeof (struct translit_t *));
4152               sorted[idx] = runp;
4153               ++number;
4154             }
4155
4156           runp = runp->next;
4157         }
4158       while (runp != NULL);
4159
4160       /* The next step is putting all the possible transliteration
4161          strings in one memory block so that we can write it out.
4162          We need several different blocks:
4163          - index to the from-string array
4164          - from-string array
4165          - index to the to-string array
4166          - to-string array.
4167       */
4168       from_len = to_len = 0;
4169       for (cnt = 0; cnt < number; ++cnt)
4170         {
4171           struct translit_to_t *srunp;
4172           from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4173           srunp = sorted[cnt]->to;
4174           while (srunp != NULL)
4175             {
4176               to_len += wcslen ((const wchar_t *) srunp->str) + 1;
4177               srunp = srunp->next;
4178             }
4179           /* Plus one for the extra NUL character marking the end of
4180              the list for the current entry.  */
4181           ++to_len;
4182         }
4183
4184       /* We can allocate the arrays for the results.  */
4185       ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
4186       ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
4187       ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
4188       ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
4189
4190       from_len = 0;
4191       to_len = 0;
4192       for (cnt = 0; cnt < number; ++cnt)
4193         {
4194           size_t len;
4195           struct translit_to_t *srunp;
4196
4197           ctype->translit_from_idx[cnt] = from_len;
4198           ctype->translit_to_idx[cnt] = to_len;
4199
4200           len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
4201           wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4202                    (const wchar_t *) sorted[cnt]->from, len);
4203           from_len += len;
4204
4205           ctype->translit_to_idx[cnt] = to_len;
4206           srunp = sorted[cnt]->to;
4207           while (srunp != NULL)
4208             {
4209               len = wcslen ((const wchar_t *) srunp->str) + 1;
4210               wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4211                        (const wchar_t *) srunp->str, len);
4212               to_len += len;
4213               srunp = srunp->next;
4214             }
4215           ctype->translit_to_tbl[to_len++] = L'\0';
4216         }
4217
4218       /* Store the information about the length.  */
4219       ctype->translit_idx_size = number;
4220       ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4221       ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4222     }
4223   else
4224     {
4225       /* Provide some dummy pointers since we have nothing to write out.  */
4226       static uint32_t no_str = { 0 };
4227
4228       ctype->translit_from_idx = &no_str;
4229       ctype->translit_from_tbl = &no_str;
4230       ctype->translit_to_tbl = &no_str;
4231       ctype->translit_idx_size = 0;
4232       ctype->translit_from_tbl_size = 0;
4233       ctype->translit_to_tbl_size = 0;
4234     }
4235 }