locale/programs/ld-collate.c

   1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
   2 This file is part of the GNU C Library.
   3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
   4
   5 The GNU C Library is free software; you can redistribute it and/or
   6 modify it under the terms of the GNU Library General Public License as
   7 published by the Free Software Foundation; either version 2 of the
   8 License, or (at your option) any later version.
   9
  10 The GNU C Library is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 Library General Public License for more details.
  14
  15 You should have received a copy of the GNU Library General Public
  16 License along with the GNU C Library; see the file COPYING.LIB.  If
  17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18 Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <locale.h>
  28 #include <obstack.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32
  33 #include "localeinfo.h"
  34 #include "locales.h"
  35 #include "simple-hash.h"
  36 #include "stringtrans.h"
  37
  38 /* Uncomment the following line in the production version.  */
  39 /* #define NDEBUG 1 */
  40 #include <assert.h>
  41
  42
  43 #define MAX(a, b) ((a) > (b) ? (a) : (b))
  44
  45 #define SWAPU32(w) \
  46   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  47
  48
  49 /* What kind of symbols get defined?  */
  50 enum coll_symbol
  51 {
  52   undefined,
  53   ellipsis,
  54   character,
  55   element,
  56   symbol
  57 };
  58
  59
  60 typedef struct patch_t
  61 {
  62   const char *fname;
  63   size_t lineno;
  64   const char *token;
  65   union
  66   {
  67     unsigned int *pos;
  68     size_t idx;
  69   } where;
  70   struct patch_t *next;
  71 } patch_t;
  72
  73
  74 typedef struct element_t
  75 {
  76   const wchar_t *name;
  77   unsigned int this_weight;
  78
  79   struct element_t *next;
  80
  81   unsigned int *ordering;
  82   size_t ordering_len;
  83 } element_t;
  84
  85
  86 /* The real definition of the struct for the LC_CTYPE locale.  */
  87 struct locale_collate_t
  88 {
  89   /* Collate symbol table.  Simple mapping to number.  */
  90   hash_table symbols;
  91
  92   /* The collation elements.  */
  93   hash_table elements;
  94   struct obstack element_mem;
  95
  96   /* The result table.  */
  97   hash_table result;
  98
  99   /* Sorting rules given in order_start line.  */
 100   int nrules;
 101   int nrules_max;
 102   enum coll_sort_rule *rules;
 103
 104   /* Used while recognizing symbol composed of multiple tokens
 105      (collating-element).  */
 106   const char *combine_token;
 107   size_t combine_token_len;
 108
 109   /* How many sorting order specifications so far.  */
 110   unsigned int order_cnt;
 111
 112   /* Was lastline ellipsis?  */
 113   int was_ellipsis;
 114   /* Value of last entry if was character.  */
 115   wchar_t last_char;
 116   /* Current element.  */
 117   element_t *current_element;
 118   /* What kind of symbol is current element.  */
 119   enum coll_symbol kind;
 120
 121   /* While collecting the weigths we need some temporary space.  */
 122   unsigned int current_order;
 123   int *weight_cnt;
 124   int weight_idx;
 125   unsigned int *weight;
 126   int nweight;
 127   int nweight_max;
 128
 129   /* Patch lists.  */
 130   patch_t *current_patch;
 131   patch_t *all_patches;
 132
 133   /* Room for the UNDEFINED information.  */
 134   element_t undefined;
 135   unsigned int undefined_len;
 136 };
 137
 138
 139 /* Be verbose?  Defined in localedef.c.  */
 140 extern int verbose;
 141
 142
 143 void *xmalloc (size_t __n);
 144 void *xrealloc (void *__p, size_t __n);
 145
 146
 147 #define obstack_chunk_alloc xmalloc
 148 #define obstack_chunk_free free
 149
 150
 151 void
 152 collate_startup (struct linereader *lr, struct localedef_t *locale,
 153                  struct charset_t *charset)
 154 {
 155   struct locale_collate_t *collate;
 156
 157   /* It is important that we always use UCS4 encoding for strings now.  */
 158   encoding_method = ENC_UCS4;
 159
 160   /* Allocate the needed room.  */
 161   locale->categories[LC_COLLATE].collate = collate =
 162     (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
 163
 164   /* Allocate hash table for collating elements.  */
 165   if (init_hash (&collate->elements, 512))
 166     error (4, 0, _("memory exhausted"));
 167   collate->combine_token = NULL;
 168   obstack_init (&collate->element_mem);
 169
 170   /* Allocate hash table for collating elements.  */
 171   if (init_hash (&collate->symbols, 64))
 172     error (4, 0, _("memory exhausted"));
 173
 174   /* Allocate hash table for result.  */
 175   if (init_hash (&collate->result, 512))
 176     error (4, 0, _("memory exhausted"));
 177
 178   collate->nrules = 0;
 179   collate->nrules_max = 10;
 180   collate->rules
 181     = (enum coll_sort_rule *) xmalloc (collate->nrules_max
 182                                        * sizeof (enum coll_sort_rule));
 183
 184   collate->order_cnt = 1;       /* The smallest weight is 2.  */
 185
 186   collate->was_ellipsis = 0;
 187   collate->last_char = L'\0';   /* 0 because leading ellipsis is allowed.  */
 188
 189   collate->all_patches = NULL;
 190
 191   /* This tells us no UNDEFINED entry was found until now.  */
 192   collate->undefined.this_weight = 0;
 193
 194   lr->translate_strings = 0;
 195 }
 196
 197
 198 void
 199 collate_finish (struct localedef_t *locale, struct charset_t *charset)
 200 {
 201   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 202   patch_t *patch;
 203   size_t cnt;
 204
 205   /* Patch the constructed table so that forward references are
 206      correctly filled.  */
 207   for (patch = collate->all_patches; patch != NULL; patch = patch->next)
 208     {
 209       wchar_t wch;
 210       size_t toklen = strlen (patch->token);
 211       void *ptmp;
 212       unsigned int value = 0;
 213
 214       wch = charset_find_value (charset, patch->token, toklen);
 215       if (wch != ILLEGAL_CHAR_VALUE)
 216         {
 217           element_t *runp;
 218
 219           if (find_entry (&collate->result, &wch, sizeof (wchar_t),
 220                           (void *) &runp) < 0)
 221             runp = NULL;
 222           for (; runp != NULL; runp = runp->next)
 223             if (runp->name[0] == wch && runp->name[1] == L'\0')
 224               break;
 225
 226           value = runp == NULL ? 0 : runp->this_weight;
 227         }
 228       else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
 229                >= 0)
 230         {
 231           value = ((element_t *) ptmp)->this_weight;
 232         }
 233       else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
 234                >= 0)
 235         {
 236           value = (unsigned int) ptmp;
 237         }
 238       else
 239         value = 0;
 240
 241       if (value == 0)
 242         error_at_line (0, 0, patch->fname, patch->lineno,
 243                         _("no weight defined for symbol `%s'"), patch->token);
 244       else
 245         *patch->where.pos = value;
 246     }
 247
 248   /* If no definition for UNDEFINED is given, all characters in the
 249      given charset must be specified.  */
 250   if (collate->undefined.ordering == NULL)
 251     {
 252       /**************************************************************\
 253       |* XXX We should test whether really an unspecified character *|
 254       |* exists before giving the message.                          *|
 255       \**************************************************************/
 256       u_int32_t weight;
 257
 258       error (0, 0, _("no definition of `UNDEFINED'"));
 259
 260       collate->undefined.ordering_len = collate->nrules;
 261       weight = ++collate->order_cnt;
 262
 263       for (cnt = 0; cnt < collate->nrules; ++cnt)
 264         {
 265           u_int32_t one = 1;
 266           obstack_grow (&collate->element_mem, &one, sizeof (one));
 267         }
 268
 269       for (cnt = 0; cnt < collate->nrules; ++cnt)
 270         obstack_grow (&collate->element_mem, &weight, sizeof (weight));
 271
 272       collate->undefined.ordering = obstack_finish (&collate->element_mem);
 273     }
 274
 275   collate->undefined_len = 2;   /* For the name: 1 x wchar_t + L'\0'.  */
 276   for (cnt = 0; cnt < collate->nrules; ++cnt)
 277     collate->undefined_len += 1 + collate->undefined.ordering[cnt];
 278
 279   /* Collating symbols are not used anymore.  */
 280   (void) delete_hash (&collate->symbols);
 281 }
 282
 283
 284
 285 void
 286 collate_output (struct localedef_t *locale, const char *output_path)
 287 {
 288   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 289   u_int32_t table_size, table_best, level_best, sum_best;
 290   void *last;
 291   element_t *pelem;
 292   wchar_t *name;
 293   size_t len;
 294   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
 295   struct iovec iov[2 + nelems];
 296   struct locale_file data;
 297   u_int32_t idx[nelems];
 298   struct obstack non_simple;
 299   size_t cnt, entry_size;
 300   u_int32_t undefined_offset = UINT_MAX;
 301   u_int32_t *table, *extra, *table2, *extra2;
 302   size_t extra_len;
 303
 304   sum_best = UINT_MAX;
 305   table_best = 0xffff;
 306   level_best = 0xffff;
 307
 308   /* Compute table size.  */
 309   fputs (_("\
 310 Computing table size for collation information might take a while..."),
 311          stderr);
 312   for (table_size = 256; table_size < sum_best; ++table_size)
 313     {
 314       size_t hits[table_size];
 315       unsigned int worst = 1;
 316       size_t cnt;
 317
 318       last = NULL;
 319
 320       for (cnt = 0; cnt < 256; ++cnt)
 321         hits[cnt] = 1;
 322       memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
 323
 324       while (iterate_table (&collate->result, &last, (const void **) &name,
 325                             &len, (void **) &pelem) >= 0)
 326         if (pelem->ordering != NULL && pelem->name[0] > 0xff)
 327           if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
 328             {
 329               worst = hits[(unsigned int) pelem->name[0] % table_size];
 330               if (table_size * worst > sum_best)
 331                 break;
 332             }
 333
 334       if (table_size * worst < sum_best)
 335         {
 336           sum_best = table_size * worst;
 337           table_best = table_size;
 338           level_best = worst;
 339         }
 340     }
 341   assert (table_best != 0xffff || level_best != 0xffff);
 342   fputs (_(" done\n"), stderr);
 343
 344   obstack_init (&non_simple);
 345
 346   data.magic = LIMAGIC (LC_COLLATE);
 347   data.n = nelems;
 348   iov[0].iov_base = (void *) &data;
 349   iov[0].iov_len = sizeof (data);
 350
 351   iov[1].iov_base = (void *) idx;
 352   iov[1].iov_len = sizeof (idx);
 353
 354   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
 355   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t);
 356
 357   table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t));
 358   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
 359   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
 360     = collate->nrules * sizeof (u_int32_t);
 361   /* Another trick here.  Describing the collation method needs only a
 362      few bits (3, to be exact).  But the binary file should be
 363      accessible by maschines with both endianesses and so we store both
 364      information in the same word.  */
 365   for (cnt = 0; cnt < collate->nrules; ++cnt)
 366     table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
 367
 368   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
 369   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t);
 370
 371   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
 372   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
 373     = sizeof (u_int32_t);
 374
 375   entry_size = 1 + MAX (collate->nrules, 2);
 376
 377   table = (u_int32_t *) alloca (table_best * level_best * entry_size
 378                                 * sizeof (table[0]));
 379   memset (table, '\0', table_best * level_best * entry_size
 380           * sizeof (table[0]));
 381
 382
 383   /* Macros for inserting in output table.  */
 384 #define ADD_VALUE(expr)                                                       \
 385   do {                                                                        \
 386     u_int32_t to_write = (u_int32_t) expr;                                    \
 387     obstack_grow (&non_simple, &to_write, sizeof (to_write));                 \
 388   } while (0)
 389
 390 #define ADD_ELEMENT(pelem, len)                                               \
 391   do {                                                                        \
 392     size_t cnt, idx;                                                          \
 393                                                                               \
 394     ADD_VALUE (len);                                                          \
 395                                                                               \
 396     wlen = wcslen (pelem->name);                                              \
 397     obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
 398                                                                               \
 399     idx = collate->nrules;                                                    \
 400     for (cnt = 0; cnt < collate->nrules; ++cnt)                               \
 401       {                                                                       \
 402         size_t disp;                                                          \
 403                                                                               \
 404         ADD_VALUE (pelem->ordering[cnt]);                                     \
 405         for (disp = 0; disp < pelem->ordering[cnt]; ++disp)                   \
 406           ADD_VALUE (pelem->ordering[idx++]);                                 \
 407       }                                                                       \
 408   } while (0)
 409
 410 #define ADD_FORWARD(pelem)                                                    \
 411   do {                                                                        \
 412     /* We leave a reference in the main table and put all                     \
 413        information in the table for the extended entries.  */                 \
 414     element_t *runp;                                                          \
 415     element_t *has_simple = NULL;                                             \
 416     size_t wlen;                                                              \
 417                                                                               \
 418     table[(level * table_best + slot) * entry_size + 1]                       \
 419       = FORWARD_CHAR;                                                         \
 420     table[(level * table_best + slot) * entry_size + 2]                       \
 421       = obstack_object_size (&non_simple) / sizeof (u_int32_t);               \
 422                                                                               \
 423     /* Here we have to construct the non-simple table entry.  First           \
 424        compute the total length of this entry.  */                            \
 425     for (runp = (pelem); runp != NULL; runp = runp->next)                     \
 426       if (runp->ordering != NULL)                                             \
 427         {                                                                     \
 428           u_int32_t value;                                                    \
 429           size_t cnt;                                                         \
 430                                                                               \
 431           value = 1 + wcslen (runp->name) + 1;                                \
 432                                                                               \
 433           for (cnt = 0; cnt < collate->nrules; ++cnt)                         \
 434             /* We have to take care for entries without ordering              \
 435                information.  While reading them they get inserted in the      \
 436                table and later not removed when something goes wrong with     \
 437                reading its weights.  */                                       \
 438             {                                                                 \
 439               value += 1 + runp->ordering[cnt];                               \
 440                                                                               \
 441               if (runp->name[1] == L'\0')                                     \
 442                 has_simple = runp;                                            \
 443             }                                                                 \
 444                                                                               \
 445           ADD_ELEMENT (runp, value);                                          \
 446         }                                                                     \
 447                                                                               \
 448     if (has_simple == NULL)                                                   \
 449       {                                                                       \
 450         size_t idx, cnt;                                                      \
 451                                                                               \
 452         ADD_VALUE (collate->undefined_len + 1);                               \
 453                                                                               \
 454         /* Add the name.  */                                                  \
 455         ADD_VALUE ((pelem)->name[0]);                                         \
 456         ADD_VALUE (0);                                                        \
 457                                                                               \
 458         idx = collate->nrules;                                                \
 459         for (cnt = 0; cnt < collate->nrules; ++cnt)                           \
 460           {                                                                   \
 461             size_t disp;                                                      \
 462                                                                               \
 463             ADD_VALUE (collate->undefined.ordering[cnt]);                     \
 464             for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)   \
 465               {                                                               \
 466                 if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR)        \
 467                   ADD_VALUE ((pelem)->name[0]);                               \
 468                 else                                                          \
 469                   ADD_VALUE (collate->undefined.ordering[idx++]);             \
 470                 ++idx;                                                        \
 471               }                                                               \
 472           }                                                                   \
 473       }                                                                       \
 474   } while (0)
 475
 476
 477
 478   /* Fill the table now.  First we look for all the characters which
 479      fit into one single byte.  This speeds up the 8-bit string
 480      functions.  */
 481   last = NULL;
 482   while (iterate_table (&collate->result, &last, (const void **) &name,
 483                         &len, (void **) &pelem) >= 0)
 484     if (pelem->name[0] <= 0xff)
 485       {
 486         /* We have a single byte name.  Now we must distinguish
 487            between entries in simple form (i.e., only one value per
 488            weight and no collation element starting with the same
 489            character) and those which are not.  */
 490         size_t slot = ((size_t) pelem->name[0]);
 491         const size_t level = 0;
 492
 493         table[slot * entry_size] = pelem->name[0];
 494
 495         if (pelem->name[1] == L'\0' && pelem->next == NULL
 496             && pelem->ordering_len == collate->nrules)
 497           {
 498             /* Yes, we have a simple one.  Lucky us.  */
 499             size_t cnt;
 500
 501             for (cnt = 0; cnt < collate->nrules; ++cnt)
 502               table[slot * entry_size + 1 + cnt]
 503                 = pelem->ordering[collate->nrules + cnt];
 504           }
 505         else
 506           ADD_FORWARD (pelem);
 507       }
 508
 509   /* Now check for missing single byte entries.  If one exist we fill
 510      with the UNDEFINED entry.  */
 511   for (cnt = 0; cnt < 256; ++cnt)
 512     /* The first weight is never 0 for existing entries.  */
 513     if (table[cnt * entry_size + 1] == 0)
 514       {
 515         /* We have to fill in the information from the UNDEFINED
 516            entry.  */
 517         table[cnt * entry_size] = (u_int32_t) cnt;
 518
 519         if (collate->undefined.ordering_len == collate->nrules)
 520           {
 521             size_t inner;
 522
 523             for (inner = 0; inner < collate->nrules; ++inner)
 524               if (collate->undefined.ordering[collate->nrules + inner]
 525                   == ELLIPSIS_CHAR)
 526                 table[cnt * entry_size + 1 + inner] = cnt;
 527               else
 528                 table[cnt * entry_size + 1 + inner]
 529                   = collate->undefined.ordering[collate->nrules + inner];
 530           }
 531         else
 532           {
 533             if (undefined_offset != UINT_MAX)
 534               {
 535                 table[cnt * entry_size + 1] = FORWARD_CHAR;
 536                 table[cnt * entry_size + 2] = undefined_offset;
 537               }
 538             else
 539               {
 540                 const size_t slot = cnt;
 541                 const size_t level = 0;
 542
 543                 ADD_FORWARD (&collate->undefined);
 544                 undefined_offset = table[cnt * entry_size + 2];
 545               }
 546           }
 547       }
 548
 549   /* Now we are ready for inserting the whole rest.   */
 550   last = NULL;
 551   while (iterate_table (&collate->result, &last, (const void **) &name,
 552                         &len, (void **) &pelem) >= 0)
 553     if (pelem->name[0] > 0xff)
 554       {
 555         /* Find the position.  */
 556         size_t slot = ((size_t) pelem->name[0]) % table_best;
 557         size_t level = 0;
 558
 559         while (table[(level * table_best + slot) * entry_size + 1] != 0)
 560           ++level;
 561         assert (level < level_best);
 562
 563         if (pelem->name[1] == L'\0' && pelem->next == NULL
 564             && pelem->ordering_len == collate->nrules)
 565           {
 566             /* Again a simple entry.  */
 567             size_t inner;
 568
 569             for (inner = 0; inner < collate->nrules; ++inner)
 570               table[(level * table_best + slot) * entry_size + 1 + inner]
 571                 = pelem->ordering[collate->nrules + inner];
 572           }
 573         else
 574           ADD_FORWARD (pelem);
 575       }
 576
 577   /* Add the UNDEFINED entry.  */
 578   {
 579     /* Here we have to construct the non-simple table entry.  */
 580     size_t idx, cnt;
 581
 582     undefined_offset = obstack_object_size (&non_simple);
 583
 584     idx = collate->nrules;
 585     for (cnt = 0; cnt < collate->nrules; ++cnt)
 586       {
 587         size_t disp;
 588
 589         ADD_VALUE (collate->undefined.ordering[cnt]);
 590         for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
 591           ADD_VALUE (collate->undefined.ordering[idx++]);
 592       }
 593   }
 594
 595   /* Finish the extra block.  */
 596   extra_len = obstack_object_size (&non_simple);
 597   extra = (u_int32_t *) obstack_finish (&non_simple);
 598   assert ((extra_len % sizeof (u_int32_t)) == 0);
 599
 600   /* Now we have to build the two array for the other byte ordering.  */
 601   table2 = (u_int32_t *) alloca (table_best * level_best * entry_size
 602                                  * sizeof (table[0]));
 603   extra2 = (u_int32_t *) alloca (extra_len);
 604
 605   for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
 606     table2[cnt] = SWAPU32 (table[cnt]);
 607
 608   for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt)
 609     extra2[cnt] = SWAPU32 (extra2[cnt]);
 610
 611   /* Store table adresses and lengths.   */
 612 #if __BYTE_ORDER == __BIG_ENDIAN
 613   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
 614   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
 615     = table_best * level_best * entry_size * sizeof (table[0]);
 616
 617   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
 618   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
 619     = table_best * level_best * entry_size * sizeof (table[0]);
 620
 621   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
 622   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
 623
 624   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
 625   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
 626 #else
 627   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
 628   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
 629     = table_best * level_best * entry_size * sizeof (table[0]);
 630
 631   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
 632   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
 633     = table_best * level_best * entry_size * sizeof (table[0]);
 634
 635   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
 636   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
 637
 638   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
 639   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
 640 #endif
 641
 642   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
 643   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t);
 644
 645   /* Update idx array.  */
 646   idx[0] = iov[0].iov_len + iov[1].iov_len;
 647   for (cnt = 1; cnt < nelems; ++cnt)
 648     idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
 649
 650   write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
 651 }
 652
 653
 654 void
 655 collate_element_to (struct linereader *lr, struct localedef_t *locale,
 656                     struct token *code, struct charset_t *charset)
 657 {
 658   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 659   unsigned int value;
 660   void *not_used;
 661
 662   if (collate->combine_token != NULL)
 663     {
 664       free ((void *) collate->combine_token);
 665       collate->combine_token = NULL;
 666     }
 667
 668   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 669   if (value != ILLEGAL_CHAR_VALUE)
 670     {
 671       lr_error (lr, _("symbol for multicharacter collating element "
 672                       "`%.*s' duplicates symbolic name in charset"),
 673                 code->val.str.len, code->val.str.start);
 674       return;
 675     }
 676
 677   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
 678                   &not_used) >= 0)
 679     {
 680       lr_error (lr, _("symbol for multicharacter collating element "
 681                       "`%.*s' duplicates other element definition"),
 682                 code->val.str.len, code->val.str.start);
 683       return;
 684     }
 685
 686   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
 687                   &not_used) >= 0)
 688     {
 689       lr_error (lr, _("symbol for multicharacter collating element "
 690                       "`%.*s' duplicates symbol definition"),
 691                 code->val.str.len, code->val.str.start);
 692       return;
 693     }
 694
 695   collate->combine_token = code->val.str.start;
 696   collate->combine_token_len = code->val.str.len;
 697 }
 698
 699
 700 void
 701 collate_element_from (struct linereader *lr, struct localedef_t *locale,
 702                       struct token *code, struct charset_t *charset)
 703 {
 704   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 705   element_t *elemp, *runp;
 706
 707   /* CODE is a string.  */
 708   elemp = (element_t *) obstack_alloc (&collate->element_mem,
 709                                        sizeof (element_t));
 710
 711   /* We have to translate the string.  It may contain <...> character
 712      names.  */
 713   elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
 714   elemp->this_weight = 0;
 715   elemp->ordering = NULL;
 716   elemp->ordering_len = 0;
 717
 718   free (code->val.str.start);
 719
 720   if (elemp->name == NULL)
 721     {
 722       /* At least one character in the string is not defined.  We simply
 723          do nothing.  */
 724       if (verbose)
 725         lr_error (lr, _("\
 726 `from' string in collation element declaration contains unknown character"));
 727       return;
 728     }
 729
 730   if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
 731     {
 732       lr_error (lr, _("illegal colltion element"));
 733       return;
 734     }
 735
 736   /* The entries in the linked lists of RESULT are sorting in
 737      descending order.  The order is important for the `strcoll' and
 738      `wcscoll' functions.  */
 739   if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
 740                   (void *) &runp) >= 0)
 741     {
 742       /* We already have an entry with this key.  Check whether it is
 743          identical.  */
 744       element_t *prevp = NULL;
 745       int cmpres;
 746
 747       do
 748         {
 749           cmpres = wcscmp (elemp->name, runp->name);
 750           if (cmpres <= 0)
 751             break;
 752           prevp = runp;
 753         }
 754       while ((runp = runp->next) != NULL);
 755
 756       if (cmpres == 0)
 757         lr_error (lr, _("duplicate collating element definition"));
 758       else
 759         {
 760           elemp->next = runp;
 761           if (prevp == NULL)
 762             {
 763               if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
 764                              elemp) < 0)
 765                 error (EXIT_FAILURE, 0,
 766                        _("\
 767 error while inserting collation element into hash table"));
 768             }
 769           else
 770             prevp->next = elemp;
 771         }
 772     }
 773   else
 774     {
 775       elemp->next = NULL;
 776       if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
 777           < 0)
 778         error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
 779     }
 780
 781   if (insert_entry (&collate->elements, collate->combine_token,
 782                     collate->combine_token_len, (void *) elemp) < 0)
 783     lr_error (lr, _("cannot insert new collating symbol definition: %s"),
 784               strerror (errno));
 785 }
 786
 787
 788 void
 789 collate_symbol (struct linereader *lr, struct localedef_t *locale,
 790                 struct token *code, struct charset_t *charset)
 791 {
 792   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 793   wchar_t value;
 794   void *not_used;
 795
 796   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
 797   if (value != ILLEGAL_CHAR_VALUE)
 798     {
 799       lr_error (lr, _("symbol for multicharacter collating element "
 800                       "`%.*s' duplicates symbolic name in charset"),
 801                 code->val.str.len, code->val.str.start);
 802       return;
 803     }
 804
 805   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
 806                   &not_used) >= 0)
 807     {
 808       lr_error (lr, _("symbol for multicharacter collating element "
 809                       "`%.*s' duplicates element definition"),
 810                 code->val.str.len, code->val.str.start);
 811       return;
 812     }
 813
 814   if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
 815                   &not_used) >= 0)
 816     {
 817       lr_error (lr, _("symbol for multicharacter collating element "
 818                       "`%.*s' duplicates other symbol definition"),
 819                 code->val.str.len, code->val.str.start);
 820       return;
 821     }
 822
 823   if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
 824                     (void *) 0) < 0)
 825     lr_error (lr, _("cannot insert new collating symbol definition: %s"),
 826               strerror (errno));
 827 }
 828
 829
 830 void
 831 collate_new_order (struct linereader *lr, struct localedef_t *locale,
 832                    enum coll_sort_rule sort_rule)
 833 {
 834   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 835
 836   if (collate->nrules >= collate->nrules_max)
 837     {
 838       collate->nrules_max *= 2;
 839       collate->rules
 840         = (enum coll_sort_rule *) xrealloc (collate->rules,
 841                                             collate->nrules_max
 842                                             * sizeof (enum coll_sort_rule));
 843     }
 844
 845   collate->rules[collate->nrules++] = sort_rule;
 846 }
 847
 848
 849 void
 850 collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
 851 {
 852   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 853
 854   collate->rules
 855     = (enum coll_sort_rule *) xrealloc (collate->rules,
 856                                         collate->nrules
 857                                         * sizeof (enum coll_sort_rule));
 858
 859   /* Allocate arrays for temporary weights.  */
 860   collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
 861
 862   /* Choose arbitrary start value for table size.  */
 863   collate->nweight_max = 5 * collate->nrules;
 864   collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
 865 }
 866
 867
 868 int
 869 collate_order_elem (struct linereader *lr, struct localedef_t *locale,
 870                     struct token *code, struct charset_t *charset)
 871 {
 872   const wchar_t zero = L'\0';
 873   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 874   int result = 0;
 875   wchar_t value;
 876   void *tmp;
 877   int i;
 878
 879   switch (code->tok)
 880     {
 881     case tok_bsymbol:
 882       /* We have a string to find in one of the three hashing tables.  */
 883       value = charset_find_value (charset, code->val.str.start,
 884                                   code->val.str.len);
 885       if (value != ILLEGAL_CHAR_VALUE)
 886         {
 887           element_t *lastp, *firstp;
 888
 889           collate->kind = character;
 890
 891           if (find_entry (&collate->result, &value, sizeof (wchar_t),
 892                           (void *) &firstp) < 0)
 893             firstp = lastp = NULL;
 894           else
 895             {
 896               /* The entry for the simple character is always found at
 897                  the end.  */
 898               lastp = firstp;
 899               while (lastp->next != NULL)
 900                 lastp = lastp->next;
 901
 902               if (lastp->name[0] == value && lastp->name[1] == L'\0')
 903                 {
 904                   lr_error (lr, _("duplicate definition for character `%.*s'"),
 905                             code->val.str.len, code->val.str.start);
 906                   lr_ignore_rest (lr, 0);
 907                   result = -1;
 908                   break;
 909                 }
 910             }
 911
 912           collate->current_element
 913             = (element_t *) obstack_alloc (&collate->element_mem,
 914                                            sizeof (element_t));
 915
 916           obstack_grow (&collate->element_mem, &value, sizeof (value));
 917           obstack_grow (&collate->element_mem, &zero, sizeof (zero));
 918
 919           collate->current_element->name =
 920             (const wchar_t *) obstack_finish (&collate->element_mem);
 921
 922           collate->current_element->this_weight = ++collate->order_cnt;
 923
 924           collate->current_element->next = NULL;
 925
 926           if (firstp == NULL)
 927             {
 928               if (insert_entry (&collate->result, &value, sizeof (wchar_t),
 929                                 (void *) collate->current_element) < 0)
 930                 {
 931                   lr_error (lr, _("cannot insert collation element `%.*s'"),
 932                             code->val.str.len, code->val.str.start);
 933                   exit (4);
 934                 }
 935             }
 936           else
 937             lastp->next = collate->current_element;
 938         }
 939       else if (find_entry (&collate->elements, code->val.str.start,
 940                            code->val.str.len, &tmp) >= 0)
 941         {
 942           collate->current_element = (element_t *) tmp;
 943
 944           if (collate->current_element->this_weight != 0)
 945             {
 946               lr_error (lr, _("\
 947 collation element `%.*s' appears more than once: ignore line"),
 948                         code->val.str.len, code->val.str.start);
 949               lr_ignore_rest (lr, 0);
 950               result = -1;
 951               break;
 952             }
 953
 954           collate->kind = element;
 955           collate->current_element->this_weight = ++collate->order_cnt;
 956         }
 957       else if (find_entry (&collate->symbols, code->val.str.start,
 958                            code->val.str.len, &tmp) >= 0)
 959         {
 960           unsigned int order = ++collate->order_cnt;
 961
 962           if ((unsigned int) tmp != 0)
 963             {
 964               lr_error (lr, _("\
 965 collation symbol `.*s' appears more than once: ignore line"),
 966                         code->val.str.len, code->val.str.start);
 967               lr_ignore_rest (lr, 0);
 968               result = -1;
 969               break;
 970             }
 971
 972           collate->kind = symbol;
 973
 974           if (set_entry (&collate->symbols, code->val.str.start,
 975                          code->val.str.len, (void *) order) < 0)
 976             {
 977               lr_error (lr, _("cannot process order specification"));
 978               exit (4);
 979             }
 980         }
 981       else
 982         {
 983           if (verbose)
 984             lr_error (lr, _("unknown symbol `%.*s': line ignored"),
 985                       code->val.str.len, code->val.str.start);
 986           lr_ignore_rest (lr, 0);
 987
 988           result = -1;
 989         }
 990       break;
 991
 992     case tok_undefined:
 993       collate->kind = undefined;
 994       collate->current_element = &collate->undefined;
 995       break;
 996
 997     case tok_ellipsis:
 998       if (collate->was_ellipsis)
 999         {
1000           lr_error (lr, _("\
1001 two lines in a row containing `...' are not allowed"));
1002           result = -1;
1003         }
1004       else if (collate->kind != character)
1005         {
1006           /* An ellipsis requires the previous line to be an
1007              character definition.  */
1008           lr_error (lr, _("\
1009 line before ellipsis does not contain definition for character constant"));
1010           lr_ignore_rest (lr, 0);
1011           result = -1;
1012         }
1013       else
1014         collate->kind = ellipsis;
1015       break;
1016
1017     default:
1018       assert (! "illegal token in `collate_order_elem'");
1019     }
1020
1021   /* Now it's time to handle the ellipsis in the previous line.  We do
1022      this only when the last line contained an definition for an
1023      character, the current line also defines an character, the
1024      character code for the later is bigger than the former.  */
1025   if (collate->was_ellipsis)
1026     {
1027       if (collate->kind != character)
1028         {
1029           lr_error (lr, _("\
1030 line after ellipsis must contain character definition"));
1031           lr_ignore_rest (lr, 0);
1032           result = -1;
1033         }
1034       else if (collate->last_char > value)
1035         {
1036           lr_error (lr, _("end point of ellipsis range is bigger then start"));
1037           lr_ignore_rest (lr, 0);
1038           result = -1;
1039         }
1040       else
1041         {
1042           /* We can fill the arrays with the information we need.  */
1043           wchar_t name[2];
1044           unsigned int *data;
1045           size_t *ptr;
1046           size_t cnt;
1047
1048           name[0] = collate->last_char + 1;
1049           name[1] = L'\0';
1050
1051           data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
1052                                           * sizeof (unsigned int));
1053           ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
1054
1055           if (data == NULL || ptr == NULL)
1056             error (4, 0, _("memory exhausted"));
1057
1058           /* Prepare data.  Because the characters covered by an
1059              ellipsis all have equal values we prepare the data once
1060              and only change the variable number (if there are any).
1061              PTR[...] will point to the entries which will have to be
1062              fixed during the output loop.  */
1063           for (cnt = 0; cnt < collate->nrules; ++cnt)
1064             {
1065               data[cnt] = collate->weight_cnt[cnt];
1066               ptr[cnt] = (cnt == 0
1067                           ? collate->nweight
1068                           : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
1069             }
1070
1071           for (cnt = 0; cnt < collate->nweight; ++cnt)
1072             data[collate->nrules + cnt] = collate->weight[cnt];
1073
1074           for (cnt = 0; cnt < collate->nrules; ++cnt)
1075             if (data[ptr[cnt]] != ELLIPSIS_CHAR)
1076               ptr[cnt] = 0;
1077
1078           while (name[0] <= value)
1079             {
1080               element_t *pelem;
1081
1082               pelem = (element_t *) obstack_alloc (&collate->element_mem,
1083                                                    sizeof (element_t));
1084               if (pelem == NULL)
1085                 error (4, 0, _("memory exhausted"));
1086
1087               pelem->name
1088                 = (const wchar_t *) obstack_copy (&collate->element_mem,
1089                                                   name, 2 * sizeof (wchar_t));
1090               pelem->this_weight = ++collate->order_cnt;
1091
1092               pelem->ordering_len = collate->nweight;
1093               pelem->ordering
1094                 = (unsigned int *) obstack_copy (&collate->element_mem, data,
1095                                                  (collate->nrules
1096                                                   * pelem->ordering_len)
1097                                                  * sizeof (unsigned int));
1098
1099               /* `...' weights need to be adjusted.  */
1100               for (cnt = 0; cnt < collate->nrules; ++cnt)
1101                 if (ptr[cnt] != 0)
1102                   pelem->ordering[ptr[cnt]] = pelem->this_weight;
1103
1104               /* Insert new entry into result table.  */
1105               if (find_entry (&collate->result, name, sizeof (wchar_t),
1106                               (void *) &pelem->next) >= 0)
1107                 {
1108                   if (set_entry (&collate->result, name, sizeof (wchar_t),
1109                                  (void *) pelem->next) < 0)
1110                     error (4, 0, _("cannot insert into result table"));
1111                 }
1112               else
1113                 if (insert_entry (&collate->result, name, sizeof (wchar_t),
1114                                   (void *) pelem->next) < 0)
1115                   error (4, 0, _("cannot insert into result table"));
1116
1117               /* Increment counter.  */
1118               ++name[0];
1119             }
1120         }
1121     }
1122
1123   /* Reset counters for weights.  */
1124   collate->weight_idx = 0;
1125   collate->nweight = 0;
1126   for (i = 0; i < collate->nrules; ++i)
1127     collate->weight_cnt[i] = 0;
1128   collate->current_patch = NULL;
1129
1130   return result;
1131 }
1132
1133
1134 int
1135 collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
1136                         struct token *code, struct charset_t *charset)
1137 {
1138   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1139   unsigned int here_weight;
1140   wchar_t value;
1141   void *tmp;
1142
1143   assert (code->tok == tok_bsymbol);
1144
1145   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
1146   if (value != ILLEGAL_CHAR_VALUE)
1147     {
1148       element_t *runp;
1149
1150       if (find_entry (&collate->result, &value, sizeof (wchar_t),
1151                       (void *)&runp) < 0)
1152         runp = NULL;
1153
1154       while (runp != NULL
1155              && (runp->name[0] != value || runp->name[1] != L'\0'))
1156         runp = runp->next;
1157
1158       here_weight = runp == NULL ? 0 : runp->this_weight;
1159     }
1160   else if (find_entry (&collate->elements, code->val.str.start,
1161                        code->val.str.len, &tmp) >= 0)
1162     {
1163       element_t *runp = (element_t *) tmp;
1164
1165       here_weight = runp->this_weight;
1166     }
1167   else if (find_entry (&collate->symbols, code->val.str.start,
1168                        code->val.str.len, &tmp) >= 0)
1169     {
1170       here_weight = (unsigned int) tmp;
1171     }
1172   else
1173     {
1174       if (verbose)
1175         lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1176                   code->val.str.len, code->val.str.start);
1177       lr_ignore_rest (lr, 0);
1178       return -1;
1179     }
1180
1181   /* When we currently work on a collation symbol we do not expect any
1182      weight.  */
1183   if (collate->kind == symbol)
1184     {
1185       lr_error (lr, _("\
1186 specification of sorting weight for collation symbol does not make sense"));
1187       lr_ignore_rest (lr, 0);
1188       return -1;
1189     }
1190
1191   /* Add to the current collection of weights.  */
1192   if (collate->nweight >= collate->nweight_max)
1193     {
1194       collate->nweight_max *= 2;
1195       collate->weight = (unsigned int *) xrealloc (collate->weight,
1196                                                    collate->nweight_max);
1197     }
1198
1199   /* If the weight is currently not known, we remember to patch the
1200      resulting tables.  */
1201   if (here_weight == 0)
1202     {
1203       patch_t *newp;
1204
1205       newp = (patch_t *) obstack_alloc (&collate->element_mem,
1206                                         sizeof (patch_t));
1207       newp->fname = lr->fname;
1208       newp->lineno = lr->lineno;
1209       newp->token = (const char *) obstack_copy0 (&collate->element_mem,
1210                                                   code->val.str.start,
1211                                                   code->val.str.len);
1212       newp->where.idx = collate->nweight++;
1213       newp->next = collate->current_patch;
1214       collate->current_patch = newp;
1215     }
1216   else
1217     collate->weight[collate->nweight++] = here_weight;
1218   ++collate->weight_cnt[collate->weight_idx];
1219
1220   return 0;
1221 }
1222
1223
1224 int
1225 collate_next_weight (struct linereader *lr, struct localedef_t *locale)
1226 {
1227   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1228
1229   if (collate->kind == symbol)
1230     {
1231       lr_error (lr, _("\
1232 specification of sorting weight for collation symbol does not make sense"));
1233       lr_ignore_rest (lr, 0);
1234       return -1;
1235     }
1236
1237   ++collate->weight_idx;
1238   if (collate->weight_idx >= collate->nrules)
1239     {
1240       lr_error (lr, _("too many weights"));
1241       lr_ignore_rest (lr, 0);
1242       return -1;
1243     }
1244
1245   return 0;
1246 }
1247
1248
1249 int
1250 collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
1251                        struct token *code, struct charset_t *charset)
1252 {
1253   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1254   unsigned int value = 0;
1255
1256   /* There current tokens can be `IGNORE', `...', or a string.  */
1257   switch (code->tok)
1258     {
1259     case tok_ignore:
1260       /* This token is allowed in all situations.  */
1261       value = IGNORE_CHAR;
1262       break;
1263
1264     case tok_ellipsis:
1265       /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1266          entry.  */
1267       if (collate->kind != ellipsis && collate->kind != undefined)
1268         {
1269           lr_error (lr, _("\
1270 `...' must only be used in `...' and `UNDEFINED' entries"));
1271           lr_ignore_rest (lr, 0);
1272           return -1;
1273         }
1274       value = ELLIPSIS_CHAR;
1275       break;
1276
1277     case tok_string:
1278       /* This can become difficult.  We have to get the weights which
1279          correspind the the single wide chars in the string.  But some
1280          of the `chars' might not be real characters, but collation
1281          elements or symbols.  And so the string decoder might have
1282          signaled errors.  The string at this point is not translated.
1283          I.e., all <...> sequences are still there.  */
1284       {
1285         char *runp = code->val.str.start;
1286         void *tmp;
1287
1288         while (*runp != '\0')
1289           {
1290             char *startp = (char *) runp;
1291             char *putp = (char *) runp;
1292             wchar_t wch;
1293
1294             /* Lookup weight for char and store it.  */
1295             if (*runp == '<')
1296               {
1297                 while (*++runp != '\0' && *runp != '>')
1298                   {
1299                     if (*runp == lr->escape_char)
1300                       if (*++runp == '\0')
1301                         {
1302                           lr_error (lr, _("unterminated weight name"));
1303                           lr_ignore_rest (lr, 0);
1304                           return -1;
1305                         }
1306                     *putp++ = *runp;
1307                   }
1308                 if (*runp == '>')
1309                   ++runp;
1310
1311                 if (putp == startp)
1312                   {
1313                     lr_error (lr, _("empty weight name: line ignored"));
1314                     lr_ignore_rest (lr, 0);
1315                     return -1;
1316                   }
1317
1318                 wch = charset_find_value (charset, startp, putp - startp);
1319                 if (wch != ILLEGAL_CHAR_VALUE)
1320                   {
1321                     element_t *pelem;
1322
1323                     if (find_entry (&collate->result, &wch, sizeof (wchar_t),
1324                                     (void *)&pelem) < 0)
1325                       pelem = NULL;
1326
1327                     while (pelem != NULL
1328                            && (pelem->name[0] != wch
1329                                || pelem->name[1] != L'\0'))
1330                       pelem = pelem->next;
1331
1332                     value = pelem == NULL ? 0 : pelem->this_weight;
1333                   }
1334                 else if (find_entry (&collate->elements, startp, putp - startp,
1335                                      &tmp) >= 0)
1336                   {
1337                     element_t *pelem = (element_t *) tmp;
1338
1339                     value = pelem->this_weight;
1340                   }
1341                 else if (find_entry (&collate->symbols, startp, putp - startp,
1342                                      &tmp) >= 0)
1343                   {
1344                     value = (unsigned int) tmp;
1345                   }
1346                 else
1347                   {
1348                     if (verbose)
1349                       lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1350                                 putp - startp, startp);
1351                     lr_ignore_rest (lr, 0);
1352                     return -1;
1353                   }
1354               }
1355             else
1356               {
1357                 element_t *wp;
1358                 wchar_t wch;
1359
1360                 if (*runp == lr->escape_char)
1361                   {
1362                     static char digits[] = "0123456789abcdef";
1363                     char *dp;
1364                     int base;
1365
1366                     ++runp;
1367                     if (tolower (*runp) == 'x')
1368                       {
1369                         ++runp;
1370                         base = 16;
1371                       }
1372                     else if (tolower (*runp) == 'd')
1373                       {
1374                         ++runp;
1375                         base = 10;
1376                       }
1377                     else
1378                       base = 8;
1379
1380                     dp = strchr (digits, tolower (*runp));
1381                     if (dp == NULL || (dp - digits) >= base)
1382                       {
1383                       illegal_char:
1384                         lr_error (lr, _("\
1385 illegal character constant in string"));
1386                         lr_ignore_rest (lr, 0);
1387                         return -1;
1388                       }
1389                     wch = dp - digits;
1390                     ++runp;
1391
1392                     dp = strchr (digits, tolower (*runp));
1393                     if (dp == NULL || (dp - digits) >= base)
1394                       goto illegal_char;
1395                     wch *= base;
1396                     wch += dp - digits;
1397                     ++runp;
1398
1399                     if (base != 16)
1400                       {
1401                         dp = strchr (digits, tolower (*runp));
1402                         if (dp != NULL && (dp - digits < base))
1403                           {
1404                             wch *= base;
1405                             wch += dp - digits;
1406                             ++runp;
1407                           }
1408                       }
1409                   }
1410                 else
1411                   wch = (wchar_t) *runp++;
1412
1413                 /* Lookup the weight for WCH.  */
1414                 if (find_entry (&collate->result, &wch, sizeof (wch),
1415                                 (void *)&wp) < 0)
1416                   wp = NULL;
1417
1418                 while (wp != NULL
1419                        && (wp->name[0] != wch || wp->name[1] != L'\0'))
1420                   wp = wp->next;
1421
1422                 value = wp == NULL ? 0 : wp->this_weight;
1423
1424                 /* To get the correct name for the error message.  */
1425                 putp = runp;
1426
1427                 /**************************************************\
1428                 |* I know here is something wrong.  Characters in *|
1429                 |* the string which are not in the <...> form     *|
1430                 |* cannot be declared forward for now!!!          *|
1431                 \**************************************************/
1432               }
1433
1434             /* Store in weight array.  */
1435             if (collate->nweight >= collate->nweight_max)
1436               {
1437                 collate->nweight_max *= 2;
1438                 collate->weight
1439                   = (unsigned int *) xrealloc (collate->weight,
1440                                                collate->nweight_max);
1441               }
1442
1443             if (value == 0)
1444               {
1445                 patch_t *newp;
1446
1447                 newp = (patch_t *) obstack_alloc (&collate->element_mem,
1448                                                   sizeof (patch_t));
1449                 newp->fname = lr->fname;
1450                 newp->lineno = lr->lineno;
1451                 newp->token
1452                   = (const char *) obstack_copy0 (&collate->element_mem,
1453                                                   startp, putp - startp);
1454                 newp->where.idx = collate->nweight++;
1455                 newp->next = collate->current_patch;
1456                 collate->current_patch = newp;
1457               }
1458             else
1459               collate->weight[collate->nweight++] = value;
1460             ++collate->weight_cnt[collate->weight_idx];
1461           }
1462       }
1463       return 0;
1464
1465     default:
1466       assert (! "should not happen");
1467     }
1468
1469
1470   if (collate->nweight >= collate->nweight_max)
1471     {
1472       collate->nweight_max *= 2;
1473       collate->weight = (unsigned int *) xrealloc (collate->weight,
1474                                                    collate->nweight_max);
1475     }
1476
1477   collate->weight[collate->nweight++] = value;
1478   ++collate->weight_cnt[collate->weight_idx];
1479
1480   return 0;
1481 }
1482
1483
1484 void
1485 collate_end_weight (struct linereader *lr, struct localedef_t *locale)
1486 {
1487   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1488   element_t *pelem = collate->current_element;
1489
1490   if (collate->kind == symbol)
1491     {
1492       /* We don't have to do anything.  */
1493       collate->was_ellipsis = 0;
1494       return;
1495     }
1496
1497   if (collate->kind == ellipsis)
1498     {
1499       /* Before the next line is processed the ellipsis is handled.  */
1500       collate->was_ellipsis = 1;
1501       return;
1502     }
1503
1504   assert (collate->kind == character || collate->kind == element
1505           || collate->kind == undefined);
1506
1507   /* Fill in the missing weights.  */
1508   while (++collate->weight_idx < collate->nrules)
1509     {
1510       collate->weight[collate->nweight++] = pelem->this_weight;
1511       ++collate->weight_cnt[collate->weight_idx];
1512     }
1513
1514   /* Now we know how many ordering weights the current
1515      character/element has.  Allocate room in the element structure
1516      and copy information.  */
1517   pelem->ordering_len = collate->nweight;
1518
1519   /* First we write an array with the number of values for each
1520      weight.  */
1521   obstack_grow (&collate->element_mem, collate->weight_cnt,
1522                 collate->nrules * sizeof (unsigned int));
1523
1524   /* Now the weights itselves.  */
1525   obstack_grow (&collate->element_mem, collate->weight,
1526                 collate->nweight * sizeof (unsigned int));
1527
1528   /* Get result.  */
1529   pelem->ordering = obstack_finish (&collate->element_mem);
1530
1531   /* Now we handle the "patches".  */
1532   while (collate->current_patch != NULL)
1533     {
1534       patch_t *this_patch;
1535
1536       this_patch = collate->current_patch;
1537
1538       this_patch->where.pos = &pelem->ordering[collate->nrules
1539                                               + this_patch->where.idx];
1540
1541       collate->current_patch = this_patch->next;
1542       this_patch->next = collate->all_patches;
1543       collate->all_patches = this_patch;
1544     }
1545
1546   /* Set information for next round.  */
1547   collate->was_ellipsis = 0;
1548   if (collate->kind != undefined)
1549     collate->last_char = pelem->name[0];
1550 }