locale/programs/ld-collate.c

   1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Library General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Library General Public License for more details.
  14
  15    You should have received a copy of the GNU Library General Public
  16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18    Boston, MA 02111-1307, USA.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include <config.h>
  22 #endif
  23
  24 #include <endian.h>
  25 #include <errno.h>
  26 #include <limits.h>
  27 #include <locale.h>
  28 #include <obstack.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <wchar.h>
  32
  33 #include "localeinfo.h"
  34 #include "locales.h"
  35 #include "simple-hash.h"
  36 #include "stringtrans.h"
  37 #include "strlen-hash.h"
  38
  39 /* Uncomment the following line in the production version.  */
  40 /* #define NDEBUG 1 */
  41 #include <assert.h>
  42
  43
  44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
  45
  46 #define SWAPU32(w) \
  47   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
  48
  49
  50 /* What kind of symbols get defined?  */
  51 enum coll_symbol
  52 {
  53   undefined,
  54   ellipsis,
  55   character,
  56   element,
  57   symbol
  58 };
  59
  60
  61 typedef struct patch_t
  62 {
  63   const char *fname;
  64   size_t lineno;
  65   const char *token;
  66   union
  67   {
  68     unsigned int *pos;
  69     size_t idx;
  70   } where;
  71   struct patch_t *next;
  72 } patch_t;
  73
  74
  75 typedef struct element_t
  76 {
  77   const wchar_t *name;
  78   unsigned int this_weight;
  79
  80   struct element_t *next;
  81
  82   unsigned int *ordering;
  83   size_t ordering_len;
  84 } element_t;
  85
  86
  87 /* The real definition of the struct for the LC_COLLATE locale.  */
  88 struct locale_collate_t
  89 {
  90   /* Collate symbol table.  Simple mapping to number.  */
  91   hash_table symbols;
  92
  93   /* The collation elements.  */
  94   hash_table elements;
  95   struct obstack element_mem;
  96
  97   /* The result table.  */
  98   hash_table result;
  99
 100   /* Sorting rules given in order_start line.  */
 101   u_int32_t nrules;
 102   u_int32_t nrules_max;
 103   enum coll_sort_rule *rules;
 104
 105   /* Used while recognizing symbol composed of multiple tokens
 106      (collating-element).  */
 107   const char *combine_token;
 108   size_t combine_token_len;
 109
 110   /* How many sorting order specifications so far.  */
 111   unsigned int order_cnt;
 112
 113   /* Was lastline ellipsis?  */
 114   int was_ellipsis;
 115   /* Value of last entry if was character.  */
 116   wchar_t last_char;
 117   /* Current element.  */
 118   element_t *current_element;
 119   /* What kind of symbol is current element.  */
 120   enum coll_symbol kind;
 121
 122   /* While collecting the weights we need some temporary space.  */
 123   unsigned int current_order;
 124   int *weight_cnt;
 125   unsigned int weight_idx;
 126   unsigned int *weight;
 127   size_t nweight;
 128   size_t nweight_max;
 129
 130   /* Patch lists.  */
 131   patch_t *current_patch;
 132   patch_t *all_patches;
 133
 134   /* Room for the UNDEFINED information.  */
 135   element_t undefined;
 136   unsigned int undefined_len;
 137 };
 138
 139
 140 /* Be verbose?  Defined in localedef.c.  */
 141 extern int verbose;
 142
 143
 144 void *xmalloc (size_t __n);
 145 void *xrealloc (void *__p, size_t __n);
 146
 147
 148 #define obstack_chunk_alloc malloc
 149 #define obstack_chunk_free free
 150
 151
 152 void
 153 collate_startup (struct linereader *lr, struct localedef_t *locale,
 154                  struct charset_t *charset)
 155 {
 156   struct locale_collate_t *collate;
 157
 158   /* It is important that we always use UCS4 encoding for strings now.  */
 159   encoding_method = ENC_UCS4;
 160
 161   /* Allocate the needed room.  */
 162   locale->categories[LC_COLLATE].collate = collate =
 163     (struct locale_collate_t *) xmalloc (sizeof (struct locale_collate_t));
 164
 165   /* Allocate hash table for collating elements.  */
 166   if (init_hash (&collate->elements, 512))
 167     error (4, 0, _("memory exhausted"));
 168   collate->combine_token = NULL;
 169   obstack_init (&collate->element_mem);
 170
 171   /* Allocate hash table for collating elements.  */
 172   if (init_hash (&collate->symbols, 64))
 173     error (4, 0, _("memory exhausted"));
 174
 175   /* Allocate hash table for result.  */
 176   if (init_hash (&collate->result, 512))
 177     error (4, 0, _("memory exhausted"));
 178
 179   collate->nrules = 0;
 180   collate->nrules_max = 10;
 181   collate->rules
 182     = (enum coll_sort_rule *) xmalloc (collate->nrules_max
 183                                        * sizeof (enum coll_sort_rule));
 184
 185   collate->order_cnt = 1;       /* The smallest weight is 2.  */
 186
 187   collate->was_ellipsis = 0;
 188   collate->last_char = L'\0';   /* 0 because leading ellipsis is allowed.  */
 189
 190   collate->all_patches = NULL;
 191
 192   /* This tells us no UNDEFINED entry was found until now.  */
 193   collate->undefined.this_weight = 0;
 194
 195   lr->translate_strings = 0;
 196 }
 197
 198
 199 void
 200 collate_finish (struct localedef_t *locale, struct charset_t *charset)
 201 {
 202   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 203   patch_t *patch;
 204   size_t cnt;
 205
 206   /* Patch the constructed table so that forward references are
 207      correctly filled.  */
 208   for (patch = collate->all_patches; patch != NULL; patch = patch->next)
 209     {
 210       wchar_t wch;
 211       size_t toklen = strlen (patch->token);
 212       void *ptmp;
 213       unsigned int value = 0;
 214
 215       wch = charset_find_value (charset, patch->token, toklen);
 216       if (wch != ILLEGAL_CHAR_VALUE)
 217         {
 218           element_t *runp;
 219
 220           if (find_entry (&collate->result, &wch, sizeof (wchar_t),
 221                           (void *) &runp) < 0)
 222             runp = NULL;
 223           for (; runp != NULL; runp = runp->next)
 224             if (runp->name[0] == wch && runp->name[1] == L'\0')
 225               break;
 226
 227           value = runp == NULL ? 0 : runp->this_weight;
 228         }
 229       else if (find_entry (&collate->elements, patch->token, toklen, &ptmp)
 230                >= 0)
 231         {
 232           value = ((element_t *) ptmp)->this_weight;
 233         }
 234       else if (find_entry (&collate->symbols, patch->token, toklen, &ptmp)
 235                >= 0)
 236         {
 237           value = (unsigned long int) ptmp;
 238         }
 239       else
 240         value = 0;
 241
 242       if (value == 0 && !be_quiet)
 243         error_at_line (0, 0, patch->fname, patch->lineno,
 244                        _("no weight defined for symbol `%s'"), patch->token);
 245       else
 246         *patch->where.pos = value;
 247     }
 248
 249   /* If no definition for UNDEFINED is given, all characters in the
 250      given charset must be specified.  */
 251   if (collate->undefined.ordering == NULL)
 252     {
 253       /**************************************************************\
 254       |* XXX We should test whether really an unspecified character *|
 255       |* exists before giving the message.                          *|
 256       \**************************************************************/
 257       u_int32_t weight;
 258
 259       if (!be_quiet)
 260         error (0, 0, _("no definition of `UNDEFINED'"));
 261
 262       collate->undefined.ordering_len = collate->nrules;
 263       weight = ++collate->order_cnt;
 264
 265       for (cnt = 0; cnt < collate->nrules; ++cnt)
 266         {
 267           u_int32_t one = 1;
 268           obstack_grow (&collate->element_mem, &one, sizeof (one));
 269         }
 270
 271       for (cnt = 0; cnt < collate->nrules; ++cnt)
 272         obstack_grow (&collate->element_mem, &weight, sizeof (weight));
 273
 274       collate->undefined.ordering = obstack_finish (&collate->element_mem);
 275     }
 276
 277   collate->undefined_len = 2;   /* For the name: 1 x wchar_t + L'\0'.  */
 278   for (cnt = 0; cnt < collate->nrules; ++cnt)
 279     collate->undefined_len += 1 + collate->undefined.ordering[cnt];
 280 }
 281
 282
 283
 284 void
 285 collate_output (struct localedef_t *locale, struct charset_t *charset,
 286                 const char *output_path)
 287 {
 288   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
 289   u_int32_t table_size, table_best, level_best, sum_best;
 290   void *last;
 291   element_t *pelem;
 292   wchar_t *name;
 293   size_t len;
 294   const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
 295   struct iovec iov[2 + nelems];
 296   struct locale_file data;
 297   u_int32_t idx[nelems];
 298   struct obstack non_simple;
 299   struct obstack string_pool;
 300   size_t cnt, entry_size;
 301   u_int32_t undefined_offset = UINT_MAX;
 302   u_int32_t *table, *extra, *table2, *extra2;
 303   size_t extra_len;
 304   u_int32_t element_hash_tab_size;
 305   u_int32_t *element_hash_tab;
 306   u_int32_t *element_hash_tab_ob;
 307   u_int32_t element_string_pool_size;
 308   char *element_string_pool;
 309   u_int32_t element_value_size;
 310   wchar_t *element_value;
 311   wchar_t *element_value_ob;
 312   u_int32_t symbols_hash_tab_size;
 313   u_int32_t *symbols_hash_tab;
 314   u_int32_t *symbols_hash_tab_ob;
 315   u_int32_t symbols_string_pool_size;
 316   char *symbols_string_pool;
 317   u_int32_t symbols_class_size;
 318   u_int32_t *symbols_class;
 319   u_int32_t *symbols_class_ob;
 320   hash_table *hash_tab;
 321   unsigned int dummy_weights[collate->nrules + 1];
 322
 323   sum_best = UINT_MAX;
 324   table_best = 0xffff;
 325   level_best = 0xffff;
 326
 327   /* Compute table size.  */
 328   if (!be_quiet)
 329     fputs (_("\
 330 Computing table size for collation information might take a while..."),
 331            stderr);
 332   for (table_size = 256; table_size < sum_best; ++table_size)
 333     {
 334       size_t hits[table_size];
 335       unsigned int worst = 1;
 336       size_t cnt;
 337
 338       last = NULL;
 339
 340       for (cnt = 0; cnt < 256; ++cnt)
 341         hits[cnt] = 1;
 342       memset (&hits[256], '\0', sizeof (hits) - 256 * sizeof (size_t));
 343
 344       while (iterate_table (&collate->result, &last, (const void **) &name,
 345                             &len, (void **) &pelem) >= 0)
 346         if (pelem->ordering != NULL && pelem->name[0] > 0xff)
 347           if (++hits[(unsigned int) pelem->name[0] % table_size] > worst)
 348             {
 349               worst = hits[(unsigned int) pelem->name[0] % table_size];
 350               if (table_size * worst > sum_best)
 351                 break;
 352             }
 353
 354       if (table_size * worst < sum_best)
 355         {
 356           sum_best = table_size * worst;
 357           table_best = table_size;
 358           level_best = worst;
 359         }
 360     }
 361   assert (table_best != 0xffff || level_best != 0xffff);
 362   if (!be_quiet)
 363     fputs (_(" done\n"), stderr);
 364
 365   obstack_init (&non_simple);
 366   obstack_init (&string_pool);
 367
 368   data.magic = LIMAGIC (LC_COLLATE);
 369   data.n = nelems;
 370   iov[0].iov_base = (void *) &data;
 371   iov[0].iov_len = sizeof (data);
 372
 373   iov[1].iov_base = (void *) idx;
 374   iov[1].iov_len = sizeof (idx);
 375
 376   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_base = &collate->nrules;
 377   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES)].iov_len = sizeof (u_int32_t);
 378
 379   table = (u_int32_t *) alloca (collate->nrules * sizeof (u_int32_t));
 380   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_base = table;
 381   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES)].iov_len
 382     = collate->nrules * sizeof (u_int32_t);
 383   /* Another trick here.  Describing the collation method needs only a
 384      few bits (3, to be exact).  But the binary file should be
 385      accessible by machines with both endianesses and so we store both
 386      forms in the same word.  */
 387   for (cnt = 0; cnt < collate->nrules; ++cnt)
 388     table[cnt] = collate->rules[cnt] | SWAPU32 (collate->rules[cnt]);
 389
 390   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_base = &table_best;
 391   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].iov_len = sizeof (u_int32_t);
 392
 393   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_base = &level_best;
 394   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].iov_len
 395     = sizeof (u_int32_t);
 396
 397   entry_size = 1 + MAX (collate->nrules, 2);
 398
 399   table = (u_int32_t *) alloca (table_best * level_best * entry_size
 400                                 * sizeof (table[0]));
 401   memset (table, '\0', table_best * level_best * entry_size
 402           * sizeof (table[0]));
 403
 404
 405   /* Macros for inserting in output table.  */
 406 #define ADD_VALUE(expr)                                                       \
 407   do {                                                                        \
 408     u_int32_t to_write = (u_int32_t) expr;                                    \
 409     obstack_grow (&non_simple, &to_write, sizeof (to_write));                 \
 410   } while (0)
 411
 412 #define ADD_ELEMENT(pelem, len)                                               \
 413   do {                                                                        \
 414     size_t cnt, idx;                                                          \
 415                                                                               \
 416     ADD_VALUE (len);                                                          \
 417                                                                               \
 418     wlen = wcslen (pelem->name);                                              \
 419     obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
 420                                                                               \
 421     idx = collate->nrules;                                                    \
 422     for (cnt = 0; cnt < collate->nrules; ++cnt)                               \
 423       {                                                                       \
 424         size_t disp;                                                          \
 425                                                                               \
 426         ADD_VALUE (pelem->ordering[cnt]);                                     \
 427         for (disp = 0; disp < pelem->ordering[cnt]; ++disp)                   \
 428           ADD_VALUE (pelem->ordering[idx++]);                                 \
 429       }                                                                       \
 430   } while (0)
 431
 432 #define ADD_FORWARD(pelem)                                                    \
 433   do {                                                                        \
 434     /* We leave a reference in the main table and put all                     \
 435        information in the table for the extended entries.  */                 \
 436     element_t *runp;                                                          \
 437     element_t *has_simple = NULL;                                             \
 438     size_t wlen;                                                              \
 439                                                                               \
 440     table[(level * table_best + slot) * entry_size + 1]                       \
 441       = FORWARD_CHAR;                                                         \
 442     table[(level * table_best + slot) * entry_size + 2]                       \
 443       = obstack_object_size (&non_simple) / sizeof (u_int32_t);               \
 444                                                                               \
 445     /* Here we have to construct the non-simple table entry.  First           \
 446        compute the total length of this entry.  */                            \
 447     for (runp = (pelem); runp != NULL; runp = runp->next)                     \
 448       if (runp->ordering != NULL)                                             \
 449         {                                                                     \
 450           u_int32_t value;                                                    \
 451           size_t cnt;                                                         \
 452                                                                               \
 453           value = 1 + wcslen (runp->name) + 1;                                \
 454                                                                               \
 455           for (cnt = 0; cnt < collate->nrules; ++cnt)                         \
 456             /* We have to take care for entries without ordering              \
 457                information.  While reading them they get inserted in the      \
 458                table and later not removed when something goes wrong with     \
 459                reading its weights.  */                                       \
 460             {                                                                 \
 461               value += 1 + runp->ordering[cnt];                               \
 462                                                                               \
 463               if (runp->name[1] == L'\0')                                     \
 464                 has_simple = runp;                                            \
 465             }                                                                 \
 466                                                                               \
 467           ADD_ELEMENT (runp, value);                                          \
 468         }                                                                     \
 469                                                                               \
 470     if (has_simple == NULL)                                                   \
 471       {                                                                       \
 472         size_t idx, cnt;                                                      \
 473                                                                               \
 474         ADD_VALUE (collate->undefined_len + 1);                               \
 475                                                                               \
 476         /* Add the name.  */                                                  \
 477         ADD_VALUE ((pelem)->name[0]);                                         \
 478         ADD_VALUE (0);                                                        \
 479                                                                               \
 480         idx = collate->nrules;                                                \
 481         for (cnt = 0; cnt < collate->nrules; ++cnt)                           \
 482           {                                                                   \
 483             size_t disp;                                                      \
 484                                                                               \
 485             ADD_VALUE (collate->undefined.ordering[cnt]);                     \
 486             for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)   \
 487               {                                                               \
 488                 if ((wchar_t) collate->undefined.ordering[idx]                \
 489                     == ELLIPSIS_CHAR)                                         \
 490                   ADD_VALUE ((pelem)->name[0]);                               \
 491                 else                                                          \
 492                   ADD_VALUE (collate->undefined.ordering[idx++]);             \
 493                 ++idx;                                                        \
 494               }                                                               \
 495           }                                                                   \
 496       }                                                                       \
 497   } while (0)
 498
 499
 500
 501   /* Fill the table now.  First we look for all the characters which
 502      fit into one single byte.  This speeds up the 8-bit string
 503      functions.  */
 504   last = NULL;
 505   while (iterate_table (&collate->result, &last, (const void **) &name,
 506                         &len, (void **) &pelem) >= 0)
 507     if (pelem->name[0] <= 0xff)
 508       {
 509         /* We have a single byte name.  Now we must distinguish
 510            between entries in simple form (i.e., only one value per
 511            weight and no collation element starting with the same
 512            character) and those which are not.  */
 513         size_t slot = ((size_t) pelem->name[0]);
 514         const size_t level = 0;
 515
 516         table[slot * entry_size] = pelem->name[0];
 517
 518         if (pelem->name[1] == L'\0' && pelem->next == NULL
 519             && pelem->ordering_len == collate->nrules)
 520           {
 521             /* Yes, we have a simple one.  Lucky us.  */
 522             size_t cnt;
 523
 524             for (cnt = 0; cnt < collate->nrules; ++cnt)
 525               table[slot * entry_size + 1 + cnt]
 526                 = pelem->ordering[collate->nrules + cnt];
 527           }
 528         else
 529           ADD_FORWARD (pelem);
 530       }
 531
 532   /* Now check for missing single byte entries.  If one exist we fill
 533      with the UNDEFINED entry.  */
 534   for (cnt = 0; cnt < 256; ++cnt)
 535     /* The first weight is never 0 for existing entries.  */
 536     if (table[cnt * entry_size + 1] == 0)
 537       {
 538         /* We have to fill in the information from the UNDEFINED
 539            entry.  */
 540         table[cnt * entry_size] = (u_int32_t) cnt;
 541
 542         if (collate->undefined.ordering_len == collate->nrules)
 543           {
 544             size_t inner;
 545
 546             for (inner = 0; inner < collate->nrules; ++inner)
 547               if ((wchar_t)collate->undefined.ordering[collate->nrules + inner]
 548                   == ELLIPSIS_CHAR)
 549                 table[cnt * entry_size + 1 + inner] = cnt;
 550               else
 551                 table[cnt * entry_size + 1 + inner]
 552                   = collate->undefined.ordering[collate->nrules + inner];
 553           }
 554         else
 555           {
 556             if (undefined_offset != UINT_MAX)
 557               {
 558                 table[cnt * entry_size + 1] = FORWARD_CHAR;
 559                 table[cnt * entry_size + 2] = undefined_offset;
 560               }
 561             else
 562               {
 563                 const size_t slot = cnt;
 564                 const size_t level = 0;
 565
 566                 ADD_FORWARD (&collate->undefined);
 567                 undefined_offset = table[cnt * entry_size + 2];
 568               }
 569           }
 570       }
 571
 572   /* Now we are ready for inserting the whole rest.   */
 573   last = NULL;
 574   while (iterate_table (&collate->result, &last, (const void **) &name,
 575                         &len, (void **) &pelem) >= 0)
 576     if (pelem->name[0] > 0xff)
 577       {
 578         /* Find the position.  */
 579         size_t slot = ((size_t) pelem->name[0]) % table_best;
 580         size_t level = 0;
 581
 582         while (table[(level * table_best + slot) * entry_size + 1] != 0)
 583           ++level;
 584         assert (level < level_best);
 585
 586         if (pelem->name[1] == L'\0' && pelem->next == NULL
 587             && pelem->ordering_len == collate->nrules)
 588           {
 589             /* Again a simple entry.  */
 590             size_t inner;
 591
 592             for (inner = 0; inner < collate->nrules; ++inner)
 593               table[(level * table_best + slot) * entry_size + 1 + inner]
 594                 = pelem->ordering[collate->nrules + inner];
 595           }
 596         else
 597           ADD_FORWARD (pelem);
 598       }
 599
 600   /* Add the UNDEFINED entry.  */
 601   {
 602     /* Here we have to construct the non-simple table entry.  */
 603     size_t idx, cnt;
 604
 605     undefined_offset = obstack_object_size (&non_simple);
 606
 607     idx = collate->nrules;
 608     for (cnt = 0; cnt < collate->nrules; ++cnt)
 609       {
 610         size_t disp;
 611
 612         ADD_VALUE (collate->undefined.ordering[cnt]);
 613         for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp)
 614           ADD_VALUE (collate->undefined.ordering[idx++]);
 615       }
 616   }
 617
 618   /* Finish the extra block.  */
 619   extra_len = obstack_object_size (&non_simple);
 620   extra = (u_int32_t *) obstack_finish (&non_simple);
 621   assert ((extra_len % sizeof (u_int32_t)) == 0);
 622
 623   /* Now we have to build the two array for the other byte ordering.  */
 624   table2 = (u_int32_t *) alloca (table_best * level_best * entry_size
 625                                  * sizeof (table[0]));
 626   extra2 = (u_int32_t *) alloca (extra_len);
 627
 628   for (cnt = 0; cnt < table_best * level_best * entry_size; ++cnt)
 629     table2[cnt] = SWAPU32 (table[cnt]);
 630
 631   for (cnt = 0; cnt < extra_len / sizeof (u_int32_t); ++cnt)
 632     extra2[cnt] = SWAPU32 (extra2[cnt]);
 633
 634   /* We need a simple hashing table to get a collation-element->chars
 635      mapping.  We again use internal hasing using a secondary hashing
 636      function.
 637
 638      Each string has an associate hashing value V, computed by a
 639      fixed function.  To locate the string we use open addressing with
 640      double hashing.  The first index will be V % M, where M is the
 641      size of the hashing table.  If no entry is found, iterating with
 642      a second, independent hashing function takes place.  This second
 643      value will be 1 + V % (M - 2).  The approximate number of probes
 644      will be
 645
 646           for unsuccessful search: (1 - N / M) ^ -1
 647           for successful search:   - (N / M) ^ -1 * ln (1 - N / M)
 648
 649      where N is the number of keys.
 650
 651      If we now choose M to be the next prime bigger than 4 / 3 * N,
 652      we get the values 4 and 1.85 resp.  Because unsuccessful searches
 653      are unlikely this is a good value.  Formulas: [Knuth, The Art of
 654      Computer Programming, Volume 3, Sorting and Searching, 1973,
 655      Addison Wesley]  */
 656   if (collate->elements.filled == 0)
 657     {
 658       /* We don't need any element table since there are no collating
 659          elements.  */
 660       element_hash_tab_size = 0;
 661       element_hash_tab = NULL;
 662       element_hash_tab_ob = NULL;
 663       element_string_pool_size = 0;
 664       element_string_pool = NULL;
 665       element_value_size = 0;
 666       element_value = NULL;
 667       element_value_ob = NULL;
 668     }
 669   else
 670     {
 671       void *ptr;                /* Running pointer.  */
 672       const char *key;          /* Key for current bucket.  */
 673       size_t keylen;            /* Length of key data.  */
 674       const element_t *data;    /* Data, i.e., the character sequence.  */
 675
 676       element_hash_tab_size = next_prime ((collate->elements.filled * 4) / 3);
 677       if (element_hash_tab_size < 7)
 678         /* We need a minimum to make the following code work.  */
 679         element_hash_tab_size = 7;
 680
 681       element_hash_tab = obstack_alloc (&non_simple, (2 * element_hash_tab_size
 682                                                       * sizeof (u_int32_t)));
 683       memset (element_hash_tab, '\377', (2 * element_hash_tab_size
 684                                          * sizeof (u_int32_t)));
 685
 686       ptr = NULL;
 687       while (iterate_table (&collate->elements, &ptr, (const void **) &key,
 688                             &keylen, (void **) &data) == 0)
 689         {
 690           size_t hash_val = hash_string (key, keylen);
 691           size_t idx = hash_val % element_hash_tab_size;
 692
 693           if (element_hash_tab[2 * idx] != (~((u_int32_t) 0)))
 694             {
 695               /* We need the second hashing function.  */
 696               size_t c = 1 + (hash_val % (element_hash_tab_size - 2));
 697
 698               do
 699                 if (idx >= element_hash_tab_size - c)
 700                   idx -= element_hash_tab_size - c;
 701                 else
 702                   idx += c;
 703               while (element_hash_tab[2 * idx] != (~((u_int32_t) 0)));
 704             }
 705
 706           element_hash_tab[2 * idx] = obstack_object_size (&non_simple);
 707           element_hash_tab[2 * idx + 1] = (obstack_object_size (&string_pool)
 708                                            / sizeof (wchar_t));
 709
 710           obstack_grow0 (&non_simple, key, keylen);
 711           obstack_grow (&string_pool, data->name,
 712                         (wcslen (data->name) + 1) * sizeof (wchar_t));
 713         }
 714
 715       if (obstack_object_size (&non_simple) % 4 != 0)
 716         obstack_blank (&non_simple,
 717                        4 - (obstack_object_size (&non_simple) % 4));
 718       element_string_pool_size = obstack_object_size (&non_simple);
 719       element_string_pool = obstack_finish (&non_simple);
 720
 721       element_value_size = obstack_object_size (&string_pool);
 722       element_value = obstack_finish (&string_pool);
 723
 724       /* Create the tables for the other byte order.  */
 725       element_hash_tab_ob = obstack_alloc (&non_simple,
 726                                            (2 * element_hash_tab_size
 727                                             * sizeof (u_int32_t)));
 728       for (cnt = 0; cnt < 2 * element_hash_tab_size; ++cnt)
 729         element_hash_tab_ob[cnt] = SWAPU32 (element_hash_tab[cnt]);
 730
 731       element_value_ob = obstack_alloc (&string_pool, element_value_size);
 732       if (sizeof (wchar_t) != 4)
 733         {
 734           fputs ("sizeof (wchar_t) != 4 currently not handled", stderr);
 735           abort ();
 736         }
 737       for (cnt = 0; cnt < element_value_size / 4; ++cnt)
 738         element_value_ob[cnt] = SWAPU32 (element_value[cnt]);
 739     }
 740
 741   /* Store collation elements as map to collation class.  There are
 742      three kinds of symbols:
 743        - simple characters
 744        - collation elements
 745        - collation symbols
 746      We need to make a table which lets the user to access the primary
 747      weight based on the symbol string.  */
 748   symbols_hash_tab_size = next_prime ((4 * (charset->char_table.filled
 749                                             + collate->elements.filled
 750                                             + collate->symbols.filled)) / 3);
 751   symbols_hash_tab = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
 752                                                   * sizeof (u_int32_t)));
 753   memset (symbols_hash_tab, '\377', (2 * symbols_hash_tab_size
 754                                      * sizeof (u_int32_t)));
 755
 756   /* Now fill the array.  First the symbols from the character set,
 757      then the collation elements and last the collation symbols.  */
 758   hash_tab = &charset->char_table;
 759   while (1)
 760     {
 761       void *ptr;        /* Running pointer.  */
 762       const char *key;  /* Key for current bucket.  */
 763       size_t keylen;    /* Length of key data.  */
 764       void *data;       /* Data.  */
 765
 766       ptr = NULL;
 767       while (iterate_table (hash_tab, &ptr, (const void **) &key,
 768                             &keylen, (void **) &data) == 0)
 769         {
 770           size_t hash_val;
 771           size_t idx;
 772           u_int32_t word;
 773           unsigned int *weights;
 774
 775           if (hash_tab == &charset->char_table
 776               || hash_tab == &collate->elements)
 777             {
 778               element_t *lastp, *firstp;
 779               wchar_t dummy_name[2];
 780               const wchar_t *name;
 781               size_t name_len;
 782
 783               if (hash_tab == &charset->char_table)
 784                 {
 785                   dummy_name[0] = (wchar_t) ((unsigned long int) data);
 786                   dummy_name[1] = L'\0';
 787                   name = dummy_name;
 788                   name_len = sizeof (wchar_t);
 789                 }
 790               else
 791                 {
 792                   element_t *elemp = (element_t *) data;
 793                   name = elemp->name;
 794                   name_len = wcslen (name) * sizeof (wchar_t);
 795                 }
 796
 797               /* First check whether this character is used at all.  */
 798               if (find_entry (&collate->result, name, name_len,
 799                               (void *) &firstp) < 0)
 800                 /* The symbol is not directly mentioned in the collation.
 801                    I.e., we use the value for UNDEFINED.  */
 802                 lastp = &collate->undefined;
 803               else
 804                 {
 805                   /* The entry for the simple character is always found at
 806                      the end.  */
 807                   lastp = firstp;
 808                   while (lastp->next != NULL && wcscmp (name, lastp->name))
 809                     lastp = lastp->next;
 810                 }
 811
 812               weights = lastp->ordering;
 813             }
 814           else
 815             {
 816               dummy_weights[0] = 1;
 817               dummy_weights[collate->nrules]
 818                 = (unsigned int) ((unsigned long int) data);
 819
 820               weights = dummy_weights;
 821             }
 822
 823           /* In LASTP->ordering we now have the collation class.
 824              Determine the place in the hashing table next.  */
 825           hash_val = hash_string (key, keylen);
 826           idx = hash_val % symbols_hash_tab_size;
 827
 828           if (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0)))
 829             {
 830               /* We need the second hashing function.  */
 831               size_t c = 1 + (hash_val % (symbols_hash_tab_size - 2));
 832
 833               do
 834                 if (idx >= symbols_hash_tab_size - c)
 835                   idx -= symbols_hash_tab_size - c;
 836                 else
 837                   idx += c;
 838               while (symbols_hash_tab[2 * idx] != (~((u_int32_t) 0)));
 839             }
 840
 841           symbols_hash_tab[2 * idx] = obstack_object_size (&string_pool);
 842           symbols_hash_tab[2 * idx + 1] = (obstack_object_size (&non_simple)
 843                                            / sizeof (u_int32_t));
 844
 845           obstack_grow0 (&string_pool, key, keylen);
 846           /* Adding the first weight looks complicated.  We have to deal
 847              with the kind it is stored and with the fact that original
 848              form uses `unsigned int's while we need `u_int32_t' here.  */
 849           word = weights[0];
 850           obstack_grow (&non_simple, &word, sizeof (u_int32_t));
 851           for (cnt = 0; cnt < weights[0]; ++cnt)
 852             {
 853               word = weights[collate->nrules + cnt];
 854               obstack_grow (&non_simple, &word, sizeof (u_int32_t));
 855             }
 856         }
 857
 858       if (hash_tab == &charset->char_table)
 859         hash_tab = &collate->elements;
 860       else if (hash_tab == &collate->elements)
 861         hash_tab = &collate->symbols;
 862       else
 863         break;
 864     }
 865
 866   /* Now we have the complete tables.  */
 867   if (obstack_object_size (&string_pool) % 4 != 0)
 868     obstack_blank (&non_simple, 4 - (obstack_object_size (&string_pool) % 4));
 869   symbols_string_pool_size = obstack_object_size (&string_pool);
 870   symbols_string_pool = obstack_finish (&string_pool);
 871
 872   symbols_class_size = obstack_object_size (&non_simple);
 873   symbols_class = obstack_finish (&non_simple);
 874
 875   /* Generate tables with other byte order.  */
 876   symbols_hash_tab_ob = obstack_alloc (&non_simple, (2 * symbols_hash_tab_size
 877                                                      * sizeof (u_int32_t)));
 878   for (cnt = 0; cnt < 2 * symbols_hash_tab_size; ++cnt)
 879     symbols_hash_tab_ob[cnt] = SWAPU32 (symbols_hash_tab[cnt]);
 880
 881   symbols_class_ob = obstack_alloc (&non_simple, symbols_class_size);
 882   for (cnt = 0; cnt < symbols_class_size / 4; ++cnt)
 883     symbols_class_ob[cnt] = SWAPU32 (symbols_class[cnt]);
 884
 885
 886   /* Store table addresses and lengths.   */
 887 #if __BYTE_ORDER == __BIG_ENDIAN
 888   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table;
 889   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
 890     = table_best * level_best * entry_size * sizeof (table[0]);
 891
 892   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table2;
 893   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
 894     = table_best * level_best * entry_size * sizeof (table[0]);
 895
 896   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra;
 897   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
 898
 899   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra2;
 900   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
 901 #else
 902   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_base = table2;
 903   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB)].iov_len
 904     = table_best * level_best * entry_size * sizeof (table[0]);
 905
 906   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_base = table;
 907   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL)].iov_len
 908     = table_best * level_best * entry_size * sizeof (table[0]);
 909
 910   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_base = extra2;
 911   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB)].iov_len = extra_len;
 912
 913   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_base = extra;
 914   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL)].iov_len = extra_len;
 915 #endif
 916
 917   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_base = &undefined_offset;
 918   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED)].iov_len = sizeof (u_int32_t);
 919
 920
 921   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_base
 922     = &element_hash_tab_size;
 923   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE)].iov_len
 924     = sizeof (u_int32_t);
 925
 926 #if __BYTE_ORDER == __BIG_ENDIAN
 927   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
 928     = element_hash_tab;
 929   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
 930     = 2 * element_hash_tab_size * sizeof (u_int32_t);
 931
 932   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
 933     = element_hash_tab_ob;
 934   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
 935     = 2 * element_hash_tab_size * sizeof (u_int32_t);
 936 #else
 937   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_base
 938     = element_hash_tab;
 939   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL)].iov_len
 940     = 2 * element_hash_tab_size * sizeof (u_int32_t);
 941
 942   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_base
 943     = element_hash_tab_ob;
 944   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB)].iov_len
 945     = 2 * element_hash_tab_size * sizeof (u_int32_t);
 946 #endif
 947
 948   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_base
 949     = element_string_pool;
 950   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL)].iov_len
 951     = element_string_pool_size;
 952
 953 #if __BYTE_ORDER == __BIG_ENDIAN
 954   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
 955     = element_value;
 956   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
 957     = element_value_size;
 958
 959   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
 960     = element_value_ob;
 961   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
 962     = element_value_size;
 963 #else
 964   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_base
 965     = element_value;
 966   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL)].iov_len
 967     = element_value_size;
 968
 969   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_base
 970     = element_value_ob;
 971   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB)].iov_len
 972     = element_value_size;
 973 #endif
 974
 975   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_base
 976     = &symbols_hash_tab_size;
 977   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE)].iov_len
 978     = sizeof (u_int32_t);
 979
 980 #if __BYTE_ORDER == __BIG_ENDIAN
 981   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
 982     = symbols_hash_tab;
 983   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
 984     = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
 985
 986   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
 987     = symbols_hash_tab_ob;
 988   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
 989     = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
 990 #else
 991   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_base
 992     = symbols_hash_tab;
 993   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL)].iov_len
 994     = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
 995
 996   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_base
 997     = symbols_hash_tab_ob;
 998   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB)].iov_len
 999     = 2 * symbols_hash_tab_size * sizeof (u_int32_t);
1000 #endif
1001
1002   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_base
1003     = symbols_string_pool;
1004   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL)].iov_len
1005     = symbols_string_pool_size;
1006
1007 #if __BYTE_ORDER == __BIG_ENDIAN
1008   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
1009     = symbols_class;
1010   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
1011     = symbols_class_size;
1012
1013   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
1014     = symbols_class_ob;
1015   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
1016     = symbols_class_size;
1017 #else
1018   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_base
1019     = symbols_class;
1020   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL)].iov_len
1021     = symbols_class_size;
1022
1023   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_base
1024     = symbols_class_ob;
1025   iov[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB)].iov_len
1026     = symbols_class_size;
1027 #endif
1028
1029   /* Update idx array.  */
1030   idx[0] = iov[0].iov_len + iov[1].iov_len;
1031   for (cnt = 1; cnt < nelems; ++cnt)
1032     idx[cnt] = idx[cnt - 1] + iov[1 + cnt].iov_len;
1033
1034   write_locale_data (output_path, "LC_COLLATE", 2 + nelems, iov);
1035
1036   obstack_free (&non_simple, NULL);
1037   obstack_free (&string_pool, NULL);
1038 }
1039
1040
1041 void
1042 collate_element_to (struct linereader *lr, struct localedef_t *locale,
1043                     struct token *code, struct charset_t *charset)
1044 {
1045   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1046   unsigned int value;
1047   void *not_used;
1048
1049   if (collate->combine_token != NULL)
1050     {
1051       free ((void *) collate->combine_token);
1052       collate->combine_token = NULL;
1053     }
1054
1055   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
1056   if ((wchar_t) value != ILLEGAL_CHAR_VALUE)
1057     {
1058       lr_error (lr, _("symbol for multicharacter collating element "
1059                       "`%.*s' duplicates symbolic name in charset"),
1060                 (int) code->val.str.len, code->val.str.start);
1061       return;
1062     }
1063
1064   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
1065                   &not_used) >= 0)
1066     {
1067       lr_error (lr, _("symbol for multicharacter collating element "
1068                       "`%.*s' duplicates other element definition"),
1069                 (int) code->val.str.len, code->val.str.start);
1070       return;
1071     }
1072
1073   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
1074                   &not_used) >= 0)
1075     {
1076       lr_error (lr, _("symbol for multicharacter collating element "
1077                       "`%.*s' duplicates symbol definition"),
1078                 (int) code->val.str.len, code->val.str.start);
1079       return;
1080     }
1081
1082   collate->combine_token = code->val.str.start;
1083   collate->combine_token_len = code->val.str.len;
1084 }
1085
1086
1087 void
1088 collate_element_from (struct linereader *lr, struct localedef_t *locale,
1089                       struct token *code, struct charset_t *charset)
1090 {
1091   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1092   element_t *elemp, *runp;
1093
1094   /* CODE is a string.  */
1095   elemp = (element_t *) obstack_alloc (&collate->element_mem,
1096                                        sizeof (element_t));
1097
1098   /* We have to translate the string.  It may contain <...> character
1099      names.  */
1100   elemp->name = (wchar_t *) translate_string (code->val.str.start, charset);
1101   elemp->this_weight = 0;
1102   elemp->ordering = NULL;
1103   elemp->ordering_len = 0;
1104
1105   free (code->val.str.start);
1106
1107   if (elemp->name == NULL)
1108     {
1109       /* At least one character in the string is not defined.  We simply
1110          do nothing.  */
1111       if (verbose)
1112         lr_error (lr, _("\
1113 `from' string in collation element declaration contains unknown character"));
1114       return;
1115     }
1116
1117   if (elemp->name[0] == L'\0' || elemp->name[1] == L'\0')
1118     {
1119       lr_error (lr, _("illegal collation element"));
1120       return;
1121     }
1122
1123   /* The entries in the linked lists of RESULT are sorting in
1124      descending order.  The order is important for the `strcoll' and
1125      `wcscoll' functions.  */
1126   if (find_entry (&collate->result, elemp->name, sizeof (wchar_t),
1127                   (void *) &runp) >= 0)
1128     {
1129       /* We already have an entry with this key.  Check whether it is
1130          identical.  */
1131       element_t *prevp = NULL;
1132       int cmpres;
1133
1134       do
1135         {
1136           cmpres = wcscmp (elemp->name, runp->name);
1137           if (cmpres <= 0)
1138             break;
1139           prevp = runp;
1140         }
1141       while ((runp = runp->next) != NULL);
1142
1143       if (cmpres == 0)
1144         lr_error (lr, _("duplicate collating element definition"));
1145       else
1146         {
1147           elemp->next = runp;
1148           if (prevp == NULL)
1149             {
1150               if (set_entry (&collate->result, elemp->name, sizeof (wchar_t),
1151                              elemp) < 0)
1152                 error (EXIT_FAILURE, 0, _("\
1153 error while inserting collation element into hash table"));
1154             }
1155           else
1156             prevp->next = elemp;
1157         }
1158     }
1159   else
1160     {
1161       elemp->next = NULL;
1162       if (insert_entry (&collate->result, elemp->name, sizeof (wchar_t), elemp)
1163           < 0)
1164         error (EXIT_FAILURE, errno, _("error while inserting to hash table"));
1165     }
1166
1167   if (insert_entry (&collate->elements, collate->combine_token,
1168                     collate->combine_token_len, (void *) elemp) < 0)
1169     lr_error (lr, _("cannot insert new collating symbol definition: %s"),
1170               strerror (errno));
1171 }
1172
1173
1174 void
1175 collate_symbol (struct linereader *lr, struct localedef_t *locale,
1176                 struct token *code, struct charset_t *charset)
1177 {
1178   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1179   wchar_t value;
1180   void *not_used;
1181
1182   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
1183   if (value != ILLEGAL_CHAR_VALUE)
1184     {
1185       lr_error (lr, _("symbol for multicharacter collating element "
1186                       "`%.*s' duplicates symbolic name in charset"),
1187                 (int) code->val.str.len, code->val.str.start);
1188       return;
1189     }
1190
1191   if (find_entry (&collate->elements, code->val.str.start, code->val.str.len,
1192                   &not_used) >= 0)
1193     {
1194       lr_error (lr, _("symbol for multicharacter collating element "
1195                       "`%.*s' duplicates element definition"),
1196                 (int) code->val.str.len, code->val.str.start);
1197       return;
1198     }
1199
1200   if (find_entry (&collate->symbols, code->val.str.start, code->val.str.len,
1201                   &not_used) >= 0)
1202     {
1203       lr_error (lr, _("symbol for multicharacter collating element "
1204                       "`%.*s' duplicates other symbol definition"),
1205                 (int) code->val.str.len, code->val.str.start);
1206       return;
1207     }
1208
1209   if (insert_entry (&collate->symbols, code->val.str.start, code->val.str.len,
1210                     (void *) 0) < 0)
1211     lr_error (lr, _("cannot insert new collating symbol definition: %s"),
1212               strerror (errno));
1213 }
1214
1215
1216 void
1217 collate_new_order (struct linereader *lr, struct localedef_t *locale,
1218                    enum coll_sort_rule sort_rule)
1219 {
1220   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1221
1222   if (collate->nrules >= collate->nrules_max)
1223     {
1224       collate->nrules_max *= 2;
1225       collate->rules
1226         = (enum coll_sort_rule *) xrealloc (collate->rules,
1227                                             collate->nrules_max
1228                                             * sizeof (enum coll_sort_rule));
1229     }
1230
1231   collate->rules[collate->nrules++] = sort_rule;
1232 }
1233
1234
1235 void
1236 collate_build_arrays (struct linereader *lr, struct localedef_t *locale)
1237 {
1238   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1239
1240   collate->rules
1241     = (enum coll_sort_rule *) xrealloc (collate->rules,
1242                                         collate->nrules
1243                                         * sizeof (enum coll_sort_rule));
1244
1245   /* Allocate arrays for temporary weights.  */
1246   collate->weight_cnt = (int *) xmalloc (collate->nrules * sizeof (int));
1247
1248   /* Choose arbitrary start value for table size.  */
1249   collate->nweight_max = 5 * collate->nrules;
1250   collate->weight = (int *) xmalloc (collate->nweight_max * sizeof (int));
1251 }
1252
1253
1254 int
1255 collate_order_elem (struct linereader *lr, struct localedef_t *locale,
1256                     struct token *code, struct charset_t *charset)
1257 {
1258   const wchar_t zero = L'\0';
1259   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1260   int result = 0;
1261   wchar_t value;
1262   void *tmp;
1263   unsigned int i;
1264
1265   switch (code->tok)
1266     {
1267     case tok_bsymbol:
1268       /* We have a string to find in one of the three hashing tables.  */
1269       value = charset_find_value (charset, code->val.str.start,
1270                                   code->val.str.len);
1271       if (value != ILLEGAL_CHAR_VALUE)
1272         {
1273           element_t *lastp, *firstp;
1274
1275           collate->kind = character;
1276
1277           if (find_entry (&collate->result, &value, sizeof (wchar_t),
1278                           (void *) &firstp) < 0)
1279             firstp = lastp = NULL;
1280           else
1281             {
1282               /* The entry for the simple character is always found at
1283                  the end.  */
1284               lastp = firstp;
1285               while (lastp->next != NULL)
1286                 lastp = lastp->next;
1287
1288               if (lastp->name[0] == value && lastp->name[1] == L'\0')
1289                 {
1290                   lr_error (lr, _("duplicate definition for character `%.*s'"),
1291                             (int) code->val.str.len, code->val.str.start);
1292                   lr_ignore_rest (lr, 0);
1293                   result = -1;
1294                   break;
1295                 }
1296             }
1297
1298           collate->current_element
1299             = (element_t *) obstack_alloc (&collate->element_mem,
1300                                            sizeof (element_t));
1301
1302           obstack_grow (&collate->element_mem, &value, sizeof (value));
1303           obstack_grow (&collate->element_mem, &zero, sizeof (zero));
1304
1305           collate->current_element->name =
1306             (const wchar_t *) obstack_finish (&collate->element_mem);
1307
1308           collate->current_element->this_weight = ++collate->order_cnt;
1309
1310           collate->current_element->next = NULL;
1311
1312           if (firstp == NULL)
1313             {
1314               if (insert_entry (&collate->result, &value, sizeof (wchar_t),
1315                                 (void *) collate->current_element) < 0)
1316                 {
1317                   lr_error (lr, _("cannot insert collation element `%.*s'"),
1318                             (int) code->val.str.len, code->val.str.start);
1319                   exit (4);
1320                 }
1321             }
1322           else
1323             lastp->next = collate->current_element;
1324         }
1325       else if (find_entry (&collate->elements, code->val.str.start,
1326                            code->val.str.len, &tmp) >= 0)
1327         {
1328           collate->current_element = (element_t *) tmp;
1329
1330           if (collate->current_element->this_weight != 0)
1331             {
1332               lr_error (lr, _("\
1333 collation element `%.*s' appears more than once: ignore line"),
1334                         (int) code->val.str.len, code->val.str.start);
1335               lr_ignore_rest (lr, 0);
1336               result = -1;
1337               break;
1338             }
1339
1340           collate->kind = element;
1341           collate->current_element->this_weight = ++collate->order_cnt;
1342         }
1343       else if (find_entry (&collate->symbols, code->val.str.start,
1344                            code->val.str.len, &tmp) >= 0)
1345         {
1346           unsigned int order = ++collate->order_cnt;
1347
1348           if ((unsigned long int) tmp != 0ul)
1349             {
1350               lr_error (lr, _("\
1351 collation symbol `%.*s' appears more than once: ignore line"),
1352                         (int) code->val.str.len, code->val.str.start);
1353               lr_ignore_rest (lr, 0);
1354               result = -1;
1355               break;
1356             }
1357
1358           collate->kind = symbol;
1359
1360           if (set_entry (&collate->symbols, code->val.str.start,
1361                          code->val.str.len, (void *) order) < 0)
1362             {
1363               lr_error (lr, _("cannot process order specification"));
1364               exit (4);
1365             }
1366         }
1367       else
1368         {
1369           if (verbose)
1370             lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1371                       (int) code->val.str.len, code->val.str.start);
1372           lr_ignore_rest (lr, 0);
1373
1374           result = -1;
1375         }
1376       break;
1377
1378     case tok_undefined:
1379       collate->kind = undefined;
1380       collate->current_element = &collate->undefined;
1381       break;
1382
1383     case tok_ellipsis:
1384       if (collate->was_ellipsis)
1385         {
1386           lr_error (lr, _("\
1387 two lines in a row containing `...' are not allowed"));
1388           result = -1;
1389         }
1390       else if (collate->kind != character)
1391         {
1392           /* An ellipsis requires the previous line to be an
1393              character definition.  */
1394           lr_error (lr, _("\
1395 line before ellipsis does not contain definition for character constant"));
1396           lr_ignore_rest (lr, 0);
1397           result = -1;
1398         }
1399       else
1400         collate->kind = ellipsis;
1401       break;
1402
1403     default:
1404       assert (! "illegal token in `collate_order_elem'");
1405     }
1406
1407   /* Now it's time to handle the ellipsis in the previous line.  We do
1408      this only when the last line contained an definition for a
1409      character, the current line also defines an character, the
1410      character code for the later is bigger than the former.  */
1411   if (collate->was_ellipsis)
1412     {
1413       if (collate->kind != character)
1414         {
1415           lr_error (lr, _("\
1416 line after ellipsis must contain character definition"));
1417           lr_ignore_rest (lr, 0);
1418           result = -1;
1419         }
1420       else if (collate->last_char > value)
1421         {
1422           lr_error (lr, _("end point of ellipsis range is bigger then start"));
1423           lr_ignore_rest (lr, 0);
1424           result = -1;
1425         }
1426       else
1427         {
1428           /* We can fill the arrays with the information we need.  */
1429           wchar_t name[2];
1430           unsigned int *data;
1431           size_t *ptr;
1432           size_t cnt;
1433
1434           name[0] = collate->last_char + 1;
1435           name[1] = L'\0';
1436
1437           data = (unsigned int *) alloca ((collate->nrules + collate->nweight)
1438                                           * sizeof (unsigned int));
1439           ptr = (size_t *) alloca (collate->nrules * sizeof (size_t));
1440
1441           if (data == NULL || ptr == NULL)
1442             error (4, 0, _("memory exhausted"));
1443
1444           /* Prepare data.  Because the characters covered by an
1445              ellipsis all have equal values we prepare the data once
1446              and only change the variable number (if there are any).
1447              PTR[...] will point to the entries which will have to be
1448              fixed during the output loop.  */
1449           for (cnt = 0; cnt < collate->nrules; ++cnt)
1450             {
1451               data[cnt] = collate->weight_cnt[cnt];
1452               ptr[cnt] = (cnt == 0
1453                           ? collate->nweight
1454                           : ptr[cnt - 1] + collate->weight_cnt[cnt - 1]);
1455             }
1456
1457           for (cnt = 0; cnt < collate->nweight; ++cnt)
1458             data[collate->nrules + cnt] = collate->weight[cnt];
1459
1460           for (cnt = 0; cnt < collate->nrules; ++cnt)
1461             if ((wchar_t) data[ptr[cnt]] != ELLIPSIS_CHAR)
1462               ptr[cnt] = 0;
1463
1464           while (name[0] <= value)
1465             {
1466               element_t *pelem;
1467
1468               pelem = (element_t *) obstack_alloc (&collate->element_mem,
1469                                                    sizeof (element_t));
1470               if (pelem == NULL)
1471                 error (4, 0, _("memory exhausted"));
1472
1473               pelem->name
1474                 = (const wchar_t *) obstack_copy (&collate->element_mem,
1475                                                   name, 2 * sizeof (wchar_t));
1476               pelem->this_weight = ++collate->order_cnt;
1477
1478               pelem->ordering_len = collate->nweight;
1479               pelem->ordering
1480                 = (unsigned int *) obstack_copy (&collate->element_mem, data,
1481                                                  (collate->nrules
1482                                                   * pelem->ordering_len)
1483                                                  * sizeof (unsigned int));
1484
1485               /* `...' weights need to be adjusted.  */
1486               for (cnt = 0; cnt < collate->nrules; ++cnt)
1487                 if (ptr[cnt] != 0)
1488                   pelem->ordering[ptr[cnt]] = pelem->this_weight;
1489
1490               /* Insert new entry into result table.  */
1491               if (find_entry (&collate->result, name, sizeof (wchar_t),
1492                               (void *) &pelem->next) >= 0)
1493                 {
1494                   if (set_entry (&collate->result, name, sizeof (wchar_t),
1495                                  (void *) pelem->next) < 0)
1496                     error (4, 0, _("cannot insert into result table"));
1497                 }
1498               else
1499                 if (insert_entry (&collate->result, name, sizeof (wchar_t),
1500                                   (void *) pelem->next) < 0)
1501                   error (4, 0, _("cannot insert into result table"));
1502
1503               /* Increment counter.  */
1504               ++name[0];
1505             }
1506         }
1507     }
1508
1509   /* Reset counters for weights.  */
1510   collate->weight_idx = 0;
1511   collate->nweight = 0;
1512   for (i = 0; i < collate->nrules; ++i)
1513     collate->weight_cnt[i] = 0;
1514   collate->current_patch = NULL;
1515
1516   return result;
1517 }
1518
1519
1520 int
1521 collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
1522                         struct token *code, struct charset_t *charset)
1523 {
1524   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1525   unsigned int here_weight;
1526   wchar_t value;
1527   void *tmp;
1528
1529   assert (code->tok == tok_bsymbol);
1530
1531   value = charset_find_value (charset, code->val.str.start, code->val.str.len);
1532   if (value != ILLEGAL_CHAR_VALUE)
1533     {
1534       element_t *runp;
1535
1536       if (find_entry (&collate->result, &value, sizeof (wchar_t),
1537                       (void *)&runp) < 0)
1538         runp = NULL;
1539
1540       while (runp != NULL
1541              && (runp->name[0] != value || runp->name[1] != L'\0'))
1542         runp = runp->next;
1543
1544       here_weight = runp == NULL ? 0 : runp->this_weight;
1545     }
1546   else if (find_entry (&collate->elements, code->val.str.start,
1547                        code->val.str.len, &tmp) >= 0)
1548     {
1549       element_t *runp = (element_t *) tmp;
1550
1551       here_weight = runp->this_weight;
1552     }
1553   else if (find_entry (&collate->symbols, code->val.str.start,
1554                        code->val.str.len, &tmp) >= 0)
1555     {
1556       here_weight = (unsigned int) tmp;
1557     }
1558   else
1559     {
1560       if (verbose)
1561         lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1562                   (int) code->val.str.len, code->val.str.start);
1563       lr_ignore_rest (lr, 0);
1564       return -1;
1565     }
1566
1567   /* When we currently work on a collation symbol we do not expect any
1568      weight.  */
1569   if (collate->kind == symbol)
1570     {
1571       lr_error (lr, _("\
1572 specification of sorting weight for collation symbol does not make sense"));
1573       lr_ignore_rest (lr, 0);
1574       return -1;
1575     }
1576
1577   /* Add to the current collection of weights.  */
1578   if (collate->nweight >= collate->nweight_max)
1579     {
1580       collate->nweight_max *= 2;
1581       collate->weight = (unsigned int *) xrealloc (collate->weight,
1582                                                    collate->nweight_max);
1583     }
1584
1585   /* If the weight is currently not known, we remember to patch the
1586      resulting tables.  */
1587   if (here_weight == 0)
1588     {
1589       patch_t *newp;
1590
1591       newp = (patch_t *) obstack_alloc (&collate->element_mem,
1592                                         sizeof (patch_t));
1593       newp->fname = lr->fname;
1594       newp->lineno = lr->lineno;
1595       newp->token = (const char *) obstack_copy0 (&collate->element_mem,
1596                                                   code->val.str.start,
1597                                                   code->val.str.len);
1598       newp->where.idx = collate->nweight++;
1599       newp->next = collate->current_patch;
1600       collate->current_patch = newp;
1601     }
1602   else
1603     collate->weight[collate->nweight++] = here_weight;
1604   ++collate->weight_cnt[collate->weight_idx];
1605
1606   return 0;
1607 }
1608
1609
1610 int
1611 collate_next_weight (struct linereader *lr, struct localedef_t *locale)
1612 {
1613   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1614
1615   if (collate->kind == symbol)
1616     {
1617       lr_error (lr, _("\
1618 specification of sorting weight for collation symbol does not make sense"));
1619       lr_ignore_rest (lr, 0);
1620       return -1;
1621     }
1622
1623   ++collate->weight_idx;
1624   if (collate->weight_idx >= collate->nrules)
1625     {
1626       lr_error (lr, _("too many weights"));
1627       lr_ignore_rest (lr, 0);
1628       return -1;
1629     }
1630
1631   return 0;
1632 }
1633
1634
1635 int
1636 collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
1637                        struct token *code, struct charset_t *charset)
1638 {
1639   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1640   unsigned int value = 0;
1641
1642   /* There current tokens can be `IGNORE', `...', or a string.  */
1643   switch (code->tok)
1644     {
1645     case tok_ignore:
1646       /* This token is allowed in all situations.  */
1647       value = IGNORE_CHAR;
1648       break;
1649
1650     case tok_ellipsis:
1651       /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1652          entry.  */
1653       if (collate->kind != ellipsis && collate->kind != undefined)
1654         {
1655           lr_error (lr, _("\
1656 `...' must only be used in `...' and `UNDEFINED' entries"));
1657           lr_ignore_rest (lr, 0);
1658           return -1;
1659         }
1660       value = ELLIPSIS_CHAR;
1661       break;
1662
1663     case tok_string:
1664       /* This can become difficult.  We have to get the weights which
1665          correspond to the single wide chars in the string.  But some
1666          of the `chars' might not be real characters, but collation
1667          elements or symbols.  And so the string decoder might have
1668          signaled errors.  The string at this point is not translated.
1669          I.e., all <...> sequences are still there.  */
1670       {
1671         char *runp = code->val.str.start;
1672         void *tmp;
1673
1674         while (*runp != '\0')
1675           {
1676             char *startp = (char *) runp;
1677             char *putp = (char *) runp;
1678             wchar_t wch;
1679
1680             /* Lookup weight for char and store it.  */
1681             if (*runp == '<')
1682               {
1683                 while (*++runp != '\0' && *runp != '>')
1684                   {
1685                     if (*runp == lr->escape_char)
1686                       if (*++runp == '\0')
1687                         {
1688                           lr_error (lr, _("unterminated weight name"));
1689                           lr_ignore_rest (lr, 0);
1690                           return -1;
1691                         }
1692                     *putp++ = *runp;
1693                   }
1694                 if (*runp == '>')
1695                   ++runp;
1696
1697                 if (putp == startp)
1698                   {
1699                     lr_error (lr, _("empty weight name: line ignored"));
1700                     lr_ignore_rest (lr, 0);
1701                     return -1;
1702                   }
1703
1704                 wch = charset_find_value (charset, startp, putp - startp);
1705                 if (wch != ILLEGAL_CHAR_VALUE)
1706                   {
1707                     element_t *pelem;
1708
1709                     if (find_entry (&collate->result, &wch, sizeof (wchar_t),
1710                                     (void *)&pelem) < 0)
1711                       pelem = NULL;
1712
1713                     while (pelem != NULL
1714                            && (pelem->name[0] != wch
1715                                || pelem->name[1] != L'\0'))
1716                       pelem = pelem->next;
1717
1718                     value = pelem == NULL ? 0 : pelem->this_weight;
1719                   }
1720                 else if (find_entry (&collate->elements, startp, putp - startp,
1721                                      &tmp) >= 0)
1722                   {
1723                     element_t *pelem = (element_t *) tmp;
1724
1725                     value = pelem->this_weight;
1726                   }
1727                 else if (find_entry (&collate->symbols, startp, putp - startp,
1728                                      &tmp) >= 0)
1729                   {
1730                     value = (unsigned int) tmp;
1731                   }
1732                 else
1733                   {
1734                     if (verbose)
1735                       lr_error (lr, _("unknown symbol `%.*s': line ignored"),
1736                                 (int) (putp - startp), startp);
1737                     lr_ignore_rest (lr, 0);
1738                     return -1;
1739                   }
1740               }
1741             else
1742               {
1743                 element_t *wp;
1744                 wchar_t wch;
1745
1746                 if (*runp == lr->escape_char)
1747                   {
1748                     static const char digits[] = "0123456789abcdef";
1749                     const char *dp;
1750                     int base;
1751
1752                     ++runp;
1753                     if (tolower (*runp) == 'x')
1754                       {
1755                         ++runp;
1756                         base = 16;
1757                       }
1758                     else if (tolower (*runp) == 'd')
1759                       {
1760                         ++runp;
1761                         base = 10;
1762                       }
1763                     else
1764                       base = 8;
1765
1766                     dp = strchr (digits, tolower (*runp));
1767                     if (dp == NULL || (dp - digits) >= base)
1768                       {
1769                       illegal_char:
1770                         lr_error (lr, _("\
1771 illegal character constant in string"));
1772                         lr_ignore_rest (lr, 0);
1773                         return -1;
1774                       }
1775                     wch = dp - digits;
1776                     ++runp;
1777
1778                     dp = strchr (digits, tolower (*runp));
1779                     if (dp == NULL || (dp - digits) >= base)
1780                       goto illegal_char;
1781                     wch *= base;
1782                     wch += dp - digits;
1783                     ++runp;
1784
1785                     if (base != 16)
1786                       {
1787                         dp = strchr (digits, tolower (*runp));
1788                         if (dp != NULL && (dp - digits < base))
1789                           {
1790                             wch *= base;
1791                             wch += dp - digits;
1792                             ++runp;
1793                           }
1794                       }
1795                   }
1796                 else
1797                   wch = (wchar_t) *runp++;
1798
1799                 /* Lookup the weight for WCH.  */
1800                 if (find_entry (&collate->result, &wch, sizeof (wch),
1801                                 (void *)&wp) < 0)
1802                   wp = NULL;
1803
1804                 while (wp != NULL
1805                        && (wp->name[0] != wch || wp->name[1] != L'\0'))
1806                   wp = wp->next;
1807
1808                 value = wp == NULL ? 0 : wp->this_weight;
1809
1810                 /* To get the correct name for the error message.  */
1811                 putp = runp;
1812
1813                 /**************************************************\
1814                 |* I know here is something wrong.  Characters in *|
1815                 |* the string which are not in the <...> form     *|
1816                 |* cannot be declared forward for now!!!          *|
1817                 \**************************************************/
1818               }
1819
1820             /* Store in weight array.  */
1821             if (collate->nweight >= collate->nweight_max)
1822               {
1823                 collate->nweight_max *= 2;
1824                 collate->weight
1825                   = (unsigned int *) xrealloc (collate->weight,
1826                                                collate->nweight_max);
1827               }
1828
1829             if (value == 0)
1830               {
1831                 patch_t *newp;
1832
1833                 newp = (patch_t *) obstack_alloc (&collate->element_mem,
1834                                                   sizeof (patch_t));
1835                 newp->fname = lr->fname;
1836                 newp->lineno = lr->lineno;
1837                 newp->token
1838                   = (const char *) obstack_copy0 (&collate->element_mem,
1839                                                   startp, putp - startp);
1840                 newp->where.idx = collate->nweight++;
1841                 newp->next = collate->current_patch;
1842                 collate->current_patch = newp;
1843               }
1844             else
1845               collate->weight[collate->nweight++] = value;
1846             ++collate->weight_cnt[collate->weight_idx];
1847           }
1848       }
1849       return 0;
1850
1851     default:
1852       assert (! "should not happen");
1853     }
1854
1855
1856   if (collate->nweight >= collate->nweight_max)
1857     {
1858       collate->nweight_max *= 2;
1859       collate->weight = (unsigned int *) xrealloc (collate->weight,
1860                                                    collate->nweight_max);
1861     }
1862
1863   collate->weight[collate->nweight++] = value;
1864   ++collate->weight_cnt[collate->weight_idx];
1865
1866   return 0;
1867 }
1868
1869
1870 void
1871 collate_end_weight (struct linereader *lr, struct localedef_t *locale)
1872 {
1873   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1874   element_t *pelem = collate->current_element;
1875
1876   if (collate->kind == symbol)
1877     {
1878       /* We don't have to do anything.  */
1879       collate->was_ellipsis = 0;
1880       return;
1881     }
1882
1883   if (collate->kind == ellipsis)
1884     {
1885       /* Before the next line is processed the ellipsis is handled.  */
1886       collate->was_ellipsis = 1;
1887       return;
1888     }
1889
1890   assert (collate->kind == character || collate->kind == element
1891           || collate->kind == undefined);
1892
1893   /* Fill in the missing weights.  */
1894   while (++collate->weight_idx < collate->nrules)
1895     {
1896       collate->weight[collate->nweight++] = pelem->this_weight;
1897       ++collate->weight_cnt[collate->weight_idx];
1898     }
1899
1900   /* Now we know how many ordering weights the current
1901      character/element has.  Allocate room in the element structure
1902      and copy information.  */
1903   pelem->ordering_len = collate->nweight;
1904
1905   /* First we write an array with the number of values for each
1906      weight.  */
1907   obstack_grow (&collate->element_mem, collate->weight_cnt,
1908                 collate->nrules * sizeof (unsigned int));
1909
1910   /* Now the weights itselves.  */
1911   obstack_grow (&collate->element_mem, collate->weight,
1912                 collate->nweight * sizeof (unsigned int));
1913
1914   /* Get result.  */
1915   pelem->ordering = obstack_finish (&collate->element_mem);
1916
1917   /* Now we handle the "patches".  */
1918   while (collate->current_patch != NULL)
1919     {
1920       patch_t *this_patch;
1921
1922       this_patch = collate->current_patch;
1923
1924       this_patch->where.pos = &pelem->ordering[collate->nrules
1925                                               + this_patch->where.idx];
1926
1927       collate->current_patch = this_patch->next;
1928       this_patch->next = collate->all_patches;
1929       collate->all_patches = this_patch;
1930     }
1931
1932   /* Set information for next round.  */
1933   collate->was_ellipsis = 0;
1934   if (collate->kind != undefined)
1935     collate->last_char = pelem->name[0];
1936 }