1 /* Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
37 #include "strlen-hash.h"
39 /* Uncomment the following line in the production version. */
40 /* #define NDEBUG 1 */
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
47 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
50 /* What kind of symbols get defined? */
61 typedef struct patch_t
75 typedef struct element_t
78 unsigned int this_weight
;
80 struct element_t
*next
;
82 unsigned int *ordering
;
87 /* The real definition of the struct for the LC_COLLATE locale. */
88 struct locale_collate_t
90 /* Collate symbol table. Simple mapping to number. */
93 /* The collation elements. */
95 struct obstack element_mem
;
97 /* The result table. */
100 /* Sorting rules given in order_start line. */
102 u_int32_t nrules_max
;
103 enum coll_sort_rule
*rules
;
105 /* Used while recognizing symbol composed of multiple tokens
106 (collating-element). */
107 const char *combine_token
;
108 size_t combine_token_len
;
110 /* How many sorting order specifications so far. */
111 unsigned int order_cnt
;
113 /* Was lastline ellipsis? */
115 /* Value of last entry if was character. */
117 /* Current element. */
118 element_t
*current_element
;
119 /* What kind of symbol is current element. */
120 enum coll_symbol kind
;
122 /* While collecting the weights we need some temporary space. */
123 unsigned int current_order
;
125 unsigned int weight_idx
;
126 unsigned int *weight
;
131 patch_t
*current_patch
;
132 patch_t
*all_patches
;
134 /* Room for the UNDEFINED information. */
136 unsigned int undefined_len
;
140 /* Be verbose? Defined in localedef.c. */
144 void *xmalloc (size_t __n
);
145 void *xrealloc (void *__p
, size_t __n
);
148 #define obstack_chunk_alloc malloc
149 #define obstack_chunk_free free
153 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
154 struct charset_t
*charset
)
156 struct locale_collate_t
*collate
;
158 /* It is important that we always use UCS4 encoding for strings now. */
159 encoding_method
= ENC_UCS4
;
161 /* Allocate the needed room. */
162 locale
->categories
[LC_COLLATE
].collate
= collate
=
163 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
165 /* Allocate hash table for collating elements. */
166 if (init_hash (&collate
->elements
, 512))
167 error (4, 0, _("memory exhausted"));
168 collate
->combine_token
= NULL
;
169 obstack_init (&collate
->element_mem
);
171 /* Allocate hash table for collating elements. */
172 if (init_hash (&collate
->symbols
, 64))
173 error (4, 0, _("memory exhausted"));
175 /* Allocate hash table for result. */
176 if (init_hash (&collate
->result
, 512))
177 error (4, 0, _("memory exhausted"));
180 collate
->nrules_max
= 10;
182 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
183 * sizeof (enum coll_sort_rule
));
185 collate
->order_cnt
= 1; /* The smallest weight is 2. */
187 collate
->was_ellipsis
= 0;
188 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
190 collate
->all_patches
= NULL
;
192 /* This tells us no UNDEFINED entry was found until now. */
193 collate
->undefined
.this_weight
= 0;
195 lr
->translate_strings
= 0;
200 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
202 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
206 /* Patch the constructed table so that forward references are
208 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
211 size_t toklen
= strlen (patch
->token
);
213 unsigned int value
= 0;
215 wch
= charset_find_value (charset
, patch
->token
, toklen
);
216 if (wch
!= ILLEGAL_CHAR_VALUE
)
220 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
223 for (; runp
!= NULL
; runp
= runp
->next
)
224 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
227 value
= runp
== NULL
? 0 : runp
->this_weight
;
229 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
232 value
= ((element_t
*) ptmp
)->this_weight
;
234 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
237 value
= (unsigned long int) ptmp
;
242 if (value
== 0 && !be_quiet
)
243 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
244 _("no weight defined for symbol `%s'"), patch
->token
);
246 *patch
->where
.pos
= value
;
249 /* If no definition for UNDEFINED is given, all characters in the
250 given charset must be specified. */
251 if (collate
->undefined
.ordering
== NULL
)
253 /**************************************************************\
254 |* XXX We should test whether really an unspecified character *|
255 |* exists before giving the message. *|
256 \**************************************************************/
260 error (0, 0, _("no definition of `UNDEFINED'"));
262 collate
->undefined
.ordering_len
= collate
->nrules
;
263 weight
= ++collate
->order_cnt
;
265 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
268 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
271 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
272 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
274 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
277 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
278 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
279 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
285 collate_output (struct localedef_t
*locale
, struct charset_t
*charset
,
286 const char *output_path
)
288 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
289 u_int32_t table_size
, table_best
, level_best
, sum_best
;
294 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
295 struct iovec iov
[2 + nelems
];
296 struct locale_file data
;
297 u_int32_t idx
[nelems
];
298 struct obstack non_simple
;
299 struct obstack string_pool
;
300 size_t cnt
, entry_size
;
301 u_int32_t undefined_offset
= UINT_MAX
;
302 u_int32_t
*table
, *extra
, *table2
, *extra2
;
304 u_int32_t element_hash_tab_size
;
305 u_int32_t
*element_hash_tab
;
306 u_int32_t
*element_hash_tab_ob
;
307 u_int32_t element_string_pool_size
;
308 char *element_string_pool
;
309 u_int32_t element_value_size
;
310 wchar_t *element_value
;
311 wchar_t *element_value_ob
;
312 u_int32_t symbols_hash_tab_size
;
313 u_int32_t
*symbols_hash_tab
;
314 u_int32_t
*symbols_hash_tab_ob
;
315 u_int32_t symbols_string_pool_size
;
316 char *symbols_string_pool
;
317 u_int32_t symbols_class_size
;
318 u_int32_t
*symbols_class
;
319 u_int32_t
*symbols_class_ob
;
320 hash_table
*hash_tab
;
321 unsigned int dummy_weights
[collate
->nrules
+ 1];
327 /* Compute table size. */
330 Computing table size for collation information might take a while..."),
332 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
334 size_t hits
[table_size
];
335 unsigned int worst
= 1;
340 for (cnt
= 0; cnt
< 256; ++cnt
)
342 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
344 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
345 &len
, (void **) &pelem
) >= 0)
346 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
347 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
349 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
350 if (table_size
* worst
> sum_best
)
354 if (table_size
* worst
< sum_best
)
356 sum_best
= table_size
* worst
;
357 table_best
= table_size
;
361 assert (table_best
!= 0xffff || level_best
!= 0xffff);
363 fputs (_(" done\n"), stderr
);
365 obstack_init (&non_simple
);
366 obstack_init (&string_pool
);
368 data
.magic
= LIMAGIC (LC_COLLATE
);
370 iov
[0].iov_base
= (void *) &data
;
371 iov
[0].iov_len
= sizeof (data
);
373 iov
[1].iov_base
= (void *) idx
;
374 iov
[1].iov_len
= sizeof (idx
);
376 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
377 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
379 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
380 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
381 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
382 = collate
->nrules
* sizeof (u_int32_t
);
383 /* Another trick here. Describing the collation method needs only a
384 few bits (3, to be exact). But the binary file should be
385 accessible by machines with both endianesses and so we store both
386 forms in the same word. */
387 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
388 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
390 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
391 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
393 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
394 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
395 = sizeof (u_int32_t
);
397 entry_size
= 1 + MAX (collate
->nrules
, 2);
399 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
400 * sizeof (table
[0]));
401 memset (table
, '\0', table_best
* level_best
* entry_size
402 * sizeof (table
[0]));
405 /* Macros for inserting in output table. */
406 #define ADD_VALUE(expr) \
408 u_int32_t to_write = (u_int32_t) expr; \
409 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
412 #define ADD_ELEMENT(pelem, len) \
418 wlen = wcslen (pelem->name); \
419 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
421 idx = collate->nrules; \
422 for (cnt = 0; cnt < collate->nrules; ++cnt) \
426 ADD_VALUE (pelem->ordering[cnt]); \
427 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
428 ADD_VALUE (pelem->ordering[idx++]); \
432 #define ADD_FORWARD(pelem) \
434 /* We leave a reference in the main table and put all \
435 information in the table for the extended entries. */ \
437 element_t *has_simple = NULL; \
440 table[(level * table_best + slot) * entry_size + 1] \
442 table[(level * table_best + slot) * entry_size + 2] \
443 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
445 /* Here we have to construct the non-simple table entry. First \
446 compute the total length of this entry. */ \
447 for (runp = (pelem); runp != NULL; runp = runp->next) \
448 if (runp->ordering != NULL) \
453 value = 1 + wcslen (runp->name) + 1; \
455 for (cnt = 0; cnt < collate->nrules; ++cnt) \
456 /* We have to take care for entries without ordering \
457 information. While reading them they get inserted in the \
458 table and later not removed when something goes wrong with \
459 reading its weights. */ \
461 value += 1 + runp->ordering[cnt]; \
463 if (runp->name[1] == L'\0') \
467 ADD_ELEMENT (runp, value); \
470 if (has_simple == NULL) \
474 ADD_VALUE (collate->undefined_len + 1); \
476 /* Add the name. */ \
477 ADD_VALUE ((pelem)->name[0]); \
480 idx = collate->nrules; \
481 for (cnt = 0; cnt < collate->nrules; ++cnt) \
485 ADD_VALUE (collate->undefined.ordering[cnt]); \
486 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
488 if ((wchar_t) collate->undefined.ordering[idx] \
490 ADD_VALUE ((pelem)->name[0]); \
492 ADD_VALUE (collate->undefined.ordering[idx++]); \
501 /* Fill the table now. First we look for all the characters which
502 fit into one single byte. This speeds up the 8-bit string
505 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
506 &len
, (void **) &pelem
) >= 0)
507 if (pelem
->name
[0] <= 0xff)
509 /* We have a single byte name. Now we must distinguish
510 between entries in simple form (i.e., only one value per
511 weight and no collation element starting with the same
512 character) and those which are not. */
513 size_t slot
= ((size_t) pelem
->name
[0]);
514 const size_t level
= 0;
516 table
[slot
* entry_size
] = pelem
->name
[0];
518 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
519 && pelem
->ordering_len
== collate
->nrules
)
521 /* Yes, we have a simple one. Lucky us. */
524 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
525 table
[slot
* entry_size
+ 1 + cnt
]
526 = pelem
->ordering
[collate
->nrules
+ cnt
];
532 /* Now check for missing single byte entries. If one exist we fill
533 with the UNDEFINED entry. */
534 for (cnt
= 0; cnt
< 256; ++cnt
)
535 /* The first weight is never 0 for existing entries. */
536 if (table
[cnt
* entry_size
+ 1] == 0)
538 /* We have to fill in the information from the UNDEFINED
540 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
542 if (collate
->undefined
.ordering_len
== collate
->nrules
)
546 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
547 if ((wchar_t)collate
->undefined
.ordering
[collate
->nrules
+ inner
]
549 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
551 table
[cnt
* entry_size
+ 1 + inner
]
552 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
556 if (undefined_offset
!= UINT_MAX
)
558 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
559 table
[cnt
* entry_size
+ 2] = undefined_offset
;
563 const size_t slot
= cnt
;
564 const size_t level
= 0;
566 ADD_FORWARD (&collate
->undefined
);
567 undefined_offset
= table
[cnt
* entry_size
+ 2];
572 /* Now we are ready for inserting the whole rest. */
574 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
575 &len
, (void **) &pelem
) >= 0)
576 if (pelem
->name
[0] > 0xff)
578 /* Find the position. */
579 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
582 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
584 assert (level
< level_best
);
586 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
587 && pelem
->ordering_len
== collate
->nrules
)
589 /* Again a simple entry. */
592 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
593 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
594 = pelem
->ordering
[collate
->nrules
+ inner
];
600 /* Add the UNDEFINED entry. */
602 /* Here we have to construct the non-simple table entry. */
605 undefined_offset
= obstack_object_size (&non_simple
);
607 idx
= collate
->nrules
;
608 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
612 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
613 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
614 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
618 /* Finish the extra block. */
619 extra_len
= obstack_object_size (&non_simple
);
620 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
621 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
623 /* Now we have to build the two array for the other byte ordering. */
624 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
625 * sizeof (table
[0]));
626 extra2
= (u_int32_t
*) alloca (extra_len
);
628 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
629 table2
[cnt
] = SWAPU32 (table
[cnt
]);
631 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
632 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
634 /* We need a simple hashing table to get a collation-element->chars
635 mapping. We again use internal hasing using a secondary hashing
638 Each string has an associate hashing value V, computed by a
639 fixed function. To locate the string we use open addressing with
640 double hashing. The first index will be V % M, where M is the
641 size of the hashing table. If no entry is found, iterating with
642 a second, independent hashing function takes place. This second
643 value will be 1 + V % (M - 2). The approximate number of probes
646 for unsuccessful search: (1 - N / M) ^ -1
647 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
649 where N is the number of keys.
651 If we now choose M to be the next prime bigger than 4 / 3 * N,
652 we get the values 4 and 1.85 resp. Because unsuccessful searches
653 are unlikely this is a good value. Formulas: [Knuth, The Art of
654 Computer Programming, Volume 3, Sorting and Searching, 1973,
656 if (collate
->elements
.filled
== 0)
658 /* We don't need any element table since there are no collating
660 element_hash_tab_size
= 0;
661 element_hash_tab
= NULL
;
662 element_hash_tab_ob
= NULL
;
663 element_string_pool_size
= 0;
664 element_string_pool
= NULL
;
665 element_value_size
= 0;
666 element_value
= NULL
;
667 element_value_ob
= NULL
;
671 void *ptr
; /* Running pointer. */
672 const char *key
; /* Key for current bucket. */
673 size_t keylen
; /* Length of key data. */
674 const element_t
*data
; /* Data, i.e., the character sequence. */
676 element_hash_tab_size
= next_prime ((collate
->elements
.filled
* 4) / 3);
677 if (element_hash_tab_size
< 7)
678 /* We need a minimum to make the following code work. */
679 element_hash_tab_size
= 7;
681 element_hash_tab
= obstack_alloc (&non_simple
, (2 * element_hash_tab_size
682 * sizeof (u_int32_t
)));
683 memset (element_hash_tab
, '\377', (2 * element_hash_tab_size
684 * sizeof (u_int32_t
)));
687 while (iterate_table (&collate
->elements
, &ptr
, (const void **) &key
,
688 &keylen
, (void **) &data
) == 0)
690 size_t hash_val
= hash_string (key
, keylen
);
691 size_t idx
= hash_val
% element_hash_tab_size
;
693 if (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
695 /* We need the second hashing function. */
696 size_t c
= 1 + (hash_val
% (element_hash_tab_size
- 2));
699 if (idx
>= element_hash_tab_size
- c
)
700 idx
-= element_hash_tab_size
- c
;
703 while (element_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
706 element_hash_tab
[2 * idx
] = obstack_object_size (&non_simple
);
707 element_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&string_pool
)
710 obstack_grow0 (&non_simple
, key
, keylen
);
711 obstack_grow (&string_pool
, data
->name
,
712 (wcslen (data
->name
) + 1) * sizeof (wchar_t));
715 if (obstack_object_size (&non_simple
) % 4 != 0)
716 obstack_blank (&non_simple
,
717 4 - (obstack_object_size (&non_simple
) % 4));
718 element_string_pool_size
= obstack_object_size (&non_simple
);
719 element_string_pool
= obstack_finish (&non_simple
);
721 element_value_size
= obstack_object_size (&string_pool
);
722 element_value
= obstack_finish (&string_pool
);
724 /* Create the tables for the other byte order. */
725 element_hash_tab_ob
= obstack_alloc (&non_simple
,
726 (2 * element_hash_tab_size
727 * sizeof (u_int32_t
)));
728 for (cnt
= 0; cnt
< 2 * element_hash_tab_size
; ++cnt
)
729 element_hash_tab_ob
[cnt
] = SWAPU32 (element_hash_tab
[cnt
]);
731 element_value_ob
= obstack_alloc (&string_pool
, element_value_size
);
732 if (sizeof (wchar_t) != 4)
734 fputs ("sizeof (wchar_t) != 4 currently not handled", stderr
);
737 for (cnt
= 0; cnt
< element_value_size
/ 4; ++cnt
)
738 element_value_ob
[cnt
] = SWAPU32 (element_value
[cnt
]);
741 /* Store collation elements as map to collation class. There are
742 three kinds of symbols:
746 We need to make a table which lets the user to access the primary
747 weight based on the symbol string. */
748 symbols_hash_tab_size
= next_prime ((4 * (charset
->char_table
.filled
749 + collate
->elements
.filled
750 + collate
->symbols
.filled
)) / 3);
751 symbols_hash_tab
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
752 * sizeof (u_int32_t
)));
753 memset (symbols_hash_tab
, '\377', (2 * symbols_hash_tab_size
754 * sizeof (u_int32_t
)));
756 /* Now fill the array. First the symbols from the character set,
757 then the collation elements and last the collation symbols. */
758 hash_tab
= &charset
->char_table
;
761 void *ptr
; /* Running pointer. */
762 const char *key
; /* Key for current bucket. */
763 size_t keylen
; /* Length of key data. */
764 void *data
; /* Data. */
767 while (iterate_table (hash_tab
, &ptr
, (const void **) &key
,
768 &keylen
, (void **) &data
) == 0)
773 unsigned int *weights
;
775 if (hash_tab
== &charset
->char_table
776 || hash_tab
== &collate
->elements
)
778 element_t
*lastp
, *firstp
;
779 wchar_t dummy_name
[2];
783 if (hash_tab
== &charset
->char_table
)
785 dummy_name
[0] = (wchar_t) ((unsigned long int) data
);
786 dummy_name
[1] = L
'\0';
788 name_len
= sizeof (wchar_t);
792 element_t
*elemp
= (element_t
*) data
;
794 name_len
= wcslen (name
) * sizeof (wchar_t);
797 /* First check whether this character is used at all. */
798 if (find_entry (&collate
->result
, name
, name_len
,
799 (void *) &firstp
) < 0)
800 /* The symbol is not directly mentioned in the collation.
801 I.e., we use the value for UNDEFINED. */
802 lastp
= &collate
->undefined
;
805 /* The entry for the simple character is always found at
808 while (lastp
->next
!= NULL
&& wcscmp (name
, lastp
->name
))
812 weights
= lastp
->ordering
;
816 dummy_weights
[0] = 1;
817 dummy_weights
[collate
->nrules
]
818 = (unsigned int) ((unsigned long int) data
);
820 weights
= dummy_weights
;
823 /* In LASTP->ordering we now have the collation class.
824 Determine the place in the hashing table next. */
825 hash_val
= hash_string (key
, keylen
);
826 idx
= hash_val
% symbols_hash_tab_size
;
828 if (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)))
830 /* We need the second hashing function. */
831 size_t c
= 1 + (hash_val
% (symbols_hash_tab_size
- 2));
834 if (idx
>= symbols_hash_tab_size
- c
)
835 idx
-= symbols_hash_tab_size
- c
;
838 while (symbols_hash_tab
[2 * idx
] != (~((u_int32_t
) 0)));
841 symbols_hash_tab
[2 * idx
] = obstack_object_size (&string_pool
);
842 symbols_hash_tab
[2 * idx
+ 1] = (obstack_object_size (&non_simple
)
843 / sizeof (u_int32_t
));
845 obstack_grow0 (&string_pool
, key
, keylen
);
846 /* Adding the first weight looks complicated. We have to deal
847 with the kind it is stored and with the fact that original
848 form uses `unsigned int's while we need `u_int32_t' here. */
850 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
851 for (cnt
= 0; cnt
< weights
[0]; ++cnt
)
853 word
= weights
[collate
->nrules
+ cnt
];
854 obstack_grow (&non_simple
, &word
, sizeof (u_int32_t
));
858 if (hash_tab
== &charset
->char_table
)
859 hash_tab
= &collate
->elements
;
860 else if (hash_tab
== &collate
->elements
)
861 hash_tab
= &collate
->symbols
;
866 /* Now we have the complete tables. */
867 if (obstack_object_size (&string_pool
) % 4 != 0)
868 obstack_blank (&non_simple
, 4 - (obstack_object_size (&string_pool
) % 4));
869 symbols_string_pool_size
= obstack_object_size (&string_pool
);
870 symbols_string_pool
= obstack_finish (&string_pool
);
872 symbols_class_size
= obstack_object_size (&non_simple
);
873 symbols_class
= obstack_finish (&non_simple
);
875 /* Generate tables with other byte order. */
876 symbols_hash_tab_ob
= obstack_alloc (&non_simple
, (2 * symbols_hash_tab_size
877 * sizeof (u_int32_t
)));
878 for (cnt
= 0; cnt
< 2 * symbols_hash_tab_size
; ++cnt
)
879 symbols_hash_tab_ob
[cnt
] = SWAPU32 (symbols_hash_tab
[cnt
]);
881 symbols_class_ob
= obstack_alloc (&non_simple
, symbols_class_size
);
882 for (cnt
= 0; cnt
< symbols_class_size
/ 4; ++cnt
)
883 symbols_class_ob
[cnt
] = SWAPU32 (symbols_class
[cnt
]);
886 /* Store table addresses and lengths. */
887 #if __BYTE_ORDER == __BIG_ENDIAN
888 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
889 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
890 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
892 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
893 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
894 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
896 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
897 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
899 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
900 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
902 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
903 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
904 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
906 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
907 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
908 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
910 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
911 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
913 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
914 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
917 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
918 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
921 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_base
922 = &element_hash_tab_size
;
923 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_SIZE
)].iov_len
924 = sizeof (u_int32_t
);
926 #if __BYTE_ORDER == __BIG_ENDIAN
927 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
929 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
930 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
932 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
933 = element_hash_tab_ob
;
934 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
935 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
937 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_base
939 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EL
)].iov_len
940 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
942 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_base
943 = element_hash_tab_ob
;
944 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_HASH_EB
)].iov_len
945 = 2 * element_hash_tab_size
* sizeof (u_int32_t
);
948 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_base
949 = element_string_pool
;
950 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_STR_POOL
)].iov_len
951 = element_string_pool_size
;
953 #if __BYTE_ORDER == __BIG_ENDIAN
954 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
956 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
957 = element_value_size
;
959 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
961 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
962 = element_value_size
;
964 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_base
966 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EL
)].iov_len
967 = element_value_size
;
969 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_base
971 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_ELEM_VAL_EB
)].iov_len
972 = element_value_size
;
975 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_base
976 = &symbols_hash_tab_size
;
977 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZE
)].iov_len
978 = sizeof (u_int32_t
);
980 #if __BYTE_ORDER == __BIG_ENDIAN
981 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
983 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
984 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
986 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
987 = symbols_hash_tab_ob
;
988 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
989 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
991 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_base
993 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EL
)].iov_len
994 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
996 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_base
997 = symbols_hash_tab_ob
;
998 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_EB
)].iov_len
999 = 2 * symbols_hash_tab_size
* sizeof (u_int32_t
);
1002 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_base
1003 = symbols_string_pool
;
1004 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_STR_POOL
)].iov_len
1005 = symbols_string_pool_size
;
1007 #if __BYTE_ORDER == __BIG_ENDIAN
1008 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1010 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1011 = symbols_class_size
;
1013 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1015 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1016 = symbols_class_size
;
1018 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_base
1020 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EL
)].iov_len
1021 = symbols_class_size
;
1023 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_base
1025 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_SYMB_CLASS_EB
)].iov_len
1026 = symbols_class_size
;
1029 /* Update idx array. */
1030 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1031 for (cnt
= 1; cnt
< nelems
; ++cnt
)
1032 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
1034 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
1036 obstack_free (&non_simple
, NULL
);
1037 obstack_free (&string_pool
, NULL
);
1042 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
1043 struct token
*code
, struct charset_t
*charset
)
1045 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1049 if (collate
->combine_token
!= NULL
)
1051 free ((void *) collate
->combine_token
);
1052 collate
->combine_token
= NULL
;
1055 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1056 if ((wchar_t) value
!= ILLEGAL_CHAR_VALUE
)
1058 lr_error (lr
, _("symbol for multicharacter collating element "
1059 "`%.*s' duplicates symbolic name in charset"),
1060 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1064 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1067 lr_error (lr
, _("symbol for multicharacter collating element "
1068 "`%.*s' duplicates other element definition"),
1069 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1073 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1076 lr_error (lr
, _("symbol for multicharacter collating element "
1077 "`%.*s' duplicates symbol definition"),
1078 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1082 collate
->combine_token
= code
->val
.str
.start
;
1083 collate
->combine_token_len
= code
->val
.str
.len
;
1088 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
1089 struct token
*code
, struct charset_t
*charset
)
1091 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1092 element_t
*elemp
, *runp
;
1094 /* CODE is a string. */
1095 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1096 sizeof (element_t
));
1098 /* We have to translate the string. It may contain <...> character
1100 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
1101 elemp
->this_weight
= 0;
1102 elemp
->ordering
= NULL
;
1103 elemp
->ordering_len
= 0;
1105 free (code
->val
.str
.start
);
1107 if (elemp
->name
== NULL
)
1109 /* At least one character in the string is not defined. We simply
1113 `from' string in collation element declaration contains unknown character"));
1117 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
1119 lr_error (lr
, _("illegal collation element"));
1123 /* The entries in the linked lists of RESULT are sorting in
1124 descending order. The order is important for the `strcoll' and
1125 `wcscoll' functions. */
1126 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1127 (void *) &runp
) >= 0)
1129 /* We already have an entry with this key. Check whether it is
1131 element_t
*prevp
= NULL
;
1136 cmpres
= wcscmp (elemp
->name
, runp
->name
);
1141 while ((runp
= runp
->next
) != NULL
);
1144 lr_error (lr
, _("duplicate collating element definition"));
1150 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
1152 error (EXIT_FAILURE
, 0, _("\
1153 error while inserting collation element into hash table"));
1156 prevp
->next
= elemp
;
1162 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
1164 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
1167 if (insert_entry (&collate
->elements
, collate
->combine_token
,
1168 collate
->combine_token_len
, (void *) elemp
) < 0)
1169 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1175 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
1176 struct token
*code
, struct charset_t
*charset
)
1178 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1182 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1183 if (value
!= ILLEGAL_CHAR_VALUE
)
1185 lr_error (lr
, _("symbol for multicharacter collating element "
1186 "`%.*s' duplicates symbolic name in charset"),
1187 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1191 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
1194 lr_error (lr
, _("symbol for multicharacter collating element "
1195 "`%.*s' duplicates element definition"),
1196 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1200 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1203 lr_error (lr
, _("symbol for multicharacter collating element "
1204 "`%.*s' duplicates other symbol definition"),
1205 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1209 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
1211 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
1217 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
1218 enum coll_sort_rule sort_rule
)
1220 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1222 if (collate
->nrules
>= collate
->nrules_max
)
1224 collate
->nrules_max
*= 2;
1226 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1228 * sizeof (enum coll_sort_rule
));
1231 collate
->rules
[collate
->nrules
++] = sort_rule
;
1236 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
1238 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1241 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
1243 * sizeof (enum coll_sort_rule
));
1245 /* Allocate arrays for temporary weights. */
1246 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
1248 /* Choose arbitrary start value for table size. */
1249 collate
->nweight_max
= 5 * collate
->nrules
;
1250 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
1255 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
1256 struct token
*code
, struct charset_t
*charset
)
1258 const wchar_t zero
= L
'\0';
1259 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1268 /* We have a string to find in one of the three hashing tables. */
1269 value
= charset_find_value (charset
, code
->val
.str
.start
,
1271 if (value
!= ILLEGAL_CHAR_VALUE
)
1273 element_t
*lastp
, *firstp
;
1275 collate
->kind
= character
;
1277 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1278 (void *) &firstp
) < 0)
1279 firstp
= lastp
= NULL
;
1282 /* The entry for the simple character is always found at
1285 while (lastp
->next
!= NULL
)
1286 lastp
= lastp
->next
;
1288 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
1290 lr_error (lr
, _("duplicate definition for character `%.*s'"),
1291 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1292 lr_ignore_rest (lr
, 0);
1298 collate
->current_element
1299 = (element_t
*) obstack_alloc (&collate
->element_mem
,
1300 sizeof (element_t
));
1302 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
1303 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
1305 collate
->current_element
->name
=
1306 (const wchar_t *) obstack_finish (&collate
->element_mem
);
1308 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1310 collate
->current_element
->next
= NULL
;
1314 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
1315 (void *) collate
->current_element
) < 0)
1317 lr_error (lr
, _("cannot insert collation element `%.*s'"),
1318 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1323 lastp
->next
= collate
->current_element
;
1325 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1326 code
->val
.str
.len
, &tmp
) >= 0)
1328 collate
->current_element
= (element_t
*) tmp
;
1330 if (collate
->current_element
->this_weight
!= 0)
1333 collation element `%.*s' appears more than once: ignore line"),
1334 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1335 lr_ignore_rest (lr
, 0);
1340 collate
->kind
= element
;
1341 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
1343 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1344 code
->val
.str
.len
, &tmp
) >= 0)
1346 unsigned int order
= ++collate
->order_cnt
;
1348 if ((unsigned long int) tmp
!= 0ul)
1351 collation symbol `%.*s' appears more than once: ignore line"),
1352 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1353 lr_ignore_rest (lr
, 0);
1358 collate
->kind
= symbol
;
1360 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
1361 code
->val
.str
.len
, (void *) order
) < 0)
1363 lr_error (lr
, _("cannot process order specification"));
1370 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1371 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1372 lr_ignore_rest (lr
, 0);
1379 collate
->kind
= undefined
;
1380 collate
->current_element
= &collate
->undefined
;
1384 if (collate
->was_ellipsis
)
1387 two lines in a row containing `...' are not allowed"));
1390 else if (collate
->kind
!= character
)
1392 /* An ellipsis requires the previous line to be an
1393 character definition. */
1395 line before ellipsis does not contain definition for character constant"));
1396 lr_ignore_rest (lr
, 0);
1400 collate
->kind
= ellipsis
;
1404 assert (! "illegal token in `collate_order_elem'");
1407 /* Now it's time to handle the ellipsis in the previous line. We do
1408 this only when the last line contained an definition for a
1409 character, the current line also defines an character, the
1410 character code for the later is bigger than the former. */
1411 if (collate
->was_ellipsis
)
1413 if (collate
->kind
!= character
)
1416 line after ellipsis must contain character definition"));
1417 lr_ignore_rest (lr
, 0);
1420 else if (collate
->last_char
> value
)
1422 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1423 lr_ignore_rest (lr
, 0);
1428 /* We can fill the arrays with the information we need. */
1434 name
[0] = collate
->last_char
+ 1;
1437 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1438 * sizeof (unsigned int));
1439 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1441 if (data
== NULL
|| ptr
== NULL
)
1442 error (4, 0, _("memory exhausted"));
1444 /* Prepare data. Because the characters covered by an
1445 ellipsis all have equal values we prepare the data once
1446 and only change the variable number (if there are any).
1447 PTR[...] will point to the entries which will have to be
1448 fixed during the output loop. */
1449 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1451 data
[cnt
] = collate
->weight_cnt
[cnt
];
1452 ptr
[cnt
] = (cnt
== 0
1454 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1457 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1458 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1460 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1461 if ((wchar_t) data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1464 while (name
[0] <= value
)
1468 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1469 sizeof (element_t
));
1471 error (4, 0, _("memory exhausted"));
1474 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1475 name
, 2 * sizeof (wchar_t));
1476 pelem
->this_weight
= ++collate
->order_cnt
;
1478 pelem
->ordering_len
= collate
->nweight
;
1480 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1482 * pelem
->ordering_len
)
1483 * sizeof (unsigned int));
1485 /* `...' weights need to be adjusted. */
1486 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1488 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1490 /* Insert new entry into result table. */
1491 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1492 (void *) &pelem
->next
) >= 0)
1494 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1495 (void *) pelem
->next
) < 0)
1496 error (4, 0, _("cannot insert into result table"));
1499 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1500 (void *) pelem
->next
) < 0)
1501 error (4, 0, _("cannot insert into result table"));
1503 /* Increment counter. */
1509 /* Reset counters for weights. */
1510 collate
->weight_idx
= 0;
1511 collate
->nweight
= 0;
1512 for (i
= 0; i
< collate
->nrules
; ++i
)
1513 collate
->weight_cnt
[i
] = 0;
1514 collate
->current_patch
= NULL
;
1521 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1522 struct token
*code
, struct charset_t
*charset
)
1524 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1525 unsigned int here_weight
;
1529 assert (code
->tok
== tok_bsymbol
);
1531 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1532 if (value
!= ILLEGAL_CHAR_VALUE
)
1536 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1541 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1544 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1546 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1547 code
->val
.str
.len
, &tmp
) >= 0)
1549 element_t
*runp
= (element_t
*) tmp
;
1551 here_weight
= runp
->this_weight
;
1553 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1554 code
->val
.str
.len
, &tmp
) >= 0)
1556 here_weight
= (unsigned int) tmp
;
1561 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1562 (int) code
->val
.str
.len
, code
->val
.str
.start
);
1563 lr_ignore_rest (lr
, 0);
1567 /* When we currently work on a collation symbol we do not expect any
1569 if (collate
->kind
== symbol
)
1572 specification of sorting weight for collation symbol does not make sense"));
1573 lr_ignore_rest (lr
, 0);
1577 /* Add to the current collection of weights. */
1578 if (collate
->nweight
>= collate
->nweight_max
)
1580 collate
->nweight_max
*= 2;
1581 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1582 collate
->nweight_max
);
1585 /* If the weight is currently not known, we remember to patch the
1586 resulting tables. */
1587 if (here_weight
== 0)
1591 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1593 newp
->fname
= lr
->fname
;
1594 newp
->lineno
= lr
->lineno
;
1595 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1596 code
->val
.str
.start
,
1598 newp
->where
.idx
= collate
->nweight
++;
1599 newp
->next
= collate
->current_patch
;
1600 collate
->current_patch
= newp
;
1603 collate
->weight
[collate
->nweight
++] = here_weight
;
1604 ++collate
->weight_cnt
[collate
->weight_idx
];
1611 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1613 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1615 if (collate
->kind
== symbol
)
1618 specification of sorting weight for collation symbol does not make sense"));
1619 lr_ignore_rest (lr
, 0);
1623 ++collate
->weight_idx
;
1624 if (collate
->weight_idx
>= collate
->nrules
)
1626 lr_error (lr
, _("too many weights"));
1627 lr_ignore_rest (lr
, 0);
1636 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1637 struct token
*code
, struct charset_t
*charset
)
1639 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1640 unsigned int value
= 0;
1642 /* There current tokens can be `IGNORE', `...', or a string. */
1646 /* This token is allowed in all situations. */
1647 value
= IGNORE_CHAR
;
1651 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1653 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1656 `...' must only be used in `...' and `UNDEFINED' entries"));
1657 lr_ignore_rest (lr
, 0);
1660 value
= ELLIPSIS_CHAR
;
1664 /* This can become difficult. We have to get the weights which
1665 correspond to the single wide chars in the string. But some
1666 of the `chars' might not be real characters, but collation
1667 elements or symbols. And so the string decoder might have
1668 signaled errors. The string at this point is not translated.
1669 I.e., all <...> sequences are still there. */
1671 char *runp
= code
->val
.str
.start
;
1674 while (*runp
!= '\0')
1676 char *startp
= (char *) runp
;
1677 char *putp
= (char *) runp
;
1680 /* Lookup weight for char and store it. */
1683 while (*++runp
!= '\0' && *runp
!= '>')
1685 if (*runp
== lr
->escape_char
)
1686 if (*++runp
== '\0')
1688 lr_error (lr
, _("unterminated weight name"));
1689 lr_ignore_rest (lr
, 0);
1699 lr_error (lr
, _("empty weight name: line ignored"));
1700 lr_ignore_rest (lr
, 0);
1704 wch
= charset_find_value (charset
, startp
, putp
- startp
);
1705 if (wch
!= ILLEGAL_CHAR_VALUE
)
1709 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1710 (void *)&pelem
) < 0)
1713 while (pelem
!= NULL
1714 && (pelem
->name
[0] != wch
1715 || pelem
->name
[1] != L
'\0'))
1716 pelem
= pelem
->next
;
1718 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1720 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1723 element_t
*pelem
= (element_t
*) tmp
;
1725 value
= pelem
->this_weight
;
1727 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1730 value
= (unsigned int) tmp
;
1735 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1736 (int) (putp
- startp
), startp
);
1737 lr_ignore_rest (lr
, 0);
1746 if (*runp
== lr
->escape_char
)
1748 static const char digits
[] = "0123456789abcdef";
1753 if (tolower (*runp
) == 'x')
1758 else if (tolower (*runp
) == 'd')
1766 dp
= strchr (digits
, tolower (*runp
));
1767 if (dp
== NULL
|| (dp
- digits
) >= base
)
1771 illegal character constant in string"));
1772 lr_ignore_rest (lr
, 0);
1778 dp
= strchr (digits
, tolower (*runp
));
1779 if (dp
== NULL
|| (dp
- digits
) >= base
)
1787 dp
= strchr (digits
, tolower (*runp
));
1788 if (dp
!= NULL
&& (dp
- digits
< base
))
1797 wch
= (wchar_t) *runp
++;
1799 /* Lookup the weight for WCH. */
1800 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1805 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1808 value
= wp
== NULL
? 0 : wp
->this_weight
;
1810 /* To get the correct name for the error message. */
1813 /**************************************************\
1814 |* I know here is something wrong. Characters in *|
1815 |* the string which are not in the <...> form *|
1816 |* cannot be declared forward for now!!! *|
1817 \**************************************************/
1820 /* Store in weight array. */
1821 if (collate
->nweight
>= collate
->nweight_max
)
1823 collate
->nweight_max
*= 2;
1825 = (unsigned int *) xrealloc (collate
->weight
,
1826 collate
->nweight_max
);
1833 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1835 newp
->fname
= lr
->fname
;
1836 newp
->lineno
= lr
->lineno
;
1838 = (const char *) obstack_copy0 (&collate
->element_mem
,
1839 startp
, putp
- startp
);
1840 newp
->where
.idx
= collate
->nweight
++;
1841 newp
->next
= collate
->current_patch
;
1842 collate
->current_patch
= newp
;
1845 collate
->weight
[collate
->nweight
++] = value
;
1846 ++collate
->weight_cnt
[collate
->weight_idx
];
1852 assert (! "should not happen");
1856 if (collate
->nweight
>= collate
->nweight_max
)
1858 collate
->nweight_max
*= 2;
1859 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1860 collate
->nweight_max
);
1863 collate
->weight
[collate
->nweight
++] = value
;
1864 ++collate
->weight_cnt
[collate
->weight_idx
];
1871 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1873 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1874 element_t
*pelem
= collate
->current_element
;
1876 if (collate
->kind
== symbol
)
1878 /* We don't have to do anything. */
1879 collate
->was_ellipsis
= 0;
1883 if (collate
->kind
== ellipsis
)
1885 /* Before the next line is processed the ellipsis is handled. */
1886 collate
->was_ellipsis
= 1;
1890 assert (collate
->kind
== character
|| collate
->kind
== element
1891 || collate
->kind
== undefined
);
1893 /* Fill in the missing weights. */
1894 while (++collate
->weight_idx
< collate
->nrules
)
1896 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1897 ++collate
->weight_cnt
[collate
->weight_idx
];
1900 /* Now we know how many ordering weights the current
1901 character/element has. Allocate room in the element structure
1902 and copy information. */
1903 pelem
->ordering_len
= collate
->nweight
;
1905 /* First we write an array with the number of values for each
1907 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1908 collate
->nrules
* sizeof (unsigned int));
1910 /* Now the weights itselves. */
1911 obstack_grow (&collate
->element_mem
, collate
->weight
,
1912 collate
->nweight
* sizeof (unsigned int));
1915 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1917 /* Now we handle the "patches". */
1918 while (collate
->current_patch
!= NULL
)
1920 patch_t
*this_patch
;
1922 this_patch
= collate
->current_patch
;
1924 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1925 + this_patch
->where
.idx
];
1927 collate
->current_patch
= this_patch
->next
;
1928 this_patch
->next
= collate
->all_patches
;
1929 collate
->all_patches
= this_patch
;
1932 /* Set information for next round. */
1933 collate
->was_ellipsis
= 0;
1934 if (collate
->kind
!= undefined
)
1935 collate
->last_char
= pelem
->name
[0];