1 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
33 #include "localeinfo.h"
35 #include "simple-hash.h"
36 #include "stringtrans.h"
38 /* Uncomment the following line in the production version. */
39 /* #define NDEBUG 1 */
43 #define MAX(a, b) ((a) > (b) ? (a) : (b))
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
49 /* What kind of symbols get defined? */
60 typedef struct patch_t
74 typedef struct element_t
77 unsigned int this_weight
;
79 struct element_t
*next
;
81 unsigned int *ordering
;
86 /* The real definition of the struct for the LC_CTYPE locale. */
87 struct locale_collate_t
89 /* Collate symbol table. Simple mapping to number. */
92 /* The collation elements. */
94 struct obstack element_mem
;
96 /* The result table. */
99 /* Sorting rules given in order_start line. */
102 enum coll_sort_rule
*rules
;
104 /* Used while recognizing symbol composed of multiple tokens
105 (collating-element). */
106 const char *combine_token
;
107 size_t combine_token_len
;
109 /* How many sorting order specifications so far. */
110 unsigned int order_cnt
;
112 /* Was lastline ellipsis? */
114 /* Value of last entry if was character. */
116 /* Current element. */
117 element_t
*current_element
;
118 /* What kind of symbol is current element. */
119 enum coll_symbol kind
;
121 /* While collecting the weigths we need some temporary space. */
122 unsigned int current_order
;
125 unsigned int *weight
;
130 patch_t
*current_patch
;
131 patch_t
*all_patches
;
133 /* Room for the UNDEFINED information. */
135 unsigned int undefined_len
;
139 /* Be verbose? Defined in localedef.c. */
143 void *xmalloc (size_t __n
);
144 void *xrealloc (void *__p
, size_t __n
);
147 #define obstack_chunk_alloc xmalloc
148 #define obstack_chunk_free free
152 collate_startup (struct linereader
*lr
, struct localedef_t
*locale
,
153 struct charset_t
*charset
)
155 struct locale_collate_t
*collate
;
157 /* It is important that we always use UCS4 encoding for strings now. */
158 encoding_method
= ENC_UCS4
;
160 /* Allocate the needed room. */
161 locale
->categories
[LC_COLLATE
].collate
= collate
=
162 (struct locale_collate_t
*) xmalloc (sizeof (struct locale_collate_t
));
164 /* Allocate hash table for collating elements. */
165 if (init_hash (&collate
->elements
, 512))
166 error (4, 0, _("memory exhausted"));
167 collate
->combine_token
= NULL
;
168 obstack_init (&collate
->element_mem
);
170 /* Allocate hash table for collating elements. */
171 if (init_hash (&collate
->symbols
, 64))
172 error (4, 0, _("memory exhausted"));
174 /* Allocate hash table for result. */
175 if (init_hash (&collate
->result
, 512))
176 error (4, 0, _("memory exhausted"));
179 collate
->nrules_max
= 10;
181 = (enum coll_sort_rule
*) xmalloc (collate
->nrules_max
182 * sizeof (enum coll_sort_rule
));
184 collate
->order_cnt
= 1; /* The smallest weight is 2. */
186 collate
->was_ellipsis
= 0;
187 collate
->last_char
= L
'\0'; /* 0 because leading ellipsis is allowed. */
189 collate
->all_patches
= NULL
;
191 /* This tells us no UNDEFINED entry was found until now. */
192 collate
->undefined
.this_weight
= 0;
194 lr
->translate_strings
= 0;
199 collate_finish (struct localedef_t
*locale
, struct charset_t
*charset
)
201 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
205 /* Patch the constructed table so that forward references are
207 for (patch
= collate
->all_patches
; patch
!= NULL
; patch
= patch
->next
)
210 size_t toklen
= strlen (patch
->token
);
212 unsigned int value
= 0;
214 wch
= charset_find_value (charset
, patch
->token
, toklen
);
215 if (wch
!= ILLEGAL_CHAR_VALUE
)
219 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
222 for (; runp
!= NULL
; runp
= runp
->next
)
223 if (runp
->name
[0] == wch
&& runp
->name
[1] == L
'\0')
226 value
= runp
== NULL
? 0 : runp
->this_weight
;
228 else if (find_entry (&collate
->elements
, patch
->token
, toklen
, &ptmp
)
231 value
= ((element_t
*) ptmp
)->this_weight
;
233 else if (find_entry (&collate
->symbols
, patch
->token
, toklen
, &ptmp
)
236 value
= (unsigned int) ptmp
;
242 error_at_line (0, 0, patch
->fname
, patch
->lineno
,
243 _("no weight defined for symbol `%s'"), patch
->token
);
245 *patch
->where
.pos
= value
;
248 /* If no definition for UNDEFINED is given, all characters in the
249 given charset must be specified. */
250 if (collate
->undefined
.ordering
== NULL
)
252 /**************************************************************\
253 |* XXX We should test whether really an unspecified character *|
254 |* exists before giving the message. *|
255 \**************************************************************/
258 error (0, 0, _("no definition of `UNDEFINED'"));
260 collate
->undefined
.ordering_len
= collate
->nrules
;
261 weight
= ++collate
->order_cnt
;
263 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
266 obstack_grow (&collate
->element_mem
, &one
, sizeof (one
));
269 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
270 obstack_grow (&collate
->element_mem
, &weight
, sizeof (weight
));
272 collate
->undefined
.ordering
= obstack_finish (&collate
->element_mem
);
275 collate
->undefined_len
= 2; /* For the name: 1 x wchar_t + L'\0'. */
276 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
277 collate
->undefined_len
+= 1 + collate
->undefined
.ordering
[cnt
];
279 /* Collating symbols are not used anymore. */
280 (void) delete_hash (&collate
->symbols
);
286 collate_output (struct localedef_t
*locale
, const char *output_path
)
288 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
289 u_int32_t table_size
, table_best
, level_best
, sum_best
;
294 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
295 struct iovec iov
[2 + nelems
];
296 struct locale_file data
;
297 u_int32_t idx
[nelems
];
298 struct obstack non_simple
;
299 size_t cnt
, entry_size
;
300 u_int32_t undefined_offset
= UINT_MAX
;
301 u_int32_t
*table
, *extra
, *table2
, *extra2
;
308 /* Compute table size. */
310 Computing table size for collation information might take a while..."),
312 for (table_size
= 256; table_size
< sum_best
; ++table_size
)
314 size_t hits
[table_size
];
315 unsigned int worst
= 1;
320 for (cnt
= 0; cnt
< 256; ++cnt
)
322 memset (&hits
[256], '\0', sizeof (hits
) - 256 * sizeof (size_t));
324 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
325 &len
, (void **) &pelem
) >= 0)
326 if (pelem
->ordering
!= NULL
&& pelem
->name
[0] > 0xff)
327 if (++hits
[(unsigned int) pelem
->name
[0] % table_size
] > worst
)
329 worst
= hits
[(unsigned int) pelem
->name
[0] % table_size
];
330 if (table_size
* worst
> sum_best
)
334 if (table_size
* worst
< sum_best
)
336 sum_best
= table_size
* worst
;
337 table_best
= table_size
;
341 assert (table_best
!= 0xffff || level_best
!= 0xffff);
342 fputs (_(" done\n"), stderr
);
344 obstack_init (&non_simple
);
346 data
.magic
= LIMAGIC (LC_COLLATE
);
348 iov
[0].iov_base
= (void *) &data
;
349 iov
[0].iov_len
= sizeof (data
);
351 iov
[1].iov_base
= (void *) idx
;
352 iov
[1].iov_len
= sizeof (idx
);
354 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_base
= &collate
->nrules
;
355 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_NRULES
)].iov_len
= sizeof (u_int32_t
);
357 table
= (u_int32_t
*) alloca (collate
->nrules
* sizeof (u_int32_t
));
358 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_base
= table
;
359 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_RULES
)].iov_len
360 = collate
->nrules
* sizeof (u_int32_t
);
361 /* Another trick here. Describing the collation method needs only a
362 few bits (3, to be exact). But the binary file should be
363 accessible by maschines with both endianesses and so we store both
364 information in the same word. */
365 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
366 table
[cnt
] = collate
->rules
[cnt
] | SWAPU32 (collate
->rules
[cnt
]);
368 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_base
= &table_best
;
369 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE
)].iov_len
= sizeof (u_int32_t
);
371 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_base
= &level_best
;
372 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS
)].iov_len
373 = sizeof (u_int32_t
);
375 entry_size
= 1 + MAX (collate
->nrules
, 2);
377 table
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
378 * sizeof (table
[0]));
379 memset (table
, '\0', table_best
* level_best
* entry_size
380 * sizeof (table
[0]));
383 /* Macros for inserting in output table. */
384 #define ADD_VALUE(expr) \
386 u_int32_t to_write = (u_int32_t) expr; \
387 obstack_grow (&non_simple, &to_write, sizeof (to_write)); \
390 #define ADD_ELEMENT(pelem, len) \
396 wlen = wcslen (pelem->name); \
397 obstack_grow (&non_simple, pelem->name, (wlen + 1) * sizeof (u_int32_t)); \
399 idx = collate->nrules; \
400 for (cnt = 0; cnt < collate->nrules; ++cnt) \
404 ADD_VALUE (pelem->ordering[cnt]); \
405 for (disp = 0; disp < pelem->ordering[cnt]; ++disp) \
406 ADD_VALUE (pelem->ordering[idx++]); \
410 #define ADD_FORWARD(pelem) \
412 /* We leave a reference in the main table and put all \
413 information in the table for the extended entries. */ \
415 element_t *has_simple = NULL; \
418 table[(level * table_best + slot) * entry_size + 1] \
420 table[(level * table_best + slot) * entry_size + 2] \
421 = obstack_object_size (&non_simple) / sizeof (u_int32_t); \
423 /* Here we have to construct the non-simple table entry. First \
424 compute the total length of this entry. */ \
425 for (runp = (pelem); runp != NULL; runp = runp->next) \
426 if (runp->ordering != NULL) \
431 value = 1 + wcslen (runp->name) + 1; \
433 for (cnt = 0; cnt < collate->nrules; ++cnt) \
434 /* We have to take care for entries without ordering \
435 information. While reading them they get inserted in the \
436 table and later not removed when something goes wrong with \
437 reading its weights. */ \
439 value += 1 + runp->ordering[cnt]; \
441 if (runp->name[1] == L'\0') \
445 ADD_ELEMENT (runp, value); \
448 if (has_simple == NULL) \
452 ADD_VALUE (collate->undefined_len + 1); \
454 /* Add the name. */ \
455 ADD_VALUE ((pelem)->name[0]); \
458 idx = collate->nrules; \
459 for (cnt = 0; cnt < collate->nrules; ++cnt) \
463 ADD_VALUE (collate->undefined.ordering[cnt]); \
464 for (disp = 0; disp < collate->undefined.ordering[cnt]; ++disp) \
466 if (collate->undefined.ordering[idx] == ELLIPSIS_CHAR) \
467 ADD_VALUE ((pelem)->name[0]); \
469 ADD_VALUE (collate->undefined.ordering[idx++]); \
478 /* Fill the table now. First we look for all the characters which
479 fit into one single byte. This speeds up the 8-bit string
482 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
483 &len
, (void **) &pelem
) >= 0)
484 if (pelem
->name
[0] <= 0xff)
486 /* We have a single byte name. Now we must distinguish
487 between entries in simple form (i.e., only one value per
488 weight and no collation element starting with the same
489 character) and those which are not. */
490 size_t slot
= ((size_t) pelem
->name
[0]);
491 const size_t level
= 0;
493 table
[slot
* entry_size
] = pelem
->name
[0];
495 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
496 && pelem
->ordering_len
== collate
->nrules
)
498 /* Yes, we have a simple one. Lucky us. */
501 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
502 table
[slot
* entry_size
+ 1 + cnt
]
503 = pelem
->ordering
[collate
->nrules
+ cnt
];
509 /* Now check for missing single byte entries. If one exist we fill
510 with the UNDEFINED entry. */
511 for (cnt
= 0; cnt
< 256; ++cnt
)
512 /* The first weight is never 0 for existing entries. */
513 if (table
[cnt
* entry_size
+ 1] == 0)
515 /* We have to fill in the information from the UNDEFINED
517 table
[cnt
* entry_size
] = (u_int32_t
) cnt
;
519 if (collate
->undefined
.ordering_len
== collate
->nrules
)
523 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
524 if (collate
->undefined
.ordering
[collate
->nrules
+ inner
]
526 table
[cnt
* entry_size
+ 1 + inner
] = cnt
;
528 table
[cnt
* entry_size
+ 1 + inner
]
529 = collate
->undefined
.ordering
[collate
->nrules
+ inner
];
533 if (undefined_offset
!= UINT_MAX
)
535 table
[cnt
* entry_size
+ 1] = FORWARD_CHAR
;
536 table
[cnt
* entry_size
+ 2] = undefined_offset
;
540 const size_t slot
= cnt
;
541 const size_t level
= 0;
543 ADD_FORWARD (&collate
->undefined
);
544 undefined_offset
= table
[cnt
* entry_size
+ 2];
549 /* Now we are ready for inserting the whole rest. */
551 while (iterate_table (&collate
->result
, &last
, (const void **) &name
,
552 &len
, (void **) &pelem
) >= 0)
553 if (pelem
->name
[0] > 0xff)
555 /* Find the position. */
556 size_t slot
= ((size_t) pelem
->name
[0]) % table_best
;
559 while (table
[(level
* table_best
+ slot
) * entry_size
+ 1] != 0)
561 assert (level
< level_best
);
563 if (pelem
->name
[1] == L
'\0' && pelem
->next
== NULL
564 && pelem
->ordering_len
== collate
->nrules
)
566 /* Again a simple entry. */
569 for (inner
= 0; inner
< collate
->nrules
; ++inner
)
570 table
[(level
* table_best
+ slot
) * entry_size
+ 1 + inner
]
571 = pelem
->ordering
[collate
->nrules
+ inner
];
577 /* Add the UNDEFINED entry. */
579 /* Here we have to construct the non-simple table entry. */
582 undefined_offset
= obstack_object_size (&non_simple
);
584 idx
= collate
->nrules
;
585 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
589 ADD_VALUE (collate
->undefined
.ordering
[cnt
]);
590 for (disp
= 0; disp
< collate
->undefined
.ordering
[cnt
]; ++disp
)
591 ADD_VALUE (collate
->undefined
.ordering
[idx
++]);
595 /* Finish the extra block. */
596 extra_len
= obstack_object_size (&non_simple
);
597 extra
= (u_int32_t
*) obstack_finish (&non_simple
);
598 assert ((extra_len
% sizeof (u_int32_t
)) == 0);
600 /* Now we have to build the two array for the other byte ordering. */
601 table2
= (u_int32_t
*) alloca (table_best
* level_best
* entry_size
602 * sizeof (table
[0]));
603 extra2
= (u_int32_t
*) alloca (extra_len
);
605 for (cnt
= 0; cnt
< table_best
* level_best
* entry_size
; ++cnt
)
606 table2
[cnt
] = SWAPU32 (table
[cnt
]);
608 for (cnt
= 0; cnt
< extra_len
/ sizeof (u_int32_t
); ++cnt
)
609 extra2
[cnt
] = SWAPU32 (extra2
[cnt
]);
611 /* Store table adresses and lengths. */
612 #if __BYTE_ORDER == __BIG_ENDIAN
613 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table
;
614 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
615 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
617 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table2
;
618 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
619 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
621 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra
;
622 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
624 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra2
;
625 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
627 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_base
= table2
;
628 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EB
)].iov_len
629 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
631 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_base
= table
;
632 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_TABLE_EL
)].iov_len
633 = table_best
* level_best
* entry_size
* sizeof (table
[0]);
635 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_base
= extra2
;
636 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EB
)].iov_len
= extra_len
;
638 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_base
= extra
;
639 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_EXTRA_EL
)].iov_len
= extra_len
;
642 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_base
= &undefined_offset
;
643 iov
[2 + _NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED
)].iov_len
= sizeof (u_int32_t
);
645 /* Update idx array. */
646 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
647 for (cnt
= 1; cnt
< nelems
; ++cnt
)
648 idx
[cnt
] = idx
[cnt
- 1] + iov
[1 + cnt
].iov_len
;
650 write_locale_data (output_path
, "LC_COLLATE", 2 + nelems
, iov
);
655 collate_element_to (struct linereader
*lr
, struct localedef_t
*locale
,
656 struct token
*code
, struct charset_t
*charset
)
658 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
662 if (collate
->combine_token
!= NULL
)
664 free ((void *) collate
->combine_token
);
665 collate
->combine_token
= NULL
;
668 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
669 if (value
!= ILLEGAL_CHAR_VALUE
)
671 lr_error (lr
, _("symbol for multicharacter collating element "
672 "`%.*s' duplicates symbolic name in charset"),
673 code
->val
.str
.len
, code
->val
.str
.start
);
677 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
680 lr_error (lr
, _("symbol for multicharacter collating element "
681 "`%.*s' duplicates other element definition"),
682 code
->val
.str
.len
, code
->val
.str
.start
);
686 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
689 lr_error (lr
, _("symbol for multicharacter collating element "
690 "`%.*s' duplicates symbol definition"),
691 code
->val
.str
.len
, code
->val
.str
.start
);
695 collate
->combine_token
= code
->val
.str
.start
;
696 collate
->combine_token_len
= code
->val
.str
.len
;
701 collate_element_from (struct linereader
*lr
, struct localedef_t
*locale
,
702 struct token
*code
, struct charset_t
*charset
)
704 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
705 element_t
*elemp
, *runp
;
707 /* CODE is a string. */
708 elemp
= (element_t
*) obstack_alloc (&collate
->element_mem
,
711 /* We have to translate the string. It may contain <...> character
713 elemp
->name
= (wchar_t *) translate_string (code
->val
.str
.start
, charset
);
714 elemp
->this_weight
= 0;
715 elemp
->ordering
= NULL
;
716 elemp
->ordering_len
= 0;
718 free (code
->val
.str
.start
);
720 if (elemp
->name
== NULL
)
722 /* At least one character in the string is not defined. We simply
726 `from' string in collation element declaration contains unknown character"));
730 if (elemp
->name
[0] == L
'\0' || elemp
->name
[1] == L
'\0')
732 lr_error (lr
, _("illegal colltion element"));
736 /* The entries in the linked lists of RESULT are sorting in
737 descending order. The order is important for the `strcoll' and
738 `wcscoll' functions. */
739 if (find_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
740 (void *) &runp
) >= 0)
742 /* We already have an entry with this key. Check whether it is
744 element_t
*prevp
= NULL
;
749 cmpres
= wcscmp (elemp
->name
, runp
->name
);
754 while ((runp
= runp
->next
) != NULL
);
757 lr_error (lr
, _("duplicate collating element definition"));
763 if (set_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t),
765 error (EXIT_FAILURE
, 0,
767 error while inserting collation element into hash table"));
776 if (insert_entry (&collate
->result
, elemp
->name
, sizeof (wchar_t), elemp
)
778 error (EXIT_FAILURE
, errno
, _("error while inserting to hash table"));
781 if (insert_entry (&collate
->elements
, collate
->combine_token
,
782 collate
->combine_token_len
, (void *) elemp
) < 0)
783 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
789 collate_symbol (struct linereader
*lr
, struct localedef_t
*locale
,
790 struct token
*code
, struct charset_t
*charset
)
792 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
796 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
797 if (value
!= ILLEGAL_CHAR_VALUE
)
799 lr_error (lr
, _("symbol for multicharacter collating element "
800 "`%.*s' duplicates symbolic name in charset"),
801 code
->val
.str
.len
, code
->val
.str
.start
);
805 if (find_entry (&collate
->elements
, code
->val
.str
.start
, code
->val
.str
.len
,
808 lr_error (lr
, _("symbol for multicharacter collating element "
809 "`%.*s' duplicates element definition"),
810 code
->val
.str
.len
, code
->val
.str
.start
);
814 if (find_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
817 lr_error (lr
, _("symbol for multicharacter collating element "
818 "`%.*s' duplicates other symbol definition"),
819 code
->val
.str
.len
, code
->val
.str
.start
);
823 if (insert_entry (&collate
->symbols
, code
->val
.str
.start
, code
->val
.str
.len
,
825 lr_error (lr
, _("cannot insert new collating symbol definition: %s"),
831 collate_new_order (struct linereader
*lr
, struct localedef_t
*locale
,
832 enum coll_sort_rule sort_rule
)
834 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
836 if (collate
->nrules
>= collate
->nrules_max
)
838 collate
->nrules_max
*= 2;
840 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
842 * sizeof (enum coll_sort_rule
));
845 collate
->rules
[collate
->nrules
++] = sort_rule
;
850 collate_build_arrays (struct linereader
*lr
, struct localedef_t
*locale
)
852 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
855 = (enum coll_sort_rule
*) xrealloc (collate
->rules
,
857 * sizeof (enum coll_sort_rule
));
859 /* Allocate arrays for temporary weights. */
860 collate
->weight_cnt
= (int *) xmalloc (collate
->nrules
* sizeof (int));
862 /* Choose arbitrary start value for table size. */
863 collate
->nweight_max
= 5 * collate
->nrules
;
864 collate
->weight
= (int *) xmalloc (collate
->nweight_max
* sizeof (int));
869 collate_order_elem (struct linereader
*lr
, struct localedef_t
*locale
,
870 struct token
*code
, struct charset_t
*charset
)
872 const wchar_t zero
= L
'\0';
873 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
882 /* We have a string to find in one of the three hashing tables. */
883 value
= charset_find_value (charset
, code
->val
.str
.start
,
885 if (value
!= ILLEGAL_CHAR_VALUE
)
887 element_t
*lastp
, *firstp
;
889 collate
->kind
= character
;
891 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
892 (void *) &firstp
) < 0)
893 firstp
= lastp
= NULL
;
896 /* The entry for the simple character is always found at
899 while (lastp
->next
!= NULL
)
902 if (lastp
->name
[0] == value
&& lastp
->name
[1] == L
'\0')
904 lr_error (lr
, _("duplicate definition for character `%.*s'"),
905 code
->val
.str
.len
, code
->val
.str
.start
);
906 lr_ignore_rest (lr
, 0);
912 collate
->current_element
913 = (element_t
*) obstack_alloc (&collate
->element_mem
,
916 obstack_grow (&collate
->element_mem
, &value
, sizeof (value
));
917 obstack_grow (&collate
->element_mem
, &zero
, sizeof (zero
));
919 collate
->current_element
->name
=
920 (const wchar_t *) obstack_finish (&collate
->element_mem
);
922 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
924 collate
->current_element
->next
= NULL
;
928 if (insert_entry (&collate
->result
, &value
, sizeof (wchar_t),
929 (void *) collate
->current_element
) < 0)
931 lr_error (lr
, _("cannot insert collation element `%.*s'"),
932 code
->val
.str
.len
, code
->val
.str
.start
);
937 lastp
->next
= collate
->current_element
;
939 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
940 code
->val
.str
.len
, &tmp
) >= 0)
942 collate
->current_element
= (element_t
*) tmp
;
944 if (collate
->current_element
->this_weight
!= 0)
947 collation element `%.*s' appears more than once: ignore line"),
948 code
->val
.str
.len
, code
->val
.str
.start
);
949 lr_ignore_rest (lr
, 0);
954 collate
->kind
= element
;
955 collate
->current_element
->this_weight
= ++collate
->order_cnt
;
957 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
958 code
->val
.str
.len
, &tmp
) >= 0)
960 unsigned int order
= ++collate
->order_cnt
;
962 if ((unsigned int) tmp
!= 0)
965 collation symbol `.*s' appears more than once: ignore line"),
966 code
->val
.str
.len
, code
->val
.str
.start
);
967 lr_ignore_rest (lr
, 0);
972 collate
->kind
= symbol
;
974 if (set_entry (&collate
->symbols
, code
->val
.str
.start
,
975 code
->val
.str
.len
, (void *) order
) < 0)
977 lr_error (lr
, _("cannot process order specification"));
984 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
985 code
->val
.str
.len
, code
->val
.str
.start
);
986 lr_ignore_rest (lr
, 0);
993 collate
->kind
= undefined
;
994 collate
->current_element
= &collate
->undefined
;
998 if (collate
->was_ellipsis
)
1001 two lines in a row containing `...' are not allowed"));
1004 else if (collate
->kind
!= character
)
1006 /* An ellipsis requires the previous line to be an
1007 character definition. */
1009 line before ellipsis does not contain definition for character constant"));
1010 lr_ignore_rest (lr
, 0);
1014 collate
->kind
= ellipsis
;
1018 assert (! "illegal token in `collate_order_elem'");
1021 /* Now it's time to handle the ellipsis in the previous line. We do
1022 this only when the last line contained an definition for an
1023 character, the current line also defines an character, the
1024 character code for the later is bigger than the former. */
1025 if (collate
->was_ellipsis
)
1027 if (collate
->kind
!= character
)
1030 line after ellipsis must contain character definition"));
1031 lr_ignore_rest (lr
, 0);
1034 else if (collate
->last_char
> value
)
1036 lr_error (lr
, _("end point of ellipsis range is bigger then start"));
1037 lr_ignore_rest (lr
, 0);
1042 /* We can fill the arrays with the information we need. */
1048 name
[0] = collate
->last_char
+ 1;
1051 data
= (unsigned int *) alloca ((collate
->nrules
+ collate
->nweight
)
1052 * sizeof (unsigned int));
1053 ptr
= (size_t *) alloca (collate
->nrules
* sizeof (size_t));
1055 if (data
== NULL
|| ptr
== NULL
)
1056 error (4, 0, _("memory exhausted"));
1058 /* Prepare data. Because the characters covered by an
1059 ellipsis all have equal values we prepare the data once
1060 and only change the variable number (if there are any).
1061 PTR[...] will point to the entries which will have to be
1062 fixed during the output loop. */
1063 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1065 data
[cnt
] = collate
->weight_cnt
[cnt
];
1066 ptr
[cnt
] = (cnt
== 0
1068 : ptr
[cnt
- 1] + collate
->weight_cnt
[cnt
- 1]);
1071 for (cnt
= 0; cnt
< collate
->nweight
; ++cnt
)
1072 data
[collate
->nrules
+ cnt
] = collate
->weight
[cnt
];
1074 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1075 if (data
[ptr
[cnt
]] != ELLIPSIS_CHAR
)
1078 while (name
[0] <= value
)
1082 pelem
= (element_t
*) obstack_alloc (&collate
->element_mem
,
1083 sizeof (element_t
));
1085 error (4, 0, _("memory exhausted"));
1088 = (const wchar_t *) obstack_copy (&collate
->element_mem
,
1089 name
, 2 * sizeof (wchar_t));
1090 pelem
->this_weight
= ++collate
->order_cnt
;
1092 pelem
->ordering_len
= collate
->nweight
;
1094 = (unsigned int *) obstack_copy (&collate
->element_mem
, data
,
1096 * pelem
->ordering_len
)
1097 * sizeof (unsigned int));
1099 /* `...' weights need to be adjusted. */
1100 for (cnt
= 0; cnt
< collate
->nrules
; ++cnt
)
1102 pelem
->ordering
[ptr
[cnt
]] = pelem
->this_weight
;
1104 /* Insert new entry into result table. */
1105 if (find_entry (&collate
->result
, name
, sizeof (wchar_t),
1106 (void *) &pelem
->next
) >= 0)
1108 if (set_entry (&collate
->result
, name
, sizeof (wchar_t),
1109 (void *) pelem
->next
) < 0)
1110 error (4, 0, _("cannot insert into result table"));
1113 if (insert_entry (&collate
->result
, name
, sizeof (wchar_t),
1114 (void *) pelem
->next
) < 0)
1115 error (4, 0, _("cannot insert into result table"));
1117 /* Increment counter. */
1123 /* Reset counters for weights. */
1124 collate
->weight_idx
= 0;
1125 collate
->nweight
= 0;
1126 for (i
= 0; i
< collate
->nrules
; ++i
)
1127 collate
->weight_cnt
[i
] = 0;
1128 collate
->current_patch
= NULL
;
1135 collate_weight_bsymbol (struct linereader
*lr
, struct localedef_t
*locale
,
1136 struct token
*code
, struct charset_t
*charset
)
1138 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1139 unsigned int here_weight
;
1143 assert (code
->tok
== tok_bsymbol
);
1145 value
= charset_find_value (charset
, code
->val
.str
.start
, code
->val
.str
.len
);
1146 if (value
!= ILLEGAL_CHAR_VALUE
)
1150 if (find_entry (&collate
->result
, &value
, sizeof (wchar_t),
1155 && (runp
->name
[0] != value
|| runp
->name
[1] != L
'\0'))
1158 here_weight
= runp
== NULL
? 0 : runp
->this_weight
;
1160 else if (find_entry (&collate
->elements
, code
->val
.str
.start
,
1161 code
->val
.str
.len
, &tmp
) >= 0)
1163 element_t
*runp
= (element_t
*) tmp
;
1165 here_weight
= runp
->this_weight
;
1167 else if (find_entry (&collate
->symbols
, code
->val
.str
.start
,
1168 code
->val
.str
.len
, &tmp
) >= 0)
1170 here_weight
= (unsigned int) tmp
;
1175 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1176 code
->val
.str
.len
, code
->val
.str
.start
);
1177 lr_ignore_rest (lr
, 0);
1181 /* When we currently work on a collation symbol we do not expect any
1183 if (collate
->kind
== symbol
)
1186 specification of sorting weight for collation symbol does not make sense"));
1187 lr_ignore_rest (lr
, 0);
1191 /* Add to the current collection of weights. */
1192 if (collate
->nweight
>= collate
->nweight_max
)
1194 collate
->nweight_max
*= 2;
1195 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1196 collate
->nweight_max
);
1199 /* If the weight is currently not known, we remember to patch the
1200 resulting tables. */
1201 if (here_weight
== 0)
1205 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1207 newp
->fname
= lr
->fname
;
1208 newp
->lineno
= lr
->lineno
;
1209 newp
->token
= (const char *) obstack_copy0 (&collate
->element_mem
,
1210 code
->val
.str
.start
,
1212 newp
->where
.idx
= collate
->nweight
++;
1213 newp
->next
= collate
->current_patch
;
1214 collate
->current_patch
= newp
;
1217 collate
->weight
[collate
->nweight
++] = here_weight
;
1218 ++collate
->weight_cnt
[collate
->weight_idx
];
1225 collate_next_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1227 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1229 if (collate
->kind
== symbol
)
1232 specification of sorting weight for collation symbol does not make sense"));
1233 lr_ignore_rest (lr
, 0);
1237 ++collate
->weight_idx
;
1238 if (collate
->weight_idx
>= collate
->nrules
)
1240 lr_error (lr
, _("too many weights"));
1241 lr_ignore_rest (lr
, 0);
1250 collate_simple_weight (struct linereader
*lr
, struct localedef_t
*locale
,
1251 struct token
*code
, struct charset_t
*charset
)
1253 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1254 unsigned int value
= 0;
1256 /* There current tokens can be `IGNORE', `...', or a string. */
1260 /* This token is allowed in all situations. */
1261 value
= IGNORE_CHAR
;
1265 /* The ellipsis is only allowed for the `...' or `UNDEFINED'
1267 if (collate
->kind
!= ellipsis
&& collate
->kind
!= undefined
)
1270 `...' must only be used in `...' and `UNDEFINED' entries"));
1271 lr_ignore_rest (lr
, 0);
1274 value
= ELLIPSIS_CHAR
;
1278 /* This can become difficult. We have to get the weights which
1279 correspind the the single wide chars in the string. But some
1280 of the `chars' might not be real characters, but collation
1281 elements or symbols. And so the string decoder might have
1282 signaled errors. The string at this point is not translated.
1283 I.e., all <...> sequences are still there. */
1285 char *runp
= code
->val
.str
.start
;
1288 while (*runp
!= '\0')
1290 char *startp
= (char *) runp
;
1291 char *putp
= (char *) runp
;
1294 /* Lookup weight for char and store it. */
1297 while (*++runp
!= '\0' && *runp
!= '>')
1299 if (*runp
== lr
->escape_char
)
1300 if (*++runp
== '\0')
1302 lr_error (lr
, _("unterminated weight name"));
1303 lr_ignore_rest (lr
, 0);
1313 lr_error (lr
, _("empty weight name: line ignored"));
1314 lr_ignore_rest (lr
, 0);
1318 wch
= charset_find_value (charset
, startp
, putp
- startp
);
1319 if (wch
!= ILLEGAL_CHAR_VALUE
)
1323 if (find_entry (&collate
->result
, &wch
, sizeof (wchar_t),
1324 (void *)&pelem
) < 0)
1327 while (pelem
!= NULL
1328 && (pelem
->name
[0] != wch
1329 || pelem
->name
[1] != L
'\0'))
1330 pelem
= pelem
->next
;
1332 value
= pelem
== NULL
? 0 : pelem
->this_weight
;
1334 else if (find_entry (&collate
->elements
, startp
, putp
- startp
,
1337 element_t
*pelem
= (element_t
*) tmp
;
1339 value
= pelem
->this_weight
;
1341 else if (find_entry (&collate
->symbols
, startp
, putp
- startp
,
1344 value
= (unsigned int) tmp
;
1349 lr_error (lr
, _("unknown symbol `%.*s': line ignored"),
1350 putp
- startp
, startp
);
1351 lr_ignore_rest (lr
, 0);
1360 if (*runp
== lr
->escape_char
)
1362 static char digits
[] = "0123456789abcdef";
1367 if (tolower (*runp
) == 'x')
1372 else if (tolower (*runp
) == 'd')
1380 dp
= strchr (digits
, tolower (*runp
));
1381 if (dp
== NULL
|| (dp
- digits
) >= base
)
1385 illegal character constant in string"));
1386 lr_ignore_rest (lr
, 0);
1392 dp
= strchr (digits
, tolower (*runp
));
1393 if (dp
== NULL
|| (dp
- digits
) >= base
)
1401 dp
= strchr (digits
, tolower (*runp
));
1402 if (dp
!= NULL
&& (dp
- digits
< base
))
1411 wch
= (wchar_t) *runp
++;
1413 /* Lookup the weight for WCH. */
1414 if (find_entry (&collate
->result
, &wch
, sizeof (wch
),
1419 && (wp
->name
[0] != wch
|| wp
->name
[1] != L
'\0'))
1422 value
= wp
== NULL
? 0 : wp
->this_weight
;
1424 /* To get the correct name for the error message. */
1427 /**************************************************\
1428 |* I know here is something wrong. Characters in *|
1429 |* the string which are not in the <...> form *|
1430 |* cannot be declared forward for now!!! *|
1431 \**************************************************/
1434 /* Store in weight array. */
1435 if (collate
->nweight
>= collate
->nweight_max
)
1437 collate
->nweight_max
*= 2;
1439 = (unsigned int *) xrealloc (collate
->weight
,
1440 collate
->nweight_max
);
1447 newp
= (patch_t
*) obstack_alloc (&collate
->element_mem
,
1449 newp
->fname
= lr
->fname
;
1450 newp
->lineno
= lr
->lineno
;
1452 = (const char *) obstack_copy0 (&collate
->element_mem
,
1453 startp
, putp
- startp
);
1454 newp
->where
.idx
= collate
->nweight
++;
1455 newp
->next
= collate
->current_patch
;
1456 collate
->current_patch
= newp
;
1459 collate
->weight
[collate
->nweight
++] = value
;
1460 ++collate
->weight_cnt
[collate
->weight_idx
];
1466 assert (! "should not happen");
1470 if (collate
->nweight
>= collate
->nweight_max
)
1472 collate
->nweight_max
*= 2;
1473 collate
->weight
= (unsigned int *) xrealloc (collate
->weight
,
1474 collate
->nweight_max
);
1477 collate
->weight
[collate
->nweight
++] = value
;
1478 ++collate
->weight_cnt
[collate
->weight_idx
];
1485 collate_end_weight (struct linereader
*lr
, struct localedef_t
*locale
)
1487 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1488 element_t
*pelem
= collate
->current_element
;
1490 if (collate
->kind
== symbol
)
1492 /* We don't have to do anything. */
1493 collate
->was_ellipsis
= 0;
1497 if (collate
->kind
== ellipsis
)
1499 /* Before the next line is processed the ellipsis is handled. */
1500 collate
->was_ellipsis
= 1;
1504 assert (collate
->kind
== character
|| collate
->kind
== element
1505 || collate
->kind
== undefined
);
1507 /* Fill in the missing weights. */
1508 while (++collate
->weight_idx
< collate
->nrules
)
1510 collate
->weight
[collate
->nweight
++] = pelem
->this_weight
;
1511 ++collate
->weight_cnt
[collate
->weight_idx
];
1514 /* Now we know how many ordering weights the current
1515 character/element has. Allocate room in the element structure
1516 and copy information. */
1517 pelem
->ordering_len
= collate
->nweight
;
1519 /* First we write an array with the number of values for each
1521 obstack_grow (&collate
->element_mem
, collate
->weight_cnt
,
1522 collate
->nrules
* sizeof (unsigned int));
1524 /* Now the weights itselves. */
1525 obstack_grow (&collate
->element_mem
, collate
->weight
,
1526 collate
->nweight
* sizeof (unsigned int));
1529 pelem
->ordering
= obstack_finish (&collate
->element_mem
);
1531 /* Now we handle the "patches". */
1532 while (collate
->current_patch
!= NULL
)
1534 patch_t
*this_patch
;
1536 this_patch
= collate
->current_patch
;
1538 this_patch
->where
.pos
= &pelem
->ordering
[collate
->nrules
1539 + this_patch
->where
.idx
];
1541 collate
->current_patch
= this_patch
->next
;
1542 this_patch
->next
= collate
->all_patches
;
1543 collate
->all_patches
= this_patch
;
1546 /* Set information for next round. */
1547 collate
->was_ellipsis
= 0;
1548 if (collate
->kind
!= undefined
)
1549 collate
->last_char
= pelem
->name
[0];