1 /* Copyright (C) 1995-2002, 2003, 2005, 2006 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
26 #include <sys/param.h>
28 #include "localedef.h"
30 #include "localeinfo.h"
31 #include "linereader.h"
33 #include "elem-hash.h"
35 /* Uncomment the following line in the production version. */
36 /* #define NDEBUG 1 */
39 #define obstack_chunk_alloc malloc
40 #define obstack_chunk_free free
43 __attribute ((always_inline
))
44 obstack_int32_grow (struct obstack
*obstack
, int32_t data
)
46 if (sizeof (int32_t) == sizeof (int))
47 obstack_int_grow (obstack
, data
);
49 obstack_grow (obstack
, &data
, sizeof (int32_t));
53 __attribute ((always_inline
))
54 obstack_int32_grow_fast (struct obstack
*obstack
, int32_t data
)
56 if (sizeof (int32_t) == sizeof (int))
57 obstack_int_grow_fast (obstack
, data
);
59 obstack_grow (obstack
, &data
, sizeof (int32_t));
62 /* Forward declaration. */
65 /* Data type for list of strings. */
68 /* Successor in the known_sections list. */
69 struct section_list
*def_next
;
70 /* Successor in the sections list. */
71 struct section_list
*next
;
72 /* Name of the section. */
74 /* First element of this section. */
75 struct element_t
*first
;
76 /* Last element of this section. */
77 struct element_t
*last
;
78 /* These are the rules for this section. */
79 enum coll_sort_rule
*rules
;
80 /* Index of the rule set in the appropriate section of the output file. */
88 /* Number of elements. */
94 /* Data type for collating element. */
106 /* The following is a bit mask which bits are set if this element is
107 used in the appropriate level. Interesting for the singlebyte
110 XXX The type here restricts the number of levels to 32. It could
111 be changed if necessary but I doubt this is necessary. */
112 unsigned int used_in_level
;
114 struct element_list_t
*weights
;
116 /* Nonzero if this is a real character definition. */
119 /* Order of the character in the sequence. This information will
120 be used in range expressions. */
124 /* Where does the definition come from. */
128 /* Which section does this belong to. */
129 struct section_list
*section
;
131 /* Predecessor and successor in the order list. */
132 struct element_t
*last
;
133 struct element_t
*next
;
135 /* Next element in multibyte output list. */
136 struct element_t
*mbnext
;
137 struct element_t
*mblast
;
139 /* Next element in wide character output list. */
140 struct element_t
*wcnext
;
141 struct element_t
*wclast
;
144 /* Special element value. */
145 #define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
146 #define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
147 #define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
149 /* Data type for collating symbol. */
154 /* Point to place in the order list. */
155 struct element_t
*order
;
157 /* Where does the definition come from. */
162 /* Sparse table of struct element_t *. */
163 #define TABLE wchead_table
164 #define ELEMENT struct element_t *
170 /* Sparse table of int32_t. */
171 #define TABLE collidx_table
172 #define ELEMENT int32_t
176 /* Sparse table of uint32_t. */
177 #define TABLE collseq_table
178 #define ELEMENT uint32_t
179 #define DEFAULT ~((uint32_t) 0)
183 /* The real definition of the struct for the LC_COLLATE locale. */
184 struct locale_collate_t
189 /* List of known scripts. */
190 struct section_list
*known_sections
;
191 /* List of used sections. */
192 struct section_list
*sections
;
193 /* Current section using definition. */
194 struct section_list
*current_section
;
195 /* There always can be an unnamed section. */
196 struct section_list unnamed_section
;
197 /* To make handling of errors easier we have another section. */
198 struct section_list error_section
;
199 /* Sometimes we are defining the values for collating symbols before
200 the first actual section. */
201 struct section_list symbol_section
;
203 /* Start of the order list. */
204 struct element_t
*start
;
206 /* The undefined element. */
207 struct element_t undefined
;
209 /* This is the cursor for `reorder_after' insertions. */
210 struct element_t
*cursor
;
212 /* This value is used when handling ellipsis. */
213 struct element_t ellipsis_weight
;
215 /* Known collating elements. */
216 hash_table elem_table
;
218 /* Known collating symbols. */
219 hash_table sym_table
;
221 /* Known collation sequences. */
222 hash_table seq_table
;
224 struct obstack mempool
;
226 /* The LC_COLLATE category is a bit special as it is sometimes possible
227 that the definitions from more than one input file contains information.
228 Therefore we keep all relevant input in a list. */
229 struct locale_collate_t
*next
;
231 /* Arrays with heads of the list for each of the leading bytes in
232 the multibyte sequences. */
233 struct element_t
*mbheads
[256];
235 /* Arrays with heads of the list for each of the leading bytes in
236 the multibyte sequences. */
237 struct wchead_table wcheads
;
239 /* The arrays with the collation sequence order. */
240 unsigned char mbseqorder
[256];
241 struct collseq_table wcseqorder
;
245 /* We have a few global variables which are used for reading all
246 LC_COLLATE category descriptions in all files. */
247 static uint32_t nrules
;
250 /* We need UTF-8 encoding of numbers. */
252 __attribute ((always_inline
))
253 utf8_encode (char *buf
, int val
)
266 for (step
= 2; step
< 6; ++step
)
267 if ((val
& (~(uint32_t)0 << (5 * step
+ 1))) == 0)
271 *buf
= (unsigned char) (~0xff >> step
);
275 buf
[step
] = 0x80 | (val
& 0x3f);
286 static struct section_list
*
287 make_seclist_elem (struct locale_collate_t
*collate
, const char *string
,
288 struct section_list
*next
)
290 struct section_list
*newp
;
292 newp
= (struct section_list
*) obstack_alloc (&collate
->mempool
,
303 static struct element_t
*
304 new_element (struct locale_collate_t
*collate
, const char *mbs
, size_t mbslen
,
305 const uint32_t *wcs
, const char *name
, size_t namelen
,
308 struct element_t
*newp
;
310 newp
= (struct element_t
*) obstack_alloc (&collate
->mempool
,
312 newp
->name
= name
== NULL
? NULL
: obstack_copy0 (&collate
->mempool
,
316 newp
->mbs
= obstack_copy0 (&collate
->mempool
, mbs
, mbslen
);
326 size_t nwcs
= wcslen ((wchar_t *) wcs
);
328 obstack_grow (&collate
->mempool
, wcs
, nwcs
* sizeof (uint32_t));
329 obstack_grow (&collate
->mempool
, &zero
, sizeof (uint32_t));
330 newp
->wcs
= (uint32_t *) obstack_finish (&collate
->mempool
);
338 newp
->mborder
= NULL
;
340 newp
->used_in_level
= 0;
341 newp
->is_character
= is_character
;
343 /* Will be assigned later. XXX */
344 newp
->mbseqorder
= 0;
345 newp
->wcseqorder
= 0;
347 /* Will be allocated later. */
348 newp
->weights
= NULL
;
353 newp
->section
= collate
->current_section
;
368 static struct symbol_t
*
369 new_symbol (struct locale_collate_t
*collate
, const char *name
, size_t len
)
371 struct symbol_t
*newp
;
373 newp
= (struct symbol_t
*) obstack_alloc (&collate
->mempool
, sizeof (*newp
));
375 newp
->name
= obstack_copy0 (&collate
->mempool
, name
, len
);
385 /* Test whether this name is already defined somewhere. */
387 check_duplicate (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
388 const struct charmap_t
*charmap
,
389 struct repertoire_t
*repertoire
, const char *symbol
,
394 if (find_entry (&charmap
->char_table
, symbol
, symbol_len
, &ignore
) == 0)
396 lr_error (ldfile
, _("`%.*s' already defined in charmap"),
397 (int) symbol_len
, symbol
);
401 if (repertoire
!= NULL
402 && (find_entry (&repertoire
->char_table
, symbol
, symbol_len
, &ignore
)
405 lr_error (ldfile
, _("`%.*s' already defined in repertoire"),
406 (int) symbol_len
, symbol
);
410 if (find_entry (&collate
->sym_table
, symbol
, symbol_len
, &ignore
) == 0)
412 lr_error (ldfile
, _("`%.*s' already defined as collating symbol"),
413 (int) symbol_len
, symbol
);
417 if (find_entry (&collate
->elem_table
, symbol
, symbol_len
, &ignore
) == 0)
419 lr_error (ldfile
, _("`%.*s' already defined as collating element"),
420 (int) symbol_len
, symbol
);
428 /* Read the direction specification. */
430 read_directions (struct linereader
*ldfile
, struct token
*arg
,
431 const struct charmap_t
*charmap
,
432 struct repertoire_t
*repertoire
, struct localedef_t
*result
)
435 int max
= nrules
?: 10;
436 enum coll_sort_rule
*rules
= calloc (max
, sizeof (*rules
));
438 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
444 if (arg
->tok
== tok_forward
)
446 if (rules
[cnt
] & sort_backward
)
450 lr_error (ldfile
, _("\
451 %s: `forward' and `backward' are mutually excluding each other"),
456 else if (rules
[cnt
] & sort_forward
)
460 lr_error (ldfile
, _("\
461 %s: `%s' mentioned more than once in definition of weight %d"),
462 "LC_COLLATE", "forward", cnt
+ 1);
466 rules
[cnt
] |= sort_forward
;
470 else if (arg
->tok
== tok_backward
)
472 if (rules
[cnt
] & sort_forward
)
476 lr_error (ldfile
, _("\
477 %s: `forward' and `backward' are mutually excluding each other"),
482 else if (rules
[cnt
] & sort_backward
)
486 lr_error (ldfile
, _("\
487 %s: `%s' mentioned more than once in definition of weight %d"),
488 "LC_COLLATE", "backward", cnt
+ 1);
492 rules
[cnt
] |= sort_backward
;
496 else if (arg
->tok
== tok_position
)
498 if (rules
[cnt
] & sort_position
)
502 lr_error (ldfile
, _("\
503 %s: `%s' mentioned more than once in definition of weight %d"),
504 "LC_COLLATE", "position", cnt
+ 1);
508 rules
[cnt
] |= sort_position
;
514 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
516 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
|| arg
->tok
== tok_comma
517 || arg
->tok
== tok_semicolon
)
519 if (! valid
&& ! warned
)
521 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
525 /* See whether we have to increment the counter. */
526 if (arg
->tok
!= tok_comma
&& rules
[cnt
] != 0)
528 /* Add the default `forward' if we have seen only `position'. */
529 if (rules
[cnt
] == sort_position
)
530 rules
[cnt
] = sort_position
| sort_forward
;
535 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
536 /* End of line or file, so we exit the loop. */
541 /* See whether we have enough room in the array. */
545 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
548 memset (&rules
[cnt
], '\0', (max
- cnt
) * sizeof (*rules
));
555 /* There must not be any more rule. */
558 lr_error (ldfile
, _("\
559 %s: too many rules; first entry only had %d"),
560 "LC_COLLATE", nrules
);
564 lr_ignore_rest (ldfile
, 0);
573 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
578 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
583 /* Now we know how many rules we have. */
585 rules
= (enum coll_sort_rule
*) xrealloc (rules
,
586 nrules
* sizeof (*rules
));
592 /* Not enough rules in this specification. */
594 lr_error (ldfile
, _("%s: not enough sorting rules"), "LC_COLLATE");
597 rules
[cnt
] = sort_forward
;
598 while (++cnt
< nrules
);
602 collate
->current_section
->rules
= rules
;
606 static struct element_t
*
607 find_element (struct linereader
*ldfile
, struct locale_collate_t
*collate
,
608 const char *str
, size_t len
)
612 /* Search for the entries among the collation sequences already define. */
613 if (find_entry (&collate
->seq_table
, str
, len
, &result
) != 0)
615 /* Nope, not define yet. So we see whether it is a
619 if (find_entry (&collate
->sym_table
, str
, len
, &ptr
) == 0)
621 /* It's a collation symbol. */
622 struct symbol_t
*sym
= (struct symbol_t
*) ptr
;
626 result
= sym
->order
= new_element (collate
, NULL
, 0, NULL
,
629 else if (find_entry (&collate
->elem_table
, str
, len
, &result
) != 0)
631 /* It's also no collation element. So it is a character
632 element defined later. */
633 result
= new_element (collate
, NULL
, 0, NULL
, str
, len
, 1);
634 /* Insert it into the sequence table. */
635 insert_entry (&collate
->seq_table
, str
, len
, result
);
639 return (struct element_t
*) result
;
644 unlink_element (struct locale_collate_t
*collate
)
646 if (collate
->cursor
== collate
->start
)
648 assert (collate
->cursor
->next
== NULL
);
649 assert (collate
->cursor
->last
== NULL
);
650 collate
->cursor
= NULL
;
654 if (collate
->cursor
->next
!= NULL
)
655 collate
->cursor
->next
->last
= collate
->cursor
->last
;
656 if (collate
->cursor
->last
!= NULL
)
657 collate
->cursor
->last
->next
= collate
->cursor
->next
;
658 collate
->cursor
= collate
->cursor
->last
;
664 insert_weights (struct linereader
*ldfile
, struct element_t
*elem
,
665 const struct charmap_t
*charmap
,
666 struct repertoire_t
*repertoire
, struct localedef_t
*result
,
667 enum token_t ellipsis
)
671 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
673 /* Initialize all the fields. */
674 elem
->file
= ldfile
->fname
;
675 elem
->line
= ldfile
->lineno
;
677 elem
->last
= collate
->cursor
;
678 elem
->next
= collate
->cursor
? collate
->cursor
->next
: NULL
;
679 if (collate
->cursor
!= NULL
&& collate
->cursor
->next
!= NULL
)
680 collate
->cursor
->next
->last
= elem
;
681 if (collate
->cursor
!= NULL
)
682 collate
->cursor
->next
= elem
;
683 if (collate
->start
== NULL
)
685 assert (collate
->cursor
== NULL
);
686 collate
->start
= elem
;
689 elem
->section
= collate
->current_section
;
691 if (collate
->current_section
->first
== NULL
)
692 collate
->current_section
->first
= elem
;
693 if (collate
->current_section
->last
== collate
->cursor
)
694 collate
->current_section
->last
= elem
;
696 collate
->cursor
= elem
;
698 elem
->weights
= (struct element_list_t
*)
699 obstack_alloc (&collate
->mempool
, nrules
* sizeof (struct element_list_t
));
700 memset (elem
->weights
, '\0', nrules
* sizeof (struct element_list_t
));
704 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
707 if (arg
->tok
== tok_eof
|| arg
->tok
== tok_eol
)
710 if (arg
->tok
== tok_ignore
)
712 /* The weight for this level has to be ignored. We use the
713 null pointer to indicate this. */
714 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
715 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
716 elem
->weights
[weight_cnt
].w
[0] = NULL
;
717 elem
->weights
[weight_cnt
].cnt
= 1;
719 else if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
722 struct element_t
*val
;
726 if (arg
->tok
== tok_bsymbol
)
728 symstr
= arg
->val
.str
.startmb
;
729 symlen
= arg
->val
.str
.lenmb
;
733 snprintf (ucs4str
, sizeof (ucs4str
), "U%08X", arg
->val
.ucs4
);
738 val
= find_element (ldfile
, collate
, symstr
, symlen
);
742 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
743 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
744 elem
->weights
[weight_cnt
].w
[0] = val
;
745 elem
->weights
[weight_cnt
].cnt
= 1;
747 else if (arg
->tok
== tok_string
)
749 /* Split the string up in the individual characters and put
750 the element definitions in the list. */
751 const char *cp
= arg
->val
.str
.startmb
;
753 struct element_t
*charelem
;
754 struct element_t
**weights
= NULL
;
759 lr_error (ldfile
, _("%s: empty weight string not allowed"),
761 lr_ignore_rest (ldfile
, 0);
769 /* Ahh, it's a bsymbol or an UCS4 value. If it's
770 the latter we have to unify the name. */
771 const char *startp
= ++cp
;
776 if (*cp
== ldfile
->escape_char
)
779 /* It's a syntax error. */
785 if (cp
- startp
== 5 && startp
[0] == 'U'
786 && isxdigit (startp
[1]) && isxdigit (startp
[2])
787 && isxdigit (startp
[3]) && isxdigit (startp
[4]))
789 unsigned int ucs4
= strtoul (startp
+ 1, NULL
, 16);
792 newstr
= (char *) xmalloc (10);
793 snprintf (newstr
, 10, "U%08X", ucs4
);
801 charelem
= find_element (ldfile
, collate
, startp
, len
);
806 /* People really shouldn't use characters directly in
807 the string. Especially since it's not really clear
808 what this means. We interpret all characters in the
809 string as if that would be bsymbols. Otherwise we
810 would have to match back to bsymbols somehow and this
811 is normally not what people normally expect. */
812 charelem
= find_element (ldfile
, collate
, cp
++, 1);
815 if (charelem
== NULL
)
817 /* We ignore the rest of the line. */
818 lr_ignore_rest (ldfile
, 0);
822 /* Add the pointer. */
825 struct element_t
**newp
;
827 newp
= (struct element_t
**)
828 alloca (max
* sizeof (struct element_t
*));
829 memcpy (newp
, weights
, cnt
* sizeof (struct element_t
*));
832 weights
[cnt
++] = charelem
;
836 /* Now store the information. */
837 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
838 obstack_alloc (&collate
->mempool
,
839 cnt
* sizeof (struct element_t
*));
840 memcpy (elem
->weights
[weight_cnt
].w
, weights
,
841 cnt
* sizeof (struct element_t
*));
842 elem
->weights
[weight_cnt
].cnt
= cnt
;
844 /* We don't need the string anymore. */
845 free (arg
->val
.str
.startmb
);
847 else if (ellipsis
!= tok_none
848 && (arg
->tok
== tok_ellipsis2
849 || arg
->tok
== tok_ellipsis3
850 || arg
->tok
== tok_ellipsis4
))
852 /* It must be the same ellipsis as used in the initial column. */
853 if (arg
->tok
!= ellipsis
)
854 lr_error (ldfile
, _("\
855 %s: weights must use the same ellipsis symbol as the name"),
858 /* The weight for this level will depend on the element
859 iterating over the range. Put a placeholder. */
860 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
861 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
862 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
863 elem
->weights
[weight_cnt
].cnt
= 1;
868 /* It's a syntax error. */
869 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
870 lr_ignore_rest (ldfile
, 0);
874 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
875 /* This better should be the end of the line or a semicolon. */
876 if (arg
->tok
== tok_semicolon
)
877 /* OK, ignore this and read the next token. */
878 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
879 else if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
881 /* It's a syntax error. */
882 lr_error (ldfile
, _("%s: syntax error"), "LC_COLLATE");
883 lr_ignore_rest (ldfile
, 0);
887 while (++weight_cnt
< nrules
);
889 if (weight_cnt
< nrules
)
891 /* This means the rest of the line uses the current element as
895 elem
->weights
[weight_cnt
].w
= (struct element_t
**)
896 obstack_alloc (&collate
->mempool
, sizeof (struct element_t
*));
897 if (ellipsis
== tok_none
)
898 elem
->weights
[weight_cnt
].w
[0] = elem
;
900 elem
->weights
[weight_cnt
].w
[0] = ELEMENT_ELLIPSIS2
;
901 elem
->weights
[weight_cnt
].cnt
= 1;
903 while (++weight_cnt
< nrules
);
907 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_bsymbol
)
909 /* Too many rule values. */
910 lr_error (ldfile
, _("%s: too many values"), "LC_COLLATE");
911 lr_ignore_rest (ldfile
, 0);
914 lr_ignore_rest (ldfile
, arg
->tok
!= tok_eol
&& arg
->tok
!= tok_eof
);
920 insert_value (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
921 const struct charmap_t
*charmap
, struct repertoire_t
*repertoire
,
922 struct localedef_t
*result
)
924 /* First find out what kind of symbol this is. */
927 struct element_t
*elem
= NULL
;
928 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
930 /* Try to find the character in the charmap. */
931 seq
= charmap_find_value (charmap
, symstr
, symlen
);
933 /* Determine the wide character. */
934 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
936 wc
= repertoire_find_value (repertoire
, symstr
, symlen
);
943 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
945 /* It's no character, so look through the collation elements and
948 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) != 0)
951 struct symbol_t
*sym
= NULL
;
953 /* It's also collation element. Therefore it's either a
954 collating symbol or it's a character which is not
955 supported by the character set. In the later case we
956 simply create a dummy entry. */
957 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &result
) == 0)
959 /* It's a collation symbol. */
960 sym
= (struct symbol_t
*) result
;
967 elem
= new_element (collate
, NULL
, 0, NULL
, symstr
, symlen
, 0);
972 /* Enter a fake element in the sequence table. This
973 won't cause anything in the output since there is
974 no multibyte or wide character associated with
976 insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
);
980 /* Copy the result back. */
985 /* Otherwise the symbols stands for a character. */
987 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) != 0)
989 uint32_t wcs
[2] = { wc
, 0 };
991 /* We have to allocate an entry. */
992 elem
= new_element (collate
, seq
!= NULL
? seq
->bytes
: NULL
,
993 seq
!= NULL
? seq
->nbytes
: 0,
994 wc
== ILLEGAL_CHAR_VALUE
? NULL
: wcs
,
997 /* And add it to the table. */
998 if (insert_entry (&collate
->seq_table
, symstr
, symlen
, elem
) != 0)
999 /* This cannot happen. */
1000 assert (! "Internal error");
1004 /* Copy the result back. */
1007 /* Maybe the character was used before the definition. In this case
1008 we have to insert the byte sequences now. */
1009 if (elem
->mbs
== NULL
&& seq
!= NULL
)
1011 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1012 seq
->bytes
, seq
->nbytes
);
1013 elem
->nmbs
= seq
->nbytes
;
1016 if (elem
->wcs
== NULL
&& wc
!= ILLEGAL_CHAR_VALUE
)
1018 uint32_t wcs
[2] = { wc
, 0 };
1020 elem
->wcs
= obstack_copy (&collate
->mempool
, wcs
, sizeof (wcs
));
1026 /* Test whether this element is not already in the list. */
1027 if (elem
->next
!= NULL
|| elem
== collate
->cursor
)
1029 lr_error (ldfile
, _("order for `%.*s' already defined at %s:%Zu"),
1030 (int) symlen
, symstr
, elem
->file
, elem
->line
);
1031 lr_ignore_rest (ldfile
, 0);
1035 insert_weights (ldfile
, elem
, charmap
, repertoire
, result
, tok_none
);
1042 handle_ellipsis (struct linereader
*ldfile
, const char *symstr
, size_t symlen
,
1043 enum token_t ellipsis
, const struct charmap_t
*charmap
,
1044 struct repertoire_t
*repertoire
,
1045 struct localedef_t
*result
)
1047 struct element_t
*startp
;
1048 struct element_t
*endp
;
1049 struct locale_collate_t
*collate
= result
->categories
[LC_COLLATE
].collate
;
1051 /* Unlink the entry added for the ellipsis. */
1052 unlink_element (collate
);
1053 startp
= collate
->cursor
;
1055 /* Process and add the end-entry. */
1057 && insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
))
1058 /* Something went wrong with inserting the to-value. This means
1059 we cannot process the ellipsis. */
1062 /* Reset the cursor. */
1063 collate
->cursor
= startp
;
1065 /* Now we have to handle many different situations:
1066 - we have to distinguish between the three different ellipsis forms
1067 - the is the ellipsis at the beginning, in the middle, or at the end.
1069 endp
= collate
->cursor
->next
;
1070 assert (symstr
== NULL
|| endp
!= NULL
);
1072 /* XXX The following is probably very wrong since also collating symbols
1073 can appear in ranges. But do we want/can refine the test for that? */
1075 /* Both, the start and the end symbol, must stand for characters. */
1076 if ((startp
!= NULL
&& (startp
->name
== NULL
|| ! startp
->is_character
))
1077 || (endp
!= NULL
&& (endp
->name
== NULL
|| ! endp
->is_character
)))
1079 lr_error (ldfile
, _("\
1080 %s: the start and the end symbol of a range must stand for characters"),
1086 if (ellipsis
== tok_ellipsis3
)
1088 /* One requirement we make here: the length of the byte
1089 sequences for the first and end character must be the same.
1090 This is mainly to prevent unwanted effects and this is often
1091 not what is wanted. */
1092 size_t len
= (startp
->mbs
!= NULL
? startp
->nmbs
1093 : (endp
->mbs
!= NULL
? endp
->nmbs
: 0));
1094 char mbcnt
[len
+ 1];
1095 char mbend
[len
+ 1];
1097 /* Well, this should be caught somewhere else already. Just to
1099 assert (startp
== NULL
|| startp
->wcs
== NULL
|| startp
->wcs
[1] == 0);
1100 assert (endp
== NULL
|| endp
->wcs
== NULL
|| endp
->wcs
[1] == 0);
1102 if (startp
!= NULL
&& endp
!= NULL
1103 && startp
->mbs
!= NULL
&& endp
->mbs
!= NULL
1104 && startp
->nmbs
!= endp
->nmbs
)
1106 lr_error (ldfile
, _("\
1107 %s: byte sequences of first and last character must have the same length"),
1112 /* Determine whether we have to generate multibyte sequences. */
1113 if ((startp
== NULL
|| startp
->mbs
!= NULL
)
1114 && (endp
== NULL
|| endp
->mbs
!= NULL
))
1119 /* Prepare the beginning byte sequence. This is either from the
1120 beginning byte sequence or it is all nulls if it was an
1121 initial ellipsis. */
1122 if (startp
== NULL
|| startp
->mbs
== NULL
)
1123 memset (mbcnt
, '\0', len
);
1126 memcpy (mbcnt
, startp
->mbs
, len
);
1128 /* And increment it so that the value is the first one we will
1130 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1131 if (++mbcnt
[cnt
] != '\0')
1136 /* And the end sequence. */
1137 if (endp
== NULL
|| endp
->mbs
== NULL
)
1138 memset (mbend
, '\0', len
);
1140 memcpy (mbend
, endp
->mbs
, len
);
1143 /* Test whether we have a correct range. */
1144 ret
= memcmp (mbcnt
, mbend
, len
);
1148 lr_error (ldfile
, _("%s: byte sequence of first character of \
1149 sequence is not lower than that of the last character"), "LC_COLLATE");
1153 /* Generate the byte sequences data. */
1156 struct charseq
*seq
;
1158 /* Quite a bit of work ahead. We have to find the character
1159 definition for the byte sequence and then determine the
1160 wide character belonging to it. */
1161 seq
= charmap_find_symbol (charmap
, mbcnt
, len
);
1164 struct element_t
*elem
;
1167 /* I don't this this can ever happen. */
1168 assert (seq
->name
!= NULL
);
1169 namelen
= strlen (seq
->name
);
1171 if (seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1172 seq
->ucs4
= repertoire_find_value (repertoire
, seq
->name
,
1175 /* Now we are ready to insert the new value in the
1176 sequence. Find out whether the element is
1179 if (find_entry (&collate
->seq_table
, seq
->name
, namelen
,
1182 uint32_t wcs
[2] = { seq
->ucs4
, 0 };
1184 /* We have to allocate an entry. */
1185 elem
= new_element (collate
, mbcnt
, len
,
1186 seq
->ucs4
== ILLEGAL_CHAR_VALUE
1187 ? NULL
: wcs
, seq
->name
,
1190 /* And add it to the table. */
1191 if (insert_entry (&collate
->seq_table
, seq
->name
,
1192 namelen
, elem
) != 0)
1193 /* This cannot happen. */
1194 assert (! "Internal error");
1197 /* Copy the result. */
1200 /* Test whether this element is not already in the list. */
1201 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1202 && elem
->next
== collate
->cursor
))
1204 lr_error (ldfile
, _("\
1205 order for `%.*s' already defined at %s:%Zu"),
1206 (int) namelen
, seq
->name
,
1207 elem
->file
, elem
->line
);
1211 /* Enqueue the new element. */
1212 elem
->last
= collate
->cursor
;
1213 if (collate
->cursor
== NULL
)
1217 elem
->next
= collate
->cursor
->next
;
1218 elem
->last
->next
= elem
;
1219 if (elem
->next
!= NULL
)
1220 elem
->next
->last
= elem
;
1222 if (collate
->start
== NULL
)
1224 assert (collate
->cursor
== NULL
);
1225 collate
->start
= elem
;
1227 collate
->cursor
= elem
;
1229 /* Add the weight value. We take them from the
1230 `ellipsis_weights' member of `collate'. */
1231 elem
->weights
= (struct element_list_t
*)
1232 obstack_alloc (&collate
->mempool
,
1233 nrules
* sizeof (struct element_list_t
));
1234 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1235 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1236 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1237 == ELEMENT_ELLIPSIS2
))
1239 elem
->weights
[cnt
].w
= (struct element_t
**)
1240 obstack_alloc (&collate
->mempool
,
1241 sizeof (struct element_t
*));
1242 elem
->weights
[cnt
].w
[0] = elem
;
1243 elem
->weights
[cnt
].cnt
= 1;
1247 /* Simply use the weight from `ellipsis_weight'. */
1248 elem
->weights
[cnt
].w
=
1249 collate
->ellipsis_weight
.weights
[cnt
].w
;
1250 elem
->weights
[cnt
].cnt
=
1251 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1255 /* Increment for the next round. */
1257 for (cnt
= len
- 1; cnt
>= 0; --cnt
)
1258 if (++mbcnt
[cnt
] != '\0')
1261 /* Find out whether this was all. */
1262 if (cnt
< 0 || memcmp (mbcnt
, mbend
, len
) >= 0)
1263 /* Yep, that's all. */
1270 /* For symbolic range we naturally must have a beginning and an
1271 end specified by the user. */
1273 lr_error (ldfile
, _("\
1274 %s: symbolic range ellipsis must not directly follow `order_start'"),
1276 else if (endp
== NULL
)
1277 lr_error (ldfile
, _("\
1278 %s: symbolic range ellipsis must not be directly followed by `order_end'"),
1282 /* Determine the range. To do so we have to determine the
1283 common prefix of the both names and then the numeric
1284 values of both ends. */
1285 size_t lenfrom
= strlen (startp
->name
);
1286 size_t lento
= strlen (endp
->name
);
1287 char buf
[lento
+ 1];
1292 int base
= ellipsis
== tok_ellipsis2
? 16 : 10;
1294 if (lenfrom
!= lento
)
1297 lr_error (ldfile
, _("\
1298 `%s' and `%.*s' are no valid names for symbolic range"),
1299 startp
->name
, (int) lento
, endp
->name
);
1303 while (startp
->name
[preflen
] == endp
->name
[preflen
])
1304 if (startp
->name
[preflen
] == '\0')
1305 /* Nothing to be done. The start and end point are identical
1306 and while inserting the end point we have already given
1307 the user an error message. */
1313 from
= strtol (startp
->name
+ preflen
, &cp
, base
);
1314 if ((from
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1318 to
= strtol (endp
->name
+ preflen
, &cp
, base
);
1319 if ((to
== UINT_MAX
&& errno
== ERANGE
) || *cp
!= '\0')
1322 /* Copy the prefix. */
1323 memcpy (buf
, startp
->name
, preflen
);
1325 /* Loop over all values. */
1326 for (++from
; from
< to
; ++from
)
1328 struct element_t
*elem
= NULL
;
1329 struct charseq
*seq
;
1333 /* Generate the the name. */
1334 sprintf (buf
+ preflen
, base
== 10 ? "%ld" : "%lX", from
);
1336 /* Look whether this name is already defined. */
1338 if (find_entry (&collate
->seq_table
, buf
, symlen
, &ptr
) == 0)
1340 /* Copy back the result. */
1343 if (elem
->next
!= NULL
|| (collate
->cursor
!= NULL
1344 && elem
->next
== collate
->cursor
))
1346 lr_error (ldfile
, _("\
1347 %s: order for `%.*s' already defined at %s:%Zu"),
1348 "LC_COLLATE", (int) lenfrom
, buf
,
1349 elem
->file
, elem
->line
);
1353 if (elem
->name
== NULL
)
1355 lr_error (ldfile
, _("%s: `%s' must be a character"),
1361 if (elem
== NULL
|| (elem
->mbs
== NULL
&& elem
->wcs
== NULL
))
1363 /* Search for a character of this name. */
1364 seq
= charmap_find_value (charmap
, buf
, lenfrom
);
1365 if (seq
== NULL
|| seq
->ucs4
== UNINITIALIZED_CHAR_VALUE
)
1367 wc
= repertoire_find_value (repertoire
, buf
, lenfrom
);
1375 if (wc
== ILLEGAL_CHAR_VALUE
&& seq
== NULL
)
1376 /* We don't know anything about a character with this
1377 name. XXX Should we warn? */
1382 uint32_t wcs
[2] = { wc
, 0 };
1384 /* We have to allocate an entry. */
1385 elem
= new_element (collate
,
1386 seq
!= NULL
? seq
->bytes
: NULL
,
1387 seq
!= NULL
? seq
->nbytes
: 0,
1388 wc
== ILLEGAL_CHAR_VALUE
1389 ? NULL
: wcs
, buf
, lenfrom
, 1);
1393 /* Update the element. */
1396 elem
->mbs
= obstack_copy0 (&collate
->mempool
,
1397 seq
->bytes
, seq
->nbytes
);
1398 elem
->nmbs
= seq
->nbytes
;
1401 if (wc
!= ILLEGAL_CHAR_VALUE
)
1405 obstack_grow (&collate
->mempool
,
1406 &wc
, sizeof (uint32_t));
1407 obstack_grow (&collate
->mempool
,
1408 &zero
, sizeof (uint32_t));
1409 elem
->wcs
= obstack_finish (&collate
->mempool
);
1414 elem
->file
= ldfile
->fname
;
1415 elem
->line
= ldfile
->lineno
;
1416 elem
->section
= collate
->current_section
;
1419 /* Enqueue the new element. */
1420 elem
->last
= collate
->cursor
;
1421 elem
->next
= collate
->cursor
->next
;
1422 elem
->last
->next
= elem
;
1423 if (elem
->next
!= NULL
)
1424 elem
->next
->last
= elem
;
1425 collate
->cursor
= elem
;
1427 /* Now add the weights. They come from the `ellipsis_weights'
1428 member of `collate'. */
1429 elem
->weights
= (struct element_list_t
*)
1430 obstack_alloc (&collate
->mempool
,
1431 nrules
* sizeof (struct element_list_t
));
1432 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1433 if (collate
->ellipsis_weight
.weights
[cnt
].cnt
== 1
1434 && (collate
->ellipsis_weight
.weights
[cnt
].w
[0]
1435 == ELEMENT_ELLIPSIS2
))
1437 elem
->weights
[cnt
].w
= (struct element_t
**)
1438 obstack_alloc (&collate
->mempool
,
1439 sizeof (struct element_t
*));
1440 elem
->weights
[cnt
].w
[0] = elem
;
1441 elem
->weights
[cnt
].cnt
= 1;
1445 /* Simly use the weight from `ellipsis_weight'. */
1446 elem
->weights
[cnt
].w
=
1447 collate
->ellipsis_weight
.weights
[cnt
].w
;
1448 elem
->weights
[cnt
].cnt
=
1449 collate
->ellipsis_weight
.weights
[cnt
].cnt
;
1458 collate_startup (struct linereader
*ldfile
, struct localedef_t
*locale
,
1459 struct localedef_t
*copy_locale
, int ignore_content
)
1461 if (!ignore_content
&& locale
->categories
[LC_COLLATE
].collate
== NULL
)
1463 struct locale_collate_t
*collate
;
1465 if (copy_locale
== NULL
)
1467 collate
= locale
->categories
[LC_COLLATE
].collate
=
1468 (struct locale_collate_t
*)
1469 xcalloc (1, sizeof (struct locale_collate_t
));
1471 /* Init the various data structures. */
1472 init_hash (&collate
->elem_table
, 100);
1473 init_hash (&collate
->sym_table
, 100);
1474 init_hash (&collate
->seq_table
, 500);
1475 obstack_init (&collate
->mempool
);
1477 collate
->col_weight_max
= -1;
1480 /* Reuse the copy_locale's data structures. */
1481 collate
= locale
->categories
[LC_COLLATE
].collate
=
1482 copy_locale
->categories
[LC_COLLATE
].collate
;
1485 ldfile
->translate_strings
= 0;
1486 ldfile
->return_widestr
= 0;
1491 collate_finish (struct localedef_t
*locale
, const struct charmap_t
*charmap
)
1493 /* Now is the time when we can assign the individual collation
1494 values for all the symbols. We have possibly different values
1495 for the wide- and the multibyte-character symbols. This is done
1496 since it might make a difference in the encoding if there is in
1497 some cases no multibyte-character but there are wide-characters.
1498 (The other way around it is not important since theencoded
1499 collation value in the wide-character case is 32 bits wide and
1500 therefore requires no encoding).
1502 The lowest collation value assigned is 2. Zero is reserved for
1503 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1504 functions and 1 is used to separate the individual passes for the
1507 We also have to construct is list with all the bytes/words which
1508 can come first in a sequence, followed by all the elements which
1509 also start with this byte/word. The order is reverse which has
1510 among others the important effect that longer strings are located
1511 first in the list. This is required for the output data since
1512 the algorithm used in `strcoll' etc depends on this.
1514 The multibyte case is easy. We simply sort into an array with
1516 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1521 struct element_t
*runp
;
1523 int need_undefined
= 0;
1524 struct section_list
*sect
;
1526 int nr_wide_elems
= 0;
1528 if (collate
== NULL
)
1530 /* No data, no check. */
1532 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1537 /* If this assertion is hit change the type in `element_t'. */
1538 assert (nrules
<= sizeof (runp
->used_in_level
) * 8);
1540 /* Make sure that the `position' rule is used either in all sections
1542 for (i
= 0; i
< nrules
; ++i
)
1543 for (sect
= collate
->sections
; sect
!= NULL
; sect
= sect
->next
)
1544 if (sect
->rules
!= NULL
1545 && ((sect
->rules
[i
] & sort_position
)
1546 != (collate
->sections
->rules
[i
] & sort_position
)))
1548 WITH_CUR_LOCALE (error (0, 0, _("\
1549 %s: `position' must be used for a specific level in all sections or none"),
1554 /* Find out which elements are used at which level. At the same
1555 time we find out whether we have any undefined symbols. */
1556 runp
= collate
->start
;
1557 while (runp
!= NULL
)
1559 if (runp
->mbs
!= NULL
)
1561 for (i
= 0; i
< nrules
; ++i
)
1565 for (j
= 0; j
< runp
->weights
[i
].cnt
; ++j
)
1566 /* A NULL pointer as the weight means IGNORE. */
1567 if (runp
->weights
[i
].w
[j
] != NULL
)
1569 if (runp
->weights
[i
].w
[j
]->weights
== NULL
)
1571 WITH_CUR_LOCALE (error_at_line (0, 0, runp
->file
,
1573 _("symbol `%s' not defined"),
1574 runp
->weights
[i
].w
[j
]->name
));
1577 runp
->weights
[i
].w
[j
] = &collate
->undefined
;
1580 /* Set the bit for the level. */
1581 runp
->weights
[i
].w
[j
]->used_in_level
|= 1 << i
;
1586 /* Up to the next entry. */
1590 /* Walk through the list of defined sequences and assign weights. Also
1591 create the data structure which will allow generating the single byte
1592 character based tables.
1594 Since at each time only the weights for each of the rules are
1595 only compared to other weights for this rule it is possible to
1596 assign more compact weight values than simply counting all
1597 weights in sequence. We can assign weights from 3, one for each
1598 rule individually and only for those elements, which are actually
1601 Why is this important? It is not for the wide char table. But
1602 it is for the singlebyte output since here larger numbers have to
1603 be encoded to make it possible to emit the value as a byte
1605 for (i
= 0; i
< nrules
; ++i
)
1610 runp
= collate
->start
;
1611 while (runp
!= NULL
)
1613 /* Determine the order. */
1614 if (runp
->used_in_level
!= 0)
1616 runp
->mborder
= (int *) obstack_alloc (&collate
->mempool
,
1617 nrules
* sizeof (int));
1619 for (i
= 0; i
< nrules
; ++i
)
1620 if ((runp
->used_in_level
& (1 << i
)) != 0)
1621 runp
->mborder
[i
] = mbact
[i
]++;
1623 runp
->mborder
[i
] = 0;
1626 if (runp
->mbs
!= NULL
)
1628 struct element_t
**eptr
;
1629 struct element_t
*lastp
= NULL
;
1631 /* Find the point where to insert in the list. */
1632 eptr
= &collate
->mbheads
[((unsigned char *) runp
->mbs
)[0]];
1633 while (*eptr
!= NULL
)
1635 if ((*eptr
)->nmbs
< runp
->nmbs
)
1638 if ((*eptr
)->nmbs
== runp
->nmbs
)
1640 int c
= memcmp ((*eptr
)->mbs
, runp
->mbs
, runp
->nmbs
);
1644 /* This should not happen. It means that we have
1645 to symbols with the same byte sequence. It is
1646 of course an error. */
1647 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1650 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1651 error_at_line (0, 0, runp
->file
,
1658 /* Insert it here. */
1662 /* To the next entry. */
1664 eptr
= &(*eptr
)->mbnext
;
1667 /* Set the pointers. */
1668 runp
->mbnext
= *eptr
;
1669 runp
->mblast
= lastp
;
1671 (*eptr
)->mblast
= runp
;
1677 if (runp
->used_in_level
)
1679 runp
->wcorder
= wcact
++;
1681 /* We take the opportunity to count the elements which have
1686 if (runp
->is_character
)
1688 if (runp
->nmbs
== 1)
1689 collate
->mbseqorder
[((unsigned char *) runp
->mbs
)[0]] = mbseqact
++;
1691 runp
->wcseqorder
= wcseqact
++;
1693 else if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
)
1694 /* This is for collation elements. */
1695 runp
->wcseqorder
= wcseqact
++;
1697 /* Up to the next entry. */
1701 /* Find out whether any of the `mbheads' entries is unset. In this
1702 case we use the UNDEFINED entry. */
1703 for (i
= 1; i
< 256; ++i
)
1704 if (collate
->mbheads
[i
] == NULL
)
1707 collate
->mbheads
[i
] = &collate
->undefined
;
1710 /* Now to the wide character case. */
1711 collate
->wcheads
.p
= 6;
1712 collate
->wcheads
.q
= 10;
1713 wchead_table_init (&collate
->wcheads
);
1715 collate
->wcseqorder
.p
= 6;
1716 collate
->wcseqorder
.q
= 10;
1717 collseq_table_init (&collate
->wcseqorder
);
1720 runp
= collate
->start
;
1721 while (runp
!= NULL
)
1723 if (runp
->wcs
!= NULL
)
1725 struct element_t
*e
;
1726 struct element_t
**eptr
;
1727 struct element_t
*lastp
;
1729 /* Insert the collation sequence value. */
1730 if (runp
->is_character
)
1731 collseq_table_add (&collate
->wcseqorder
, runp
->wcs
[0],
1734 /* Find the point where to insert in the list. */
1735 e
= wchead_table_get (&collate
->wcheads
, runp
->wcs
[0]);
1738 while (*eptr
!= NULL
)
1740 if ((*eptr
)->nwcs
< runp
->nwcs
)
1743 if ((*eptr
)->nwcs
== runp
->nwcs
)
1745 int c
= wmemcmp ((wchar_t *) (*eptr
)->wcs
,
1746 (wchar_t *) runp
->wcs
, runp
->nwcs
);
1750 /* This should not happen. It means that we have
1751 two symbols with the same byte sequence. It is
1752 of course an error. */
1753 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr
)->file
,
1756 symbol `%s' has the same encoding as"), (*eptr
)->name
);
1757 error_at_line (0, 0, runp
->file
,
1764 /* Insert it here. */
1768 /* To the next entry. */
1770 eptr
= &(*eptr
)->wcnext
;
1773 /* Set the pointers. */
1774 runp
->wcnext
= *eptr
;
1775 runp
->wclast
= lastp
;
1777 (*eptr
)->wclast
= runp
;
1780 wchead_table_add (&collate
->wcheads
, runp
->wcs
[0], e
);
1785 /* Up to the next entry. */
1789 collseq_table_finalize (&collate
->wcseqorder
);
1791 /* Now determine whether the UNDEFINED entry is needed and if yes,
1792 whether it was defined. */
1793 collate
->undefined
.used_in_level
= need_undefined
? ~0ul : 0;
1794 if (collate
->undefined
.file
== NULL
)
1798 /* This seems not to be enforced by recent standards. Don't
1799 emit an error, simply append UNDEFINED at the end. */
1801 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1803 /* Add UNDEFINED at the end. */
1804 collate
->undefined
.mborder
=
1805 (int *) obstack_alloc (&collate
->mempool
, nrules
* sizeof (int));
1807 for (i
= 0; i
< nrules
; ++i
)
1808 collate
->undefined
.mborder
[i
] = mbact
[i
]++;
1811 /* In any case we will need the definition for the wide character
1812 case. But we will not complain that it is missing since the
1813 specification strangely enough does not seem to account for
1815 collate
->undefined
.wcorder
= wcact
++;
1818 /* Finally, try to unify the rules for the sections. Whenever the rules
1819 for a section are the same as those for another section give the
1820 ruleset the same index. Since there are never many section we can
1821 use an O(n^2) algorithm here. */
1822 sect
= collate
->sections
;
1823 while (sect
!= NULL
&& sect
->rules
== NULL
)
1826 /* Bail out if we have no sections because of earlier errors. */
1829 WITH_CUR_LOCALE (error (EXIT_FAILURE
, 0,
1830 _("too many errors; giving up")));
1837 struct section_list
*osect
= collate
->sections
;
1839 while (osect
!= sect
)
1840 if (osect
->rules
!= NULL
1841 && memcmp (osect
->rules
, sect
->rules
, nrules
) == 0)
1844 osect
= osect
->next
;
1847 sect
->ruleidx
= ruleidx
++;
1849 sect
->ruleidx
= osect
->ruleidx
;
1854 while (sect
!= NULL
&& sect
->rules
== NULL
);
1856 while (sect
!= NULL
);
1857 /* We are currently not prepared for more than 128 rulesets. But this
1858 should never really be a problem. */
1859 assert (ruleidx
<= 128);
1864 output_weight (struct obstack
*pool
, struct locale_collate_t
*collate
,
1865 struct element_t
*elem
)
1870 /* Optimize the use of UNDEFINED. */
1871 if (elem
== &collate
->undefined
)
1872 /* The weights are already inserted. */
1875 /* This byte can start exactly one collation element and this is
1876 a single byte. We can directly give the index to the weights. */
1877 retval
= obstack_object_size (pool
);
1879 /* Construct the weight. */
1880 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1882 char buf
[elem
->weights
[cnt
].cnt
* 7];
1886 for (i
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1887 /* Encode the weight value. We do nothing for IGNORE entries. */
1888 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1889 len
+= utf8_encode (&buf
[len
],
1890 elem
->weights
[cnt
].w
[i
]->mborder
[cnt
]);
1892 /* And add the buffer content. */
1893 obstack_1grow (pool
, len
);
1894 obstack_grow (pool
, buf
, len
);
1897 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1902 output_weightwc (struct obstack
*pool
, struct locale_collate_t
*collate
,
1903 struct element_t
*elem
)
1908 /* Optimize the use of UNDEFINED. */
1909 if (elem
== &collate
->undefined
)
1910 /* The weights are already inserted. */
1913 /* This byte can start exactly one collation element and this is
1914 a single byte. We can directly give the index to the weights. */
1915 retval
= obstack_object_size (pool
) / sizeof (int32_t);
1917 /* Construct the weight. */
1918 for (cnt
= 0; cnt
< nrules
; ++cnt
)
1920 int32_t buf
[elem
->weights
[cnt
].cnt
];
1924 for (i
= 0, j
= 0; i
< elem
->weights
[cnt
].cnt
; ++i
)
1925 if (elem
->weights
[cnt
].w
[i
] != NULL
)
1926 buf
[j
++] = elem
->weights
[cnt
].w
[i
]->wcorder
;
1928 /* And add the buffer content. */
1929 obstack_int32_grow (pool
, j
);
1931 obstack_grow (pool
, buf
, j
* sizeof (int32_t));
1934 return retval
| ((elem
->section
->ruleidx
& 0x7f) << 24);
1939 collate_output (struct localedef_t
*locale
, const struct charmap_t
*charmap
,
1940 const char *output_path
)
1942 struct locale_collate_t
*collate
= locale
->categories
[LC_COLLATE
].collate
;
1943 const size_t nelems
= _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
);
1944 struct iovec iov
[2 + nelems
];
1945 struct locale_file data
;
1946 uint32_t idx
[nelems
];
1949 int32_t tablemb
[256];
1950 struct obstack weightpool
;
1951 struct obstack extrapool
;
1952 struct obstack indirectpool
;
1953 struct section_list
*sect
;
1954 struct collidx_table tablewc
;
1956 uint32_t *elem_table
;
1958 struct element_t
*runp
;
1960 data
.magic
= LIMAGIC (LC_COLLATE
);
1962 iov
[0].iov_base
= (void *) &data
;
1963 iov
[0].iov_len
= sizeof (data
);
1965 iov
[1].iov_base
= (void *) idx
;
1966 iov
[1].iov_len
= sizeof (idx
);
1968 idx
[0] = iov
[0].iov_len
+ iov
[1].iov_len
;
1971 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_NRULES
));
1972 iov
[2 + cnt
].iov_base
= &nrules
;
1973 iov
[2 + cnt
].iov_len
= sizeof (uint32_t);
1974 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
1977 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
1978 if (collate
== NULL
)
1982 while (cnt
< _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1984 /* The words have to be handled specially. */
1985 if (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
))
1987 iov
[2 + cnt
].iov_base
= &dummy
;
1988 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
1992 iov
[2 + cnt
].iov_base
= NULL
;
1993 iov
[2 + cnt
].iov_len
= 0;
1996 if (cnt
+ 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
))
1997 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2001 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2003 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2008 obstack_init (&weightpool
);
2009 obstack_init (&extrapool
);
2010 obstack_init (&indirectpool
);
2012 /* Since we are using the sign of an integer to mark indirection the
2013 offsets in the arrays we are indirectly referring to must not be
2014 zero since -0 == 0. Therefore we add a bit of dummy content. */
2015 obstack_int32_grow (&extrapool
, 0);
2016 obstack_int32_grow (&indirectpool
, 0);
2018 /* Prepare the ruleset table. */
2019 for (sect
= collate
->sections
, i
= 0; sect
!= NULL
; sect
= sect
->next
)
2020 if (sect
->rules
!= NULL
&& sect
->ruleidx
== i
)
2024 obstack_make_room (&weightpool
, nrules
);
2026 for (j
= 0; j
< nrules
; ++j
)
2027 obstack_1grow_fast (&weightpool
, sect
->rules
[j
]);
2030 /* And align the output. */
2031 i
= (nrules
* i
) % __alignof__ (int32_t);
2034 obstack_1grow (&weightpool
, '\0');
2035 while (++i
< __alignof__ (int32_t));
2037 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_RULESETS
));
2038 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2039 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2040 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2043 /* Generate the 8-bit table. Walk through the lists of sequences
2044 starting with the same byte and add them one after the other to
2045 the table. In case we have more than one sequence starting with
2046 the same byte we have to use extra indirection.
2048 First add a record for the NUL byte. This entry will never be used
2049 so it does not matter. */
2052 /* Now insert the `UNDEFINED' value if it is used. Since this value
2053 will probably be used more than once it is good to store the
2054 weights only once. */
2055 if (collate
->undefined
.used_in_level
!= 0)
2056 output_weight (&weightpool
, collate
, &collate
->undefined
);
2058 for (ch
= 1; ch
< 256; ++ch
)
2059 if (collate
->mbheads
[ch
]->mbnext
== NULL
2060 && collate
->mbheads
[ch
]->nmbs
<= 1)
2062 tablemb
[ch
] = output_weight (&weightpool
, collate
,
2063 collate
->mbheads
[ch
]);
2067 /* The entries in the list are sorted by length and then
2068 alphabetically. This is the order in which we will add the
2069 elements to the collation table. This allows simply walking
2070 the table in sequence and stopping at the first matching
2071 entry. Since the longer sequences are coming first in the
2072 list they have the possibility to match first, just as it
2073 has to be. In the worst case we are walking to the end of
2074 the list where we put, if no singlebyte sequence is defined
2075 in the locale definition, the weights for UNDEFINED.
2077 To reduce the length of the search list we compress them a bit.
2078 This happens by collecting sequences of consecutive byte
2079 sequences in one entry (having and begin and end byte sequence)
2080 and add only one index into the weight table. We can find the
2081 consecutive entries since they are also consecutive in the list. */
2082 struct element_t
*runp
= collate
->mbheads
[ch
];
2083 struct element_t
*lastp
;
2085 assert ((obstack_object_size (&extrapool
)
2086 & (__alignof__ (int32_t) - 1)) == 0);
2088 tablemb
[ch
] = -obstack_object_size (&extrapool
);
2092 /* Store the current index in the weight table. We know that
2093 the current position in the `extrapool' is aligned on a
2098 /* Find out wether this is a single entry or we have more than
2099 one consecutive entry. */
2100 if (runp
->mbnext
!= NULL
2101 && runp
->nmbs
== runp
->mbnext
->nmbs
2102 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
, runp
->nmbs
- 1) == 0
2103 && (runp
->mbs
[runp
->nmbs
- 1]
2104 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1))
2107 struct element_t
*series_startp
= runp
;
2108 struct element_t
*curp
;
2110 /* Compute how much space we will need. */
2111 added
= ((sizeof (int32_t) + 1 + 2 * (runp
->nmbs
- 1)
2112 + __alignof__ (int32_t) - 1)
2113 & ~(__alignof__ (int32_t) - 1));
2114 assert ((obstack_object_size (&extrapool
)
2115 & (__alignof__ (int32_t) - 1)) == 0);
2116 obstack_make_room (&extrapool
, added
);
2118 /* More than one consecutive entry. We mark this by having
2119 a negative index into the indirect table. */
2120 obstack_int32_grow_fast (&extrapool
,
2121 -(obstack_object_size (&indirectpool
)
2122 / sizeof (int32_t)));
2124 /* Now search first the end of the series. */
2126 runp
= runp
->mbnext
;
2127 while (runp
->mbnext
!= NULL
2128 && runp
->nmbs
== runp
->mbnext
->nmbs
2129 && memcmp (runp
->mbs
, runp
->mbnext
->mbs
,
2130 runp
->nmbs
- 1) == 0
2131 && (runp
->mbs
[runp
->nmbs
- 1]
2132 == runp
->mbnext
->mbs
[runp
->nmbs
- 1] + 1));
2134 /* Now walk backward from here to the beginning. */
2137 assert (runp
->nmbs
<= 256);
2138 obstack_1grow_fast (&extrapool
, curp
->nmbs
- 1);
2139 for (i
= 1; i
< curp
->nmbs
; ++i
)
2140 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2142 /* Now find the end of the consecutive sequence and
2143 add all the indeces in the indirect pool. */
2146 weightidx
= output_weight (&weightpool
, collate
, curp
);
2147 obstack_int32_grow (&indirectpool
, weightidx
);
2149 curp
= curp
->mblast
;
2151 while (curp
!= series_startp
);
2153 /* Add the final weight. */
2154 weightidx
= output_weight (&weightpool
, collate
, curp
);
2155 obstack_int32_grow (&indirectpool
, weightidx
);
2157 /* And add the end byte sequence. Without length this
2159 for (i
= 1; i
< curp
->nmbs
; ++i
)
2160 obstack_1grow_fast (&extrapool
, curp
->mbs
[i
]);
2164 /* A single entry. Simply add the index and the length and
2165 string (except for the first character which is already
2169 /* Output the weight info. */
2170 weightidx
= output_weight (&weightpool
, collate
, runp
);
2172 added
= ((sizeof (int32_t) + 1 + runp
->nmbs
- 1
2173 + __alignof__ (int32_t) - 1)
2174 & ~(__alignof__ (int32_t) - 1));
2175 assert ((obstack_object_size (&extrapool
)
2176 & (__alignof__ (int32_t) - 1)) == 0);
2177 obstack_make_room (&extrapool
, added
);
2179 obstack_int32_grow_fast (&extrapool
, weightidx
);
2180 assert (runp
->nmbs
<= 256);
2181 obstack_1grow_fast (&extrapool
, runp
->nmbs
- 1);
2183 for (i
= 1; i
< runp
->nmbs
; ++i
)
2184 obstack_1grow_fast (&extrapool
, runp
->mbs
[i
]);
2187 /* Add alignment bytes if necessary. */
2188 while ((obstack_object_size (&extrapool
)
2189 & (__alignof__ (int32_t) - 1)) != 0)
2190 obstack_1grow_fast (&extrapool
, '\0');
2194 runp
= runp
->mbnext
;
2196 while (runp
!= NULL
);
2198 assert ((obstack_object_size (&extrapool
)
2199 & (__alignof__ (int32_t) - 1)) == 0);
2201 /* If the final entry in the list is not a single character we
2202 add an UNDEFINED entry here. */
2203 if (lastp
->nmbs
!= 1)
2205 int added
= ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t) - 1)
2206 & ~(__alignof__ (int32_t) - 1));
2207 obstack_make_room (&extrapool
, added
);
2209 obstack_int32_grow_fast (&extrapool
, 0);
2210 /* XXX What rule? We just pick the first. */
2211 obstack_1grow_fast (&extrapool
, 0);
2212 /* Length is zero. */
2213 obstack_1grow_fast (&extrapool
, 0);
2215 /* Add alignment bytes if necessary. */
2216 while ((obstack_object_size (&extrapool
)
2217 & (__alignof__ (int32_t) - 1)) != 0)
2218 obstack_1grow_fast (&extrapool
, '\0');
2222 /* Add padding to the tables if necessary. */
2223 while ((obstack_object_size (&weightpool
) & (__alignof__ (int32_t) - 1))
2225 obstack_1grow (&weightpool
, 0);
2227 /* Now add the four tables. */
2228 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB
));
2229 iov
[2 + cnt
].iov_base
= tablemb
;
2230 iov
[2 + cnt
].iov_len
= sizeof (tablemb
);
2231 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2232 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2235 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB
));
2236 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2237 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2238 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2241 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB
));
2242 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2243 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2244 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2247 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB
));
2248 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2249 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2250 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2251 assert ((iov
[2 + cnt
].iov_len
& (__alignof__ (int32_t) - 1)) == 0);
2255 /* Now the same for the wide character table. We need to store some
2256 more information here. */
2257 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP1
));
2258 iov
[2 + cnt
].iov_base
= NULL
;
2259 iov
[2 + cnt
].iov_len
= 0;
2260 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2261 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2264 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP2
));
2265 iov
[2 + cnt
].iov_base
= NULL
;
2266 iov
[2 + cnt
].iov_len
= 0;
2267 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2268 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2271 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_GAP3
));
2272 iov
[2 + cnt
].iov_base
= NULL
;
2273 iov
[2 + cnt
].iov_len
= 0;
2274 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2275 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2278 /* Since we are using the sign of an integer to mark indirection the
2279 offsets in the arrays we are indirectly referring to must not be
2280 zero since -0 == 0. Therefore we add a bit of dummy content. */
2281 obstack_int32_grow (&extrapool
, 0);
2282 obstack_int32_grow (&indirectpool
, 0);
2284 /* Now insert the `UNDEFINED' value if it is used. Since this value
2285 will probably be used more than once it is good to store the
2286 weights only once. */
2287 if (output_weightwc (&weightpool
, collate
, &collate
->undefined
) != 0)
2290 /* Generate the table. Walk through the lists of sequences starting
2291 with the same wide character and add them one after the other to
2292 the table. In case we have more than one sequence starting with
2293 the same byte we have to use extra indirection. */
2295 auto void add_to_tablewc (uint32_t ch
, struct element_t
*runp
);
2297 void add_to_tablewc (uint32_t ch
, struct element_t
*runp
)
2299 if (runp
->wcnext
== NULL
&& runp
->nwcs
== 1)
2301 int32_t weigthidx
= output_weightwc (&weightpool
, collate
, runp
);
2302 collidx_table_add (&tablewc
, ch
, weigthidx
);
2306 /* As for the singlebyte table, we recognize sequences and
2308 struct element_t
*lastp
;
2310 collidx_table_add (&tablewc
, ch
,
2311 -(obstack_object_size (&extrapool
) / sizeof (uint32_t)));
2315 /* Store the current index in the weight table. We know that
2316 the current position in the `extrapool' is aligned on a
2321 /* Find out wether this is a single entry or we have more than
2322 one consecutive entry. */
2323 if (runp
->wcnext
!= NULL
2324 && runp
->nwcs
== runp
->wcnext
->nwcs
2325 && wmemcmp ((wchar_t *) runp
->wcs
,
2326 (wchar_t *)runp
->wcnext
->wcs
,
2327 runp
->nwcs
- 1) == 0
2328 && (runp
->wcs
[runp
->nwcs
- 1]
2329 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1))
2332 struct element_t
*series_startp
= runp
;
2333 struct element_t
*curp
;
2335 /* Now add first the initial byte sequence. */
2336 added
= (1 + 1 + 2 * (runp
->nwcs
- 1)) * sizeof (int32_t);
2337 if (sizeof (int32_t) == sizeof (int))
2338 obstack_make_room (&extrapool
, added
);
2340 /* More than one consecutive entry. We mark this by having
2341 a negative index into the indirect table. */
2342 obstack_int32_grow_fast (&extrapool
,
2343 -(obstack_object_size (&indirectpool
)
2344 / sizeof (int32_t)));
2345 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2348 runp
= runp
->wcnext
;
2349 while (runp
->wcnext
!= NULL
2350 && runp
->nwcs
== runp
->wcnext
->nwcs
2351 && wmemcmp ((wchar_t *) runp
->wcs
,
2352 (wchar_t *)runp
->wcnext
->wcs
,
2353 runp
->nwcs
- 1) == 0
2354 && (runp
->wcs
[runp
->nwcs
- 1]
2355 == runp
->wcnext
->wcs
[runp
->nwcs
- 1] + 1));
2357 /* Now walk backward from here to the beginning. */
2360 for (i
= 1; i
< runp
->nwcs
; ++i
)
2361 obstack_int32_grow_fast (&extrapool
, curp
->wcs
[i
]);
2363 /* Now find the end of the consecutive sequence and
2364 add all the indeces in the indirect pool. */
2367 weightidx
= output_weightwc (&weightpool
, collate
,
2369 obstack_int32_grow (&indirectpool
, weightidx
);
2371 curp
= curp
->wclast
;
2373 while (curp
!= series_startp
);
2375 /* Add the final weight. */
2376 weightidx
= output_weightwc (&weightpool
, collate
, curp
);
2377 obstack_int32_grow (&indirectpool
, weightidx
);
2379 /* And add the end byte sequence. Without length this
2381 for (i
= 1; i
< curp
->nwcs
; ++i
)
2382 obstack_int32_grow (&extrapool
, curp
->wcs
[i
]);
2386 /* A single entry. Simply add the index and the length and
2387 string (except for the first character which is already
2391 /* Output the weight info. */
2392 weightidx
= output_weightwc (&weightpool
, collate
, runp
);
2394 added
= (1 + 1 + runp
->nwcs
- 1) * sizeof (int32_t);
2395 if (sizeof (int) == sizeof (int32_t))
2396 obstack_make_room (&extrapool
, added
);
2398 obstack_int32_grow_fast (&extrapool
, weightidx
);
2399 obstack_int32_grow_fast (&extrapool
, runp
->nwcs
- 1);
2400 for (i
= 1; i
< runp
->nwcs
; ++i
)
2401 obstack_int32_grow_fast (&extrapool
, runp
->wcs
[i
]);
2406 runp
= runp
->wcnext
;
2408 while (runp
!= NULL
);
2414 collidx_table_init (&tablewc
);
2416 wchead_table_iterate (&collate
->wcheads
, add_to_tablewc
);
2418 collidx_table_finalize (&tablewc
);
2421 /* Now add the four tables. */
2422 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_TABLEWC
));
2423 iov
[2 + cnt
].iov_base
= tablewc
.result
;
2424 iov
[2 + cnt
].iov_len
= tablewc
.result_size
;
2425 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2426 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2427 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2430 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTWC
));
2431 iov
[2 + cnt
].iov_len
= obstack_object_size (&weightpool
);
2432 iov
[2 + cnt
].iov_base
= obstack_finish (&weightpool
);
2433 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2434 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2435 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2438 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_EXTRAWC
));
2439 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2440 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2441 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2442 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2443 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2446 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTWC
));
2447 iov
[2 + cnt
].iov_len
= obstack_object_size (&indirectpool
);
2448 iov
[2 + cnt
].iov_base
= obstack_finish (&indirectpool
);
2449 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2450 assert (iov
[2 + cnt
].iov_len
% sizeof (int32_t) == 0);
2451 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2455 /* Finally write the table with collation element names out. It is
2456 a hash table with a simple function which gets the name of the
2457 character as the input. One character might have many names. The
2458 value associated with the name is an index into the weight table
2459 where we are then interested in the first-level weight value.
2461 To determine how large the table should be we are counting the
2462 elements have to put in. Since we are using internal chaining
2463 using a secondary hash function we have to make the table a bit
2464 larger to avoid extremely long search times. We can achieve
2465 good results with a 40% larger table than there are entries. */
2467 runp
= collate
->start
;
2468 while (runp
!= NULL
)
2470 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2471 /* Yep, the element really counts. */
2476 /* Add 40% and find the next prime number. */
2477 elem_size
= next_prime (elem_size
* 1.4);
2479 /* Allocate the table. Each entry consists of two words: the hash
2480 value and an index in a secondary table which provides the index
2481 into the weight table and the string itself (so that a match can
2483 elem_table
= (uint32_t *) obstack_alloc (&extrapool
,
2484 elem_size
* 2 * sizeof (uint32_t));
2485 memset (elem_table
, '\0', elem_size
* 2 * sizeof (uint32_t));
2487 /* Now add the elements. */
2488 runp
= collate
->start
;
2489 while (runp
!= NULL
)
2491 if (runp
->mbs
!= NULL
&& runp
->weights
!= NULL
&& !runp
->is_character
)
2493 /* Compute the hash value of the name. */
2494 uint32_t namelen
= strlen (runp
->name
);
2495 uint32_t hash
= elem_hash (runp
->name
, namelen
);
2496 size_t idx
= hash
% elem_size
;
2497 size_t start_idx
= idx
;
2499 if (elem_table
[idx
* 2] != 0)
2501 /* The spot is already taken. Try iterating using the value
2502 from the secondary hashing function. */
2503 size_t iter
= hash
% (elem_size
- 2) + 1;
2508 if (idx
>= elem_size
)
2510 assert (idx
!= start_idx
);
2512 while (elem_table
[idx
* 2] != 0);
2514 /* This is the spot where we will insert the value. */
2515 elem_table
[idx
* 2] = hash
;
2516 elem_table
[idx
* 2 + 1] = obstack_object_size (&extrapool
);
2518 /* The the string itself including length. */
2519 obstack_1grow (&extrapool
, namelen
);
2520 obstack_grow (&extrapool
, runp
->name
, namelen
);
2522 /* And the multibyte representation. */
2523 obstack_1grow (&extrapool
, runp
->nmbs
);
2524 obstack_grow (&extrapool
, runp
->mbs
, runp
->nmbs
);
2526 /* And align again to 32 bits. */
2527 if ((1 + namelen
+ 1 + runp
->nmbs
) % sizeof (int32_t) != 0)
2528 obstack_grow (&extrapool
, "\0\0",
2530 - ((1 + namelen
+ 1 + runp
->nmbs
)
2531 % sizeof (int32_t))));
2533 /* Now some 32-bit values: multibyte collation sequence,
2534 wide char string (including length), and wide char
2535 collation sequence. */
2536 obstack_int32_grow (&extrapool
, runp
->mbseqorder
);
2538 obstack_int32_grow (&extrapool
, runp
->nwcs
);
2539 obstack_grow (&extrapool
, runp
->wcs
,
2540 runp
->nwcs
* sizeof (uint32_t));
2542 obstack_int32_grow (&extrapool
, runp
->wcseqorder
);
2548 /* Prepare to write out this data. */
2549 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB
));
2550 iov
[2 + cnt
].iov_base
= &elem_size
;
2551 iov
[2 + cnt
].iov_len
= sizeof (int32_t);
2552 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2553 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2556 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_TABLEMB
));
2557 iov
[2 + cnt
].iov_base
= elem_table
;
2558 iov
[2 + cnt
].iov_len
= elem_size
* 2 * sizeof (int32_t);
2559 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2560 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2563 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB
));
2564 iov
[2 + cnt
].iov_len
= obstack_object_size (&extrapool
);
2565 iov
[2 + cnt
].iov_base
= obstack_finish (&extrapool
);
2566 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2569 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQMB
));
2570 iov
[2 + cnt
].iov_base
= collate
->mbseqorder
;
2571 iov
[2 + cnt
].iov_len
= 256;
2572 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2575 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_COLLSEQWC
));
2576 iov
[2 + cnt
].iov_base
= collate
->wcseqorder
.result
;
2577 iov
[2 + cnt
].iov_len
= collate
->wcseqorder
.result_size
;
2578 idx
[1 + cnt
] = idx
[cnt
] + iov
[2 + cnt
].iov_len
;
2579 assert (idx
[cnt
] % __alignof__ (int32_t) == 0);
2582 assert (cnt
== _NL_ITEM_INDEX (_NL_COLLATE_CODESET
));
2583 iov
[2 + cnt
].iov_base
= (void *) charmap
->code_set_name
;
2584 iov
[2 + cnt
].iov_len
= strlen (iov
[2 + cnt
].iov_base
) + 1;
2587 assert (cnt
== _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE
));
2589 write_locale_data (output_path
, LC_COLLATE
, "LC_COLLATE", 2 + cnt
, iov
);
2591 obstack_free (&weightpool
, NULL
);
2592 obstack_free (&extrapool
, NULL
);
2593 obstack_free (&indirectpool
, NULL
);
2598 collate_read (struct linereader
*ldfile
, struct localedef_t
*result
,
2599 const struct charmap_t
*charmap
, const char *repertoire_name
,
2602 struct repertoire_t
*repertoire
= NULL
;
2603 struct locale_collate_t
*collate
;
2605 struct token
*arg
= NULL
;
2606 enum token_t nowtok
;
2607 enum token_t was_ellipsis
= tok_none
;
2608 struct localedef_t
*copy_locale
= NULL
;
2611 1 - between `order-start' and `order-end'
2612 2 - after `order-end'
2613 3 - after `reorder-after', waiting for `reorder-end'
2614 4 - after `reorder-end'
2615 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2616 6 - after `reorder-sections-end'
2620 /* Get the repertoire we have to use. */
2621 if (repertoire_name
!= NULL
)
2622 repertoire
= repertoire_read (repertoire_name
);
2624 /* The rest of the line containing `LC_COLLATE' must be free. */
2625 lr_ignore_rest (ldfile
, 1);
2629 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2632 while (nowtok
== tok_eol
);
2634 if (nowtok
== tok_copy
)
2637 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2638 if (now
->tok
!= tok_string
)
2640 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2644 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2645 while (now
->tok
!= tok_eof
&& now
->tok
!= tok_end
);
2647 if (now
->tok
!= tok_eof
2648 || (now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
),
2649 now
->tok
== tok_eof
))
2650 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");
2651 else if (now
->tok
!= tok_lc_collate
)
2653 lr_error (ldfile
, _("\
2654 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2655 lr_ignore_rest (ldfile
, 0);
2658 lr_ignore_rest (ldfile
, 1);
2663 if (! ignore_content
)
2665 /* Get the locale definition. */
2666 copy_locale
= load_locale (LC_COLLATE
, now
->val
.str
.startmb
,
2667 repertoire_name
, charmap
, NULL
);
2668 if ((copy_locale
->avail
& COLLATE_LOCALE
) == 0)
2670 /* Not yet loaded. So do it now. */
2671 if (locfile_read (copy_locale
, charmap
) != 0)
2675 if (copy_locale
->categories
[LC_COLLATE
].collate
== NULL
)
2679 lr_ignore_rest (ldfile
, 1);
2681 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2685 /* Prepare the data structures. */
2686 collate_startup (ldfile
, result
, copy_locale
, ignore_content
);
2687 collate
= result
->categories
[LC_COLLATE
].collate
;
2695 /* Of course we don't proceed beyond the end of file. */
2696 if (nowtok
== tok_eof
)
2699 /* Ingore empty lines. */
2700 if (nowtok
== tok_eol
)
2702 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2710 /* Allow copying other locales. */
2711 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2712 if (now
->tok
!= tok_string
)
2715 if (! ignore_content
)
2716 load_locale (LC_COLLATE
, now
->val
.str
.startmb
, repertoire_name
,
2719 lr_ignore_rest (ldfile
, 1);
2722 case tok_coll_weight_max
:
2723 /* Ignore the rest of the line if we don't need the input of
2727 lr_ignore_rest (ldfile
, 0);
2734 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
2735 if (arg
->tok
!= tok_number
)
2737 if (collate
->col_weight_max
!= -1)
2738 lr_error (ldfile
, _("%s: duplicate definition of `%s'"),
2739 "LC_COLLATE", "col_weight_max");
2741 collate
->col_weight_max
= arg
->val
.num
;
2742 lr_ignore_rest (ldfile
, 1);
2745 case tok_section_symbol
:
2746 /* Ignore the rest of the line if we don't need the input of
2750 lr_ignore_rest (ldfile
, 0);
2757 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2758 if (arg
->tok
!= tok_bsymbol
)
2760 else if (!ignore_content
)
2762 /* Check whether this section is already known. */
2763 struct section_list
*known
= collate
->sections
;
2764 while (known
!= NULL
)
2766 if (strcmp (known
->name
, arg
->val
.str
.startmb
) == 0)
2768 known
= known
->next
;
2774 _("%s: duplicate declaration of section `%s'"),
2775 "LC_COLLATE", arg
->val
.str
.startmb
);
2776 free (arg
->val
.str
.startmb
);
2779 collate
->sections
= make_seclist_elem (collate
,
2780 arg
->val
.str
.startmb
,
2783 lr_ignore_rest (ldfile
, known
== NULL
);
2787 free (arg
->val
.str
.startmb
);
2788 lr_ignore_rest (ldfile
, 0);
2792 case tok_collating_element
:
2793 /* Ignore the rest of the line if we don't need the input of
2797 lr_ignore_rest (ldfile
, 0);
2801 if (state
!= 0 && state
!= 2)
2804 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2805 if (arg
->tok
!= tok_bsymbol
)
2809 const char *symbol
= arg
->val
.str
.startmb
;
2810 size_t symbol_len
= arg
->val
.str
.lenmb
;
2812 /* Next the `from' keyword. */
2813 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2814 if (arg
->tok
!= tok_from
)
2816 free ((char *) symbol
);
2820 ldfile
->return_widestr
= 1;
2821 ldfile
->translate_strings
= 1;
2823 /* Finally the string with the replacement. */
2824 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2826 ldfile
->return_widestr
= 0;
2827 ldfile
->translate_strings
= 0;
2829 if (arg
->tok
!= tok_string
)
2832 if (!ignore_content
&& symbol
!= NULL
)
2834 /* The name is already defined. */
2835 if (check_duplicate (ldfile
, collate
, charmap
,
2836 repertoire
, symbol
, symbol_len
))
2839 if (arg
->val
.str
.startmb
!= NULL
)
2840 insert_entry (&collate
->elem_table
, symbol
, symbol_len
,
2841 new_element (collate
,
2842 arg
->val
.str
.startmb
,
2843 arg
->val
.str
.lenmb
- 1,
2844 arg
->val
.str
.startwc
,
2845 symbol
, symbol_len
, 0));
2851 free ((char *) symbol
);
2852 if (arg
->val
.str
.startmb
!= NULL
)
2853 free (arg
->val
.str
.startmb
);
2854 if (arg
->val
.str
.startwc
!= NULL
)
2855 free (arg
->val
.str
.startwc
);
2857 lr_ignore_rest (ldfile
, 1);
2861 case tok_collating_symbol
:
2862 /* Ignore the rest of the line if we don't need the input of
2866 lr_ignore_rest (ldfile
, 0);
2870 if (state
!= 0 && state
!= 2)
2873 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2874 if (arg
->tok
!= tok_bsymbol
)
2878 char *symbol
= arg
->val
.str
.startmb
;
2879 size_t symbol_len
= arg
->val
.str
.lenmb
;
2880 char *endsymbol
= NULL
;
2881 size_t endsymbol_len
= 0;
2882 enum token_t ellipsis
= tok_none
;
2884 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
2885 if (arg
->tok
== tok_ellipsis2
|| arg
->tok
== tok_ellipsis4
)
2887 ellipsis
= arg
->tok
;
2889 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
2891 if (arg
->tok
!= tok_bsymbol
)
2897 endsymbol
= arg
->val
.str
.startmb
;
2898 endsymbol_len
= arg
->val
.str
.lenmb
;
2900 lr_ignore_rest (ldfile
, 1);
2902 else if (arg
->tok
!= tok_eol
)
2908 if (!ignore_content
)
2911 || (ellipsis
!= tok_none
&& endsymbol
== NULL
))
2913 lr_error (ldfile
, _("\
2914 %s: unknown character in collating symbol name"),
2918 else if (ellipsis
== tok_none
)
2920 /* A single symbol, no ellipsis. */
2921 if (check_duplicate (ldfile
, collate
, charmap
,
2922 repertoire
, symbol
, symbol_len
))
2923 /* The name is already defined. */
2926 insert_entry (&collate
->sym_table
, symbol
, symbol_len
,
2927 new_symbol (collate
, symbol
, symbol_len
));
2929 else if (symbol_len
!= endsymbol_len
)
2933 _("invalid names for character range"));
2938 /* Oh my, we have to handle an ellipsis. First, as
2939 usual, determine the common prefix and then
2940 convert the rest into a range. */
2942 unsigned long int from
;
2943 unsigned long int to
;
2946 for (prefixlen
= 0; prefixlen
< symbol_len
; ++prefixlen
)
2947 if (symbol
[prefixlen
] != endsymbol
[prefixlen
])
2950 /* Convert the rest into numbers. */
2951 symbol
[symbol_len
] = '\0';
2952 from
= strtoul (&symbol
[prefixlen
], &endp
,
2953 ellipsis
== tok_ellipsis2
? 16 : 10);
2955 goto col_sym_inv_range
;
2957 endsymbol
[symbol_len
] = '\0';
2958 to
= strtoul (&endsymbol
[prefixlen
], &endp
,
2959 ellipsis
== tok_ellipsis2
? 16 : 10);
2961 goto col_sym_inv_range
;
2964 goto col_sym_inv_range
;
2966 /* Now loop over all entries. */
2971 symbuf
= (char *) obstack_alloc (&collate
->mempool
,
2974 /* Create the name. */
2976 ellipsis
== tok_ellipsis2
2977 ? "%.*s%.*lX" : "%.*s%.*lu",
2978 (int) prefixlen
, symbol
,
2979 (int) (symbol_len
- prefixlen
), from
);
2981 if (check_duplicate (ldfile
, collate
, charmap
,
2982 repertoire
, symbuf
, symbol_len
))
2983 /* The name is already defined. */
2986 insert_entry (&collate
->sym_table
, symbuf
,
2988 new_symbol (collate
, symbuf
,
2991 /* Increment the counter. */
3003 if (endsymbol
!= NULL
)
3009 case tok_symbol_equivalence
:
3010 /* Ignore the rest of the line if we don't need the input of
3014 lr_ignore_rest (ldfile
, 0);
3021 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3022 if (arg
->tok
!= tok_bsymbol
)
3026 const char *newname
= arg
->val
.str
.startmb
;
3027 size_t newname_len
= arg
->val
.str
.lenmb
;
3028 const char *symname
;
3030 void *symval
; /* Actually struct symbol_t* */
3032 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3033 if (arg
->tok
!= tok_bsymbol
)
3035 if (newname
!= NULL
)
3036 free ((char *) newname
);
3040 symname
= arg
->val
.str
.startmb
;
3041 symname_len
= arg
->val
.str
.lenmb
;
3043 if (newname
== NULL
)
3045 lr_error (ldfile
, _("\
3046 %s: unknown character in equivalent definition name"),
3050 if (newname
!= NULL
)
3051 free ((char *) newname
);
3052 if (symname
!= NULL
)
3053 free ((char *) symname
);
3056 if (symname
== NULL
)
3058 lr_error (ldfile
, _("\
3059 %s: unknown character in equivalent definition value"),
3061 goto sym_equiv_free
;
3064 /* See whether the symbol name is already defined. */
3065 if (find_entry (&collate
->sym_table
, symname
, symname_len
,
3068 lr_error (ldfile
, _("\
3069 %s: unknown symbol `%s' in equivalent definition"),
3070 "LC_COLLATE", symname
);
3071 goto sym_equiv_free
;
3074 if (insert_entry (&collate
->sym_table
,
3075 newname
, newname_len
, symval
) < 0)
3077 lr_error (ldfile
, _("\
3078 error while adding equivalent collating symbol"));
3079 goto sym_equiv_free
;
3082 free ((char *) symname
);
3084 lr_ignore_rest (ldfile
, 1);
3088 /* We get told about the scripts we know. */
3089 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3090 if (arg
->tok
!= tok_bsymbol
)
3094 struct section_list
*runp
= collate
->known_sections
;
3097 while (runp
!= NULL
)
3098 if (strncmp (runp
->name
, arg
->val
.str
.startmb
,
3099 arg
->val
.str
.lenmb
) == 0
3100 && runp
->name
[arg
->val
.str
.lenmb
] == '\0')
3103 runp
= runp
->def_next
;
3107 lr_error (ldfile
, _("duplicate definition of script `%s'"),
3109 lr_ignore_rest (ldfile
, 0);
3113 runp
= (struct section_list
*) xcalloc (1, sizeof (*runp
));
3114 name
= (char *) xmalloc (arg
->val
.str
.lenmb
+ 1);
3115 memcpy (name
, arg
->val
.str
.startmb
, arg
->val
.str
.lenmb
);
3116 name
[arg
->val
.str
.lenmb
] = '\0';
3119 runp
->def_next
= collate
->known_sections
;
3120 collate
->known_sections
= runp
;
3122 lr_ignore_rest (ldfile
, 1);
3125 case tok_order_start
:
3126 /* Ignore the rest of the line if we don't need the input of
3130 lr_ignore_rest (ldfile
, 0);
3134 if (state
!= 0 && state
!= 1)
3138 /* The 14652 draft does not specify whether all `order_start' lines
3139 must contain the same number of sort-rules, but 14651 does. So
3140 we require this here as well. */
3141 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3142 if (arg
->tok
== tok_bsymbol
)
3144 /* This better should be a section name. */
3145 struct section_list
*sp
= collate
->known_sections
;
3147 && (sp
->name
== NULL
3148 || strncmp (sp
->name
, arg
->val
.str
.startmb
,
3149 arg
->val
.str
.lenmb
) != 0
3150 || sp
->name
[arg
->val
.str
.lenmb
] != '\0'))
3155 lr_error (ldfile
, _("\
3156 %s: unknown section name `%s'"),
3157 "LC_COLLATE", arg
->val
.str
.startmb
);
3158 /* We use the error section. */
3159 collate
->current_section
= &collate
->error_section
;
3161 if (collate
->error_section
.first
== NULL
)
3163 /* Insert &collate->error_section at the end of
3164 the collate->sections list. */
3165 if (collate
->sections
== NULL
)
3166 collate
->sections
= &collate
->error_section
;
3169 sp
= collate
->sections
;
3170 while (sp
->next
!= NULL
)
3173 sp
->next
= &collate
->error_section
;
3175 collate
->error_section
.next
= NULL
;
3180 /* One should not be allowed to open the same
3182 if (sp
->first
!= NULL
)
3183 lr_error (ldfile
, _("\
3184 %s: multiple order definitions for section `%s'"),
3185 "LC_COLLATE", sp
->name
);
3188 /* Insert sp in the collate->sections list,
3189 right after collate->current_section. */
3190 if (collate
->current_section
== NULL
)
3191 collate
->current_section
= sp
;
3194 sp
->next
= collate
->current_section
->next
;
3195 collate
->current_section
->next
= sp
;
3199 /* Next should come the end of the line or a semicolon. */
3200 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3202 if (arg
->tok
== tok_eol
)
3206 /* This means we have exactly one rule: `forward'. */
3208 lr_error (ldfile
, _("\
3209 %s: invalid number of sorting rules"),
3213 sp
->rules
= obstack_alloc (&collate
->mempool
,
3214 (sizeof (enum coll_sort_rule
)
3216 for (cnt
= 0; cnt
< nrules
; ++cnt
)
3217 sp
->rules
[cnt
] = sort_forward
;
3223 /* Get the next token. */
3224 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3230 /* There is no section symbol. Therefore we use the unnamed
3232 collate
->current_section
= &collate
->unnamed_section
;
3234 if (collate
->unnamed_section
.first
!= NULL
)
3235 lr_error (ldfile
, _("\
3236 %s: multiple order definitions for unnamed section"),
3240 /* Insert &collate->unnamed_section at the beginning of
3241 the collate->sections list. */
3242 collate
->unnamed_section
.next
= collate
->sections
;
3243 collate
->sections
= &collate
->unnamed_section
;
3247 /* Now read the direction names. */
3248 read_directions (ldfile
, arg
, charmap
, repertoire
, result
);
3250 /* From now we need the strings untranslated. */
3251 ldfile
->translate_strings
= 0;
3255 /* Ignore the rest of the line if we don't need the input of
3259 lr_ignore_rest (ldfile
, 0);
3266 /* Handle ellipsis at end of list. */
3267 if (was_ellipsis
!= tok_none
)
3269 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3270 repertoire
, result
);
3271 was_ellipsis
= tok_none
;
3275 lr_ignore_rest (ldfile
, 1);
3278 case tok_reorder_after
:
3279 /* Ignore the rest of the line if we don't need the input of
3283 lr_ignore_rest (ldfile
, 0);
3289 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3293 /* Handle ellipsis at end of list. */
3294 if (was_ellipsis
!= tok_none
)
3296 handle_ellipsis (ldfile
, arg
->val
.str
.startmb
,
3297 arg
->val
.str
.lenmb
, was_ellipsis
, charmap
,
3298 repertoire
, result
);
3299 was_ellipsis
= tok_none
;
3302 else if (state
!= 2 && state
!= 3)
3306 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3307 if (arg
->tok
== tok_bsymbol
|| arg
->tok
== tok_ucs4
)
3309 /* Find this symbol in the sequence table. */
3313 struct element_t
*insp
;
3317 if (arg
->tok
== tok_bsymbol
)
3319 startmb
= arg
->val
.str
.startmb
;
3320 lenmb
= arg
->val
.str
.lenmb
;
3324 sprintf (ucsbuf
, "U%08X", arg
->val
.ucs4
);
3329 if (find_entry (&collate
->seq_table
, startmb
, lenmb
, &ptr
) == 0)
3330 /* Yes, the symbol exists. Simply point the cursor
3332 collate
->cursor
= (struct element_t
*) ptr
;
3335 struct symbol_t
*symbp
;
3338 if (find_entry (&collate
->sym_table
, startmb
, lenmb
,
3343 if (symbp
->order
->last
!= NULL
3344 || symbp
->order
->next
!= NULL
)
3345 collate
->cursor
= symbp
->order
;
3348 /* This is a collating symbol but its position
3349 is not yet defined. */
3350 lr_error (ldfile
, _("\
3351 %s: order for collating symbol %.*s not yet defined"),
3352 "LC_COLLATE", (int) lenmb
, startmb
);
3353 collate
->cursor
= NULL
;
3357 else if (find_entry (&collate
->elem_table
, startmb
, lenmb
,
3360 insp
= (struct element_t
*) ptr
;
3362 if (insp
->last
!= NULL
|| insp
->next
!= NULL
)
3363 collate
->cursor
= insp
;
3366 /* This is a collating element but its position
3367 is not yet defined. */
3368 lr_error (ldfile
, _("\
3369 %s: order for collating element %.*s not yet defined"),
3370 "LC_COLLATE", (int) lenmb
, startmb
);
3371 collate
->cursor
= NULL
;
3377 /* This is bad. The symbol after which we have to
3378 insert does not exist. */
3379 lr_error (ldfile
, _("\
3380 %s: cannot reorder after %.*s: symbol not known"),
3381 "LC_COLLATE", (int) lenmb
, startmb
);
3382 collate
->cursor
= NULL
;
3387 lr_ignore_rest (ldfile
, no_error
);
3390 /* This must not happen. */
3394 case tok_reorder_end
:
3395 /* Ignore the rest of the line if we don't need the input of
3403 lr_ignore_rest (ldfile
, 1);
3406 case tok_reorder_sections_after
:
3407 /* Ignore the rest of the line if we don't need the input of
3411 lr_ignore_rest (ldfile
, 0);
3417 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3421 /* Handle ellipsis at end of list. */
3422 if (was_ellipsis
!= tok_none
)
3424 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3425 repertoire
, result
);
3426 was_ellipsis
= tok_none
;
3429 else if (state
== 3)
3431 WITH_CUR_LOCALE (error (0, 0, _("\
3432 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3435 else if (state
!= 2 && state
!= 4)
3439 /* Get the name of the sections we are adding after. */
3440 arg
= lr_token (ldfile
, charmap
, result
, repertoire
, verbose
);
3441 if (arg
->tok
== tok_bsymbol
)
3443 /* Now find a section with this name. */
3444 struct section_list
*runp
= collate
->sections
;
3446 while (runp
!= NULL
)
3448 if (runp
->name
!= NULL
3449 && strlen (runp
->name
) == arg
->val
.str
.lenmb
3450 && memcmp (runp
->name
, arg
->val
.str
.startmb
,
3451 arg
->val
.str
.lenmb
) == 0)
3458 collate
->current_section
= runp
;
3461 /* This is bad. The section after which we have to
3462 reorder does not exist. Therefore we cannot
3463 process the whole rest of this reorder
3465 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3466 "LC_COLLATE", (int) arg
->val
.str
.lenmb
,
3467 arg
->val
.str
.startmb
);
3471 lr_ignore_rest (ldfile
, 0);
3473 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3475 while (now
->tok
== tok_reorder_sections_after
3476 || now
->tok
== tok_reorder_sections_end
3477 || now
->tok
== tok_end
);
3479 /* Process the token we just saw. */
3485 /* This must not happen. */
3489 case tok_reorder_sections_end
:
3490 /* Ignore the rest of the line if we don't need the input of
3498 lr_ignore_rest (ldfile
, 1);
3503 /* Ignore the rest of the line if we don't need the input of
3507 lr_ignore_rest (ldfile
, 0);
3511 if (state
!= 0 && state
!= 1 && state
!= 3 && state
!= 5)
3514 if ((state
== 0 || state
== 5) && nowtok
== tok_ucs4
)
3517 if (nowtok
== tok_ucs4
)
3519 snprintf (ucs4buf
, sizeof (ucs4buf
), "U%08X", now
->val
.ucs4
);
3523 else if (arg
!= NULL
)
3525 symstr
= arg
->val
.str
.startmb
;
3526 symlen
= arg
->val
.str
.lenmb
;
3530 lr_error (ldfile
, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3531 (int) ldfile
->token
.val
.str
.lenmb
,
3532 ldfile
->token
.val
.str
.startmb
);
3536 struct element_t
*seqp
;
3539 /* We are outside an `order_start' region. This means
3540 we must only accept definitions of values for
3541 collation symbols since these are purely abstract
3542 values and don't need directions associated. */
3545 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3549 /* It's already defined. First check whether this
3550 is really a collating symbol. */
3551 if (seqp
->is_character
)
3560 if (find_entry (&collate
->sym_table
, symstr
, symlen
,
3562 /* No collating symbol, it's an error. */
3565 /* Maybe this is the first time we define a symbol
3566 value and it is before the first actual section. */
3567 if (collate
->sections
== NULL
)
3568 collate
->sections
= collate
->current_section
=
3569 &collate
->symbol_section
;
3572 if (was_ellipsis
!= tok_none
)
3575 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
,
3576 charmap
, repertoire
, result
);
3578 /* Remember that we processed the ellipsis. */
3579 was_ellipsis
= tok_none
;
3581 /* And don't add the value a second time. */
3585 else if (state
== 3)
3587 /* It is possible that we already have this collation sequence.
3588 In this case we move the entry. */
3592 /* If the symbol after which we have to insert was not found
3593 ignore all entries. */
3594 if (collate
->cursor
== NULL
)
3596 lr_ignore_rest (ldfile
, 0);
3600 if (find_entry (&collate
->seq_table
, symstr
, symlen
, &ptr
) == 0)
3602 seqp
= (struct element_t
*) ptr
;
3606 if (find_entry (&collate
->sym_table
, symstr
, symlen
, &sym
) == 0
3607 && (seqp
= ((struct symbol_t
*) sym
)->order
) != NULL
)
3610 if (find_entry (&collate
->elem_table
, symstr
, symlen
, &ptr
) == 0
3611 && (seqp
= (struct element_t
*) ptr
,
3612 seqp
->last
!= NULL
|| seqp
->next
!= NULL
3613 || (collate
->start
!= NULL
&& seqp
== collate
->start
)))
3616 /* Remove the entry from the old position. */
3617 if (seqp
->last
== NULL
)
3618 collate
->start
= seqp
->next
;
3620 seqp
->last
->next
= seqp
->next
;
3621 if (seqp
->next
!= NULL
)
3622 seqp
->next
->last
= seqp
->last
;
3624 /* We also have to check whether this entry is the
3625 first or last of a section. */
3626 if (seqp
->section
->first
== seqp
)
3628 if (seqp
->section
->first
== seqp
->section
->last
)
3629 /* This section has no content anymore. */
3630 seqp
->section
->first
= seqp
->section
->last
= NULL
;
3632 seqp
->section
->first
= seqp
->next
;
3634 else if (seqp
->section
->last
== seqp
)
3635 seqp
->section
->last
= seqp
->last
;
3637 /* Now insert it in the new place. */
3638 insert_weights (ldfile
, seqp
, charmap
, repertoire
, result
,
3643 /* Otherwise we just add a new entry. */
3645 else if (state
== 5)
3647 /* We are reordering sections. Find the named section. */
3648 struct section_list
*runp
= collate
->sections
;
3649 struct section_list
*prevp
= NULL
;
3651 while (runp
!= NULL
)
3653 if (runp
->name
!= NULL
3654 && strlen (runp
->name
) == symlen
3655 && memcmp (runp
->name
, symstr
, symlen
) == 0)
3664 lr_error (ldfile
, _("%s: section `%.*s' not known"),
3665 "LC_COLLATE", (int) symlen
, symstr
);
3666 lr_ignore_rest (ldfile
, 0);
3670 if (runp
!= collate
->current_section
)
3672 /* Remove the named section from the old place and
3673 insert it in the new one. */
3674 prevp
->next
= runp
->next
;
3676 runp
->next
= collate
->current_section
->next
;
3677 collate
->current_section
->next
= runp
;
3678 collate
->current_section
= runp
;
3681 /* Process the rest of the line which might change
3682 the collation rules. */
3683 arg
= lr_token (ldfile
, charmap
, result
, repertoire
,
3685 if (arg
->tok
!= tok_eof
&& arg
->tok
!= tok_eol
)
3686 read_directions (ldfile
, arg
, charmap
, repertoire
,
3691 else if (was_ellipsis
!= tok_none
)
3693 /* Using the information in the `ellipsis_weight'
3694 element and this and the last value we have to handle
3695 the ellipsis now. */
3696 assert (state
== 1);
3698 handle_ellipsis (ldfile
, symstr
, symlen
, was_ellipsis
, charmap
,
3699 repertoire
, result
);
3701 /* Remember that we processed the ellipsis. */
3702 was_ellipsis
= tok_none
;
3704 /* And don't add the value a second time. */
3708 /* Now insert in the new place. */
3709 insert_value (ldfile
, symstr
, symlen
, charmap
, repertoire
, result
);
3713 /* Ignore the rest of the line if we don't need the input of
3717 lr_ignore_rest (ldfile
, 0);
3724 if (was_ellipsis
!= tok_none
)
3727 _("%s: cannot have `%s' as end of ellipsis range"),
3728 "LC_COLLATE", "UNDEFINED");
3730 unlink_element (collate
);
3731 was_ellipsis
= tok_none
;
3734 /* See whether UNDEFINED already appeared somewhere. */
3735 if (collate
->undefined
.next
!= NULL
3736 || &collate
->undefined
== collate
->cursor
)
3739 _("%s: order for `%.*s' already defined at %s:%Zu"),
3740 "LC_COLLATE", 9, "UNDEFINED",
3741 collate
->undefined
.file
,
3742 collate
->undefined
.line
);
3743 lr_ignore_rest (ldfile
, 0);
3746 /* Parse the weights. */
3747 insert_weights (ldfile
, &collate
->undefined
, charmap
,
3748 repertoire
, result
, tok_none
);
3751 case tok_ellipsis2
: /* symbolic hexadecimal ellipsis */
3752 case tok_ellipsis3
: /* absolute ellipsis */
3753 case tok_ellipsis4
: /* symbolic decimal ellipsis */
3754 /* This is the symbolic (decimal or hexadecimal) or absolute
3756 if (was_ellipsis
!= tok_none
)
3759 if (state
!= 0 && state
!= 1 && state
!= 3)
3762 was_ellipsis
= nowtok
;
3764 insert_weights (ldfile
, &collate
->ellipsis_weight
, charmap
,
3765 repertoire
, result
, nowtok
);
3769 /* Next we assume `LC_COLLATE'. */
3770 if (!ignore_content
)
3773 /* We must either see a copy statement or have
3776 _("%s: empty category description not allowed"),
3778 else if (state
== 1)
3780 lr_error (ldfile
, _("%s: missing `order_end' keyword"),
3783 /* Handle ellipsis at end of list. */
3784 if (was_ellipsis
!= tok_none
)
3786 handle_ellipsis (ldfile
, NULL
, 0, was_ellipsis
, charmap
,
3787 repertoire
, result
);
3788 was_ellipsis
= tok_none
;
3791 else if (state
== 3)
3792 WITH_CUR_LOCALE (error (0, 0, _("\
3793 %s: missing `reorder-end' keyword"), "LC_COLLATE"));
3794 else if (state
== 5)
3795 WITH_CUR_LOCALE (error (0, 0, _("\
3796 %s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3798 arg
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3799 if (arg
->tok
== tok_eof
)
3801 if (arg
->tok
== tok_eol
)
3802 lr_error (ldfile
, _("%s: incomplete `END' line"), "LC_COLLATE");
3803 else if (arg
->tok
!= tok_lc_collate
)
3804 lr_error (ldfile
, _("\
3805 %1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3806 lr_ignore_rest (ldfile
, arg
->tok
== tok_lc_collate
);
3811 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3814 /* Prepare for the next round. */
3815 now
= lr_token (ldfile
, charmap
, result
, NULL
, verbose
);
3819 /* When we come here we reached the end of the file. */
3820 lr_error (ldfile
, _("%s: premature end of file"), "LC_COLLATE");