1 /* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
35 #include "linereader.h"
36 #include "localeinfo.h"
40 /* Uncomment the following line in the production version. */
41 /* #define NDEBUG 1 */
44 /* Define the lookup function. */
45 #include "locfile-kw.h"
48 /* Some useful macros. */
49 #define MIN(a, b) (__extension__ ({ typeof (a) _a = (a); \
50 typeof (b) _b = (b); \
51 _a < _b ? _a : _b; }))
54 void *xmalloc (size_t __n
);
55 char *xstrdup (const char *__str
);
58 locfile_read (const char *filename
, struct charset_t
*charset
)
60 struct linereader
*ldfile
;
61 struct localedef_t
*result
;
63 enum token_t expected_tok
= tok_none
;
64 const char *expected_str
= NULL
;
65 enum token_t ctype_tok_sym
= tok_none
;
66 const char *ctype_tok_str
= NULL
;
67 int copy_category
= 0;
70 /* Allocate space for result. */
71 result
= (struct localedef_t
*) xmalloc (sizeof (struct localedef_t
));
72 memset (result
, '\0', sizeof (struct localedef_t
));
74 ldfile
= lr_open (filename
, locfile_hash
);
77 if (filename
[0] != '/')
79 char path
[strlen (filename
) + 1 + sizeof (LOCSRCDIR
)];
81 stpcpy (stpcpy (stpcpy (path
, LOCSRCDIR
), "/"), filename
);
82 ldfile
= lr_open (path
, locfile_hash
);
92 #define HANDLE_COPY(category, token, string) \
93 if (nowtok == tok_copy) \
95 copy_category = category; \
96 expected_tok = token; \
97 expected_str = string; \
103 #define LOCALE_PROLOG(token, string) \
104 if (nowtok == tok_eol) \
105 /* Ignore empty lines. */ \
107 if (nowtok == tok_end) \
109 expected_tok = token; \
110 expected_str = string; \
114 if (nowtok == tok_copy) \
118 #define READ_STRING(fn, errlabel) \
121 arg = lr_token (ldfile, charset); \
122 if (arg->tok != tok_string) \
124 fn (ldfile, result, nowtok, arg, charset); \
125 lr_ignore_rest (ldfile, 1); \
129 #define READ_STRING_LIST(fn, errlabel) \
132 arg = lr_token (ldfile, charset); \
133 while (arg->tok == tok_string) \
135 fn (ldfile, result, nowtok, arg, charset); \
136 arg = lr_token (ldfile, charset); \
137 if (arg->tok != tok_semicolon) \
139 arg = lr_token (ldfile, charset); \
141 if (arg->tok != tok_eol) \
146 #define READ_NUMBER(fn, errlabel) \
149 arg = lr_token (ldfile, charset); \
150 if (arg->tok != tok_minus1 && arg->tok != tok_number) \
152 fn (ldfile, result, nowtok, arg, charset); \
153 lr_ignore_rest (ldfile, 1); \
157 #define READ_NUMBER_LIST(fn, errlabel) \
160 arg = lr_token (ldfile, charset); \
161 while (arg->tok == tok_minus1 || arg->tok == tok_number) \
163 fn (ldfile, result, nowtok, arg, charset); \
164 arg = lr_token (ldfile, charset); \
165 if (arg->tok != tok_semicolon) \
167 arg = lr_token (ldfile, charset); \
169 if (arg->tok != tok_eol) \
174 #define SYNTAX_ERROR(string) \
175 lr_error (ldfile, string); \
176 lr_ignore_rest (ldfile, 0);
179 /* Parse locale definition file and store result in RESULT. */
184 struct token
*now
= lr_token (ldfile
, charset
);
185 enum token_t nowtok
= now
->tok
;
188 if (nowtok
== tok_eof
)
194 /* The beginning. We expect the special declarations, EOL or
195 the start of any locale. */
196 if (nowtok
== tok_eol
)
197 /* Ignore empty lines. */
202 case tok_escape_char
:
203 case tok_comment_char
:
204 /* We need an argument. */
205 arg
= lr_token (ldfile
, charset
);
207 if (arg
->tok
!= tok_ident
)
209 SYNTAX_ERROR (_("bad argument"));
213 if (arg
->val
.str
.len
!= 1)
215 lr_error (ldfile
, _("\
216 argument to `%s' must be a single character"),
217 nowtok
== tok_escape_char
? "escape_char"
220 lr_ignore_rest (ldfile
, 0);
224 if (nowtok
== tok_escape_char
)
225 ldfile
->escape_char
= *arg
->val
.str
.start
;
227 ldfile
->comment_char
= *arg
->val
.str
.start
;
238 case tok_lc_monetary
:
250 case tok_lc_messages
:
256 syntax error: not inside a locale definition section"));
259 lr_ignore_rest (ldfile
, 1);
263 HANDLE_COPY (LC_CTYPE
, tok_lc_ctype
, "LC_CYTPE");
265 ctype_startup (ldfile
, result
, charset
);
269 /* Here we accept all the character classes, tolower/toupper,
270 and following ANSI C:1995 self-defined classes. */
271 LOCALE_PROLOG (tok_lc_ctype
, "LC_CTYPE");
273 if (nowtok
== tok_charclass
)
275 READ_STRING_LIST (ctype_class_new
, bad_new_charclass
);
279 syntax error in definition of new character class"));
283 if (nowtok
== tok_charconv
)
285 READ_STRING_LIST (ctype_map_new
, bad_new_charconv
);
289 syntax error in definition of new character map"));
293 if (nowtok
== tok_upper
|| nowtok
== tok_lower
294 || nowtok
== tok_alpha
|| nowtok
== tok_digit
295 || nowtok
== tok_alnum
|| nowtok
== tok_space
296 || nowtok
== tok_cntrl
|| nowtok
== tok_punct
297 || nowtok
== tok_graph
|| nowtok
== tok_print
298 || nowtok
== tok_xdigit
|| nowtok
== tok_blank
)
300 ctype_tok_sym
= nowtok
;
301 ctype_tok_str
= NULL
;
306 if (nowtok
== tok_toupper
|| nowtok
== tok_tolower
)
308 ctype_tok_sym
= nowtok
;
309 ctype_tok_str
= NULL
;
314 if (nowtok
!= tok_ident
)
317 /* We possibly have a self-defined character class. */
318 if (ctype_is_charclass (ldfile
, result
, now
->val
.str
.start
))
320 ctype_tok_sym
= nowtok
;
321 ctype_tok_str
= now
->val
.str
.start
;
326 /* ...or a self-defined character map. */
327 if (ctype_is_charconv (ldfile
, result
, now
->val
.str
.start
))
329 ctype_tok_sym
= nowtok
;
330 ctype_tok_str
= now
->val
.str
.start
;
335 SYNTAX_ERROR (_("syntax error in definition of LC_CTYPE category"));
339 /* Handle `END xxx'. */
340 if (nowtok
!= expected_tok
)
341 lr_error (ldfile
, _("\
342 `%1$s' definition does not end with `END %1$s'"), expected_str
);
344 lr_ignore_rest (ldfile
, nowtok
== expected_tok
);
349 /* Here we expect a semicolon separated list of bsymbols. The
350 bit to be set in the word is given in CHARCLASS_BIT. */
353 ctype_class_start (ldfile
, result
, ctype_tok_sym
, ctype_tok_str
,
356 while (arg
->tok
!= tok_eol
)
358 /* Any token other than a bsymbol is an error. */
359 if (arg
->tok
!= tok_bsymbol
)
363 syntax error in character class definition"));
367 /* Lookup value for token and write into array. */
368 ctype_class_from (ldfile
, result
, arg
, charset
);
370 arg
= lr_token (ldfile
, charset
);
371 if (arg
->tok
== tok_semicolon
)
372 arg
= lr_token (ldfile
, charset
);
373 else if (arg
->tok
!= tok_eol
)
376 /* Look for ellipsis. */
377 if (arg
->tok
== tok_ellipsis
)
379 arg
= lr_token (ldfile
, charset
);
380 if (arg
->tok
!= tok_semicolon
)
383 arg
= lr_token (ldfile
, charset
);
384 if (arg
->tok
!= tok_bsymbol
)
387 /* Write range starting at LAST to ARG->VAL. */
388 ctype_class_to (ldfile
, result
, arg
, charset
);
390 arg
= lr_token (ldfile
, charset
);
391 if (arg
->tok
== tok_semicolon
)
392 arg
= lr_token (ldfile
, charset
);
393 else if (arg
->tok
!= tok_eol
)
398 /* Mark class as already seen. */
399 ctype_class_end (ldfile
, result
);
405 /* Here we expect a list of character mappings. Note: the
406 first opening brace is already matched. */
407 ctype_map_start (ldfile
, result
, ctype_tok_sym
, ctype_tok_str
,
412 /* Match ( bsymbol , bsymbol ) */
413 if (now
->tok
!= tok_open_brace
)
416 now
= lr_token (ldfile
, charset
);
417 if (now
->tok
!= tok_bsymbol
)
421 syntax error in character conversion definition"));
426 /* Lookup arg and assign to FROM. */
427 ctype_map_from (ldfile
, result
, now
, charset
);
429 now
= lr_token (ldfile
, charset
);
430 if (now
->tok
!= tok_comma
)
433 now
= lr_token (ldfile
, charset
);
434 if (now
->tok
!= tok_bsymbol
)
437 /* Lookup arg and assign to TO. */
438 ctype_map_to (ldfile
, result
, now
, charset
);
440 now
= lr_token (ldfile
, charset
);
441 if (now
->tok
!= tok_close_brace
)
444 now
= lr_token (ldfile
, charset
);
445 if (now
->tok
== tok_eol
)
450 if (now
->tok
!= tok_semicolon
)
453 now
= lr_token (ldfile
, charset
);
456 ctype_map_end (ldfile
, result
);
461 /* We have seen `copy'. First match the argument. */
464 if (nowtok
!= tok_string
)
465 lr_error (ldfile
, _("expect string argument for `copy'"));
467 def_to_process (now
->val
.str
.start
, 1 << copy_category
);
469 lr_ignore_rest (ldfile
, nowtok
== tok_string
);
471 /* The rest of the line must be empty
472 and the next keyword must be `END xxx'. */
474 while (lr_token (ldfile
, charset
)->tok
!= tok_end
)
479 lr_error (ldfile
, _("\
480 no other keyword shall be specified when `copy' is used"));
484 lr_ignore_rest (ldfile
, 0);
492 HANDLE_COPY (LC_COLLATE
, tok_lc_collate
, "LC_COLLATE");
494 collate_startup (ldfile
, result
, charset
);
498 /* Process the LC_COLLATE section. We expect `END LC_COLLATE'
499 any of the collation specifications, or any bsymbol. */
500 LOCALE_PROLOG (tok_lc_collate
, "LC_COLLATE");
502 if (nowtok
== tok_order_start
)
508 if (nowtok
!= tok_collating_element
509 && nowtok
!= tok_collating_symbol
)
512 lr_error (ldfile
, _("\
513 syntax error in collation definition"));
514 lr_ignore_rest (ldfile
, 0);
519 arg
= lr_token (ldfile
, charset
);
520 if (arg
->tok
!= tok_bsymbol
)
522 lr_error (ldfile
, _("\
523 collation symbol expected after `%s'"),
524 nowtok
== tok_collating_element
525 ? "collating-element" : "collating-symbol");
526 lr_ignore_rest (ldfile
, 0);
530 if (nowtok
== tok_collating_element
)
532 /* Save to-value as new name. */
533 collate_element_to (ldfile
, result
, arg
, charset
);
535 arg
= lr_token (ldfile
, charset
);
536 if (arg
->tok
!= tok_from
)
538 lr_error (ldfile
, _("\
539 `from' expected after first argument to `collating-element'"));
540 lr_ignore_rest (ldfile
, 0);
544 arg
= lr_token (ldfile
, charset
);
545 if (arg
->tok
!= tok_string
)
547 lr_error (ldfile
, _("\
548 from-value of `collating-element' must be a string"));
549 lr_ignore_rest (ldfile
, 0);
553 /* Enter new collating element. */
554 collate_element_from (ldfile
, result
, arg
, charset
);
557 /* Enter new collating symbol into table. */
558 collate_symbol (ldfile
, result
, arg
, charset
);
560 lr_ignore_rest (ldfile
, 1);
564 /* We parse the rest of the line containing `order_start'.
565 In any case we continue with parsing the symbols. */
569 while (now
->tok
!= tok_eol
)
571 int collation_method
= 0;
577 if (now
->tok
== tok_forward
)
578 collation_method
|= sort_forward
;
579 else if (now
->tok
== tok_backward
)
580 collation_method
|= sort_backward
;
581 else if (now
->tok
== tok_position
)
582 collation_method
|= sort_position
;
585 lr_error (ldfile
, _("unknown collation directive"));
586 lr_ignore_rest (ldfile
, 0);
590 now
= lr_token (ldfile
, charset
);
592 while (now
->tok
== tok_comma
593 && (now
== lr_token (ldfile
, charset
) != tok_none
));
595 /* Check for consistency: forward and backwards are
596 mutually exclusive. */
597 if ((collation_method
& sort_forward
) != 0
598 && (collation_method
& sort_backward
) != 0)
600 lr_error (ldfile
, _("\
601 sorting order `forward' and `backward' are mutually exclusive"));
602 /* The recover clear the backward flag. */
603 collation_method
&= ~sort_backward
;
606 /* ??? I don't know whether this is correct but while
607 thinking about the `strcoll' functions I found that I
608 need a direction when performing position depended
609 collation. So I assume here that implicitly the
610 direction `forward' is given when `position' alone is
611 written. --drepper */
612 if (collation_method
== sort_position
)
613 collation_method
|= sort_forward
;
615 /* Enter info about next collation order. */
616 collate_new_order (ldfile
, result
, collation_method
);
618 if (now
->tok
!= tok_eol
&& now
->tok
!= tok_semicolon
)
620 lr_error (ldfile
, _("\
621 syntax error in `order_start' directive"));
622 lr_ignore_rest (ldfile
, 0);
626 if (now
->tok
== tok_semicolon
)
627 now
= lr_token (ldfile
, charset
);
630 /* If no argument to `order_start' is given, one `forward'
631 argument is implicitly assumed. */
633 collate_new_order (ldfile
, result
, sort_forward
);
636 /* We now know about all sorting rules. */
637 collate_build_arrays (ldfile
, result
);
642 /* We read one symbol a line until `order_end' is found. */
644 static int last_correct
= 1;
646 if (nowtok
== tok_order_end
)
649 lr_ignore_rest (ldfile
, 1);
653 /* Ignore empty lines. */
654 if (nowtok
== tok_eol
)
657 if (nowtok
!= tok_bsymbol
&& nowtok
!= tok_undefined
658 && nowtok
!= tok_ellipsis
)
660 if (last_correct
== 1)
662 lr_error (ldfile
, _("\
663 syntax error in collating order definition"));
666 lr_ignore_rest (ldfile
, 0);
673 /* Remember current token. */
674 if (collate_order_elem (ldfile
, result
, now
, charset
) < 0)
678 /* Read optional arguments. */
679 arg
= lr_token (ldfile
, charset
);
680 while (arg
->tok
!= tok_eol
)
682 if (arg
->tok
!= tok_ignore
&& arg
->tok
!= tok_ellipsis
683 && arg
->tok
!= tok_bsymbol
&& arg
->tok
!= tok_string
)
686 if (arg
->tok
== tok_ignore
|| arg
->tok
== tok_ellipsis
687 || arg
->tok
== tok_string
)
689 /* Call handler for simple weights. */
690 if (collate_simple_weight (ldfile
, result
, arg
, charset
)
694 arg
= lr_token (ldfile
, charset
);
700 int ok
= collate_weight_bsymbol (ldfile
, result
, arg
,
705 arg
= lr_token (ldfile
, charset
);
707 while (arg
->tok
== tok_bsymbol
);
709 /* Are there more weights? */
710 if (arg
->tok
!= tok_semicolon
)
713 /* Yes, prepare next weight. */
714 if (collate_next_weight (ldfile
, result
) < 0)
717 arg
= lr_token (ldfile
, charset
);
720 if (arg
->tok
!= tok_eol
)
722 SYNTAX_ERROR (_("syntax error in order specification"));
725 collate_end_weight (ldfile
, result
);
731 /* Following to the `order_end' keyword we don't expect
732 anything but the `END'. */
733 if (nowtok
== tok_eol
)
736 if (nowtok
!= tok_end
)
739 expected_tok
= tok_lc_collate
;
740 expected_str
= "LC_COLLATE";
743 ldfile
->translate_strings
= 1;
747 HANDLE_COPY (LC_MONETARY
, tok_lc_monetary
, "LC_MONETARY");
749 monetary_startup (ldfile
, result
, charset
);
753 LOCALE_PROLOG (tok_lc_monetary
, "LC_MONETARY");
757 case tok_int_curr_symbol
:
758 case tok_currency_symbol
:
759 case tok_mon_decimal_point
:
760 case tok_mon_thousands_sep
:
761 case tok_positive_sign
:
762 case tok_negative_sign
:
763 READ_STRING (monetary_add
, bad_monetary
);
766 case tok_int_frac_digits
:
767 case tok_frac_digits
:
768 case tok_p_cs_precedes
:
769 case tok_p_sep_by_space
:
770 case tok_n_cs_precedes
:
771 case tok_n_sep_by_space
:
772 case tok_p_sign_posn
:
773 case tok_n_sign_posn
:
774 READ_NUMBER (monetary_add
, bad_monetary
);
777 case tok_mon_grouping
:
778 /* We have a semicolon separated list of integers. */
779 READ_NUMBER_LIST (monetary_add
, bad_monetary
);
784 SYNTAX_ERROR (_("syntax error in monetary locale definition"));
789 HANDLE_COPY (LC_NUMERIC
, tok_lc_numeric
, "LC_NUMERIC");
791 numeric_startup (ldfile
, result
, charset
);
795 LOCALE_PROLOG (tok_lc_numeric
, "LC_NUMERIC");
799 case tok_decimal_point
:
800 case tok_thousands_sep
:
801 READ_STRING (numeric_add
, bad_numeric
);
805 /* We have a semicolon separated list of integers. */
806 READ_NUMBER_LIST (numeric_add
, bad_numeric
);
811 SYNTAX_ERROR (_("syntax error in numeric locale definition"));
816 HANDLE_COPY (LC_TIME
, tok_lc_time
, "LC_TIME");
818 time_startup (ldfile
, result
, charset
);
822 LOCALE_PROLOG (tok_lc_time
, "LC_TIME");
833 READ_STRING_LIST (time_add
, bad_time
);
841 case tok_era_d_t_fmt
:
844 READ_STRING (time_add
, bad_time
);
849 SYNTAX_ERROR (_("syntax error in time locale definition"));
854 HANDLE_COPY (LC_MESSAGES
, tok_lc_messages
, "LC_MESSAGES");
856 messages_startup (ldfile
, result
, charset
);
860 LOCALE_PROLOG (tok_lc_messages
, "LC_MESSAGES");
868 READ_STRING (messages_add
, bad_message
);
873 SYNTAX_ERROR (_("syntax error in message locale definition"));
878 error (5, 0, _("%s: error in state machine"), __FILE__
);
885 /* We read all of the file. */
888 /* Let's see what information is available. */
889 for (cnt
= LC_CTYPE
; cnt
<= LC_MESSAGES
; ++cnt
)
890 if (result
->categories
[cnt
].generic
!= NULL
)
891 result
->avail
|= 1 << cnt
;
898 check_all_categories (struct localedef_t
*locale
, struct charset_t
*charset
)
900 /* Call the finishing functions for all locales. */
901 if ((locale
->binary
& (1 << LC_CTYPE
)) == 0)
902 ctype_finish (locale
, charset
);
903 if ((locale
->binary
& (1 << LC_COLLATE
)) == 0)
904 collate_finish (locale
, charset
);
905 if ((locale
->binary
& (1 << LC_MONETARY
)) == 0)
906 monetary_finish (locale
);
907 if ((locale
->binary
& (1 << LC_NUMERIC
)) == 0)
908 numeric_finish (locale
);
909 if ((locale
->binary
& (1 << LC_TIME
)) == 0)
910 time_finish (locale
);
911 if ((locale
->binary
& (1 << LC_MESSAGES
)) == 0)
912 messages_finish (locale
);
917 write_all_categories (struct localedef_t
*locale
, struct charset_t
*charset
,
918 const char *output_path
)
920 /* Call all functions to write locale data. */
921 ctype_output (locale
, charset
, output_path
);
922 collate_output (locale
, charset
, output_path
);
923 monetary_output (locale
, output_path
);
924 numeric_output (locale
, output_path
);
925 time_output (locale
, output_path
);
926 messages_output (locale
, output_path
);
931 write_locale_data (const char *output_path
, const char *category
,
932 size_t n_elem
, struct iovec
*vec
)
934 size_t cnt
, step
, maxiov
;
938 fname
= malloc (strlen (output_path
) + 2 * strlen (category
) + 6);
940 error (5, errno
, _("memory exhausted"));
942 /* Normally we write to the directory pointed to by the OUTPUT_PATH.
943 But for LC_MESSAGES we have to take care for the translation
944 data. This means we need to have a directory LC_MESSAGES in
945 which we place the file under the name SYS_LC_MESSAGES. */
946 sprintf (fname
, "%s%s", output_path
, category
);
947 if (strcmp (category
, "LC_MESSAGES") == 0)
951 if (stat (fname
, &st
) < 0)
953 if (mkdir (fname
, 0777) < 0)
954 fd
= creat (fname
, 0666);
961 else if (S_ISREG (st
.st_mode
))
962 fd
= creat (fname
, 0666);
970 fd
= creat (fname
, 0666);
974 int save_err
= errno
;
978 sprintf (fname
, "%1$s%2$s/SYS_%2$s", output_path
, category
);
979 fd
= creat (fname
, 0666);
984 if (fd
== -1 && !be_quiet
)
986 error (0, save_err
, _("\
987 cannot open output file `%s' for category `%s'"),
997 maxiov
= sysconf (_SC_UIO_MAXIOV
);
1000 /* Write the data using writev. But we must take care for the
1001 limitation of the implementation. */
1002 for (cnt
= 0; cnt
< n_elem
; cnt
+= step
)
1004 step
= n_elem
- cnt
;
1006 step
= MIN (maxiov
, step
);
1008 if (writev (fd
, &vec
[cnt
], step
) < 0 && !be_quiet
)
1010 error (0, errno
, _("failure while writing data for category `%s'"),