1 /* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
42 #include "catgetsinfo.h"
46 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
57 struct message_list
*next
;
65 struct message_list
*messages
;
72 struct set_list
*next
;
78 struct set_list
*all_sets
;
79 struct set_list
*current_set
;
80 size_t total_messages
;
84 struct obstack mem_pool
;
88 /* If non-zero force creation of new file, not using existing one. */
92 static const struct option long_options
[] =
94 { "header", required_argument
, NULL
, 'H' },
95 { "help", no_argument
, NULL
, 'h' },
96 { "new", no_argument
, &force_new
, 1 },
97 { "output", required_argument
, NULL
, 'o' },
98 { "version", no_argument
, NULL
, 'V' },
102 /* Wrapper functions with error checking for standard functions. */
103 extern void *xmalloc (size_t n
);
105 /* Prototypes for local functions. */
106 static void usage (int status
) __attribute__ ((noreturn
));
107 static void error_print (void);
108 static struct catalog
*read_input_file (struct catalog
*current
,
110 static void write_out (struct catalog
*result
, const char *output_name
,
111 const char *header_name
);
112 static struct set_list
*find_set (struct catalog
*current
, int number
);
113 static void normalize_line (const char *fname
, size_t line
, char *string
,
115 static void read_old (struct catalog
*catalog
, const char *file_name
);
119 main (int argc
, char *argv
[])
121 struct catalog
*result
;
122 const char *output_name
;
123 const char *header_name
;
128 /* Set program name for messages. */
129 error_print_progname
= error_print
;
131 /* Set locale via LC_ALL. */
132 setlocale (LC_ALL
, "");
134 /* Set the text message domain. */
135 textdomain (PACKAGE
);
137 /* Initialize local variables. */
144 while ((opt
= getopt_long (argc
, argv
, "hH:o:V", long_options
, NULL
)) != -1)
147 case '\0': /* Long option. */
153 header_name
= optarg
;
156 output_name
= optarg
;
162 usage (EXIT_FAILURE
);
165 /* Version information is requested. */
168 printf ("gencat (GNU %s) %s\n", PACKAGE
, VERSION
);
170 Copyright (C) %s Free Software Foundation, Inc.\n\
171 This is free software; see the source for copying conditions. There is NO\n\
172 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
174 printf (_("Written by %s.\n"), "Ulrich Drepper");
179 /* Help is requested. */
181 usage (EXIT_SUCCESS
);
183 /* Determine output file. */
184 if (output_name
== NULL
)
185 output_name
= optind
< argc
? argv
[optind
++] : "-";
187 /* Process all input files. */
188 setlocale (LC_CTYPE
, "C");
191 result
= read_input_file (result
, argv
[optind
]);
192 while (++optind
< argc
);
194 result
= read_input_file (NULL
, "-");
196 /* Write out the result. */
198 write_out (result
, output_name
, header_name
);
207 if (status
!= EXIT_SUCCESS
)
208 fprintf (stderr
, gettext ("Try `%s --help' for more information.\n"),
209 program_invocation_name
);
213 Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n\
214 %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n\
215 Mandatory arguments to long options are mandatory for short options too.\n\
216 -H, --header=NAME create C header file NAME containing symbol definitions\n\
217 -h, --help display this help and exit\n\
218 --new do not use existing catalog, force new output file\n\
219 -o, --output=NAME write output to file NAME\n\
220 -V, --version output version information and exit\n\
221 If INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
222 is -, output is written to standard output.\n"),
223 program_invocation_name
, program_invocation_name
);
225 Report bugs using the `glibcbug' script to <bugs@gnu.ai.mit.edu>.\n"),
233 /* The address of this function will be assigned to the hook in the
238 /* We don't want the program name to be printed in messages. Emacs'
239 compile.el does not like this. */
243 static struct catalog
*
244 read_input_file (struct catalog
*current
, const char *fname
)
251 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
254 fname
= gettext ("*standard input*");
257 fp
= fopen (fname
, "r");
260 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
264 /* If we haven't seen anything yet, allocate result structure. */
267 current
= (struct catalog
*) xmalloc (sizeof (*current
));
269 current
->all_sets
= NULL
;
270 current
->total_messages
= 0;
271 current
->last_set
= 0;
272 current
->current_set
= find_set (current
, NL_SETD
);
274 #define obstack_chunk_alloc malloc
275 #define obstack_chunk_free free
276 obstack_init (¤t
->mem_pool
);
286 size_t start_line
= line_number
+ 1;
293 act_len
= getline (&buf
, &len
, fp
);
298 /* It the line continued? */
299 if (buf
[act_len
- 1] == '\n')
302 continued
= buf
[act_len
- 1] == '\\';
309 /* Append to currently selected line. */
310 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
314 obstack_1grow (¤t
->mem_pool
, '\0');
315 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
318 if (this_line
[0] == '$')
320 if (isspace (this_line
[1]))
321 /* This is a comment line. Do nothing. */;
322 else if (strncmp (&this_line
[1], "set", 3) == 0)
324 int cnt
= sizeof ("cnt");
326 const char *symbol
= NULL
;
327 while (isspace (this_line
[cnt
]))
330 if (isdigit (this_line
[cnt
]))
332 set_number
= atol (&this_line
[cnt
]);
334 /* If the given number for the character set is
335 higher than any we used for symbolic set names
336 avoid clashing by using only higher numbers for
337 the following symbolic definitions. */
338 if (set_number
> current
->last_set
)
339 current
->last_set
= set_number
;
343 /* See whether it is a reasonable identifier. */
345 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
350 /* No correct character found. */
351 error_at_line (0, 0, fname
, start_line
,
352 gettext ("illegal set number"));
357 /* We have found seomthing that looks like a
358 correct identifier. */
359 struct set_list
*runp
;
361 this_line
[cnt
] = '\0';
363 symbol
= &this_line
[start
];
365 /* Test whether the identifier was already used. */
366 runp
= current
->all_sets
;
368 if (runp
->symbol
!= NULL
369 && strcmp (runp
->symbol
, symbol
) == 0)
376 /* We cannot allow duplicate identifiers for
378 error_at_line (0, 0, fname
, start_line
,
379 gettext ("duplicate set definition"));
380 error_at_line (0, 0, runp
->fname
, runp
->line
,
382 this is the first definition"));
386 /* Allocate next free message set for identifier. */
387 set_number
= ++current
->last_set
;
393 /* We found a legal set number. */
394 current
->current_set
= find_set (current
, set_number
);
397 current
->current_set
->symbol
= symbol
;
398 current
->current_set
->fname
= fname
;
399 current
->current_set
->line
= start_line
;
402 else if (strncmp (&this_line
[1], "delset", 6) == 0)
404 int cnt
= sizeof ("delset");
406 while (isspace (this_line
[cnt
]))
409 if (isdigit (this_line
[cnt
]))
411 size_t set_number
= atol (&this_line
[cnt
]);
412 struct set_list
*set
;
414 /* Mark the message set with the given number as
416 set
= find_set (current
, set_number
);
421 /* See whether it is a reasonable identifier. */
423 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
428 error_at_line (0, 0, fname
, start_line
,
429 gettext ("illegal set number"));
435 struct set_list
*runp
;
437 this_line
[cnt
] = '\0';
439 symbol
= &this_line
[start
];
441 /* We have a symbolic set name. This name must
442 appear somewhere else in the catalogs read so
445 for (runp
= current
->all_sets
; runp
!= NULL
;
448 if (strcmp (runp
->symbol
, symbol
) == 0)
455 /* Name does not exist before. */
456 error_at_line (0, 0, fname
, start_line
,
457 gettext ("unknown set `%s'"), symbol
);
461 else if (strncmp (&this_line
[1], "quote", 5) == 0)
463 int cnt
= sizeof ("quote");
464 while (isspace (this_line
[cnt
]))
466 /* Yes, the quote char can be '\0'; this means no quote
468 current
->quote_char
= this_line
[cnt
];
474 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
476 this_line
[cnt
] = '\0';
477 error_at_line (0, 0, fname
, start_line
,
478 gettext ("unknown directive `%s': line ignored"),
482 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
484 const char *ident
= this_line
;
489 while (this_line
[0] != '\0' && !isspace (this_line
[0]));;
490 this_line
[0] = '\0'; /* Terminate the identifier. */
494 while (isspace (this_line
[0]));
495 /* Now we found the beginning of the message itself. */
497 if (isdigit (ident
[0]))
499 struct message_list
*runp
;
501 message_number
= atoi (ident
);
503 /* Find location to insert the new message. */
504 runp
= current
->current_set
->messages
;
506 if (runp
->number
== message_number
)
512 /* Oh, oh. There is already a message with this
513 number is the message set. */
514 error_at_line (0, 0, fname
, start_line
,
515 gettext ("duplicated message number"));
516 error_at_line (0, 0, runp
->fname
, runp
->line
,
517 gettext ("this is the first definition"));
520 ident
= NULL
; /* We don't have a symbol. */
522 if (message_number
!= 0
523 && message_number
> current
->current_set
->last_message
)
524 current
->current_set
->last_message
= message_number
;
526 else if (ident
[0] != '\0')
528 struct message_list
*runp
;
529 runp
= current
->current_set
->messages
;
531 /* Test whether the symbolic name was not used for
532 another message in this message set. */
534 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
540 /* The name is already used. */
541 error_at_line (0, 0, fname
, start_line
,
542 gettext ("duplicated message identifier"));
543 error_at_line (0, 0, runp
->fname
, runp
->line
,
544 gettext ("this is the first definition"));
548 /* Give the message the next unused number. */
549 message_number
= ++current
->current_set
->last_message
;
554 if (message_number
!= 0)
556 struct message_list
*newp
;
558 used
= 1; /* Yes, we use the line. */
560 /* Strip quote characters, change escape sequences into
561 correct characters etc. */
562 normalize_line (fname
, start_line
, this_line
,
563 current
->quote_char
);
565 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
566 newp
->number
= message_number
;
567 newp
->message
= this_line
;
568 /* Remember symbolic name; is NULL if no is given. */
569 newp
->symbol
= ident
;
570 /* Remember where we found the character. */
572 newp
->line
= start_line
;
574 /* Find place to insert to message. We keep them in a
575 sorted single linked list. */
576 if (current
->current_set
->messages
== NULL
577 || current
->current_set
->messages
->number
> message_number
)
579 newp
->next
= current
->current_set
->messages
;
580 current
->current_set
->messages
= newp
;
584 struct message_list
*runp
;
585 runp
= current
->current_set
->messages
;
586 while (runp
->next
!= NULL
)
587 if (runp
->next
->number
> message_number
)
591 newp
->next
= runp
->next
;
595 ++current
->total_messages
;
602 /* See whether we have any non-white space character in this
604 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
607 if (this_line
[cnt
] != '\0')
608 /* Yes, some unknown characters found. */
609 error_at_line (0, 0, fname
, start_line
,
610 gettext ("malformed line ignored"));
613 /* We can save the memory for the line if it was not used. */
615 obstack_free (¤t
->mem_pool
, this_line
);
625 write_out (struct catalog
*catalog
, const char *output_name
,
626 const char *header_name
)
628 /* Computing the "optimal" size. */
629 struct set_list
*set_run
;
630 size_t best_total
, best_size
, best_depth
;
631 size_t act_size
, act_depth
;
632 struct catalog_obj obj
;
633 struct obstack string_pool
;
636 u_int32_t
*array1
, *array2
;
640 /* If not otherwise told try to read file with existing
643 read_old (catalog
, output_name
);
645 /* Initialize best_size with a very high value. */
646 best_total
= best_size
= best_depth
= UINT_MAX
;
648 /* We need some start size for testing. Let's start with
649 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
651 act_size
= 1 + catalog
->total_messages
/ 5;
653 /* We determine the size of a hash table here. Because the message
654 numbers can be chosen arbitrary by the programmer we cannot use
655 the simple method of accessing the array using the message
656 number. The algorithm is based on the trivial hash function
657 NUMBER % TABLE_SIZE, where collisions are stored in a second
658 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
659 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
660 while (act_size
<= best_total
)
662 size_t deep
[act_size
];
665 memset (deep
, '\0', act_size
* sizeof (size_t));
666 set_run
= catalog
->all_sets
;
667 while (set_run
!= NULL
)
669 struct message_list
*message_run
;
671 message_run
= set_run
->messages
;
672 while (message_run
!= NULL
)
674 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
677 if (deep
[idx
] > act_depth
)
679 act_depth
= deep
[idx
];
680 if (act_depth
* act_size
> best_total
)
683 message_run
= message_run
->next
;
685 set_run
= set_run
->next
;
688 if (act_depth
* act_size
<= best_total
)
690 /* We have found a better solution. */
691 best_total
= act_depth
* act_size
;
692 best_size
= act_size
;
693 best_depth
= act_depth
;
699 /* let's be prepared for an empty message file. */
700 if (best_size
== UINT_MAX
)
706 /* OK, now we have the size we will use. Fill in the header, build
707 the table and the second one with swapped byte order. */
708 obj
.magic
= CATGETS_MAGIC
;
709 obj
.plane_size
= best_size
;
710 obj
.plane_depth
= best_depth
;
712 /* Allocate room for all needed arrays. */
714 (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
715 memset (array1
, '\0', best_size
* best_depth
* sizeof (u_int32_t
) * 3);
717 = (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
718 obstack_init (&string_pool
);
720 set_run
= catalog
->all_sets
;
721 while (set_run
!= NULL
)
723 struct message_list
*message_run
;
725 message_run
= set_run
->messages
;
726 while (message_run
!= NULL
)
728 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
730 /* Determine collision depth. */
731 while (array1
[idx
] != 0)
732 idx
+= best_size
* 3;
734 /* Store set number, message number and pointer into string
735 space, relative to the first string. */
736 array1
[idx
+ 0] = set_run
->number
;
737 array1
[idx
+ 1] = message_run
->number
;
738 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
740 /* Add current string to the continuous space containing all
742 obstack_grow0 (&string_pool
, message_run
->message
,
743 strlen (message_run
->message
));
745 message_run
= message_run
->next
;
748 set_run
= set_run
->next
;
750 strings_size
= obstack_object_size (&string_pool
);
751 strings
= obstack_finish (&string_pool
);
753 /* Compute ARRAY2 by changing the byte order. */
754 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
755 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
757 /* Now we can write out the whole data. */
758 if (strcmp (output_name
, "-") == 0
759 || strcmp (output_name
, "/dev/stdout") == 0)
763 fd
= creat (output_name
, 0666);
765 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
769 /* Write out header. */
770 write (fd
, &obj
, sizeof (obj
));
772 /* We always write out the little endian version of the index
774 #if __BYTE_ORDER == __LITTLE_ENDIAN
775 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
776 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
777 #elif __BYTE_ORDER == __BIG_ENDIAN
778 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
779 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
781 # error Cannot handle __BYTE_ORDER byte order
784 /* Finally write the strings. */
785 write (fd
, strings
, strings_size
);
787 if (fd
!= STDOUT_FILENO
)
790 /* If requested now write out the header file. */
791 if (header_name
!= NULL
)
796 /* Open output file. "-" or "/dev/stdout" means write to
798 if (strcmp (header_name
, "-") == 0
799 || strcmp (header_name
, "/dev/stdout") == 0)
803 fp
= fopen (header_name
, "w");
805 error (EXIT_FAILURE
, errno
,
806 gettext ("cannot open output file `%s'"), header_name
);
809 /* Iterate over all sets and all messages. */
810 set_run
= catalog
->all_sets
;
811 while (set_run
!= NULL
)
813 struct message_list
*message_run
;
815 /* If the current message set has a symbolic name write this
817 if (set_run
->symbol
!= NULL
)
818 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
819 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
820 set_run
->fname
, set_run
->line
);
823 message_run
= set_run
->messages
;
824 while (message_run
!= NULL
)
826 /* If the current message has a symbolic name write
827 #define out. But we have to take care for the set
828 not having a symbolic name. */
829 if (message_run
->symbol
!= NULL
)
830 if (set_run
->symbol
== NULL
)
831 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
832 set_run
->number
, message_run
->symbol
,
833 message_run
->number
, message_run
->fname
,
836 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
837 set_run
->symbol
, message_run
->symbol
,
838 message_run
->number
, message_run
->fname
,
841 message_run
= message_run
->next
;
844 set_run
= set_run
->next
;
853 static struct set_list
*
854 find_set (struct catalog
*current
, int number
)
856 struct set_list
*result
= current
->all_sets
;
858 /* We must avoid set number 0 because a set of this number signals
859 in the tables that the entry is not occupied. */
862 while (result
!= NULL
)
863 if (result
->number
== number
)
866 result
= result
->next
;
868 /* Prepare new message set. */
869 result
= (struct set_list
*) xmalloc (sizeof (*result
));
870 result
->number
= number
;
872 result
->messages
= NULL
;
873 result
->next
= current
->all_sets
;
874 current
->all_sets
= result
;
880 /* Normalize given string *in*place* by processing escape sequences
881 and quote characters. */
883 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
889 if (quote_char
!= '\0' && *rp
== quote_char
)
898 if (*rp
== quote_char
)
899 /* We simply end the string when we find the first time an
900 not-escaped quote character. */
902 else if (*rp
== '\\')
905 if (quote_char
!= '\0' && *rp
== quote_char
)
906 /* This is an extension to XPG. */
909 /* Recognize escape sequences. */
942 int number
= *rp
++ - '0';
943 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
946 number
+= *rp
++ - '0';
948 *wp
++ = (char) number
;
952 /* Simply ignore the backslash character. */
959 /* If we saw a quote character at the beginning we expect another
961 if (is_quoted
&& *rp
!= quote_char
)
962 error (0, 0, fname
, line
, gettext ("unterminated message"));
964 /* Terminate string. */
971 read_old (struct catalog
*catalog
, const char *file_name
)
973 struct catalog_info old_cat_obj
;
974 struct set_list
*set
= NULL
;
978 old_cat_obj
.status
= closed
;
979 old_cat_obj
.cat_name
= file_name
;
981 /* Try to open catalog, but don't look through the NLSPATH. */
982 __open_catalog (&old_cat_obj
, 0);
984 if (old_cat_obj
.status
!= mmapped
&& old_cat_obj
.status
!= malloced
)
986 /* No problem, the catalog simply does not exist. */
989 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
991 /* OK, we have the catalog loaded. Now read all messages and merge
992 them. When set and message number clash for any message the new
994 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
996 struct message_list
*message
, *last
;
998 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
999 /* No message in this slot. */
1002 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != (u_int32_t
) last_set
)
1004 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
1005 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
1009 message
= set
->messages
;
1010 while (message
!= NULL
)
1012 if ((u_int32_t
) message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1015 message
= message
->next
;
1019 || (u_int32_t
) message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1021 /* We have found a message which is not yet in the catalog.
1022 Insert it at the right position. */
1023 struct message_list
*newp
;
1025 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1026 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1028 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1031 newp
->symbol
= NULL
;
1032 newp
->next
= message
;
1035 set
->messages
= newp
;
1039 ++catalog
->total_messages
;