1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
41 #include "catgetsinfo.h"
45 (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
56 struct message_list
*next
;
64 struct message_list
*messages
;
71 struct set_list
*next
;
77 struct set_list
*all_sets
;
78 struct set_list
*current_set
;
79 size_t total_messages
;
83 struct obstack mem_pool
;
87 /* If non-zero force creation of new file, not using existing one. */
91 static const struct option long_options
[] =
93 { "header", required_argument
, NULL
, 'H' },
94 { "help", no_argument
, NULL
, 'h' },
95 { "new", no_argument
, &force_new
, 1 },
96 { "output", required_argument
, NULL
, 'o' },
97 { "version", no_argument
, NULL
, 'V' },
101 /* Wrapper functions with error checking for standard functions. */
102 extern void *xmalloc (size_t n
);
104 /* Prototypes for local functions. */
105 static void usage (int status
) __attribute__ ((noreturn
));
106 static void error_print (void);
107 static struct catalog
*read_input_file (struct catalog
*current
,
109 static void write_out (struct catalog
*result
, const char *output_name
,
110 const char *header_name
);
111 static struct set_list
*find_set (struct catalog
*current
, int number
);
112 static void normalize_line (const char *fname
, size_t line
, char *string
,
114 static void read_old (struct catalog
*catalog
, const char *file_name
);
118 main (int argc
, char *argv
[])
120 struct catalog
*result
;
121 const char *output_name
;
122 const char *header_name
;
127 /* Set program name for messages. */
128 error_print_progname
= error_print
;
130 /* Set locale via LC_ALL. */
131 setlocale (LC_ALL
, "");
133 /* Set the text message domain. */
134 textdomain (PACKAGE
);
136 /* Initialize local variables. */
143 while ((opt
= getopt_long (argc
, argv
, "hH:o:V", long_options
, NULL
)) != EOF
)
146 case '\0': /* Long option. */
152 header_name
= optarg
;
155 output_name
= optarg
;
161 usage (EXIT_FAILURE
);
164 /* Version information is requested. */
167 fprintf (stderr
, "%s - GNU %s %s\n", program_invocation_name
,
172 /* Help is requested. */
174 usage (EXIT_SUCCESS
);
176 /* Determine output file. */
177 if (output_name
== NULL
)
178 output_name
= optind
< argc
? argv
[optind
++] : "-";
180 /* Process all input files. */
181 setlocale (LC_CTYPE
, "C");
184 result
= read_input_file (result
, argv
[optind
]);
185 while (++optind
< argc
);
187 result
= read_input_file (NULL
, "-");
189 /* Write out the result. */
191 write_out (result
, output_name
, header_name
);
200 if (status
!= EXIT_SUCCESS
)
201 fprintf (stderr
, gettext ("Try `%s --help' for more information.\n"),
202 program_invocation_name
);
205 Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n\
206 %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n\
207 Mandatory arguments to long options are mandatory for short options too.\n\
208 -H, --header create C header file containing symbol definitions\n\
209 -h, --help display this help and exit\n\
210 --new do not use existing catalog, force new output file\n\
211 -o, --output=NAME write output to file NAME\n\
212 -V, --version output version information and exit\n\
213 If INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
214 is -, output is written to standard output.\n"),
215 program_invocation_name
, program_invocation_name
);
221 /* The address of this function will be assigned to the hook in the
226 /* We don't want the program name to be printed in messages. Emacs'
227 compile.el does not like this. */
231 static struct catalog
*
232 read_input_file (struct catalog
*current
, const char *fname
)
239 if (strcmp (fname
, "-") == 0 || strcmp (fname
, "/dev/stdin") == 0)
242 fname
= gettext ("*standard input*");
245 fp
= fopen (fname
, "r");
248 error (0, errno
, gettext ("cannot open input file `%s'"), fname
);
252 /* If we haven't seen anything yet, allocate result structure. */
255 current
= (struct catalog
*) xmalloc (sizeof (*current
));
257 current
->all_sets
= NULL
;
258 current
->total_messages
= 0;
259 current
->last_set
= 0;
260 current
->current_set
= find_set (current
, NL_SETD
);
262 #define obstack_chunk_alloc xmalloc
263 #define obstack_chunk_free free
264 obstack_init (¤t
->mem_pool
);
274 size_t start_line
= line_number
+ 1;
281 act_len
= getline (&buf
, &len
, fp
);
286 /* It the line continued? */
287 if (buf
[act_len
- 1] == '\n')
290 continued
= buf
[act_len
- 1] == '\\';
297 /* Append to currently selected line. */
298 obstack_grow (¤t
->mem_pool
, buf
, act_len
);
302 obstack_1grow (¤t
->mem_pool
, '\0');
303 this_line
= (char *) obstack_finish (¤t
->mem_pool
);
306 if (this_line
[0] == '$')
308 if (isspace (this_line
[1]))
309 /* This is a comment line. Do nothing. */;
310 else if (strncmp (&this_line
[1], "set", 3) == 0)
312 int cnt
= sizeof ("cnt");
314 const char *symbol
= NULL
;
315 while (isspace (this_line
[cnt
]))
318 if (isdigit (this_line
[cnt
]))
320 set_number
= atol (&this_line
[cnt
]);
322 /* If the given number for the character set is
323 higher than any we used for symbolic set names
324 avoid clashing by using only higher numbers for
325 the following symbolic definitions. */
326 if (set_number
> current
->last_set
)
327 current
->last_set
= set_number
;
331 /* See whether it is a reasonable identifier. */
333 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
338 /* No correct character found. */
339 error_at_line (0, 0, fname
, start_line
,
340 gettext ("illegal set number"));
345 /* We have found seomthing which looks like a
346 correct identifier. */
347 struct set_list
*runp
;
349 this_line
[cnt
] = '\0';
351 symbol
= &this_line
[start
];
353 /* Test whether the identifier was already used. */
354 runp
= current
->all_sets
;
356 if (runp
->symbol
!= NULL
357 && strcmp (runp
->symbol
, symbol
) == 0)
364 /* We cannot allow duplicate identifiers for
366 error_at_line (0, 0, fname
, start_line
,
367 gettext ("duplicate set definition"));
368 error_at_line (0, 0, runp
->fname
, runp
->line
,
370 this is the first definition"));
374 /* Allocate next free message set for identifier. */
375 set_number
= ++current
->last_set
;
381 /* We found a legal set number. */
382 current
->current_set
= find_set (current
, set_number
);
385 current
->current_set
->symbol
= symbol
;
386 current
->current_set
->fname
= fname
;
387 current
->current_set
->line
= start_line
;
390 else if (strncmp (&this_line
[1], "delset", 6) == 0)
392 int cnt
= sizeof ("delset");
394 while (isspace (this_line
[cnt
]))
397 if (isdigit (this_line
[cnt
]))
399 size_t set_number
= atol (&this_line
[cnt
]);
400 struct set_list
*set
;
402 /* Mark the message set with the given number as
404 set
= find_set (current
, set_number
);
409 /* See whether it is a reasonable identifier. */
411 while (isalnum (this_line
[cnt
]) || this_line
[cnt
] == '_')
416 error_at_line (0, 0, fname
, start_line
,
417 gettext ("illegal set number"));
423 struct set_list
*runp
;
425 this_line
[cnt
] = '\0';
427 symbol
= &this_line
[start
];
429 /* We have a symbolic set name. This name must
430 appear somewhere else in the catalogs read so
433 for (runp
= current
->all_sets
; runp
!= NULL
;
436 if (strcmp (runp
->symbol
, symbol
) == 0)
443 /* Name does not exist before. */
444 error_at_line (0, 0, fname
, start_line
,
445 gettext ("unknown set `%s'"), symbol
);
449 else if (strncmp (&this_line
[1], "quote", 5) == 0)
451 int cnt
= sizeof ("quote");
452 while (isspace (this_line
[cnt
]))
454 /* Yes, the quote char can be '\0'; this means no quote
456 current
->quote_char
= this_line
[cnt
];
462 while (this_line
[cnt
] != '\0' && !isspace (this_line
[cnt
]))
464 this_line
[cnt
] = '\0';
465 error_at_line (0, 0, fname
, start_line
,
466 gettext ("unknown directive `%s': line ignored"),
470 else if (isalnum (this_line
[0]) || this_line
[0] == '_')
472 const char *ident
= this_line
;
477 while (this_line
[0] != '\0' && !isspace (this_line
[0]));;
478 this_line
[0] = '\0'; /* Terminate the identifier. */
482 while (isspace (this_line
[0]));
483 /* Now we found the beginning of the message itself. */
485 if (isdigit (ident
[0]))
487 struct message_list
*runp
;
489 message_number
= atoi (ident
);
491 /* Find location to insert the new message. */
492 runp
= current
->current_set
->messages
;
494 if (runp
->number
== message_number
)
500 /* Oh, oh. There is already a message with this
501 number is the message set. */
502 error_at_line (0, 0, fname
, start_line
,
503 gettext ("duplicated message number"));
504 error_at_line (0, 0, runp
->fname
, runp
->line
,
505 gettext ("this is the first definition"));
508 ident
= NULL
; /* We don't have a symbol. */
510 if (message_number
!= 0
511 && message_number
> current
->current_set
->last_message
)
512 current
->current_set
->last_message
= message_number
;
514 else if (ident
[0] != '\0')
516 struct message_list
*runp
;
517 runp
= current
->current_set
->messages
;
519 /* Test whether the symbolic name was not used for
520 another message in this message set. */
522 if (runp
->symbol
!= NULL
&& strcmp (ident
, runp
->symbol
) == 0)
528 /* The name is already used. */
529 error_at_line (0, 0, fname
, start_line
,
530 gettext ("duplicated message identifier"));
531 error_at_line (0, 0, runp
->fname
, runp
->line
,
532 gettext ("this is the first definition"));
536 /* Give the message the next unused number. */
537 message_number
= ++current
->current_set
->last_message
;
542 if (message_number
!= 0)
544 struct message_list
*newp
;
546 used
= 1; /* Yes, we use the line. */
548 /* Strip quote characters, change escape sequences into
549 correct characters etc. */
550 normalize_line (fname
, start_line
, this_line
,
551 current
->quote_char
);
553 newp
= (struct message_list
*) xmalloc (sizeof (*newp
));
554 newp
->number
= message_number
;
555 newp
->message
= this_line
;
556 /* Remember symbolic name; is NULL if no is given. */
557 newp
->symbol
= ident
;
558 /* Remember where we found the character. */
560 newp
->line
= start_line
;
562 /* Find place to insert to message. We keep them in a
563 sorted single linked list. */
564 if (current
->current_set
->messages
== NULL
565 || current
->current_set
->messages
->number
> message_number
)
567 newp
->next
= current
->current_set
->messages
;
568 current
->current_set
->messages
= newp
;
572 struct message_list
*runp
;
573 runp
= current
->current_set
->messages
;
574 while (runp
->next
!= NULL
)
575 if (runp
->next
->number
> message_number
)
579 newp
->next
= runp
->next
;
583 ++current
->total_messages
;
590 /* See whether we have any non-white space character in this
592 while (this_line
[cnt
] != '\0' && isspace (this_line
[cnt
]))
595 if (this_line
[cnt
] != '\0')
596 /* Yes, some unknown characters found. */
597 error_at_line (0, 0, fname
, start_line
,
598 gettext ("malformed line ignored"));
601 /* We can save the memory for the line if it was not used. */
603 obstack_free (¤t
->mem_pool
, this_line
);
613 write_out (struct catalog
*catalog
, const char *output_name
,
614 const char *header_name
)
616 /* Computing the "optimal" size. */
617 struct set_list
*set_run
;
618 size_t best_total
, best_size
, best_depth
;
619 size_t act_size
, act_depth
;
620 struct catalog_obj obj
;
621 struct obstack string_pool
;
624 u_int32_t
*array1
, *array2
;
628 /* If not otherwise told try to read file with existing
631 read_old (catalog
, output_name
);
633 /* Initialize best_size with a very high value. */
634 best_total
= best_size
= best_depth
= UINT_MAX
;
636 /* We need some start size for testing. Let's start with
637 TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
639 act_size
= 1 + catalog
->total_messages
/ 5;
641 /* We determine the size of a hash table here. Because the message
642 numbers can be chosen arbitrary by the programmer we cannot use
643 the simple method of accessing the array using the message
644 number. The algorithm is based on the trivial hash function
645 NUMBER % TABLE_SIZE, where collisions are stored in a second
646 dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
647 the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
648 while (act_size
<= best_total
)
650 size_t deep
[act_size
];
653 memset (deep
, '\0', act_size
* sizeof (size_t));
654 set_run
= catalog
->all_sets
;
655 while (set_run
!= NULL
)
657 struct message_list
*message_run
;
659 message_run
= set_run
->messages
;
660 while (message_run
!= NULL
)
662 size_t idx
= (message_run
->number
* set_run
->number
) % act_size
;
665 if (deep
[idx
] > act_depth
)
667 act_depth
= deep
[idx
];
668 if (act_depth
* act_size
> best_total
)
671 message_run
= message_run
->next
;
674 if (act_depth
* act_size
<= best_total
)
676 /* We have found a better solution. */
677 best_total
= act_depth
* act_size
;
678 best_size
= act_size
;
679 best_depth
= act_depth
;
681 set_run
= set_run
->next
;
687 /* let's be prepared for an empty message file. */
688 if (best_size
== UINT_MAX
)
694 /* OK, now we have the size we will use. Fill in the header, build
695 the table and the second one with swapped byte order. */
696 obj
.magic
= CATGETS_MAGIC
;
697 obj
.plane_size
= best_size
;
698 obj
.plane_depth
= best_depth
;
700 /* Allocate room for all needed arrays. */
702 (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
703 memset (array1
, '\0', best_size
* best_depth
* sizeof (u_int32_t
) * 3);
705 = (u_int32_t
*) alloca (best_size
* best_depth
* sizeof (u_int32_t
) * 3);
706 obstack_init (&string_pool
);
708 set_run
= catalog
->all_sets
;
709 while (set_run
!= NULL
)
711 struct message_list
*message_run
;
713 message_run
= set_run
->messages
;
714 while (message_run
!= NULL
)
716 size_t idx
= (((message_run
->number
* set_run
->number
) % best_size
)
718 /* Determine collision depth. */
719 while (array1
[idx
] != 0)
720 idx
+= best_size
* 3;
722 /* Store set number, message number and pointer into string
723 space, relative to the first string. */
724 array1
[idx
+ 0] = set_run
->number
;
725 array1
[idx
+ 1] = message_run
->number
;
726 array1
[idx
+ 2] = obstack_object_size (&string_pool
);
728 /* Add current string to the continuous space containing all
730 obstack_grow0 (&string_pool
, message_run
->message
,
731 strlen (message_run
->message
));
733 message_run
= message_run
->next
;
736 set_run
= set_run
->next
;
738 strings_size
= obstack_object_size (&string_pool
);
739 strings
= obstack_finish (&string_pool
);
741 /* Compute ARRAY2 by changing the byte order. */
742 for (cnt
= 0; cnt
< best_size
* best_depth
* 3; ++cnt
)
743 array2
[cnt
] = SWAPU32 (array1
[cnt
]);
745 /* Now we can write out the whole data. */
746 if (strcmp (output_name
, "-") == 0
747 || strcmp (output_name
, "/dev/stdout") == 0)
751 fd
= creat (output_name
, 0666);
753 error (EXIT_FAILURE
, errno
, gettext ("cannot open output file `%s'"),
757 /* Write out header. */
758 write (fd
, &obj
, sizeof (obj
));
760 /* We always write out the little endian version of the index
762 #if __BYTE_ORDER == __LITTLE_ENDIAN
763 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
764 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
765 #elif __BYTE_ORDER == __BIG_ENDIAN
766 write (fd
, array2
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
767 write (fd
, array1
, best_size
* best_depth
* sizeof (u_int32_t
) * 3);
769 # error Cannot handle __BYTE_ORDER byte order
772 /* Finally write the strings. */
773 write (fd
, strings
, strings_size
);
775 if (fd
!= STDOUT_FILENO
)
778 /* If requested now write out the header file. */
779 if (header_name
!= NULL
)
784 /* Open output file. "-" or "/dev/stdout" means write to
786 if (strcmp (header_name
, "-") == 0
787 || strcmp (header_name
, "/dev/stdout") == 0)
791 fp
= fopen (header_name
, "w");
793 error (EXIT_FAILURE
, errno
,
794 gettext ("cannot open output file `%s'"), header_name
);
797 /* Iterate over all sets and all messages. */
798 set_run
= catalog
->all_sets
;
799 while (set_run
!= NULL
)
801 struct message_list
*message_run
;
803 /* If the current message set has a symbolic name write this
805 if (set_run
->symbol
!= NULL
)
806 fprintf (fp
, "%s#define %sSet %#x\t/* %s:%Zu */\n",
807 first
? "" : "\n", set_run
->symbol
, set_run
->number
- 1,
808 set_run
->fname
, set_run
->line
);
811 message_run
= set_run
->messages
;
812 while (message_run
!= NULL
)
814 /* If the current message has a symbolic name write
815 #define out. But we have to take care for the set
816 not having a symbolic name. */
817 if (message_run
->symbol
!= NULL
)
818 if (set_run
->symbol
== NULL
)
819 fprintf (fp
, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
820 set_run
->number
, message_run
->symbol
,
821 message_run
->number
, message_run
->fname
,
824 fprintf (fp
, "#define %s%s %#x\t/* %s:%Zu */\n",
825 set_run
->symbol
, message_run
->symbol
,
826 message_run
->number
, message_run
->fname
,
829 message_run
= message_run
->next
;
832 set_run
= set_run
->next
;
841 static struct set_list
*
842 find_set (struct catalog
*current
, int number
)
844 struct set_list
*result
= current
->all_sets
;
846 /* We must avoid set number 0 because a set of this number signals
847 in the tables that the entry is not occupied. */
850 while (result
!= NULL
)
851 if (result
->number
== number
)
854 result
= result
->next
;
856 /* Prepare new message set. */
857 result
= (struct set_list
*) xmalloc (sizeof (*result
));
858 result
->number
= number
;
860 result
->messages
= NULL
;
861 result
->next
= current
->all_sets
;
862 current
->all_sets
= result
;
868 /* Normalize given string *in*place* by processing escape sequences
869 and quote characters. */
871 normalize_line (const char *fname
, size_t line
, char *string
, char quote_char
)
877 if (quote_char
!= '\0' && *rp
== quote_char
)
886 if (*rp
== quote_char
)
887 /* We simply end the string when we find the first time an
888 not-escaped quote character. */
890 else if (*rp
== '\\')
893 if (quote_char
!= '\0' && *rp
== quote_char
)
894 /* This is an extension to XPG. */
897 /* Recognize escape sequences. */
930 int number
= *rp
++ - '0';
931 while (number
<= (255 / 8) && *rp
>= '0' && *rp
<= '7')
934 number
+= *rp
++ - '0';
936 *wp
++ = (char) number
;
940 /* Simply ignore the backslash character. */
947 /* If we saw a quote character at the beginning we expect another
949 if (is_quoted
&& *rp
!= quote_char
)
950 error (0, 0, fname
, line
, gettext ("unterminated message"));
952 /* Terminate string. */
959 read_old (struct catalog
*catalog
, const char *file_name
)
961 struct catalog_info old_cat_obj
;
962 struct set_list
*set
= NULL
;
966 old_cat_obj
.status
= closed
;
967 old_cat_obj
.cat_name
= file_name
;
969 /* Try to open catalog, but don't look through the NLSPATH. */
970 __open_catalog (&old_cat_obj
, 0);
972 if (old_cat_obj
.status
!= mmaped
&& old_cat_obj
.status
!= malloced
)
974 /* No problem, the catalog simply does not exist. */
977 error (EXIT_FAILURE
, errno
, gettext ("while opening old catalog file"));
979 /* OK, we have the catalog loaded. Now read all messages and merge
980 them. When set and message number clash for any message the new
982 for (cnt
= 0; cnt
< old_cat_obj
.plane_size
* old_cat_obj
.plane_depth
; ++cnt
)
984 struct message_list
*message
, *last
;
986 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] == 0)
987 /* No message in this slot. */
990 if (old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1 != last_set
)
992 last_set
= old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1;
993 set
= find_set (catalog
, old_cat_obj
.name_ptr
[cnt
* 3 + 0] - 1);
997 message
= set
->messages
;
998 while (message
!= NULL
)
1000 if (message
->number
>= old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1003 message
= message
->next
;
1007 || message
->number
> old_cat_obj
.name_ptr
[cnt
* 3 + 1])
1009 /* We have found a message which is not yet in the catalog.
1010 Insert it at the right position. */
1011 struct message_list
*newp
;
1013 newp
= (struct message_list
*) xmalloc (sizeof(*newp
));
1014 newp
->number
= old_cat_obj
.name_ptr
[cnt
* 3 + 1];
1016 &old_cat_obj
.strings
[old_cat_obj
.name_ptr
[cnt
* 3 + 2]];
1019 newp
->symbol
= NULL
;
1020 newp
->next
= message
;
1023 set
->messages
= newp
;
1027 ++catalog
->total_messages
;