Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / write-mo.c
blob4635bcc8f1582107bb51b5698a2c9f4ca5154be8
1 /* Writing binary .mo files.
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 #include <alloca.h>
24 /* Specification. */
25 #include "write-mo.h"
27 #include <errno.h>
28 #include <stdbool.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #if HAVE_SYS_PARAM_H
34 # include <sys/param.h>
35 #endif
37 /* These two include files describe the binary .mo format. */
38 #include "gmo.h"
39 #include "hash-string.h"
41 #include "error.h"
42 #include "hash.h"
43 #include "message.h"
44 #include "format.h"
45 #include "xalloc.h"
46 #include "xallocsa.h"
47 #include "binary-io.h"
48 #include "fwriteerror.h"
49 #include "exit.h"
50 #include "gettext.h"
52 #define _(str) gettext (str)
54 #define freea(p) /* nothing */
56 /* Usually defined in <sys/param.h>. */
57 #ifndef roundup
58 # if defined __GNUC__ && __GNUC__ >= 2
59 # define roundup(x, y) ({typeof(x) _x = (x); typeof(y) _y = (y); \
60 ((_x + _y - 1) / _y) * _y; })
61 # else
62 # define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
63 # endif /* GNU CC2 */
64 #endif /* roundup */
67 /* Alignment of strings in resulting .mo file. */
68 size_t alignment;
70 /* True if no hash table in .mo is wanted. */
71 bool no_hash_table;
74 /* Indices into the strings contained in 'struct pre_message' and
75 'struct pre_sysdep_message'. */
76 enum
78 M_ID = 0, /* msgid - the original string */
79 M_STR = 1 /* msgstr - the translated string */
82 /* An intermediate data structure representing a 'struct string_desc'. */
83 struct pre_string
85 size_t length;
86 const char *pointer;
89 /* An intermediate data structure representing a message. */
90 struct pre_message
92 struct pre_string str[2];
93 const char *id_plural;
94 size_t id_plural_len;
97 static int
98 compare_id (const void *pval1, const void *pval2)
100 return strcmp (((struct pre_message *) pval1)->str[M_ID].pointer,
101 ((struct pre_message *) pval2)->str[M_ID].pointer);
105 /* An intermediate data structure representing a 'struct sysdep_segment'. */
106 struct pre_sysdep_segment
108 size_t length;
109 const char *pointer;
112 /* An intermediate data structure representing a 'struct segment_pair'. */
113 struct pre_segment_pair
115 size_t segsize;
116 const char *segptr;
117 size_t sysdepref;
120 /* An intermediate data structure representing a 'struct sysdep_string'. */
121 struct pre_sysdep_string
123 unsigned int segmentcount;
124 struct pre_segment_pair segments[1];
127 /* An intermediate data structure representing a message with system dependent
128 strings. */
129 struct pre_sysdep_message
131 struct pre_sysdep_string *str[2];
132 const char *id_plural;
133 size_t id_plural_len;
136 /* Write the message list to the given open file. */
137 static void
138 write_table (FILE *output_file, message_list_ty *mlp)
140 size_t nstrings;
141 struct pre_message *msg_arr;
142 size_t n_sysdep_strings;
143 struct pre_sysdep_message *sysdep_msg_arr;
144 size_t n_sysdep_segments;
145 struct pre_sysdep_segment *sysdep_segments;
146 bool have_outdigits;
147 int major_revision;
148 int minor_revision;
149 bool omit_hash_table;
150 nls_uint32 hash_tab_size;
151 struct mo_file_header header; /* Header of the .mo file to be written. */
152 size_t header_size;
153 size_t offset;
154 struct string_desc *orig_tab;
155 struct string_desc *trans_tab;
156 size_t sysdep_tab_offset = 0;
157 size_t end_offset;
158 char *null;
159 size_t j, m;
161 /* First pass: Move the static string pairs into an array, for sorting,
162 and at the same time, compute the segments of the system dependent
163 strings. */
164 nstrings = 0;
165 msg_arr =
166 (struct pre_message *)
167 xmalloc (mlp->nitems * sizeof (struct pre_message));
168 n_sysdep_strings = 0;
169 sysdep_msg_arr =
170 (struct pre_sysdep_message *)
171 xmalloc (mlp->nitems * sizeof (struct pre_sysdep_message));
172 n_sysdep_segments = 0;
173 sysdep_segments = NULL;
174 have_outdigits = false;
175 for (j = 0; j < mlp->nitems; j++)
177 message_ty *mp = mlp->item[j];
178 struct interval *intervals[2];
179 size_t nintervals[2];
181 intervals[M_ID] = NULL;
182 nintervals[M_ID] = 0;
183 intervals[M_STR] = NULL;
184 nintervals[M_STR] = 0;
186 /* Test if mp contains system dependent strings and thus
187 requires the use of the .mo file minor revision 1. */
188 if (possible_format_p (mp->is_format[format_c])
189 || possible_format_p (mp->is_format[format_objc]))
191 /* Check whether msgid or msgstr contain ISO C 99 <inttypes.h>
192 format string directives. No need to check msgid_plural, because
193 it is not accessed by the [n]gettext() function family. */
194 const char *p_end;
195 const char *p;
197 get_sysdep_c_format_directives (mp->msgid, false,
198 &intervals[M_ID], &nintervals[M_ID]);
200 p_end = mp->msgstr + mp->msgstr_len;
201 for (p = mp->msgstr; p < p_end; p += strlen (p) + 1)
203 struct interval *part_intervals;
204 size_t part_nintervals;
206 get_sysdep_c_format_directives (p, true,
207 &part_intervals,
208 &part_nintervals);
209 if (part_nintervals > 0)
211 size_t d = p - mp->msgstr;
212 unsigned int i;
214 intervals[M_STR] =
215 (struct interval *)
216 xrealloc (intervals[M_STR],
217 (nintervals[M_STR] + part_nintervals)
218 * sizeof (struct interval));
219 for (i = 0; i < part_nintervals; i++)
221 intervals[M_STR][nintervals[M_STR] + i].startpos =
222 d + part_intervals[i].startpos;
223 intervals[M_STR][nintervals[M_STR] + i].endpos =
224 d + part_intervals[i].endpos;
226 nintervals[M_STR] += part_nintervals;
231 if (nintervals[M_ID] > 0 || nintervals[M_STR] > 0)
233 /* System dependent string pair. */
234 for (m = 0; m < 2; m++)
236 struct pre_sysdep_string *pre =
237 (struct pre_sysdep_string *)
238 xmalloc (sizeof (struct pre_sysdep_string)
239 + nintervals[m] * sizeof (struct pre_segment_pair));
240 const char *str;
241 size_t str_len;
242 size_t lastpos;
243 unsigned int i;
245 if (m == M_ID)
247 str = mp->msgid;
248 str_len = strlen (mp->msgid) + 1;
250 else
252 str = mp->msgstr;
253 str_len = mp->msgstr_len;
256 lastpos = 0;
257 pre->segmentcount = nintervals[m];
258 for (i = 0; i < nintervals[m]; i++)
260 size_t length;
261 const char *pointer;
262 size_t r;
264 pre->segments[i].segptr = str + lastpos;
265 pre->segments[i].segsize = intervals[m][i].startpos - lastpos;
267 length = intervals[m][i].endpos - intervals[m][i].startpos;
268 pointer = str + intervals[m][i].startpos;
269 if (length >= 2
270 && pointer[0] == '<' && pointer[length - 1] == '>')
272 /* Skip the '<' and '>' markers. */
273 length -= 2;
274 pointer += 1;
277 for (r = 0; r < n_sysdep_segments; r++)
278 if (sysdep_segments[r].length == length
279 && memcmp (sysdep_segments[r].pointer, pointer, length)
280 == 0)
281 break;
282 if (r == n_sysdep_segments)
284 n_sysdep_segments++;
285 sysdep_segments =
286 (struct pre_sysdep_segment *)
287 xrealloc (sysdep_segments,
288 n_sysdep_segments
289 * sizeof (struct pre_sysdep_segment));
290 sysdep_segments[r].length = length;
291 sysdep_segments[r].pointer = pointer;
294 pre->segments[i].sysdepref = r;
296 if (length == 1 && *pointer == 'I')
297 have_outdigits = true;
299 lastpos = intervals[m][i].endpos;
301 pre->segments[i].segptr = str + lastpos;
302 pre->segments[i].segsize = str_len - lastpos;
303 pre->segments[i].sysdepref = SEGMENTS_END;
305 sysdep_msg_arr[n_sysdep_strings].str[m] = pre;
308 sysdep_msg_arr[n_sysdep_strings].id_plural = mp->msgid_plural;
309 sysdep_msg_arr[n_sysdep_strings].id_plural_len =
310 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
311 n_sysdep_strings++;
313 else
315 /* Static string pair. */
316 msg_arr[nstrings].str[M_ID].pointer = mp->msgid;
317 msg_arr[nstrings].str[M_ID].length = strlen (mp->msgid) + 1;
318 msg_arr[nstrings].str[M_STR].pointer = mp->msgstr;
319 msg_arr[nstrings].str[M_STR].length = mp->msgstr_len;
320 msg_arr[nstrings].id_plural = mp->msgid_plural;
321 msg_arr[nstrings].id_plural_len =
322 (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0);
323 nstrings++;
326 for (m = 0; m < 2; m++)
327 if (intervals[m] != NULL)
328 free (intervals[m]);
331 /* Sort the table according to original string. */
332 if (nstrings > 0)
333 qsort (msg_arr, nstrings, sizeof (struct pre_message), compare_id);
335 /* We need major revision 1 if there are system dependent strings that use
336 "I" because older versions of gettext() crash when this occurs in a .mo
337 file. Otherwise use major revision 0. */
338 major_revision =
339 (have_outdigits ? MO_REVISION_NUMBER_WITH_SYSDEP_I : MO_REVISION_NUMBER);
341 /* We need minor revision 1 if there are system dependent strings.
342 Otherwise we choose minor revision 0 because it's supported by older
343 versions of libintl and revision 1 isn't. */
344 minor_revision = (n_sysdep_strings > 0 ? 1 : 0);
346 /* In minor revision >= 1, the hash table is obligatory. */
347 omit_hash_table = (no_hash_table && minor_revision == 0);
349 /* This should be explained:
350 Each string has an associate hashing value V, computed by a fixed
351 function. To locate the string we use open addressing with double
352 hashing. The first index will be V % M, where M is the size of the
353 hashing table. If no entry is found, iterating with a second,
354 independent hashing function takes place. This second value will
355 be 1 + V % (M - 2).
356 The approximate number of probes will be
358 for unsuccessful search: (1 - N / M) ^ -1
359 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
361 where N is the number of keys.
363 If we now choose M to be the next prime bigger than 4 / 3 * N,
364 we get the values
365 4 and 1.85 resp.
366 Because unsuccessful searches are unlikely this is a good value.
367 Formulas: [Knuth, The Art of Computer Programming, Volume 3,
368 Sorting and Searching, 1973, Addison Wesley] */
369 if (!omit_hash_table)
371 hash_tab_size = next_prime ((mlp->nitems * 4) / 3);
372 /* Ensure M > 2. */
373 if (hash_tab_size <= 2)
374 hash_tab_size = 3;
376 else
377 hash_tab_size = 0;
380 /* Second pass: Fill the structure describing the header. At the same time,
381 compute the sizes and offsets of the non-string parts of the file. */
383 /* Magic number. */
384 header.magic = _MAGIC;
385 /* Revision number of file format. */
386 header.revision = (major_revision << 16) + minor_revision;
388 header_size =
389 (minor_revision == 0
390 ? offsetof (struct mo_file_header, n_sysdep_segments)
391 : sizeof (struct mo_file_header));
392 offset = header_size;
394 /* Number of static string pairs. */
395 header.nstrings = nstrings;
397 /* Offset of table for original string offsets. */
398 header.orig_tab_offset = offset;
399 offset += nstrings * sizeof (struct string_desc);
400 orig_tab =
401 (struct string_desc *) xmalloc (nstrings * sizeof (struct string_desc));
403 /* Offset of table for translated string offsets. */
404 header.trans_tab_offset = offset;
405 offset += nstrings * sizeof (struct string_desc);
406 trans_tab =
407 (struct string_desc *) xmalloc (nstrings * sizeof (struct string_desc));
409 /* Size of hash table. */
410 header.hash_tab_size = hash_tab_size;
411 /* Offset of hash table. */
412 header.hash_tab_offset = offset;
413 offset += hash_tab_size * sizeof (nls_uint32);
415 if (minor_revision >= 1)
417 /* Size of table describing system dependent segments. */
418 header.n_sysdep_segments = n_sysdep_segments;
419 /* Offset of table describing system dependent segments. */
420 header.sysdep_segments_offset = offset;
421 offset += n_sysdep_segments * sizeof (struct sysdep_segment);
423 /* Number of system dependent string pairs. */
424 header.n_sysdep_strings = n_sysdep_strings;
426 /* Offset of table for original sysdep string offsets. */
427 header.orig_sysdep_tab_offset = offset;
428 offset += n_sysdep_strings * sizeof (nls_uint32);
430 /* Offset of table for translated sysdep string offsets. */
431 header.trans_sysdep_tab_offset = offset;
432 offset += n_sysdep_strings * sizeof (nls_uint32);
434 /* System dependent string descriptors. */
435 sysdep_tab_offset = offset;
436 for (m = 0; m < 2; m++)
437 for (j = 0; j < n_sysdep_strings; j++)
438 offset += sizeof (struct sysdep_string)
439 + sysdep_msg_arr[j].str[m]->segmentcount
440 * sizeof (struct segment_pair);
443 end_offset = offset;
446 /* Third pass: Write the non-string parts of the file. At the same time,
447 compute the offsets of each string, including the proper alignment. */
449 /* Write the header out. */
450 fwrite (&header, header_size, 1, output_file);
452 /* Table for original string offsets. */
453 /* Here output_file is at position header.orig_tab_offset. */
455 for (j = 0; j < nstrings; j++)
457 offset = roundup (offset, alignment);
458 orig_tab[j].length =
459 msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
460 orig_tab[j].offset = offset;
461 offset += orig_tab[j].length;
462 /* Subtract 1 because of the terminating NUL. */
463 orig_tab[j].length--;
465 fwrite (orig_tab, nstrings * sizeof (struct string_desc), 1, output_file);
467 /* Table for translated string offsets. */
468 /* Here output_file is at position header.trans_tab_offset. */
470 for (j = 0; j < nstrings; j++)
472 offset = roundup (offset, alignment);
473 trans_tab[j].length = msg_arr[j].str[M_STR].length;
474 trans_tab[j].offset = offset;
475 offset += trans_tab[j].length;
476 /* Subtract 1 because of the terminating NUL. */
477 trans_tab[j].length--;
479 fwrite (trans_tab, nstrings * sizeof (struct string_desc), 1, output_file);
481 /* Skip this part when no hash table is needed. */
482 if (!omit_hash_table)
484 nls_uint32 *hash_tab;
485 unsigned int j;
487 /* Here output_file is at position header.hash_tab_offset. */
489 /* Allocate room for the hashing table to be written out. */
490 hash_tab = (nls_uint32 *) xmalloc (hash_tab_size * sizeof (nls_uint32));
491 memset (hash_tab, '\0', hash_tab_size * sizeof (nls_uint32));
493 /* Insert all value in the hash table, following the algorithm described
494 above. */
495 for (j = 0; j < nstrings; j++)
497 nls_uint32 hash_val = hash_string (msg_arr[j].str[M_ID].pointer);
498 nls_uint32 idx = hash_val % hash_tab_size;
500 if (hash_tab[idx] != 0)
502 /* We need the second hashing function. */
503 nls_uint32 incr = 1 + (hash_val % (hash_tab_size - 2));
506 if (idx >= hash_tab_size - incr)
507 idx -= hash_tab_size - incr;
508 else
509 idx += incr;
510 while (hash_tab[idx] != 0);
513 hash_tab[idx] = j + 1;
516 /* Write the hash table out. */
517 fwrite (hash_tab, hash_tab_size * sizeof (nls_uint32), 1, output_file);
519 free (hash_tab);
522 if (minor_revision >= 1)
524 struct sysdep_segment *sysdep_segments_tab;
525 nls_uint32 *sysdep_tab;
526 size_t stoffset;
527 unsigned int i;
529 /* Here output_file is at position header.sysdep_segments_offset. */
531 sysdep_segments_tab =
532 (struct sysdep_segment *)
533 xmalloc (n_sysdep_segments * sizeof (struct sysdep_segment));
534 for (i = 0; i < n_sysdep_segments; i++)
536 offset = roundup (offset, alignment);
537 /* The "+ 1" accounts for the trailing NUL byte. */
538 sysdep_segments_tab[i].length = sysdep_segments[i].length + 1;
539 sysdep_segments_tab[i].offset = offset;
540 offset += sysdep_segments_tab[i].length;
543 fwrite (sysdep_segments_tab,
544 n_sysdep_segments * sizeof (struct sysdep_segment), 1,
545 output_file);
547 free (sysdep_segments_tab);
549 sysdep_tab =
550 (nls_uint32 *) xmalloc (n_sysdep_strings * sizeof (nls_uint32));
551 stoffset = sysdep_tab_offset;
553 for (m = 0; m < 2; m++)
555 /* Here output_file is at position
556 m == M_ID -> header.orig_sysdep_tab_offset,
557 m == M_STR -> header.trans_sysdep_tab_offset. */
559 for (j = 0; j < n_sysdep_strings; j++)
561 sysdep_tab[j] = stoffset;
562 stoffset += sizeof (struct sysdep_string)
563 + sysdep_msg_arr[j].str[m]->segmentcount
564 * sizeof (struct segment_pair);
566 /* Write the table for original/translated sysdep string offsets. */
567 fwrite (sysdep_tab, n_sysdep_strings * sizeof (nls_uint32), 1,
568 output_file);
571 free (sysdep_tab);
573 /* Here output_file is at position sysdep_tab_offset. */
575 for (m = 0; m < 2; m++)
576 for (j = 0; j < n_sysdep_strings; j++)
578 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
579 struct pre_sysdep_string *pre = msg->str[m];
580 struct sysdep_string *str =
581 (struct sysdep_string *)
582 xallocsa (sizeof (struct sysdep_string)
583 + pre->segmentcount * sizeof (struct segment_pair));
584 unsigned int i;
586 offset = roundup (offset, alignment);
587 str->offset = offset;
588 for (i = 0; i <= pre->segmentcount; i++)
590 str->segments[i].segsize = pre->segments[i].segsize;
591 str->segments[i].sysdepref = pre->segments[i].sysdepref;
592 offset += str->segments[i].segsize;
594 if (m == M_ID && msg->id_plural_len > 0)
596 str->segments[pre->segmentcount].segsize += msg->id_plural_len;
597 offset += msg->id_plural_len;
599 fwrite (str,
600 sizeof (struct sysdep_string)
601 + pre->segmentcount * sizeof (struct segment_pair),
602 1, output_file);
604 freesa (str);
608 /* Here output_file is at position end_offset. */
610 free (trans_tab);
611 free (orig_tab);
614 /* Fourth pass: Write the strings. */
616 offset = end_offset;
618 /* A few zero bytes for padding. */
619 null = alloca (alignment);
620 memset (null, '\0', alignment);
622 /* Now write the original strings. */
623 for (j = 0; j < nstrings; j++)
625 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
626 offset = roundup (offset, alignment);
628 fwrite (msg_arr[j].str[M_ID].pointer, msg_arr[j].str[M_ID].length, 1,
629 output_file);
630 if (msg_arr[j].id_plural_len > 0)
631 fwrite (msg_arr[j].id_plural, msg_arr[j].id_plural_len, 1,
632 output_file);
633 offset += msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len;
636 /* Now write the translated strings. */
637 for (j = 0; j < nstrings; j++)
639 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
640 offset = roundup (offset, alignment);
642 fwrite (msg_arr[j].str[M_STR].pointer, msg_arr[j].str[M_STR].length, 1,
643 output_file);
644 offset += msg_arr[j].str[M_STR].length;
647 if (minor_revision >= 1)
649 unsigned int i;
651 for (i = 0; i < n_sysdep_segments; i++)
653 fwrite (null, roundup (offset, alignment) - offset, 1, output_file);
654 offset = roundup (offset, alignment);
656 fwrite (sysdep_segments[i].pointer, sysdep_segments[i].length, 1,
657 output_file);
658 fwrite (null, 1, 1, output_file);
659 offset += sysdep_segments[i].length + 1;
662 for (m = 0; m < 2; m++)
663 for (j = 0; j < n_sysdep_strings; j++)
665 struct pre_sysdep_message *msg = &sysdep_msg_arr[j];
666 struct pre_sysdep_string *pre = msg->str[m];
668 fwrite (null, roundup (offset, alignment) - offset, 1,
669 output_file);
670 offset = roundup (offset, alignment);
672 for (i = 0; i <= pre->segmentcount; i++)
674 fwrite (pre->segments[i].segptr, pre->segments[i].segsize, 1,
675 output_file);
676 offset += pre->segments[i].segsize;
678 if (m == M_ID && msg->id_plural_len > 0)
680 fwrite (msg->id_plural, msg->id_plural_len, 1, output_file);
681 offset += msg->id_plural_len;
684 free (pre);
688 freea (null);
689 free (sysdep_msg_arr);
690 free (msg_arr);
695 msgdomain_write_mo (message_list_ty *mlp,
696 const char *domain_name,
697 const char *file_name)
699 FILE *output_file;
701 /* If no entry for this domain don't even create the file. */
702 if (mlp->nitems != 0)
704 if (strcmp (domain_name, "-") == 0)
706 output_file = stdout;
707 SET_BINARY (fileno (output_file));
709 else
711 output_file = fopen (file_name, "wb");
712 if (output_file == NULL)
714 error (0, errno, _("error while opening \"%s\" for writing"),
715 file_name);
716 return 1;
720 if (output_file != NULL)
722 write_table (output_file, mlp);
724 /* Make sure nothing went wrong. */
725 if (fwriteerror (output_file))
726 error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
727 file_name);
731 return 0;