1 /* Writing binary .mo files.
2 Copyright (C) 1995-1998, 2000-2005 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
34 # include <sys/param.h>
37 /* These two include files describe the binary .mo format. */
39 #include "hash-string.h"
47 #include "binary-io.h"
48 #include "fwriteerror.h"
52 #define _(str) gettext (str)
54 #define freea(p) /* nothing */
56 /* Usually defined in <sys/param.h>. */
58 # if defined __GNUC__ && __GNUC__ >= 2
59 # define roundup(x, y) ({typeof(x) _x = (x); typeof(y) _y = (y); \
60 ((_x + _y - 1) / _y) * _y; })
62 # define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
67 /* Alignment of strings in resulting .mo file. */
70 /* True if no hash table in .mo is wanted. */
74 /* Indices into the strings contained in 'struct pre_message' and
75 'struct pre_sysdep_message'. */
78 M_ID
= 0, /* msgid - the original string */
79 M_STR
= 1 /* msgstr - the translated string */
82 /* An intermediate data structure representing a 'struct string_desc'. */
89 /* An intermediate data structure representing a message. */
92 struct pre_string str
[2];
93 const char *id_plural
;
98 compare_id (const void *pval1
, const void *pval2
)
100 return strcmp (((struct pre_message
*) pval1
)->str
[M_ID
].pointer
,
101 ((struct pre_message
*) pval2
)->str
[M_ID
].pointer
);
105 /* An intermediate data structure representing a 'struct sysdep_segment'. */
106 struct pre_sysdep_segment
112 /* An intermediate data structure representing a 'struct segment_pair'. */
113 struct pre_segment_pair
120 /* An intermediate data structure representing a 'struct sysdep_string'. */
121 struct pre_sysdep_string
123 unsigned int segmentcount
;
124 struct pre_segment_pair segments
[1];
127 /* An intermediate data structure representing a message with system dependent
129 struct pre_sysdep_message
131 struct pre_sysdep_string
*str
[2];
132 const char *id_plural
;
133 size_t id_plural_len
;
136 /* Write the message list to the given open file. */
138 write_table (FILE *output_file
, message_list_ty
*mlp
)
141 struct pre_message
*msg_arr
;
142 size_t n_sysdep_strings
;
143 struct pre_sysdep_message
*sysdep_msg_arr
;
144 size_t n_sysdep_segments
;
145 struct pre_sysdep_segment
*sysdep_segments
;
149 bool omit_hash_table
;
150 nls_uint32 hash_tab_size
;
151 struct mo_file_header header
; /* Header of the .mo file to be written. */
154 struct string_desc
*orig_tab
;
155 struct string_desc
*trans_tab
;
156 size_t sysdep_tab_offset
= 0;
161 /* First pass: Move the static string pairs into an array, for sorting,
162 and at the same time, compute the segments of the system dependent
166 (struct pre_message
*)
167 xmalloc (mlp
->nitems
* sizeof (struct pre_message
));
168 n_sysdep_strings
= 0;
170 (struct pre_sysdep_message
*)
171 xmalloc (mlp
->nitems
* sizeof (struct pre_sysdep_message
));
172 n_sysdep_segments
= 0;
173 sysdep_segments
= NULL
;
174 have_outdigits
= false;
175 for (j
= 0; j
< mlp
->nitems
; j
++)
177 message_ty
*mp
= mlp
->item
[j
];
178 struct interval
*intervals
[2];
179 size_t nintervals
[2];
181 intervals
[M_ID
] = NULL
;
182 nintervals
[M_ID
] = 0;
183 intervals
[M_STR
] = NULL
;
184 nintervals
[M_STR
] = 0;
186 /* Test if mp contains system dependent strings and thus
187 requires the use of the .mo file minor revision 1. */
188 if (possible_format_p (mp
->is_format
[format_c
])
189 || possible_format_p (mp
->is_format
[format_objc
]))
191 /* Check whether msgid or msgstr contain ISO C 99 <inttypes.h>
192 format string directives. No need to check msgid_plural, because
193 it is not accessed by the [n]gettext() function family. */
197 get_sysdep_c_format_directives (mp
->msgid
, false,
198 &intervals
[M_ID
], &nintervals
[M_ID
]);
200 p_end
= mp
->msgstr
+ mp
->msgstr_len
;
201 for (p
= mp
->msgstr
; p
< p_end
; p
+= strlen (p
) + 1)
203 struct interval
*part_intervals
;
204 size_t part_nintervals
;
206 get_sysdep_c_format_directives (p
, true,
209 if (part_nintervals
> 0)
211 size_t d
= p
- mp
->msgstr
;
216 xrealloc (intervals
[M_STR
],
217 (nintervals
[M_STR
] + part_nintervals
)
218 * sizeof (struct interval
));
219 for (i
= 0; i
< part_nintervals
; i
++)
221 intervals
[M_STR
][nintervals
[M_STR
] + i
].startpos
=
222 d
+ part_intervals
[i
].startpos
;
223 intervals
[M_STR
][nintervals
[M_STR
] + i
].endpos
=
224 d
+ part_intervals
[i
].endpos
;
226 nintervals
[M_STR
] += part_nintervals
;
231 if (nintervals
[M_ID
] > 0 || nintervals
[M_STR
] > 0)
233 /* System dependent string pair. */
234 for (m
= 0; m
< 2; m
++)
236 struct pre_sysdep_string
*pre
=
237 (struct pre_sysdep_string
*)
238 xmalloc (sizeof (struct pre_sysdep_string
)
239 + nintervals
[m
] * sizeof (struct pre_segment_pair
));
248 str_len
= strlen (mp
->msgid
) + 1;
253 str_len
= mp
->msgstr_len
;
257 pre
->segmentcount
= nintervals
[m
];
258 for (i
= 0; i
< nintervals
[m
]; i
++)
264 pre
->segments
[i
].segptr
= str
+ lastpos
;
265 pre
->segments
[i
].segsize
= intervals
[m
][i
].startpos
- lastpos
;
267 length
= intervals
[m
][i
].endpos
- intervals
[m
][i
].startpos
;
268 pointer
= str
+ intervals
[m
][i
].startpos
;
270 && pointer
[0] == '<' && pointer
[length
- 1] == '>')
272 /* Skip the '<' and '>' markers. */
277 for (r
= 0; r
< n_sysdep_segments
; r
++)
278 if (sysdep_segments
[r
].length
== length
279 && memcmp (sysdep_segments
[r
].pointer
, pointer
, length
)
282 if (r
== n_sysdep_segments
)
286 (struct pre_sysdep_segment
*)
287 xrealloc (sysdep_segments
,
289 * sizeof (struct pre_sysdep_segment
));
290 sysdep_segments
[r
].length
= length
;
291 sysdep_segments
[r
].pointer
= pointer
;
294 pre
->segments
[i
].sysdepref
= r
;
296 if (length
== 1 && *pointer
== 'I')
297 have_outdigits
= true;
299 lastpos
= intervals
[m
][i
].endpos
;
301 pre
->segments
[i
].segptr
= str
+ lastpos
;
302 pre
->segments
[i
].segsize
= str_len
- lastpos
;
303 pre
->segments
[i
].sysdepref
= SEGMENTS_END
;
305 sysdep_msg_arr
[n_sysdep_strings
].str
[m
] = pre
;
308 sysdep_msg_arr
[n_sysdep_strings
].id_plural
= mp
->msgid_plural
;
309 sysdep_msg_arr
[n_sysdep_strings
].id_plural_len
=
310 (mp
->msgid_plural
!= NULL
? strlen (mp
->msgid_plural
) + 1 : 0);
315 /* Static string pair. */
316 msg_arr
[nstrings
].str
[M_ID
].pointer
= mp
->msgid
;
317 msg_arr
[nstrings
].str
[M_ID
].length
= strlen (mp
->msgid
) + 1;
318 msg_arr
[nstrings
].str
[M_STR
].pointer
= mp
->msgstr
;
319 msg_arr
[nstrings
].str
[M_STR
].length
= mp
->msgstr_len
;
320 msg_arr
[nstrings
].id_plural
= mp
->msgid_plural
;
321 msg_arr
[nstrings
].id_plural_len
=
322 (mp
->msgid_plural
!= NULL
? strlen (mp
->msgid_plural
) + 1 : 0);
326 for (m
= 0; m
< 2; m
++)
327 if (intervals
[m
] != NULL
)
331 /* Sort the table according to original string. */
333 qsort (msg_arr
, nstrings
, sizeof (struct pre_message
), compare_id
);
335 /* We need major revision 1 if there are system dependent strings that use
336 "I" because older versions of gettext() crash when this occurs in a .mo
337 file. Otherwise use major revision 0. */
339 (have_outdigits
? MO_REVISION_NUMBER_WITH_SYSDEP_I
: MO_REVISION_NUMBER
);
341 /* We need minor revision 1 if there are system dependent strings.
342 Otherwise we choose minor revision 0 because it's supported by older
343 versions of libintl and revision 1 isn't. */
344 minor_revision
= (n_sysdep_strings
> 0 ? 1 : 0);
346 /* In minor revision >= 1, the hash table is obligatory. */
347 omit_hash_table
= (no_hash_table
&& minor_revision
== 0);
349 /* This should be explained:
350 Each string has an associate hashing value V, computed by a fixed
351 function. To locate the string we use open addressing with double
352 hashing. The first index will be V % M, where M is the size of the
353 hashing table. If no entry is found, iterating with a second,
354 independent hashing function takes place. This second value will
356 The approximate number of probes will be
358 for unsuccessful search: (1 - N / M) ^ -1
359 for successful search: - (N / M) ^ -1 * ln (1 - N / M)
361 where N is the number of keys.
363 If we now choose M to be the next prime bigger than 4 / 3 * N,
366 Because unsuccessful searches are unlikely this is a good value.
367 Formulas: [Knuth, The Art of Computer Programming, Volume 3,
368 Sorting and Searching, 1973, Addison Wesley] */
369 if (!omit_hash_table
)
371 hash_tab_size
= next_prime ((mlp
->nitems
* 4) / 3);
373 if (hash_tab_size
<= 2)
380 /* Second pass: Fill the structure describing the header. At the same time,
381 compute the sizes and offsets of the non-string parts of the file. */
384 header
.magic
= _MAGIC
;
385 /* Revision number of file format. */
386 header
.revision
= (major_revision
<< 16) + minor_revision
;
390 ? offsetof (struct mo_file_header
, n_sysdep_segments
)
391 : sizeof (struct mo_file_header
));
392 offset
= header_size
;
394 /* Number of static string pairs. */
395 header
.nstrings
= nstrings
;
397 /* Offset of table for original string offsets. */
398 header
.orig_tab_offset
= offset
;
399 offset
+= nstrings
* sizeof (struct string_desc
);
401 (struct string_desc
*) xmalloc (nstrings
* sizeof (struct string_desc
));
403 /* Offset of table for translated string offsets. */
404 header
.trans_tab_offset
= offset
;
405 offset
+= nstrings
* sizeof (struct string_desc
);
407 (struct string_desc
*) xmalloc (nstrings
* sizeof (struct string_desc
));
409 /* Size of hash table. */
410 header
.hash_tab_size
= hash_tab_size
;
411 /* Offset of hash table. */
412 header
.hash_tab_offset
= offset
;
413 offset
+= hash_tab_size
* sizeof (nls_uint32
);
415 if (minor_revision
>= 1)
417 /* Size of table describing system dependent segments. */
418 header
.n_sysdep_segments
= n_sysdep_segments
;
419 /* Offset of table describing system dependent segments. */
420 header
.sysdep_segments_offset
= offset
;
421 offset
+= n_sysdep_segments
* sizeof (struct sysdep_segment
);
423 /* Number of system dependent string pairs. */
424 header
.n_sysdep_strings
= n_sysdep_strings
;
426 /* Offset of table for original sysdep string offsets. */
427 header
.orig_sysdep_tab_offset
= offset
;
428 offset
+= n_sysdep_strings
* sizeof (nls_uint32
);
430 /* Offset of table for translated sysdep string offsets. */
431 header
.trans_sysdep_tab_offset
= offset
;
432 offset
+= n_sysdep_strings
* sizeof (nls_uint32
);
434 /* System dependent string descriptors. */
435 sysdep_tab_offset
= offset
;
436 for (m
= 0; m
< 2; m
++)
437 for (j
= 0; j
< n_sysdep_strings
; j
++)
438 offset
+= sizeof (struct sysdep_string
)
439 + sysdep_msg_arr
[j
].str
[m
]->segmentcount
440 * sizeof (struct segment_pair
);
446 /* Third pass: Write the non-string parts of the file. At the same time,
447 compute the offsets of each string, including the proper alignment. */
449 /* Write the header out. */
450 fwrite (&header
, header_size
, 1, output_file
);
452 /* Table for original string offsets. */
453 /* Here output_file is at position header.orig_tab_offset. */
455 for (j
= 0; j
< nstrings
; j
++)
457 offset
= roundup (offset
, alignment
);
459 msg_arr
[j
].str
[M_ID
].length
+ msg_arr
[j
].id_plural_len
;
460 orig_tab
[j
].offset
= offset
;
461 offset
+= orig_tab
[j
].length
;
462 /* Subtract 1 because of the terminating NUL. */
463 orig_tab
[j
].length
--;
465 fwrite (orig_tab
, nstrings
* sizeof (struct string_desc
), 1, output_file
);
467 /* Table for translated string offsets. */
468 /* Here output_file is at position header.trans_tab_offset. */
470 for (j
= 0; j
< nstrings
; j
++)
472 offset
= roundup (offset
, alignment
);
473 trans_tab
[j
].length
= msg_arr
[j
].str
[M_STR
].length
;
474 trans_tab
[j
].offset
= offset
;
475 offset
+= trans_tab
[j
].length
;
476 /* Subtract 1 because of the terminating NUL. */
477 trans_tab
[j
].length
--;
479 fwrite (trans_tab
, nstrings
* sizeof (struct string_desc
), 1, output_file
);
481 /* Skip this part when no hash table is needed. */
482 if (!omit_hash_table
)
484 nls_uint32
*hash_tab
;
487 /* Here output_file is at position header.hash_tab_offset. */
489 /* Allocate room for the hashing table to be written out. */
490 hash_tab
= (nls_uint32
*) xmalloc (hash_tab_size
* sizeof (nls_uint32
));
491 memset (hash_tab
, '\0', hash_tab_size
* sizeof (nls_uint32
));
493 /* Insert all value in the hash table, following the algorithm described
495 for (j
= 0; j
< nstrings
; j
++)
497 nls_uint32 hash_val
= hash_string (msg_arr
[j
].str
[M_ID
].pointer
);
498 nls_uint32 idx
= hash_val
% hash_tab_size
;
500 if (hash_tab
[idx
] != 0)
502 /* We need the second hashing function. */
503 nls_uint32 incr
= 1 + (hash_val
% (hash_tab_size
- 2));
506 if (idx
>= hash_tab_size
- incr
)
507 idx
-= hash_tab_size
- incr
;
510 while (hash_tab
[idx
] != 0);
513 hash_tab
[idx
] = j
+ 1;
516 /* Write the hash table out. */
517 fwrite (hash_tab
, hash_tab_size
* sizeof (nls_uint32
), 1, output_file
);
522 if (minor_revision
>= 1)
524 struct sysdep_segment
*sysdep_segments_tab
;
525 nls_uint32
*sysdep_tab
;
529 /* Here output_file is at position header.sysdep_segments_offset. */
531 sysdep_segments_tab
=
532 (struct sysdep_segment
*)
533 xmalloc (n_sysdep_segments
* sizeof (struct sysdep_segment
));
534 for (i
= 0; i
< n_sysdep_segments
; i
++)
536 offset
= roundup (offset
, alignment
);
537 /* The "+ 1" accounts for the trailing NUL byte. */
538 sysdep_segments_tab
[i
].length
= sysdep_segments
[i
].length
+ 1;
539 sysdep_segments_tab
[i
].offset
= offset
;
540 offset
+= sysdep_segments_tab
[i
].length
;
543 fwrite (sysdep_segments_tab
,
544 n_sysdep_segments
* sizeof (struct sysdep_segment
), 1,
547 free (sysdep_segments_tab
);
550 (nls_uint32
*) xmalloc (n_sysdep_strings
* sizeof (nls_uint32
));
551 stoffset
= sysdep_tab_offset
;
553 for (m
= 0; m
< 2; m
++)
555 /* Here output_file is at position
556 m == M_ID -> header.orig_sysdep_tab_offset,
557 m == M_STR -> header.trans_sysdep_tab_offset. */
559 for (j
= 0; j
< n_sysdep_strings
; j
++)
561 sysdep_tab
[j
] = stoffset
;
562 stoffset
+= sizeof (struct sysdep_string
)
563 + sysdep_msg_arr
[j
].str
[m
]->segmentcount
564 * sizeof (struct segment_pair
);
566 /* Write the table for original/translated sysdep string offsets. */
567 fwrite (sysdep_tab
, n_sysdep_strings
* sizeof (nls_uint32
), 1,
573 /* Here output_file is at position sysdep_tab_offset. */
575 for (m
= 0; m
< 2; m
++)
576 for (j
= 0; j
< n_sysdep_strings
; j
++)
578 struct pre_sysdep_message
*msg
= &sysdep_msg_arr
[j
];
579 struct pre_sysdep_string
*pre
= msg
->str
[m
];
580 struct sysdep_string
*str
=
581 (struct sysdep_string
*)
582 xallocsa (sizeof (struct sysdep_string
)
583 + pre
->segmentcount
* sizeof (struct segment_pair
));
586 offset
= roundup (offset
, alignment
);
587 str
->offset
= offset
;
588 for (i
= 0; i
<= pre
->segmentcount
; i
++)
590 str
->segments
[i
].segsize
= pre
->segments
[i
].segsize
;
591 str
->segments
[i
].sysdepref
= pre
->segments
[i
].sysdepref
;
592 offset
+= str
->segments
[i
].segsize
;
594 if (m
== M_ID
&& msg
->id_plural_len
> 0)
596 str
->segments
[pre
->segmentcount
].segsize
+= msg
->id_plural_len
;
597 offset
+= msg
->id_plural_len
;
600 sizeof (struct sysdep_string
)
601 + pre
->segmentcount
* sizeof (struct segment_pair
),
608 /* Here output_file is at position end_offset. */
614 /* Fourth pass: Write the strings. */
618 /* A few zero bytes for padding. */
619 null
= alloca (alignment
);
620 memset (null
, '\0', alignment
);
622 /* Now write the original strings. */
623 for (j
= 0; j
< nstrings
; j
++)
625 fwrite (null
, roundup (offset
, alignment
) - offset
, 1, output_file
);
626 offset
= roundup (offset
, alignment
);
628 fwrite (msg_arr
[j
].str
[M_ID
].pointer
, msg_arr
[j
].str
[M_ID
].length
, 1,
630 if (msg_arr
[j
].id_plural_len
> 0)
631 fwrite (msg_arr
[j
].id_plural
, msg_arr
[j
].id_plural_len
, 1,
633 offset
+= msg_arr
[j
].str
[M_ID
].length
+ msg_arr
[j
].id_plural_len
;
636 /* Now write the translated strings. */
637 for (j
= 0; j
< nstrings
; j
++)
639 fwrite (null
, roundup (offset
, alignment
) - offset
, 1, output_file
);
640 offset
= roundup (offset
, alignment
);
642 fwrite (msg_arr
[j
].str
[M_STR
].pointer
, msg_arr
[j
].str
[M_STR
].length
, 1,
644 offset
+= msg_arr
[j
].str
[M_STR
].length
;
647 if (minor_revision
>= 1)
651 for (i
= 0; i
< n_sysdep_segments
; i
++)
653 fwrite (null
, roundup (offset
, alignment
) - offset
, 1, output_file
);
654 offset
= roundup (offset
, alignment
);
656 fwrite (sysdep_segments
[i
].pointer
, sysdep_segments
[i
].length
, 1,
658 fwrite (null
, 1, 1, output_file
);
659 offset
+= sysdep_segments
[i
].length
+ 1;
662 for (m
= 0; m
< 2; m
++)
663 for (j
= 0; j
< n_sysdep_strings
; j
++)
665 struct pre_sysdep_message
*msg
= &sysdep_msg_arr
[j
];
666 struct pre_sysdep_string
*pre
= msg
->str
[m
];
668 fwrite (null
, roundup (offset
, alignment
) - offset
, 1,
670 offset
= roundup (offset
, alignment
);
672 for (i
= 0; i
<= pre
->segmentcount
; i
++)
674 fwrite (pre
->segments
[i
].segptr
, pre
->segments
[i
].segsize
, 1,
676 offset
+= pre
->segments
[i
].segsize
;
678 if (m
== M_ID
&& msg
->id_plural_len
> 0)
680 fwrite (msg
->id_plural
, msg
->id_plural_len
, 1, output_file
);
681 offset
+= msg
->id_plural_len
;
689 free (sysdep_msg_arr
);
695 msgdomain_write_mo (message_list_ty
*mlp
,
696 const char *domain_name
,
697 const char *file_name
)
701 /* If no entry for this domain don't even create the file. */
702 if (mlp
->nitems
!= 0)
704 if (strcmp (domain_name
, "-") == 0)
706 output_file
= stdout
;
707 SET_BINARY (fileno (output_file
));
711 output_file
= fopen (file_name
, "wb");
712 if (output_file
== NULL
)
714 error (0, errno
, _("error while opening \"%s\" for writing"),
720 if (output_file
!= NULL
)
722 write_table (output_file
, mlp
);
724 /* Make sure nothing went wrong. */
725 if (fwriteerror (output_file
))
726 error (EXIT_FAILURE
, errno
, _("error while writing \"%s\" file"),