Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / write-qt.c
blobe28dfa9e80e39a10eb4e2d7aa542533bad409da4
1 /* Writing Qt .qm files.
2 Copyright (C) 2003, 2005 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
23 /* Specification. */
24 #include "write-qt.h"
26 #include <assert.h>
27 #include <errno.h>
28 #include <stdbool.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #include "error.h"
34 #include "xerror.h"
35 #include "message.h"
36 #include "po-charset.h"
37 #include "msgl-iconv.h"
38 #include "hash-string.h"
39 #include "utf8-ucs4.h"
40 #include "xalloc.h"
41 #include "obstack.h"
42 #include "binary-io.h"
43 #include "fwriteerror.h"
44 #include "exit.h"
45 #include "gettext.h"
47 #define _(str) gettext (str)
49 /* Qt .qm files are read by the QTranslator::load() function and written
50 by the Qt QTranslator::save() function.
52 The Qt tool 'msg2qm' uses the latter function and can convert PO files
53 to .qm files. But since 'msg2qm' is marked as an "old" tool in Qt 3.0.5's
54 i18n.html documentation and therefore likely to disappear, we provide the
55 same functionality here.
57 The format of .qm files, as reverse engineered from the functions
58 QTranslator::save(const QString& filename, SaveMode mode)
59 QTranslator::squeeze(SaveMode mode)
60 QTranslatorMessage::write(QDataStream& stream, bool strip, Prefix prefix)
61 elfHash(const char* name)
62 in qt-3.0.5, is as follows:
64 It's a binary data format. Elements are u8 (byte), u16, u32. They are
65 written in big-endian order.
67 The file starts with a magic string of 16 bytes:
68 3C B8 64 18 CA EF 9C 95 CD 21 1C BF 60 A1 BD DD
70 Then come three sections. Each of the three sections is optional. Each
71 has this structure:
72 struct {
73 u8 section_type; // 0x42 = hashes, 0x69 = messages, 0x2f = contexts
74 u32 length; // number of bytes of the data
75 u8 data[length];
78 In the first section, the hashes section, the data has the following
79 structure:
80 It's a sorted array of
81 struct {
82 u32 hashcode; // elfHash of the concatenation of msgid and
83 // disambiguating-comment
84 u32 offset; // offset within the data[] of the messages section
86 It's sorted in ascending order by hashcode as primary sorting criteria
87 and - when the hashcodes are the same - by offset as secondary criteria.
89 In the second section, the messages section, the data has the following
90 structure:
91 It's a sequence of records, each representing a message, in no
92 particular order. Each record is a sequence of subsections, each
93 introduced by a particular subsection tag. The possible subsection tags
94 are (and they usually occur in this order):
95 - 03: Translation. Followed by the msgstr in UCS-2 or UTF-16 format:
96 struct {
97 u32 length;
98 u16 chars[length/2];
100 - 08: Disambiguating-comment. Followed by the NUL-terminated,
101 ISO-8859-1 encoded, disambiguating-comment string:
102 struct {
103 u32 length; // number of bytes including the NUL at the end
104 u8 chars[length];
106 - 06: SourceText, i.e. msgid. Followed by the NUL-terminated,
107 ISO-8859-1 encoded, msgid:
108 struct {
109 u32 length; // number of bytes including the NUL at the end
110 u8 chars[length];
112 - 02: SourceText16, i.e. msgid. Encoded as UCS-2, but must actually
113 be ISO-8859-1.
114 struct {
115 u32 length;
116 u16 chars[length/2];
118 This subsection tag is obsoleted by SourceText.
119 - 07: Context. Followed by the NUL-terminated, ISO-8859-1 encoded,
120 context string (usually a C++ class name or empty):
121 struct {
122 u32 length; // number of bytes including the NUL at the end
123 u8 chars[length];
125 - 04: Context16. Encoded as UCS-2, but must actually be ISO-8859-1.
126 struct {
127 u32 length;
128 u16 chars[length/2];
130 This subsection tag is obsoleted by Context.
131 - 05: Hash. Followed by
132 struct {
133 u32 hashcode; // elfHash of the concatenation of msgid and
134 // disambiguating-comment
136 - 01: End. Designates the end of the record. No further data.
137 Usually the following subsections are written, but some of them are
138 optional:
139 - 03: Translation.
140 - 08: Disambiguating-comment (optional).
141 - 06: SourceText (optional).
142 - 07: Context (optional).
143 - 05: Hash.
144 - 01: End.
145 A subsection can be omitted if the value to be output is the same as
146 for the previous record.
148 In the third section, the contexts section, the data contains a hash
149 table. Quite complicated.
151 The elfHash function is the same as our hash_string function, except that
152 at the end it maps a hash code of 0x00000000 to 0x00000001.
154 When we convert from PO file format, all disambiguating-comments and
155 contexts are empty, and therefore the contexts section can be omitted. */
158 /* Write a u8 (a single byte) to the output stream. */
159 static inline void
160 write_u8 (FILE *output_file, unsigned char value)
162 putc (value, output_file);
165 /* Write a u16 (two bytes) to the output stream. */
166 static inline void
167 write_u16 (FILE *output_file, unsigned short value)
169 unsigned char data[2];
171 data[0] = (value >> 8) & 0xff;
172 data[1] = value & 0xff;
174 fwrite (data, 2, 1, output_file);
177 /* Write a u32 (four bytes) to the output stream. */
178 static inline void
179 write_u32 (FILE *output_file, unsigned int value)
181 unsigned char data[4];
183 data[0] = (value >> 24) & 0xff;
184 data[1] = (value >> 16) & 0xff;
185 data[2] = (value >> 8) & 0xff;
186 data[3] = value & 0xff;
188 fwrite (data, 4, 1, output_file);
192 #define obstack_chunk_alloc xmalloc
193 #define obstack_chunk_free free
195 /* Add a u8 (a single byte) to an obstack. */
196 static void
197 append_u8 (struct obstack *mempool, unsigned char value)
199 unsigned char data[1];
201 data[0] = value;
203 obstack_grow (mempool, data, 1);
206 /* Add a u16 (two bytes) to an obstack. */
207 static void
208 append_u16 (struct obstack *mempool, unsigned short value)
210 unsigned char data[2];
212 data[0] = (value >> 8) & 0xff;
213 data[1] = value & 0xff;
215 obstack_grow (mempool, data, 2);
218 /* Add a u32 (four bytes) to an obstack. */
219 static void
220 append_u32 (struct obstack *mempool, unsigned int value)
222 unsigned char data[4];
224 data[0] = (value >> 24) & 0xff;
225 data[1] = (value >> 16) & 0xff;
226 data[2] = (value >> 8) & 0xff;
227 data[3] = value & 0xff;
229 obstack_grow (mempool, data, 4);
232 /* Add an ISO-8859-1 encoded string to an obstack. */
233 static void
234 append_base_string (struct obstack *mempool, const char *string)
236 size_t length = strlen (string) + 1;
237 append_u32 (mempool, length);
238 obstack_grow (mempool, string, length);
241 /* Add an UTF-16 encoded string to an obstack. */
242 static void
243 append_unicode_string (struct obstack *mempool, const unsigned short *string,
244 size_t length)
246 append_u32 (mempool, length * 2);
247 for (; length > 0; string++, length--)
248 append_u16 (mempool, *string);
251 /* Retrieve a 4-byte integer from memory. */
252 static inline unsigned int
253 peek_u32 (const unsigned char *p)
255 return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
258 /* Convert an UTF-8 string to ISO-8859-1, without error checking. */
259 static char *
260 conv_to_iso_8859_1 (const char *string)
262 size_t length = strlen (string);
263 const char *str = string;
264 const char *str_limit = string + length;
265 /* Conversion to ISO-8859-1 can only reduce the number of bytes. */
266 char *result = (char *) xmalloc (length + 1);
267 char *q = result;
269 while (str < str_limit)
271 unsigned int uc;
272 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
273 /* It has already been verified that the string its in ISO-8859-1. */
274 if (!(uc < 0x100))
275 abort ();
276 /* Store as ISO-8859-1. */
277 *q++ = (unsigned char) uc;
279 *q = '\0';
280 assert (q - result <= length);
282 return result;
285 /* Convert an UTF-8 string to UTF-16, returning its size (number of UTF-16
286 codepoints) in *SIZEP. */
287 static unsigned short *
288 conv_to_utf16 (const char *string, size_t *sizep)
290 size_t length = strlen (string);
291 const char *str = string;
292 const char *str_limit = string + length;
293 /* Conversion to UTF-16 can at most double the number of bytes. */
294 unsigned short *result = (unsigned short *) xmalloc (2 * length);
295 unsigned short *q = result;
297 while (str < str_limit)
299 unsigned int uc;
300 str += u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
301 if (uc < 0x10000)
302 /* UCS-2 character. */
303 *q++ = (unsigned short) uc;
304 else
306 /* UTF-16 surrogate. */
307 *q++ = 0xd800 + ((uc - 0x10000) >> 10);
308 *q++ = 0xdc00 + ((uc - 0x10000) & 0x3ff);
311 assert (q - result <= 2 * length);
313 *sizep = q - result;
314 return result;
317 /* Return the Qt hash code of a string. */
318 static unsigned int
319 string_hashcode (const char *str)
321 unsigned int h;
323 h = hash_string (str);
324 if (h == 0)
325 h = 1;
326 return h;
329 /* Compare two entries of the hashes section. */
330 static int
331 cmp_hashes (const void *va, const void *vb)
333 const unsigned char *a = (const unsigned char *) va;
334 const unsigned char *b = (const unsigned char *) vb;
335 unsigned int a_hashcode = peek_u32 (a);
336 unsigned int b_hashcode = peek_u32 (b);
338 if (a_hashcode != b_hashcode)
339 return (a_hashcode >= b_hashcode ? 1 : -1);
340 else
342 unsigned int a_offset = peek_u32 (a + 4);
343 unsigned int b_offset = peek_u32 (b + 4);
345 if (a_offset != b_offset)
346 return (a_offset >= b_offset ? 1 : -1);
347 else
348 return 0;
353 /* Write a section to the output stream. */
354 static void
355 write_section (FILE *output_file, unsigned char tag, void *data, size_t size)
357 /* A section can be omitted if it is empty. */
358 if (size > 0)
360 write_u8 (output_file, tag);
361 write_u32 (output_file, size);
362 fwrite (data, size, 1, output_file);
367 /* Write an entire .qm file. */
368 static void
369 write_qm (FILE *output_file, message_list_ty *mlp)
371 static unsigned char magic[16] =
373 0x3C, 0xB8, 0x64, 0x18, 0xCA, 0xEF, 0x9C, 0x95,
374 0xCD, 0x21, 0x1C, 0xBF, 0x60, 0xA1, 0xBD, 0xDD
376 struct obstack hashes_pool;
377 struct obstack messages_pool;
378 size_t j;
380 obstack_init (&hashes_pool);
381 obstack_init (&messages_pool);
383 /* Prepare the hashes section and the messages section. */
384 for (j = 0; j < mlp->nitems; j++)
386 message_ty *mp = mlp->item[j];
388 /* No need to emit the header entry, it's not needed at runtime. */
389 if (mp->msgid[0] != '\0')
391 char *msgid_as_iso_8859_1 = conv_to_iso_8859_1 (mp->msgid);
392 size_t msgstr_len;
393 unsigned short *msgstr_as_utf16 =
394 conv_to_utf16 (mp->msgstr, &msgstr_len);
395 unsigned int hashcode = string_hashcode (msgid_as_iso_8859_1);
396 unsigned int offset = obstack_object_size (&messages_pool);
398 /* Add a record to the hashes section. */
399 append_u32 (&hashes_pool, hashcode);
400 append_u32 (&hashes_pool, offset);
402 /* Add a record to the messages section. */
404 append_u8 (&messages_pool, 0x03);
405 append_unicode_string (&messages_pool, msgstr_as_utf16, msgstr_len);
407 append_u8 (&messages_pool, 0x08);
408 append_base_string (&messages_pool, "");
410 append_u8 (&messages_pool, 0x06);
411 append_base_string (&messages_pool, msgid_as_iso_8859_1);
413 append_u8 (&messages_pool, 0x07);
414 append_base_string (&messages_pool, "");
416 append_u8 (&messages_pool, 0x05);
417 append_u32 (&messages_pool, hashcode);
419 append_u8 (&messages_pool, 0x01);
421 free (msgstr_as_utf16);
422 free (msgid_as_iso_8859_1);
426 /* Sort the hashes section. */
428 size_t nstrings = obstack_object_size (&hashes_pool) / 8;
429 if (nstrings > 0)
430 qsort (obstack_base (&hashes_pool), nstrings, 8, cmp_hashes);
433 /* Write the magic number. */
434 fwrite (magic, sizeof (magic), 1, output_file);
436 /* Write the hashes section. */
437 write_section (output_file, 0x42, obstack_base (&hashes_pool),
438 obstack_object_size (&hashes_pool));
440 /* Write the messages section. */
441 write_section (output_file, 0x69, obstack_base (&messages_pool),
442 obstack_object_size (&messages_pool));
444 /* Omit the contexts section. */
445 #if 0
446 write_section (output_file, 0x2f, ...);
447 #endif
449 obstack_free (&messages_pool, NULL);
450 obstack_free (&hashes_pool, NULL);
455 msgdomain_write_qt (message_list_ty *mlp, const char *canon_encoding,
456 const char *domain_name, const char *file_name)
458 FILE *output_file;
460 /* If no entry for this domain don't even create the file. */
461 if (mlp->nitems != 0)
463 /* Determine whether mlp has plural entries. */
465 bool has_plural;
466 size_t j;
468 has_plural = false;
469 for (j = 0; j < mlp->nitems; j++)
470 if (mlp->item[j]->msgid_plural != NULL)
471 has_plural = true;
472 if (has_plural)
474 multiline_error (xstrdup (""),
475 xstrdup (_("\
476 message catalog has plural form translations\n\
477 but the Qt message catalog format doesn't support plural handling\n")));
478 return 1;
482 /* Convert the messages to Unicode. */
483 iconv_message_list (mlp, canon_encoding, po_charset_utf8, NULL);
485 /* Determine whether mlp has non-ISO-8859-1 msgid entries. */
487 size_t j;
489 for (j = 0; j < mlp->nitems; j++)
491 const char *string = mlp->item[j]->msgid;
493 /* An UTF-8 encoded string fits in ISO-8859-1 if and only if all
494 its bytes are < 0xc4. */
495 for (; *string; string++)
496 if ((unsigned char) *string >= 0xc4)
498 multiline_error (xstrdup (""),
499 xstrdup (_("\
500 message catalog has msgid strings containing characters outside ISO-8859-1\n\
501 but the Qt message catalog format supports Unicode only in the translated\n\
502 strings, not in the untranslated strings\n")));
503 return 1;
508 if (strcmp (domain_name, "-") == 0)
510 output_file = stdout;
511 SET_BINARY (fileno (output_file));
513 else
515 output_file = fopen (file_name, "wb");
516 if (output_file == NULL)
518 error (0, errno, _("error while opening \"%s\" for writing"),
519 file_name);
520 return 1;
524 if (output_file != NULL)
526 write_qm (output_file, mlp);
528 /* Make sure nothing went wrong. */
529 if (fwriteerror (output_file))
530 error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
531 file_name);
535 return 0;