Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / src / msgl-iconv.c
blobd5f93b7efb7c87bc23c7f1e53ef80290be9a8d19
1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23 #include <alloca.h>
25 /* Specification. */
26 #include "msgl-iconv.h"
28 #include <errno.h>
29 #include <stdbool.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #if HAVE_ICONV
34 # include <iconv.h>
35 #endif
37 #include "error.h"
38 #include "progname.h"
39 #include "basename.h"
40 #include "message.h"
41 #include "po-charset.h"
42 #include "msgl-ascii.h"
43 #include "xalloc.h"
44 #include "xallocsa.h"
45 #include "strstr.h"
46 #include "exit.h"
47 #include "gettext.h"
49 #define _(str) gettext (str)
52 #if HAVE_ICONV
54 /* Converts an entire string from one encoding to another, using iconv.
55 Return value: 0 if successful, otherwise -1 and errno set. */
56 static int
57 iconv_string (iconv_t cd, const char *start, const char *end,
58 char **resultp, size_t *lengthp)
60 #define tmpbufsize 4096
61 size_t length;
62 char *result;
64 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
65 # if defined _LIBICONV_VERSION \
66 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
67 /* Set to the initial state. */
68 iconv (cd, NULL, NULL, NULL, NULL);
69 # endif
71 /* Determine the length we need. */
73 size_t count = 0;
74 char tmpbuf[tmpbufsize];
75 const char *inptr = start;
76 size_t insize = end - start;
78 while (insize > 0)
80 char *outptr = tmpbuf;
81 size_t outsize = tmpbufsize;
82 size_t res = iconv (cd,
83 (ICONV_CONST char **) &inptr, &insize,
84 &outptr, &outsize);
86 if (res == (size_t)(-1))
88 if (errno == E2BIG)
90 else if (errno == EINVAL)
91 break;
92 else
93 return -1;
95 # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
96 /* Irix iconv() inserts a NUL byte if it cannot convert. */
97 else if (res > 0)
98 return -1;
99 # endif
100 count += outptr - tmpbuf;
102 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
103 # if defined _LIBICONV_VERSION \
104 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
106 char *outptr = tmpbuf;
107 size_t outsize = tmpbufsize;
108 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
110 if (res == (size_t)(-1))
111 return -1;
112 count += outptr - tmpbuf;
114 # endif
115 length = count;
118 *lengthp = length;
119 *resultp = result = xrealloc (*resultp, length);
120 if (length == 0)
121 return 0;
123 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
124 # if defined _LIBICONV_VERSION \
125 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
126 /* Return to the initial state. */
127 iconv (cd, NULL, NULL, NULL, NULL);
128 # endif
130 /* Do the conversion for real. */
132 const char *inptr = start;
133 size_t insize = end - start;
134 char *outptr = result;
135 size_t outsize = length;
137 while (insize > 0)
139 size_t res = iconv (cd,
140 (ICONV_CONST char **) &inptr, &insize,
141 &outptr, &outsize);
143 if (res == (size_t)(-1))
145 if (errno == EINVAL)
146 break;
147 else
148 return -1;
150 # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
151 /* Irix iconv() inserts a NUL byte if it cannot convert. */
152 else if (res > 0)
153 return -1;
154 # endif
156 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
157 # if defined _LIBICONV_VERSION \
158 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
160 size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
162 if (res == (size_t)(-1))
163 return -1;
165 # endif
166 if (outsize != 0)
167 abort ();
170 return 0;
171 #undef tmpbufsize
174 char *
175 convert_string (iconv_t cd, const char *string)
177 size_t len = strlen (string) + 1;
178 char *result = NULL;
179 size_t resultlen;
181 if (iconv_string (cd, string, string + len, &result, &resultlen) == 0)
182 /* Verify the result has exactly one NUL byte, at the end. */
183 if (resultlen > 0 && result[resultlen - 1] == '\0'
184 && strlen (result) == resultlen - 1)
185 return result;
187 error (EXIT_FAILURE, 0, _("conversion failure"));
188 /* NOTREACHED */
189 return NULL;
192 static void
193 convert_string_list (iconv_t cd, string_list_ty *slp)
195 size_t i;
197 if (slp != NULL)
198 for (i = 0; i < slp->nitems; i++)
199 slp->item[i] = convert_string (cd, slp->item[i]);
202 static void
203 convert_msgid (iconv_t cd, message_ty *mp)
205 mp->msgid = convert_string (cd, mp->msgid);
206 if (mp->msgid_plural != NULL)
207 mp->msgid_plural = convert_string (cd, mp->msgid_plural);
210 static void
211 convert_msgstr (iconv_t cd, message_ty *mp)
213 char *result = NULL;
214 size_t resultlen;
216 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
217 abort ();
219 if (iconv_string (cd, mp->msgstr, mp->msgstr + mp->msgstr_len,
220 &result, &resultlen) == 0)
221 /* Verify the result has a NUL byte at the end. */
222 if (resultlen > 0 && result[resultlen - 1] == '\0')
223 /* Verify the result has the same number of NUL bytes. */
225 const char *p;
226 const char *pend;
227 int nulcount1;
228 int nulcount2;
230 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
231 p < pend;
232 p += strlen (p) + 1, nulcount1++);
233 for (p = result, pend = p + resultlen, nulcount2 = 0;
234 p < pend;
235 p += strlen (p) + 1, nulcount2++);
237 if (nulcount1 == nulcount2)
239 mp->msgstr = result;
240 mp->msgstr_len = resultlen;
241 return;
245 error (EXIT_FAILURE, 0, _("conversion failure"));
248 #endif
251 void
252 iconv_message_list (message_list_ty *mlp,
253 const char *canon_from_code, const char *canon_to_code,
254 const char *from_filename)
256 bool canon_from_code_overridden = (canon_from_code != NULL);
257 size_t j;
259 /* If the list is empty, nothing to do. */
260 if (mlp->nitems == 0)
261 return;
263 /* Search the header entry, and extract and replace the charset name. */
264 for (j = 0; j < mlp->nitems; j++)
265 if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
267 const char *header = mlp->item[j]->msgstr;
269 if (header != NULL)
271 const char *charsetstr = strstr (header, "charset=");
273 if (charsetstr != NULL)
275 size_t len;
276 char *charset;
277 const char *canon_charset;
278 size_t len1, len2, len3;
279 char *new_header;
281 charsetstr += strlen ("charset=");
282 len = strcspn (charsetstr, " \t\n");
283 charset = (char *) xallocsa (len + 1);
284 memcpy (charset, charsetstr, len);
285 charset[len] = '\0';
287 canon_charset = po_charset_canonicalize (charset);
288 if (canon_charset == NULL)
290 if (!canon_from_code_overridden)
292 /* Don't give an error for POT files, because POT
293 files usually contain only ASCII msgids. */
294 const char *filename = from_filename;
295 size_t filenamelen;
297 if (filename != NULL
298 && (filenamelen = strlen (filename)) >= 4
299 && memcmp (filename + filenamelen - 4, ".pot", 4)
300 == 0
301 && strcmp (charset, "CHARSET") == 0)
302 canon_charset = po_charset_ascii;
303 else
304 error (EXIT_FAILURE, 0,
305 _("\
306 present charset \"%s\" is not a portable encoding name"),
307 charset);
310 else
312 if (canon_from_code == NULL)
313 canon_from_code = canon_charset;
314 else if (canon_from_code != canon_charset)
315 error (EXIT_FAILURE, 0,
316 _("\
317 two different charsets \"%s\" and \"%s\" in input file"),
318 canon_from_code, canon_charset);
320 freesa (charset);
322 len1 = charsetstr - header;
323 len2 = strlen (canon_to_code);
324 len3 = (header + strlen (header)) - (charsetstr + len);
325 new_header = (char *) xmalloc (len1 + len2 + len3 + 1);
326 memcpy (new_header, header, len1);
327 memcpy (new_header + len1, canon_to_code, len2);
328 memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1);
329 mlp->item[j]->msgstr = new_header;
330 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
334 if (canon_from_code == NULL)
336 if (is_ascii_message_list (mlp))
337 canon_from_code = po_charset_ascii;
338 else
339 error (EXIT_FAILURE, 0, _("\
340 input file doesn't contain a header entry with a charset specification"));
343 /* If the two encodings are the same, nothing to do. */
344 if (canon_from_code != canon_to_code)
346 #if HAVE_ICONV
347 iconv_t cd;
348 bool msgids_changed;
350 /* Avoid glibc-2.1 bug with EUC-KR. */
351 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
352 if (strcmp (canon_from_code, "EUC-KR") == 0)
353 cd = (iconv_t)(-1);
354 else
355 # endif
356 cd = iconv_open (canon_to_code, canon_from_code);
357 if (cd == (iconv_t)(-1))
358 error (EXIT_FAILURE, 0, _("\
359 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
360 and iconv() does not support this conversion."),
361 canon_from_code, canon_to_code, basename (program_name));
363 msgids_changed = false;
364 for (j = 0; j < mlp->nitems; j++)
366 message_ty *mp = mlp->item[j];
368 if (!is_ascii_string (mp->msgid))
369 msgids_changed = true;
370 convert_string_list (cd, mp->comment);
371 convert_string_list (cd, mp->comment_dot);
372 convert_msgid (cd, mp);
373 convert_msgstr (cd, mp);
376 iconv_close (cd);
378 if (msgids_changed)
379 if (message_list_msgids_changed (mlp))
380 error (EXIT_FAILURE, 0, _("\
381 Conversion from \"%s\" to \"%s\" introduces duplicates: \
382 some different msgids become equal."),
383 canon_from_code, canon_to_code);
384 #else
385 error (EXIT_FAILURE, 0, _("\
386 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
387 This version was built without iconv()."),
388 canon_from_code, canon_to_code, basename (program_name));
389 #endif
393 msgdomain_list_ty *
394 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
395 const char *to_code,
396 const char *from_filename)
398 const char *canon_to_code;
399 size_t k;
401 /* Canonicalize target encoding. */
402 canon_to_code = po_charset_canonicalize (to_code);
403 if (canon_to_code == NULL)
404 error (EXIT_FAILURE, 0,
405 _("target charset \"%s\" is not a portable encoding name."),
406 to_code);
408 for (k = 0; k < mdlp->nitems; k++)
409 iconv_message_list (mdlp->item[k]->messages, mdlp->encoding, canon_to_code,
410 from_filename);
412 mdlp->encoding = canon_to_code;
413 return mdlp;