1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
26 #include "msgl-iconv.h"
41 #include "po-charset.h"
42 #include "msgl-ascii.h"
49 #define _(str) gettext (str)
54 /* Converts an entire string from one encoding to another, using iconv.
55 Return value: 0 if successful, otherwise -1 and errno set. */
57 iconv_string (iconv_t cd
, const char *start
, const char *end
,
58 char **resultp
, size_t *lengthp
)
60 #define tmpbufsize 4096
64 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
65 # if defined _LIBICONV_VERSION \
66 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
67 /* Set to the initial state. */
68 iconv (cd
, NULL
, NULL
, NULL
, NULL
);
71 /* Determine the length we need. */
74 char tmpbuf
[tmpbufsize
];
75 const char *inptr
= start
;
76 size_t insize
= end
- start
;
80 char *outptr
= tmpbuf
;
81 size_t outsize
= tmpbufsize
;
82 size_t res
= iconv (cd
,
83 (ICONV_CONST
char **) &inptr
, &insize
,
86 if (res
== (size_t)(-1))
90 else if (errno
== EINVAL
)
95 # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
96 /* Irix iconv() inserts a NUL byte if it cannot convert. */
100 count
+= outptr
- tmpbuf
;
102 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
103 # if defined _LIBICONV_VERSION \
104 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
106 char *outptr
= tmpbuf
;
107 size_t outsize
= tmpbufsize
;
108 size_t res
= iconv (cd
, NULL
, NULL
, &outptr
, &outsize
);
110 if (res
== (size_t)(-1))
112 count
+= outptr
- tmpbuf
;
119 *resultp
= result
= xrealloc (*resultp
, length
);
123 /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
124 # if defined _LIBICONV_VERSION \
125 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
126 /* Return to the initial state. */
127 iconv (cd
, NULL
, NULL
, NULL
, NULL
);
130 /* Do the conversion for real. */
132 const char *inptr
= start
;
133 size_t insize
= end
- start
;
134 char *outptr
= result
;
135 size_t outsize
= length
;
139 size_t res
= iconv (cd
,
140 (ICONV_CONST
char **) &inptr
, &insize
,
143 if (res
== (size_t)(-1))
150 # if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
151 /* Irix iconv() inserts a NUL byte if it cannot convert. */
156 /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
157 # if defined _LIBICONV_VERSION \
158 || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
160 size_t res
= iconv (cd
, NULL
, NULL
, &outptr
, &outsize
);
162 if (res
== (size_t)(-1))
175 convert_string (iconv_t cd
, const char *string
)
177 size_t len
= strlen (string
) + 1;
181 if (iconv_string (cd
, string
, string
+ len
, &result
, &resultlen
) == 0)
182 /* Verify the result has exactly one NUL byte, at the end. */
183 if (resultlen
> 0 && result
[resultlen
- 1] == '\0'
184 && strlen (result
) == resultlen
- 1)
187 error (EXIT_FAILURE
, 0, _("conversion failure"));
193 convert_string_list (iconv_t cd
, string_list_ty
*slp
)
198 for (i
= 0; i
< slp
->nitems
; i
++)
199 slp
->item
[i
] = convert_string (cd
, slp
->item
[i
]);
203 convert_msgid (iconv_t cd
, message_ty
*mp
)
205 mp
->msgid
= convert_string (cd
, mp
->msgid
);
206 if (mp
->msgid_plural
!= NULL
)
207 mp
->msgid_plural
= convert_string (cd
, mp
->msgid_plural
);
211 convert_msgstr (iconv_t cd
, message_ty
*mp
)
216 if (!(mp
->msgstr_len
> 0 && mp
->msgstr
[mp
->msgstr_len
- 1] == '\0'))
219 if (iconv_string (cd
, mp
->msgstr
, mp
->msgstr
+ mp
->msgstr_len
,
220 &result
, &resultlen
) == 0)
221 /* Verify the result has a NUL byte at the end. */
222 if (resultlen
> 0 && result
[resultlen
- 1] == '\0')
223 /* Verify the result has the same number of NUL bytes. */
230 for (p
= mp
->msgstr
, pend
= p
+ mp
->msgstr_len
, nulcount1
= 0;
232 p
+= strlen (p
) + 1, nulcount1
++);
233 for (p
= result
, pend
= p
+ resultlen
, nulcount2
= 0;
235 p
+= strlen (p
) + 1, nulcount2
++);
237 if (nulcount1
== nulcount2
)
240 mp
->msgstr_len
= resultlen
;
245 error (EXIT_FAILURE
, 0, _("conversion failure"));
252 iconv_message_list (message_list_ty
*mlp
,
253 const char *canon_from_code
, const char *canon_to_code
,
254 const char *from_filename
)
256 bool canon_from_code_overridden
= (canon_from_code
!= NULL
);
259 /* If the list is empty, nothing to do. */
260 if (mlp
->nitems
== 0)
263 /* Search the header entry, and extract and replace the charset name. */
264 for (j
= 0; j
< mlp
->nitems
; j
++)
265 if (mlp
->item
[j
]->msgid
[0] == '\0' && !mlp
->item
[j
]->obsolete
)
267 const char *header
= mlp
->item
[j
]->msgstr
;
271 const char *charsetstr
= strstr (header
, "charset=");
273 if (charsetstr
!= NULL
)
277 const char *canon_charset
;
278 size_t len1
, len2
, len3
;
281 charsetstr
+= strlen ("charset=");
282 len
= strcspn (charsetstr
, " \t\n");
283 charset
= (char *) xallocsa (len
+ 1);
284 memcpy (charset
, charsetstr
, len
);
287 canon_charset
= po_charset_canonicalize (charset
);
288 if (canon_charset
== NULL
)
290 if (!canon_from_code_overridden
)
292 /* Don't give an error for POT files, because POT
293 files usually contain only ASCII msgids. */
294 const char *filename
= from_filename
;
298 && (filenamelen
= strlen (filename
)) >= 4
299 && memcmp (filename
+ filenamelen
- 4, ".pot", 4)
301 && strcmp (charset
, "CHARSET") == 0)
302 canon_charset
= po_charset_ascii
;
304 error (EXIT_FAILURE
, 0,
306 present charset \"%s\" is not a portable encoding name"),
312 if (canon_from_code
== NULL
)
313 canon_from_code
= canon_charset
;
314 else if (canon_from_code
!= canon_charset
)
315 error (EXIT_FAILURE
, 0,
317 two different charsets \"%s\" and \"%s\" in input file"),
318 canon_from_code
, canon_charset
);
322 len1
= charsetstr
- header
;
323 len2
= strlen (canon_to_code
);
324 len3
= (header
+ strlen (header
)) - (charsetstr
+ len
);
325 new_header
= (char *) xmalloc (len1
+ len2
+ len3
+ 1);
326 memcpy (new_header
, header
, len1
);
327 memcpy (new_header
+ len1
, canon_to_code
, len2
);
328 memcpy (new_header
+ len1
+ len2
, charsetstr
+ len
, len3
+ 1);
329 mlp
->item
[j
]->msgstr
= new_header
;
330 mlp
->item
[j
]->msgstr_len
= len1
+ len2
+ len3
+ 1;
334 if (canon_from_code
== NULL
)
336 if (is_ascii_message_list (mlp
))
337 canon_from_code
= po_charset_ascii
;
339 error (EXIT_FAILURE
, 0, _("\
340 input file doesn't contain a header entry with a charset specification"));
343 /* If the two encodings are the same, nothing to do. */
344 if (canon_from_code
!= canon_to_code
)
350 /* Avoid glibc-2.1 bug with EUC-KR. */
351 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
352 if (strcmp (canon_from_code
, "EUC-KR") == 0)
356 cd
= iconv_open (canon_to_code
, canon_from_code
);
357 if (cd
== (iconv_t
)(-1))
358 error (EXIT_FAILURE
, 0, _("\
359 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
360 and iconv() does not support this conversion."),
361 canon_from_code
, canon_to_code
, basename (program_name
));
363 msgids_changed
= false;
364 for (j
= 0; j
< mlp
->nitems
; j
++)
366 message_ty
*mp
= mlp
->item
[j
];
368 if (!is_ascii_string (mp
->msgid
))
369 msgids_changed
= true;
370 convert_string_list (cd
, mp
->comment
);
371 convert_string_list (cd
, mp
->comment_dot
);
372 convert_msgid (cd
, mp
);
373 convert_msgstr (cd
, mp
);
379 if (message_list_msgids_changed (mlp
))
380 error (EXIT_FAILURE
, 0, _("\
381 Conversion from \"%s\" to \"%s\" introduces duplicates: \
382 some different msgids become equal."),
383 canon_from_code
, canon_to_code
);
385 error (EXIT_FAILURE
, 0, _("\
386 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
387 This version was built without iconv()."),
388 canon_from_code
, canon_to_code
, basename (program_name
));
394 iconv_msgdomain_list (msgdomain_list_ty
*mdlp
,
396 const char *from_filename
)
398 const char *canon_to_code
;
401 /* Canonicalize target encoding. */
402 canon_to_code
= po_charset_canonicalize (to_code
);
403 if (canon_to_code
== NULL
)
404 error (EXIT_FAILURE
, 0,
405 _("target charset \"%s\" is not a portable encoding name."),
408 for (k
= 0; k
< mdlp
->nitems
; k
++)
409 iconv_message_list (mdlp
->item
[k
]->messages
, mdlp
->encoding
, canon_to_code
,
412 mdlp
->encoding
= canon_to_code
;