Prepare for version 1.15.
[libiconv.git] / src / iconv.c
blob0684c82d1d435b0749aba68392b07d9302ac599e
1 /* Copyright (C) 2000-2009, 2011-2012, 2016 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #ifndef ICONV_CONST
19 # define ICONV_CONST
20 #endif
22 #include <limits.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <iconv.h>
28 #include <errno.h>
29 #include <locale.h>
30 #include <fcntl.h>
32 /* Ensure that iconv_no_i18n does not depend on libintl. */
33 #ifdef NO_I18N
34 # undef ENABLE_NLS
35 # undef ENABLE_RELOCATABLE
36 #endif
38 #include "binary-io.h"
39 #include "progname.h"
40 #include "relocatable.h"
41 #include "safe-read.h"
42 #include "xalloc.h"
43 #include "uniwidth.h"
44 #include "uniwidth/cjk.h"
46 /* Ensure that iconv_no_i18n does not depend on libintl. */
47 #ifdef NO_I18N
48 #include <stdarg.h>
49 static void
50 error (int status, int errnum, const char *message, ...)
52 va_list args;
54 fflush(stdout);
55 fprintf(stderr,"%s: ",program_name);
56 va_start(args,message);
57 vfprintf(stderr,message,args);
58 va_end(args);
59 if (errnum) {
60 const char *s = strerror(errnum);
61 if (s == NULL)
62 s = "Unknown system error";
64 putc('\n',stderr);
65 fflush(stderr);
66 if (status)
67 exit(status);
69 #else
70 # include "error.h"
71 #endif
73 #include "gettext.h"
75 #define _(str) gettext(str)
77 /* Ensure that iconv_no_i18n does not depend on libintl. */
78 #ifdef NO_I18N
79 # define xmalloc malloc
80 # define xalloc_die abort
81 #endif
83 /* Locale independent test for a decimal digit.
84 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
85 <ctype.h> isdigit must be an 'unsigned char'.) */
86 #undef isdigit
87 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
89 /* Locale independent test for a printable character.
90 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
91 <ctype.h> isdigit must be an 'unsigned char'.) */
92 #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
94 /* ========================================================================= */
96 static int discard_unconvertible = 0;
97 static int silent = 0;
99 static void usage (int exitcode)
101 if (exitcode != 0) {
102 const char* helpstring1 =
103 /* TRANSLATORS: The first line of the short usage message. */
104 _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
105 const char* helpstring2 =
106 /* TRANSLATORS: The second line of the short usage message.
107 Align it correctly against the first line. */
108 _("or: iconv -l");
109 fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
110 fprintf(stderr, _("Try '%s --help' for more information.\n"), program_name);
111 } else {
112 /* xgettext: no-wrap */
113 /* TRANSLATORS: The first line of the long usage message.
114 The %s placeholder expands to the program name. */
115 printf(_("\
116 Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
117 program_name);
118 /* xgettext: no-wrap */
119 /* TRANSLATORS: The second line of the long usage message.
120 Align it correctly against the first line.
121 The %s placeholder expands to the program name. */
122 printf(_("\
123 or: %s -l\n"),
124 program_name);
125 printf("\n");
126 /* xgettext: no-wrap */
127 /* TRANSLATORS: Description of the iconv program. */
128 printf(_("\
129 Converts text from one encoding to another encoding.\n"));
130 printf("\n");
131 /* xgettext: no-wrap */
132 printf(_("\
133 Options controlling the input and output format:\n"));
134 /* xgettext: no-wrap */
135 printf(_("\
136 -f ENCODING, --from-code=ENCODING\n\
137 the encoding of the input\n"));
138 /* xgettext: no-wrap */
139 printf(_("\
140 -t ENCODING, --to-code=ENCODING\n\
141 the encoding of the output\n"));
142 printf("\n");
143 /* xgettext: no-wrap */
144 printf(_("\
145 Options controlling conversion problems:\n"));
146 /* xgettext: no-wrap */
147 printf(_("\
148 -c discard unconvertible characters\n"));
149 /* xgettext: no-wrap */
150 printf(_("\
151 --unicode-subst=FORMATSTRING\n\
152 substitution for unconvertible Unicode characters\n"));
153 /* xgettext: no-wrap */
154 printf(_("\
155 --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
156 /* xgettext: no-wrap */
157 printf(_("\
158 --widechar-subst=FORMATSTRING\n\
159 substitution for unconvertible wide characters\n"));
160 printf("\n");
161 /* xgettext: no-wrap */
162 printf(_("\
163 Options controlling error output:\n"));
164 /* xgettext: no-wrap */
165 printf(_("\
166 -s, --silent suppress error messages about conversion problems\n"));
167 printf("\n");
168 /* xgettext: no-wrap */
169 printf(_("\
170 Informative output:\n"));
171 /* xgettext: no-wrap */
172 printf(_("\
173 -l, --list list the supported encodings\n"));
174 /* xgettext: no-wrap */
175 printf(_("\
176 --help display this help and exit\n"));
177 /* xgettext: no-wrap */
178 printf(_("\
179 --version output version information and exit\n"));
180 printf("\n");
181 /* TRANSLATORS: The placeholder indicates the bug-reporting address
182 for this package. Please add _another line_ saying
183 "Report translation bugs to <...>\n" with the address for translation
184 bugs (typically your translation team's web or email address). */
185 fputs(_("Report bugs to <bug-gnu-libiconv@gnu.org>.\n"),stdout);
187 exit(exitcode);
190 static void print_version (void)
192 printf("iconv (GNU libiconv %d.%d)\n",
193 _libiconv_version >> 8, _libiconv_version & 0xff);
194 printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2016");
195 /* xgettext: no-wrap */
196 fputs (_("\
197 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
198 This is free software: you are free to change and redistribute it.\n\
199 There is NO WARRANTY, to the extent permitted by law.\n\
200 "),stdout);
201 /* TRANSLATORS: The %s placeholder expands to an author's name. */
202 printf(_("Written by %s.\n"),"Bruno Haible");
203 exit(EXIT_SUCCESS);
206 static int print_one (unsigned int namescount, const char * const * names,
207 void* data)
209 unsigned int i;
210 (void)data;
211 for (i = 0; i < namescount; i++) {
212 if (i > 0)
213 putc(' ',stdout);
214 fputs(names[i],stdout);
216 putc('\n',stdout);
217 return 0;
220 /* ========================================================================= */
222 /* Line number and column position. */
223 static unsigned int line;
224 static unsigned int column;
225 static const char* cjkcode;
226 /* Update the line number and column position after a character was
227 successfully converted. */
228 static void update_line_column (unsigned int uc, void* data)
230 if (uc == 0x000A) {
231 line++;
232 column = 0;
233 } else {
234 int width = uc_width(uc, cjkcode);
235 if (width >= 0)
236 column += width;
237 else if (uc == 0x0009)
238 column += 8 - (column % 8);
242 /* ========================================================================= */
244 /* Production of placeholder strings as fallback for unconvertible
245 characters. */
247 /* Check that the argument is a format string taking either no argument
248 or exactly one unsigned integer argument. Returns the maximum output
249 size of the format string. */
250 static size_t check_subst_formatstring (const char *format, const char *param_name)
252 /* C format strings are described in POSIX (IEEE P1003.1 2001), section
253 XSH 3 fprintf(). See also Linux fprintf(3) manual page.
254 For simplicity, we don't accept
255 - the '%m$' reordering syntax,
256 - the 'I' flag,
257 - width specifications referring to an argument,
258 - precision specifications referring to an argument,
259 - size specifiers,
260 - format specifiers other than 'o', 'u', 'x', 'X'.
261 What remains?
262 A directive
263 - starts with '%',
264 - is optionally followed by any of the characters '#', '0', '-', ' ',
265 '+', "'", each of which acts as a flag,
266 - is optionally followed by a width specification: a nonempty digit
267 sequence,
268 - is optionally followed by '.' and a precision specification: a
269 nonempty digit sequence,
270 - is finished by a specifier
271 - '%', that needs no argument,
272 - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
274 size_t maxsize = 0;
275 unsigned int unnumbered_arg_count = 0;
277 for (; *format != '\0';) {
278 if (*format++ == '%') {
279 /* A directive. */
280 unsigned int width = 0;
281 unsigned int precision = 0;
282 unsigned int length;
283 /* Parse flags. */
284 for (;;) {
285 if (*format == ' ' || *format == '+' || *format == '-'
286 || *format == '#' || *format == '0' || *format == '\'')
287 format++;
288 else
289 break;
291 /* Parse width. */
292 if (*format == '*')
293 error(EXIT_FAILURE,0,
294 /* TRANSLATORS: An error message.
295 The %s placeholder expands to a command-line option. */
296 _("%s argument: A format directive with a variable width is not allowed here."),
297 param_name);
298 if (isdigit (*format)) {
299 do {
300 width = 10*width + (*format - '0');
301 format++;
302 } while (isdigit (*format));
304 /* Parse precision. */
305 if (*format == '.') {
306 format++;
307 if (*format == '*')
308 error(EXIT_FAILURE,0,
309 /* TRANSLATORS: An error message.
310 The %s placeholder expands to a command-line option. */
311 _("%s argument: A format directive with a variable precision is not allowed here."),
312 param_name);
313 if (isdigit (*format)) {
314 do {
315 precision = 10*precision + (*format - '0');
316 format++;
317 } while (isdigit (*format));
320 /* Parse size. */
321 switch (*format) {
322 case 'h': case 'l': case 'L': case 'q':
323 case 'j': case 'z': case 'Z': case 't':
324 error(EXIT_FAILURE,0,
325 /* TRANSLATORS: An error message.
326 The %s placeholder expands to a command-line option. */
327 _("%s argument: A format directive with a size is not allowed here."),
328 param_name);
330 /* Parse end of directive. */
331 switch (*format) {
332 case '%':
333 length = 1;
334 break;
335 case 'u': case 'o': case 'x': case 'X':
336 if (*format == 'u') {
337 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
338 * 0.30103 /* binary -> decimal */
340 + 1; /* turn floor into ceil */
341 if (length < precision)
342 length = precision;
343 length *= 2; /* estimate for FLAG_GROUP */
344 length += 1; /* account for leading sign */
345 } else if (*format == 'o') {
346 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
347 * 0.333334 /* binary -> octal */
349 + 1; /* turn floor into ceil */
350 if (length < precision)
351 length = precision;
352 length += 1; /* account for leading sign */
353 } else { /* 'x', 'X' */
354 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
355 * 0.25 /* binary -> hexadecimal */
357 + 1; /* turn floor into ceil */
358 if (length < precision)
359 length = precision;
360 length += 2; /* account for leading sign or alternate form */
362 unnumbered_arg_count++;
363 break;
364 default:
365 if (*format == '\0')
366 error(EXIT_FAILURE,0,
367 /* TRANSLATORS: An error message.
368 The %s placeholder expands to a command-line option. */
369 _("%s argument: The string ends in the middle of a directive."),
370 param_name);
371 else if (c_isprint(*format))
372 error(EXIT_FAILURE,0,
373 /* TRANSLATORS: An error message.
374 The %s placeholder expands to a command-line option.
375 The %c placeholder expands to an unknown format directive. */
376 _("%s argument: The character '%c' is not a valid conversion specifier."),
377 param_name,*format);
378 else
379 error(EXIT_FAILURE,0,
380 /* TRANSLATORS: An error message.
381 The %s placeholder expands to a command-line option. */
382 _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
383 param_name);
384 abort(); /*NOTREACHED*/
386 format++;
387 if (length < width)
388 length = width;
389 maxsize += length;
390 } else
391 maxsize++;
393 if (unnumbered_arg_count > 1)
394 error(EXIT_FAILURE,0,
395 /* TRANSLATORS: An error message.
396 The %s placeholder expands to a command-line option.
397 The %u placeholder expands to the number of arguments consumed by the format string. */
398 ngettext("%s argument: The format string consumes more than one argument: %u argument.",
399 "%s argument: The format string consumes more than one argument: %u arguments.",
400 unnumbered_arg_count),
401 param_name,unnumbered_arg_count);
402 return maxsize;
405 /* Format strings. */
406 static const char* ilseq_byte_subst;
407 static const char* ilseq_wchar_subst;
408 static const char* ilseq_unicode_subst;
410 /* Maximum result size for each format string. */
411 static size_t ilseq_byte_subst_size;
412 static size_t ilseq_wchar_subst_size;
413 static size_t ilseq_unicode_subst_size;
415 /* Buffer of size ilseq_byte_subst_size+1. */
416 static char* ilseq_byte_subst_buffer;
417 #if HAVE_WCHAR_T
418 /* Buffer of size ilseq_wchar_subst_size+1. */
419 static char* ilseq_wchar_subst_buffer;
420 #endif
421 /* Buffer of size ilseq_unicode_subst_size+1. */
422 static char* ilseq_unicode_subst_buffer;
424 /* Auxiliary variables for subst_mb_to_uc_fallback. */
425 /* Converter from locale encoding to UCS-4. */
426 static iconv_t subst_mb_to_uc_cd;
427 /* Buffer of size ilseq_byte_subst_size. */
428 static unsigned int* subst_mb_to_uc_temp_buffer;
430 static void subst_mb_to_uc_fallback
431 (const char* inbuf, size_t inbufsize,
432 void (*write_replacement) (const unsigned int *buf, size_t buflen,
433 void* callback_arg),
434 void* callback_arg,
435 void* data)
437 for (; inbufsize > 0; inbuf++, inbufsize--) {
438 const char* inptr;
439 size_t inbytesleft;
440 char* outptr;
441 size_t outbytesleft;
442 sprintf(ilseq_byte_subst_buffer,
443 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
444 inptr = ilseq_byte_subst_buffer;
445 inbytesleft = strlen(ilseq_byte_subst_buffer);
446 outptr = (char*)subst_mb_to_uc_temp_buffer;
447 outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
448 iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
449 if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
450 == (size_t)(-1)
451 || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
452 == (size_t)(-1))
453 error(EXIT_FAILURE,0,
454 /* TRANSLATORS: An error message.
455 The %s placeholder expands to a piece of text, specified through --byte-subst. */
456 _("cannot convert byte substitution to Unicode: %s"),
457 ilseq_byte_subst_buffer);
458 if (!(outbytesleft%sizeof(unsigned int) == 0))
459 abort();
460 write_replacement(subst_mb_to_uc_temp_buffer,
461 ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
462 callback_arg);
466 /* Auxiliary variables for subst_uc_to_mb_fallback. */
467 /* Converter from locale encoding to target encoding. */
468 static iconv_t subst_uc_to_mb_cd;
469 /* Buffer of size ilseq_unicode_subst_size*4. */
470 static char* subst_uc_to_mb_temp_buffer;
472 static void subst_uc_to_mb_fallback
473 (unsigned int code,
474 void (*write_replacement) (const char *buf, size_t buflen,
475 void* callback_arg),
476 void* callback_arg,
477 void* data)
479 const char* inptr;
480 size_t inbytesleft;
481 char* outptr;
482 size_t outbytesleft;
483 sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
484 inptr = ilseq_unicode_subst_buffer;
485 inbytesleft = strlen(ilseq_unicode_subst_buffer);
486 outptr = subst_uc_to_mb_temp_buffer;
487 outbytesleft = ilseq_unicode_subst_size*4;
488 iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
489 if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
490 == (size_t)(-1)
491 || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
492 == (size_t)(-1))
493 error(EXIT_FAILURE,0,
494 /* TRANSLATORS: An error message.
495 The %s placeholder expands to a piece of text, specified through --unicode-subst. */
496 _("cannot convert unicode substitution to target encoding: %s"),
497 ilseq_unicode_subst_buffer);
498 write_replacement(subst_uc_to_mb_temp_buffer,
499 ilseq_unicode_subst_size*4-outbytesleft,
500 callback_arg);
503 #if HAVE_WCHAR_T
505 /* Auxiliary variables for subst_mb_to_wc_fallback. */
506 /* Converter from locale encoding to wchar_t. */
507 static iconv_t subst_mb_to_wc_cd;
508 /* Buffer of size ilseq_byte_subst_size. */
509 static wchar_t* subst_mb_to_wc_temp_buffer;
511 static void subst_mb_to_wc_fallback
512 (const char* inbuf, size_t inbufsize,
513 void (*write_replacement) (const wchar_t *buf, size_t buflen,
514 void* callback_arg),
515 void* callback_arg,
516 void* data)
518 for (; inbufsize > 0; inbuf++, inbufsize--) {
519 const char* inptr;
520 size_t inbytesleft;
521 char* outptr;
522 size_t outbytesleft;
523 sprintf(ilseq_byte_subst_buffer,
524 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
525 inptr = ilseq_byte_subst_buffer;
526 inbytesleft = strlen(ilseq_byte_subst_buffer);
527 outptr = (char*)subst_mb_to_wc_temp_buffer;
528 outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
529 iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
530 if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
531 == (size_t)(-1)
532 || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
533 == (size_t)(-1))
534 error(EXIT_FAILURE,0,
535 /* TRANSLATORS: An error message.
536 The %s placeholder expands to a piece of text, specified through --byte-subst. */
537 _("cannot convert byte substitution to wide string: %s"),
538 ilseq_byte_subst_buffer);
539 if (!(outbytesleft%sizeof(wchar_t) == 0))
540 abort();
541 write_replacement(subst_mb_to_wc_temp_buffer,
542 ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
543 callback_arg);
547 /* Auxiliary variables for subst_wc_to_mb_fallback. */
548 /* Converter from locale encoding to target encoding. */
549 static iconv_t subst_wc_to_mb_cd;
550 /* Buffer of size ilseq_wchar_subst_size*4.
551 Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
552 static char* subst_wc_to_mb_temp_buffer;
554 static void subst_wc_to_mb_fallback
555 (wchar_t code,
556 void (*write_replacement) (const char *buf, size_t buflen,
557 void* callback_arg),
558 void* callback_arg,
559 void* data)
561 const char* inptr;
562 size_t inbytesleft;
563 char* outptr;
564 size_t outbytesleft;
565 sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
566 inptr = ilseq_wchar_subst_buffer;
567 inbytesleft = strlen(ilseq_wchar_subst_buffer);
568 outptr = subst_wc_to_mb_temp_buffer;
569 outbytesleft = ilseq_wchar_subst_size*4;
570 iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
571 if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
572 == (size_t)(-1)
573 || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
574 == (size_t)(-1))
575 error(EXIT_FAILURE,0,
576 /* TRANSLATORS: An error message.
577 The %s placeholder expands to a piece of text, specified through --widechar-subst. */
578 _("cannot convert widechar substitution to target encoding: %s"),
579 ilseq_wchar_subst_buffer);
580 write_replacement(subst_wc_to_mb_temp_buffer,
581 ilseq_wchar_subst_size*4-outbytesleft,
582 callback_arg);
585 #else
587 #define subst_mb_to_wc_fallback NULL
588 #define subst_wc_to_mb_fallback NULL
590 #endif
592 /* Auxiliary variables for subst_mb_to_mb_fallback. */
593 /* Converter from locale encoding to target encoding. */
594 static iconv_t subst_mb_to_mb_cd;
595 /* Buffer of size ilseq_byte_subst_size*4. */
596 static char* subst_mb_to_mb_temp_buffer;
598 static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
600 for (; inbufsize > 0; inbuf++, inbufsize--) {
601 const char* inptr;
602 size_t inbytesleft;
603 char* outptr;
604 size_t outbytesleft;
605 sprintf(ilseq_byte_subst_buffer,
606 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
607 inptr = ilseq_byte_subst_buffer;
608 inbytesleft = strlen(ilseq_byte_subst_buffer);
609 outptr = subst_mb_to_mb_temp_buffer;
610 outbytesleft = ilseq_byte_subst_size*4;
611 iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
612 if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
613 == (size_t)(-1)
614 || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
615 == (size_t)(-1))
616 error(EXIT_FAILURE,0,
617 /* TRANSLATORS: An error message.
618 The %s placeholder expands to a piece of text, specified through --byte-subst. */
619 _("cannot convert byte substitution to target encoding: %s"),
620 ilseq_byte_subst_buffer);
621 fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
622 stdout);
626 /* ========================================================================= */
628 /* Error messages during conversion. */
630 static void conversion_error_EILSEQ (const char* infilename)
632 fflush(stdout);
633 if (column > 0)
634 putc('\n',stderr);
635 error(0,0,
636 /* TRANSLATORS: An error message.
637 The placeholders expand to the input file name, a line number, and a column number. */
638 _("%s:%u:%u: cannot convert"),
639 infilename,line,column);
642 static void conversion_error_EINVAL (const char* infilename)
644 fflush(stdout);
645 if (column > 0)
646 putc('\n',stderr);
647 error(0,0,
648 /* TRANSLATORS: An error message.
649 The placeholders expand to the input file name, a line number, and a column number.
650 A "shift sequence" is a sequence of bytes that changes the state of the converter;
651 this concept exists only for "stateful" encodings like ISO-2022-JP. */
652 _("%s:%u:%u: incomplete character or shift sequence"),
653 infilename,line,column);
656 static void conversion_error_other (int errnum, const char* infilename)
658 fflush(stdout);
659 if (column > 0)
660 putc('\n',stderr);
661 error(0,errnum,
662 /* TRANSLATORS: The first part of an error message.
663 It is followed by a colon and a detail message.
664 The placeholders expand to the input file name, a line number, and a column number. */
665 _("%s:%u:%u"),
666 infilename,line,column);
669 /* Convert the input given in infile. */
671 static int convert (iconv_t cd, int infile, const char* infilename)
673 char inbuf[4096+4096];
674 size_t inbufrest = 0;
675 int infile_error = 0;
676 char initial_outbuf[4096];
677 char *outbuf = initial_outbuf;
678 size_t outbufsize = sizeof(initial_outbuf);
679 int status = 0;
681 #if O_BINARY
682 SET_BINARY(infile);
683 #endif
684 line = 1; column = 0;
685 iconv(cd,NULL,NULL,NULL,NULL);
686 for (;;) {
687 size_t inbufsize;
688 /* Transfer the accumulated output to its destination, in case the
689 safe_read() call will block. */
690 fflush(stdout);
691 inbufsize = safe_read(infile,inbuf+4096,4096);
692 if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) {
693 infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0);
694 if (inbufrest == 0)
695 break;
696 else {
697 if (ilseq_byte_subst != NULL)
698 subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
699 if (!silent)
700 conversion_error_EINVAL(infilename);
701 status = 1;
702 goto done;
704 } else {
705 const char* inptr = inbuf+4096-inbufrest;
706 size_t insize = inbufrest+inbufsize;
707 inbufrest = 0;
708 while (insize > 0) {
709 char* outptr = outbuf;
710 size_t outsize = outbufsize;
711 size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
712 if (outptr != outbuf) {
713 int saved_errno = errno;
714 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
715 status = 1;
716 goto done;
718 errno = saved_errno;
720 if (res == (size_t)(-1)) {
721 if (errno == EILSEQ) {
722 if (discard_unconvertible == 1) {
723 int one = 1;
724 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
725 discard_unconvertible = 2;
726 status = 1;
727 } else {
728 if (!silent)
729 conversion_error_EILSEQ(infilename);
730 status = 1;
731 goto done;
733 } else if (errno == EINVAL) {
734 if (inbufsize == 0 || insize > 4096) {
735 if (!silent)
736 conversion_error_EINVAL(infilename);
737 status = 1;
738 goto done;
739 } else {
740 inbufrest = insize;
741 if (insize > 0) {
742 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
743 we cannot use memcpy here, because source and destination
744 regions may overlap. */
745 char* restptr = inbuf+4096-insize;
746 do { *restptr++ = *inptr++; } while (--insize > 0);
748 break;
750 } else if (errno == E2BIG) {
751 if (outptr==outbuf) {
752 /* outbuf is too small. Double its size. */
753 if (outbuf != initial_outbuf)
754 free(outbuf);
755 outbufsize = 2*outbufsize;
756 if (outbufsize==0) /* integer overflow? */
757 xalloc_die();
758 outbuf = (char*)xmalloc(outbufsize);
760 } else {
761 if (!silent)
762 conversion_error_other(errno,infilename);
763 status = 1;
764 goto done;
770 for (;;) {
771 char* outptr = outbuf;
772 size_t outsize = outbufsize;
773 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
774 if (outptr != outbuf) {
775 int saved_errno = errno;
776 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
777 status = 1;
778 goto done;
780 errno = saved_errno;
782 if (res == (size_t)(-1)) {
783 if (errno == EILSEQ) {
784 if (discard_unconvertible == 1) {
785 int one = 1;
786 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
787 discard_unconvertible = 2;
788 status = 1;
789 } else {
790 if (!silent)
791 conversion_error_EILSEQ(infilename);
792 status = 1;
793 goto done;
795 } else if (errno == EINVAL) {
796 if (!silent)
797 conversion_error_EINVAL(infilename);
798 status = 1;
799 goto done;
800 } else if (errno == E2BIG) {
801 if (outptr==outbuf) {
802 /* outbuf is too small. Double its size. */
803 if (outbuf != initial_outbuf)
804 free(outbuf);
805 outbufsize = 2*outbufsize;
806 if (outbufsize==0) /* integer overflow? */
807 xalloc_die();
808 outbuf = (char*)xmalloc(outbufsize);
810 } else {
811 if (!silent)
812 conversion_error_other(errno,infilename);
813 status = 1;
814 goto done;
816 } else
817 break;
819 if (infile_error) {
820 fflush(stdout);
821 if (column > 0)
822 putc('\n',stderr);
823 error(0,infile_error,
824 /* TRANSLATORS: An error message.
825 The placeholder expands to the input file name. */
826 _("%s: I/O error"),
827 infilename);
828 status = 1;
829 goto done;
831 done:
832 if (outbuf != initial_outbuf)
833 free(outbuf);
834 return status;
837 /* ========================================================================= */
839 int main (int argc, char* argv[])
841 const char* fromcode = NULL;
842 const char* tocode = NULL;
843 int do_list = 0;
844 iconv_t cd;
845 struct iconv_fallbacks fallbacks;
846 struct iconv_hooks hooks;
847 int i;
848 int status;
850 set_program_name (argv[0]);
851 #if HAVE_SETLOCALE
852 /* Needed for the locale dependent encodings, "char" and "wchar_t",
853 and for gettext. */
854 setlocale(LC_CTYPE,"");
855 #if ENABLE_NLS
856 /* Needed for gettext. */
857 setlocale(LC_MESSAGES,"");
858 #endif
859 #endif
860 #if ENABLE_NLS
861 bindtextdomain("libiconv",relocate(LOCALEDIR));
862 #endif
863 textdomain("libiconv");
864 for (i = 1; i < argc;) {
865 size_t len = strlen(argv[i]);
866 if (!strcmp(argv[i],"--")) {
867 i++;
868 break;
870 if (!strcmp(argv[i],"-f")
871 /* --f ... --from-code */
872 || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
873 /* --from-code=... */
874 || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
875 if (len < 12)
876 if (i == argc-1) usage(1);
877 if (fromcode != NULL) usage(1);
878 if (len < 12) {
879 fromcode = argv[i+1];
880 i += 2;
881 } else {
882 fromcode = argv[i]+12;
883 i++;
885 continue;
887 if (!strcmp(argv[i],"-t")
888 /* --t ... --to-code */
889 || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
890 /* --from-code=... */
891 || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
892 if (len < 10)
893 if (i == argc-1) usage(1);
894 if (tocode != NULL) usage(1);
895 if (len < 10) {
896 tocode = argv[i+1];
897 i += 2;
898 } else {
899 tocode = argv[i]+10;
900 i++;
902 continue;
904 if (!strcmp(argv[i],"-l")
905 /* --l ... --list */
906 || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
907 do_list = 1;
908 i++;
909 continue;
911 if (/* --by ... --byte-subst */
912 (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
913 /* --byte-subst=... */
914 || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
915 if (len < 13) {
916 if (i == argc-1) usage(1);
917 ilseq_byte_subst = argv[i+1];
918 i += 2;
919 } else {
920 ilseq_byte_subst = argv[i]+13;
921 i++;
923 ilseq_byte_subst_size =
924 check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
925 continue;
927 if (/* --w ... --widechar-subst */
928 (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
929 /* --widechar-subst=... */
930 || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
931 if (len < 17) {
932 if (i == argc-1) usage(1);
933 ilseq_wchar_subst = argv[i+1];
934 i += 2;
935 } else {
936 ilseq_wchar_subst = argv[i]+17;
937 i++;
939 ilseq_wchar_subst_size =
940 check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
941 continue;
943 if (/* --u ... --unicode-subst */
944 (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
945 /* --unicode-subst=... */
946 || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
947 if (len < 16) {
948 if (i == argc-1) usage(1);
949 ilseq_unicode_subst = argv[i+1];
950 i += 2;
951 } else {
952 ilseq_unicode_subst = argv[i]+16;
953 i++;
955 ilseq_unicode_subst_size =
956 check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
957 continue;
959 if /* --s ... --silent */
960 (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
961 silent = 1;
962 i++;
963 continue;
965 if /* --h ... --help */
966 (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
967 usage(0);
969 if /* --v ... --version */
970 (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
971 print_version();
973 #if O_BINARY
974 /* Backward compatibility with iconv <= 1.9.1. */
975 if /* --bi ... --binary */
976 (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
977 i++;
978 continue;
980 #endif
981 if (argv[i][0] == '-') {
982 const char *option = argv[i] + 1;
983 if (*option == '\0')
984 usage(1);
985 for (; *option; option++)
986 switch (*option) {
987 case 'c': discard_unconvertible = 1; break;
988 case 's': silent = 1; break;
989 default: usage(1);
991 i++;
992 continue;
994 break;
996 if (do_list) {
997 if (i != 2 || i != argc)
998 usage(1);
999 iconvlist(print_one,NULL);
1000 status = 0;
1001 } else {
1002 #if O_BINARY
1003 SET_BINARY(fileno(stdout));
1004 #endif
1005 if (fromcode == NULL)
1006 fromcode = "char";
1007 if (tocode == NULL)
1008 tocode = "char";
1009 cd = iconv_open(tocode,fromcode);
1010 if (cd == (iconv_t)(-1)) {
1011 if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
1012 error(0,0,
1013 /* TRANSLATORS: An error message.
1014 The placeholder expands to the encoding name, specified through --from-code. */
1015 _("conversion from %s unsupported"),
1016 fromcode);
1017 else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
1018 error(0,0,
1019 /* TRANSLATORS: An error message.
1020 The placeholder expands to the encoding name, specified through --to-code. */
1021 _("conversion to %s unsupported"),
1022 tocode);
1023 else
1024 error(0,0,
1025 /* TRANSLATORS: An error message.
1026 The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
1027 _("conversion from %s to %s unsupported"),
1028 fromcode,tocode);
1029 error(EXIT_FAILURE,0,
1030 /* TRANSLATORS: Additional advice after an error message.
1031 The %s placeholder expands to the program name. */
1032 _("try '%s -l' to get the list of supported encodings"),
1033 program_name);
1035 /* Look at fromcode and tocode, to determine whether character widths
1036 should be determined according to legacy CJK conventions. */
1037 cjkcode = iconv_canonicalize(tocode);
1038 if (!is_cjk_encoding(cjkcode))
1039 cjkcode = iconv_canonicalize(fromcode);
1040 /* Set up fallback routines for handling impossible conversions. */
1041 if (ilseq_byte_subst != NULL)
1042 ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
1043 if (!discard_unconvertible) {
1044 #if HAVE_WCHAR_T
1045 if (ilseq_wchar_subst != NULL)
1046 ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
1047 #endif
1048 if (ilseq_unicode_subst != NULL)
1049 ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
1050 if (ilseq_byte_subst != NULL) {
1051 subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
1052 subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
1053 #if HAVE_WCHAR_T
1054 subst_mb_to_wc_cd = iconv_open("wchar_t","char");
1055 subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
1056 #endif
1057 subst_mb_to_mb_cd = iconv_open(tocode,"char");
1058 subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
1060 #if HAVE_WCHAR_T
1061 if (ilseq_wchar_subst != NULL) {
1062 subst_wc_to_mb_cd = iconv_open(tocode,"char");
1063 subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
1065 #endif
1066 if (ilseq_unicode_subst != NULL) {
1067 subst_uc_to_mb_cd = iconv_open(tocode,"char");
1068 subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
1070 fallbacks.mb_to_uc_fallback =
1071 (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
1072 fallbacks.uc_to_mb_fallback =
1073 (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
1074 fallbacks.mb_to_wc_fallback =
1075 (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
1076 fallbacks.wc_to_mb_fallback =
1077 (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
1078 fallbacks.data = NULL;
1079 iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
1081 /* Set up hooks for updating the line and column position. */
1082 hooks.uc_hook = update_line_column;
1083 hooks.wc_hook = NULL;
1084 hooks.data = NULL;
1085 iconvctl(cd, ICONV_SET_HOOKS, &hooks);
1086 if (i == argc)
1087 status = convert(cd,fileno(stdin),
1088 /* TRANSLATORS: A filename substitute denoting standard input. */
1089 _("(stdin)"));
1090 else {
1091 status = 0;
1092 for (; i < argc; i++) {
1093 const char* infilename = argv[i];
1094 FILE* infile = fopen(infilename,"r");
1095 if (infile == NULL) {
1096 int saved_errno = errno;
1097 error(0,saved_errno,
1098 /* TRANSLATORS: The first part of an error message.
1099 It is followed by a colon and a detail message.
1100 The %s placeholder expands to the input file name. */
1101 _("%s"),
1102 infilename);
1103 status = 1;
1104 } else {
1105 status |= convert(cd,fileno(infile),infilename);
1106 fclose(infile);
1110 iconv_close(cd);
1112 if (ferror(stdout) || fclose(stdout)) {
1113 error(0,0,
1114 /* TRANSLATORS: An error message. */
1115 _("I/O error"));
1116 status = 1;
1118 exit(status);