Prefer #include <...> for system headers.
[libiconv.git] / src / iconv.c
blobcb579da27695fb770b31fca87c650e33599eb03a
1 /* Copyright (C) 2000-2024 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #ifndef ICONV_CONST
19 # define ICONV_CONST
20 #endif
22 #include <limits.h>
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <iconv.h>
28 #include <errno.h>
29 #include <locale.h>
30 #include <fcntl.h>
32 /* Ensure that iconv_no_i18n does not depend on libintl. */
33 #ifdef NO_I18N
34 # undef ENABLE_NLS
35 # undef ENABLE_RELOCATABLE
36 #endif
38 #include "binary-io.h"
39 #include "progname.h"
40 #include "relocatable.h"
41 #include "safe-read.h"
42 #include "xalloc.h"
43 #include "uniwidth.h"
44 #include "uniwidth/cjk.h"
46 #ifdef __MVS__
47 #include "zos-tag.h"
48 #endif
50 /* Ensure that iconv_no_i18n does not depend on libintl. */
51 #ifdef NO_I18N
52 #include <stdarg.h>
53 static void
54 error (int status, int errnum, const char *message, ...)
56 va_list args;
58 fflush(stdout);
59 fprintf(stderr,"%s: ",program_name);
60 va_start(args,message);
61 vfprintf(stderr,message,args);
62 va_end(args);
63 if (errnum) {
64 const char *s = strerror(errnum);
65 if (s == NULL)
66 s = "Unknown system error";
68 putc('\n',stderr);
69 fflush(stderr);
70 if (status)
71 exit(status);
73 #else
74 # include <error.h>
75 #endif
77 #include "gettext.h"
79 #define _(str) gettext(str)
81 /* Ensure that iconv_no_i18n does not depend on libintl. */
82 #ifdef NO_I18N
83 # define xmalloc malloc
84 # define xalloc_die abort
85 #endif
87 /* Locale independent test for a decimal digit.
88 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
89 <ctype.h> isdigit must be an 'unsigned char'.) */
90 #undef isdigit
91 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
93 /* Locale independent test for a printable character.
94 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
95 <ctype.h> isdigit must be an 'unsigned char'.) */
96 #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
98 /* ========================================================================= */
100 static int discard_unconvertible = 0;
101 static int silent = 0;
103 static void usage (int exitcode)
105 if (exitcode != 0) {
106 const char* helpstring1 =
107 /* TRANSLATORS: The first line of the short usage message. */
108 _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
109 const char* helpstring2 =
110 /* TRANSLATORS: The second line of the short usage message.
111 Align it correctly against the first line. */
112 _("or: iconv -l");
113 fprintf(stderr, "%s\n%s\n", helpstring1, helpstring2);
114 fprintf(stderr, _("Try '%s --help' for more information.\n"), program_name);
115 } else {
116 /* xgettext: no-wrap */
117 /* TRANSLATORS: The first line of the long usage message.
118 The %s placeholder expands to the program name. */
119 printf(_("\
120 Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
121 program_name);
122 /* xgettext: no-wrap */
123 /* TRANSLATORS: The second line of the long usage message.
124 Align it correctly against the first line.
125 The %s placeholder expands to the program name. */
126 printf(_("\
127 or: %s -l\n"),
128 program_name);
129 printf("\n");
130 /* xgettext: no-wrap */
131 /* TRANSLATORS: Description of the iconv program. */
132 printf(_("\
133 Converts text from one encoding to another encoding.\n"));
134 printf("\n");
135 /* xgettext: no-wrap */
136 printf(_("\
137 Options controlling the input and output format:\n"));
138 /* xgettext: no-wrap */
139 printf(_("\
140 -f ENCODING, --from-code=ENCODING\n\
141 the encoding of the input\n"));
142 /* xgettext: no-wrap */
143 printf(_("\
144 -t ENCODING, --to-code=ENCODING\n\
145 the encoding of the output\n"));
146 printf("\n");
147 /* xgettext: no-wrap */
148 printf(_("\
149 Options controlling conversion problems:\n"));
150 /* xgettext: no-wrap */
151 printf(_("\
152 -c discard unconvertible characters\n"));
153 /* xgettext: no-wrap */
154 printf(_("\
155 --unicode-subst=FORMATSTRING\n\
156 substitution for unconvertible Unicode characters\n"));
157 /* xgettext: no-wrap */
158 printf(_("\
159 --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
160 /* xgettext: no-wrap */
161 printf(_("\
162 --widechar-subst=FORMATSTRING\n\
163 substitution for unconvertible wide characters\n"));
164 printf("\n");
165 /* xgettext: no-wrap */
166 printf(_("\
167 Options controlling error output:\n"));
168 /* xgettext: no-wrap */
169 printf(_("\
170 -s, --silent suppress error messages about conversion problems\n"));
171 printf("\n");
172 /* xgettext: no-wrap */
173 printf(_("\
174 Informative output:\n"));
175 /* xgettext: no-wrap */
176 printf(_("\
177 -l, --list list the supported encodings\n"));
178 /* xgettext: no-wrap */
179 printf(_("\
180 --help display this help and exit\n"));
181 /* xgettext: no-wrap */
182 printf(_("\
183 --version output version information and exit\n"));
184 printf("\n");
185 /* TRANSLATORS: The first placeholder is the web address of the Savannah
186 project of this package. The second placeholder is the bug-reporting
187 email address for this package. Please add _another line_ saying
188 "Report translation bugs to <...>\n" with the address for translation
189 bugs (typically your translation team's web or email address). */
190 printf(_("\
191 Report bugs in the bug tracker at <%s>\n\
192 or by email to <%s>.\n"),
193 "https://savannah.gnu.org/projects/libiconv",
194 "bug-gnu-libiconv@gnu.org");
196 exit(exitcode);
199 static void print_version (void)
201 printf("iconv (GNU libiconv %d.%d)\n",
202 _libiconv_version >> 8, _libiconv_version & 0xff);
203 printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2022");
204 /* xgettext: no-wrap */
205 /* TRANSLATORS: The %s placeholder is the web address of the GPL license. */
206 printf (_("\
207 License GPLv3+: GNU GPL version 3 or later <%s>\n\
208 This is free software: you are free to change and redistribute it.\n\
209 There is NO WARRANTY, to the extent permitted by law.\n"),
210 "https://gnu.org/licenses/gpl.html");
211 /* TRANSLATORS: The %s placeholder expands to an author's name. */
212 printf(_("Written by %s.\n"),"Bruno Haible");
213 exit(EXIT_SUCCESS);
216 static int print_one (unsigned int namescount, const char * const * names,
217 void* data)
219 unsigned int i;
220 (void)data;
221 for (i = 0; i < namescount; i++) {
222 if (i > 0)
223 putc(' ',stdout);
224 fputs(names[i],stdout);
226 putc('\n',stdout);
227 return 0;
230 /* ========================================================================= */
232 /* Line number and column position. */
233 static unsigned int line;
234 static unsigned int column;
235 static const char* cjkcode;
236 /* Update the line number and column position after a character was
237 successfully converted. */
238 static void update_line_column (unsigned int uc, void* data)
240 if (uc == 0x000A) {
241 line++;
242 column = 0;
243 } else {
244 int width = uc_width(uc, cjkcode);
245 if (width >= 0)
246 column += width;
247 else if (uc == 0x0009)
248 column += 8 - (column % 8);
252 /* ========================================================================= */
254 /* Production of placeholder strings as fallback for unconvertible
255 characters. */
257 /* Check that the argument is a format string taking either no argument
258 or exactly one unsigned integer argument. Returns the maximum output
259 size of the format string. */
260 static size_t check_subst_formatstring (const char *format, const char *param_name)
262 /* C format strings are described in POSIX (IEEE P1003.1 2001), section
263 XSH 3 fprintf(). See also Linux fprintf(3) manual page.
264 For simplicity, we don't accept
265 - the '%m$' reordering syntax,
266 - the 'I' flag,
267 - width specifications referring to an argument,
268 - precision specifications referring to an argument,
269 - size specifiers,
270 - format specifiers other than 'o', 'u', 'x', 'X'.
271 What remains?
272 A directive
273 - starts with '%',
274 - is optionally followed by any of the characters '#', '0', '-', ' ',
275 '+', "'", each of which acts as a flag,
276 - is optionally followed by a width specification: a nonempty digit
277 sequence,
278 - is optionally followed by '.' and a precision specification: a
279 nonempty digit sequence,
280 - is finished by a specifier
281 - '%', that needs no argument,
282 - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
284 size_t maxsize = 0;
285 unsigned int unnumbered_arg_count = 0;
287 for (; *format != '\0';) {
288 if (*format++ == '%') {
289 /* A directive. */
290 unsigned int width = 0;
291 unsigned int precision = 0;
292 unsigned int length;
293 /* Parse flags. */
294 for (;;) {
295 if (*format == ' ' || *format == '+' || *format == '-'
296 || *format == '#' || *format == '0' || *format == '\'')
297 format++;
298 else
299 break;
301 /* Parse width. */
302 if (*format == '*')
303 error(EXIT_FAILURE,0,
304 /* TRANSLATORS: An error message.
305 The %s placeholder expands to a command-line option. */
306 _("%s argument: A format directive with a variable width is not allowed here."),
307 param_name);
308 if (isdigit (*format)) {
309 do {
310 width = 10*width + (*format - '0');
311 format++;
312 } while (isdigit (*format));
314 /* Parse precision. */
315 if (*format == '.') {
316 format++;
317 if (*format == '*')
318 error(EXIT_FAILURE,0,
319 /* TRANSLATORS: An error message.
320 The %s placeholder expands to a command-line option. */
321 _("%s argument: A format directive with a variable precision is not allowed here."),
322 param_name);
323 if (isdigit (*format)) {
324 do {
325 precision = 10*precision + (*format - '0');
326 format++;
327 } while (isdigit (*format));
330 /* Parse size. */
331 switch (*format) {
332 case 'h': case 'l': case 'L': case 'q':
333 case 'j': case 'z': case 'Z': case 't':
334 error(EXIT_FAILURE,0,
335 /* TRANSLATORS: An error message.
336 The %s placeholder expands to a command-line option. */
337 _("%s argument: A format directive with a size is not allowed here."),
338 param_name);
340 /* Parse end of directive. */
341 switch (*format) {
342 case '%':
343 length = 1;
344 break;
345 case 'u': case 'o': case 'x': case 'X':
346 if (*format == 'u') {
347 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
348 * 0.30103 /* binary -> decimal */
350 + 1; /* turn floor into ceil */
351 if (length < precision)
352 length = precision;
353 length *= 2; /* estimate for FLAG_GROUP */
354 length += 1; /* account for leading sign */
355 } else if (*format == 'o') {
356 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
357 * 0.333334 /* binary -> octal */
359 + 1; /* turn floor into ceil */
360 if (length < precision)
361 length = precision;
362 length += 1; /* account for leading sign */
363 } else { /* 'x', 'X' */
364 length = (unsigned int) (sizeof (unsigned int) * CHAR_BIT
365 * 0.25 /* binary -> hexadecimal */
367 + 1; /* turn floor into ceil */
368 if (length < precision)
369 length = precision;
370 length += 2; /* account for leading sign or alternate form */
372 unnumbered_arg_count++;
373 break;
374 default:
375 if (*format == '\0')
376 error(EXIT_FAILURE,0,
377 /* TRANSLATORS: An error message.
378 The %s placeholder expands to a command-line option. */
379 _("%s argument: The string ends in the middle of a directive."),
380 param_name);
381 else if (c_isprint(*format))
382 error(EXIT_FAILURE,0,
383 /* TRANSLATORS: An error message.
384 The %s placeholder expands to a command-line option.
385 The %c placeholder expands to an unknown format directive. */
386 _("%s argument: The character '%c' is not a valid conversion specifier."),
387 param_name,*format);
388 else
389 error(EXIT_FAILURE,0,
390 /* TRANSLATORS: An error message.
391 The %s placeholder expands to a command-line option. */
392 _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
393 param_name);
394 abort(); /*NOTREACHED*/
396 format++;
397 if (length < width)
398 length = width;
399 maxsize += length;
400 } else
401 maxsize++;
403 if (unnumbered_arg_count > 1)
404 error(EXIT_FAILURE,0,
405 /* TRANSLATORS: An error message.
406 The %s placeholder expands to a command-line option.
407 The %u placeholder expands to the number of arguments consumed by the format string. */
408 ngettext("%s argument: The format string consumes more than one argument: %u argument.",
409 "%s argument: The format string consumes more than one argument: %u arguments.",
410 unnumbered_arg_count),
411 param_name,unnumbered_arg_count);
412 return maxsize;
415 /* Format strings. */
416 static const char* ilseq_byte_subst;
417 static const char* ilseq_wchar_subst;
418 static const char* ilseq_unicode_subst;
420 /* Maximum result size for each format string. */
421 static size_t ilseq_byte_subst_size;
422 static size_t ilseq_wchar_subst_size;
423 static size_t ilseq_unicode_subst_size;
425 /* Buffer of size ilseq_byte_subst_size+1. */
426 static char* ilseq_byte_subst_buffer;
427 /* Buffer of size ilseq_wchar_subst_size+1. */
428 static char* ilseq_wchar_subst_buffer;
429 /* Buffer of size ilseq_unicode_subst_size+1. */
430 static char* ilseq_unicode_subst_buffer;
432 /* Auxiliary variables for subst_mb_to_uc_fallback. */
433 /* Converter from locale encoding to UCS-4. */
434 static iconv_t subst_mb_to_uc_cd;
435 /* Buffer of size ilseq_byte_subst_size. */
436 static unsigned int* subst_mb_to_uc_temp_buffer;
438 static void subst_mb_to_uc_fallback
439 (const char* inbuf, size_t inbufsize,
440 void (*write_replacement) (const unsigned int *buf, size_t buflen,
441 void* callback_arg),
442 void* callback_arg,
443 void* data)
445 for (; inbufsize > 0; inbuf++, inbufsize--) {
446 const char* inptr;
447 size_t inbytesleft;
448 char* outptr;
449 size_t outbytesleft;
450 sprintf(ilseq_byte_subst_buffer,
451 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
452 inptr = ilseq_byte_subst_buffer;
453 inbytesleft = strlen(ilseq_byte_subst_buffer);
454 outptr = (char*)subst_mb_to_uc_temp_buffer;
455 outbytesleft = ilseq_byte_subst_size*sizeof(unsigned int);
456 iconv(subst_mb_to_uc_cd,NULL,NULL,NULL,NULL);
457 if (iconv(subst_mb_to_uc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
458 == (size_t)(-1)
459 || iconv(subst_mb_to_uc_cd, NULL,NULL, &outptr,&outbytesleft)
460 == (size_t)(-1))
461 error(EXIT_FAILURE,0,
462 /* TRANSLATORS: An error message.
463 The %s placeholder expands to a piece of text, specified through --byte-subst. */
464 _("cannot convert byte substitution to Unicode: %s"),
465 ilseq_byte_subst_buffer);
466 if (!(outbytesleft%sizeof(unsigned int) == 0))
467 abort();
468 write_replacement(subst_mb_to_uc_temp_buffer,
469 ilseq_byte_subst_size-(outbytesleft/sizeof(unsigned int)),
470 callback_arg);
474 /* Auxiliary variables for subst_uc_to_mb_fallback. */
475 /* Converter from locale encoding to target encoding. */
476 static iconv_t subst_uc_to_mb_cd;
477 /* Buffer of size ilseq_unicode_subst_size*4. */
478 static char* subst_uc_to_mb_temp_buffer;
480 static void subst_uc_to_mb_fallback
481 (unsigned int code,
482 void (*write_replacement) (const char *buf, size_t buflen,
483 void* callback_arg),
484 void* callback_arg,
485 void* data)
487 const char* inptr;
488 size_t inbytesleft;
489 char* outptr;
490 size_t outbytesleft;
491 sprintf(ilseq_unicode_subst_buffer, ilseq_unicode_subst, code);
492 inptr = ilseq_unicode_subst_buffer;
493 inbytesleft = strlen(ilseq_unicode_subst_buffer);
494 outptr = subst_uc_to_mb_temp_buffer;
495 outbytesleft = ilseq_unicode_subst_size*4;
496 iconv(subst_uc_to_mb_cd,NULL,NULL,NULL,NULL);
497 if (iconv(subst_uc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
498 == (size_t)(-1)
499 || iconv(subst_uc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
500 == (size_t)(-1))
501 error(EXIT_FAILURE,0,
502 /* TRANSLATORS: An error message.
503 The %s placeholder expands to a piece of text, specified through --unicode-subst. */
504 _("cannot convert unicode substitution to target encoding: %s"),
505 ilseq_unicode_subst_buffer);
506 write_replacement(subst_uc_to_mb_temp_buffer,
507 ilseq_unicode_subst_size*4-outbytesleft,
508 callback_arg);
511 /* Auxiliary variables for subst_mb_to_wc_fallback. */
512 /* Converter from locale encoding to wchar_t. */
513 static iconv_t subst_mb_to_wc_cd;
514 /* Buffer of size ilseq_byte_subst_size. */
515 static wchar_t* subst_mb_to_wc_temp_buffer;
517 static void subst_mb_to_wc_fallback
518 (const char* inbuf, size_t inbufsize,
519 void (*write_replacement) (const wchar_t *buf, size_t buflen,
520 void* callback_arg),
521 void* callback_arg,
522 void* data)
524 for (; inbufsize > 0; inbuf++, inbufsize--) {
525 const char* inptr;
526 size_t inbytesleft;
527 char* outptr;
528 size_t outbytesleft;
529 sprintf(ilseq_byte_subst_buffer,
530 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
531 inptr = ilseq_byte_subst_buffer;
532 inbytesleft = strlen(ilseq_byte_subst_buffer);
533 outptr = (char*)subst_mb_to_wc_temp_buffer;
534 outbytesleft = ilseq_byte_subst_size*sizeof(wchar_t);
535 iconv(subst_mb_to_wc_cd,NULL,NULL,NULL,NULL);
536 if (iconv(subst_mb_to_wc_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
537 == (size_t)(-1)
538 || iconv(subst_mb_to_wc_cd, NULL,NULL, &outptr,&outbytesleft)
539 == (size_t)(-1))
540 error(EXIT_FAILURE,0,
541 /* TRANSLATORS: An error message.
542 The %s placeholder expands to a piece of text, specified through --byte-subst. */
543 _("cannot convert byte substitution to wide string: %s"),
544 ilseq_byte_subst_buffer);
545 if (!(outbytesleft%sizeof(wchar_t) == 0))
546 abort();
547 write_replacement(subst_mb_to_wc_temp_buffer,
548 ilseq_byte_subst_size-(outbytesleft/sizeof(wchar_t)),
549 callback_arg);
553 /* Auxiliary variables for subst_wc_to_mb_fallback. */
554 /* Converter from locale encoding to target encoding. */
555 static iconv_t subst_wc_to_mb_cd;
556 /* Buffer of size ilseq_wchar_subst_size*4.
557 Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
558 static char* subst_wc_to_mb_temp_buffer;
560 static void subst_wc_to_mb_fallback
561 (wchar_t code,
562 void (*write_replacement) (const char *buf, size_t buflen,
563 void* callback_arg),
564 void* callback_arg,
565 void* data)
567 const char* inptr;
568 size_t inbytesleft;
569 char* outptr;
570 size_t outbytesleft;
571 sprintf(ilseq_wchar_subst_buffer, ilseq_wchar_subst, (unsigned int) code);
572 inptr = ilseq_wchar_subst_buffer;
573 inbytesleft = strlen(ilseq_wchar_subst_buffer);
574 outptr = subst_wc_to_mb_temp_buffer;
575 outbytesleft = ilseq_wchar_subst_size*4;
576 iconv(subst_wc_to_mb_cd,NULL,NULL,NULL,NULL);
577 if (iconv(subst_wc_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
578 == (size_t)(-1)
579 || iconv(subst_wc_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
580 == (size_t)(-1))
581 error(EXIT_FAILURE,0,
582 /* TRANSLATORS: An error message.
583 The %s placeholder expands to a piece of text, specified through --widechar-subst. */
584 _("cannot convert widechar substitution to target encoding: %s"),
585 ilseq_wchar_subst_buffer);
586 write_replacement(subst_wc_to_mb_temp_buffer,
587 ilseq_wchar_subst_size*4-outbytesleft,
588 callback_arg);
591 /* Auxiliary variables for subst_mb_to_mb_fallback. */
592 /* Converter from locale encoding to target encoding. */
593 static iconv_t subst_mb_to_mb_cd;
594 /* Buffer of size ilseq_byte_subst_size*4. */
595 static char* subst_mb_to_mb_temp_buffer;
597 static void subst_mb_to_mb_fallback (const char* inbuf, size_t inbufsize)
599 for (; inbufsize > 0; inbuf++, inbufsize--) {
600 const char* inptr;
601 size_t inbytesleft;
602 char* outptr;
603 size_t outbytesleft;
604 sprintf(ilseq_byte_subst_buffer,
605 ilseq_byte_subst, (unsigned int)(unsigned char)*inbuf);
606 inptr = ilseq_byte_subst_buffer;
607 inbytesleft = strlen(ilseq_byte_subst_buffer);
608 outptr = subst_mb_to_mb_temp_buffer;
609 outbytesleft = ilseq_byte_subst_size*4;
610 iconv(subst_mb_to_mb_cd,NULL,NULL,NULL,NULL);
611 if (iconv(subst_mb_to_mb_cd, (ICONV_CONST char**)&inptr,&inbytesleft, &outptr,&outbytesleft)
612 == (size_t)(-1)
613 || iconv(subst_mb_to_mb_cd, NULL,NULL, &outptr,&outbytesleft)
614 == (size_t)(-1))
615 error(EXIT_FAILURE,0,
616 /* TRANSLATORS: An error message.
617 The %s placeholder expands to a piece of text, specified through --byte-subst. */
618 _("cannot convert byte substitution to target encoding: %s"),
619 ilseq_byte_subst_buffer);
620 fwrite(subst_mb_to_mb_temp_buffer,1,ilseq_byte_subst_size*4-outbytesleft,
621 stdout);
625 /* ========================================================================= */
627 /* Error messages during conversion. */
629 static void conversion_error_EILSEQ (const char* infilename)
631 fflush(stdout);
632 if (column > 0)
633 putc('\n',stderr);
634 error(0,0,
635 /* TRANSLATORS: An error message.
636 The placeholders expand to the input file name, a line number, and a column number. */
637 _("%s:%u:%u: cannot convert"),
638 infilename,line,column);
641 static void conversion_error_EINVAL (const char* infilename)
643 fflush(stdout);
644 if (column > 0)
645 putc('\n',stderr);
646 error(0,0,
647 /* TRANSLATORS: An error message.
648 The placeholders expand to the input file name, a line number, and a column number.
649 A "shift sequence" is a sequence of bytes that changes the state of the converter;
650 this concept exists only for "stateful" encodings like ISO-2022-JP. */
651 _("%s:%u:%u: incomplete character or shift sequence"),
652 infilename,line,column);
655 static void conversion_error_other (int errnum, const char* infilename)
657 fflush(stdout);
658 if (column > 0)
659 putc('\n',stderr);
660 error(0,errnum,
661 /* TRANSLATORS: The first part of an error message.
662 It is followed by a colon and a detail message.
663 The placeholders expand to the input file name, a line number, and a column number. */
664 _("%s:%u:%u"),
665 infilename,line,column);
668 /* Convert the input given in infile. */
670 static int convert (iconv_t cd, int infile, const char* infilename, _GL_UNUSED const char* tocode)
672 char inbuf[4096+4096];
673 size_t inbufrest = 0;
674 int infile_error = 0;
675 char initial_outbuf[4096];
676 char *outbuf = initial_outbuf;
677 size_t outbufsize = sizeof(initial_outbuf);
678 int status = 0;
680 #if O_BINARY
681 SET_BINARY(infile);
682 #endif
683 #ifdef __MVS__
684 /* Turn off z/OS auto-conversion. */
685 struct f_cnvrt req = {SETCVTOFF, 0, 0};
686 fcntl(infile, F_CONTROL_CVT, &req);
687 #endif
688 line = 1; column = 0;
689 iconv(cd,NULL,NULL,NULL,NULL);
690 for (;;) {
691 size_t inbufsize;
692 /* Transfer the accumulated output to its destination, in case the
693 safe_read() call will block. */
694 fflush(stdout);
695 inbufsize = safe_read(infile,inbuf+4096,4096);
696 if (inbufsize == 0 || inbufsize == SAFE_READ_ERROR) {
697 infile_error = (inbufsize == SAFE_READ_ERROR ? errno : 0);
698 if (inbufrest == 0)
699 break;
700 else {
701 if (ilseq_byte_subst != NULL)
702 subst_mb_to_mb_fallback(inbuf+4096-inbufrest, inbufrest);
703 if (!silent)
704 conversion_error_EINVAL(infilename);
705 status = 1;
706 goto done;
708 } else {
709 const char* inptr = inbuf+4096-inbufrest;
710 size_t insize = inbufrest+inbufsize;
711 inbufrest = 0;
712 while (insize > 0) {
713 char* outptr = outbuf;
714 size_t outsize = outbufsize;
715 size_t res = iconv(cd,(ICONV_CONST char**)&inptr,&insize,&outptr,&outsize);
716 if (outptr != outbuf) {
717 int saved_errno = errno;
718 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
719 status = 1;
720 goto done;
722 errno = saved_errno;
724 if (res == (size_t)(-1)) {
725 if (errno == EILSEQ) {
726 if (discard_unconvertible == 1) {
727 int one = 1;
728 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
729 discard_unconvertible = 2;
730 status = 1;
731 } else {
732 if (!silent)
733 conversion_error_EILSEQ(infilename);
734 status = 1;
735 goto done;
737 } else if (errno == EINVAL) {
738 if (inbufsize == 0 || insize > 4096) {
739 if (!silent)
740 conversion_error_EINVAL(infilename);
741 status = 1;
742 goto done;
743 } else {
744 inbufrest = insize;
745 if (insize > 0) {
746 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
747 we cannot use memcpy here, because source and destination
748 regions may overlap. */
749 char* restptr = inbuf+4096-insize;
750 do { *restptr++ = *inptr++; } while (--insize > 0);
752 break;
754 } else if (errno == E2BIG) {
755 if (outptr==outbuf) {
756 /* outbuf is too small. Double its size. */
757 if (outbuf != initial_outbuf)
758 free(outbuf);
759 outbufsize = 2*outbufsize;
760 if (outbufsize==0) /* integer overflow? */
761 xalloc_die();
762 outbuf = (char*)xmalloc(outbufsize);
764 } else {
765 if (!silent)
766 conversion_error_other(errno,infilename);
767 status = 1;
768 goto done;
774 for (;;) {
775 char* outptr = outbuf;
776 size_t outsize = outbufsize;
777 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
778 if (outptr != outbuf) {
779 int saved_errno = errno;
780 if (fwrite(outbuf,1,outptr-outbuf,stdout) < outptr-outbuf) {
781 status = 1;
782 goto done;
784 errno = saved_errno;
786 if (res == (size_t)(-1)) {
787 if (errno == EILSEQ) {
788 if (discard_unconvertible == 1) {
789 int one = 1;
790 iconvctl(cd,ICONV_SET_DISCARD_ILSEQ,&one);
791 discard_unconvertible = 2;
792 status = 1;
793 } else {
794 if (!silent)
795 conversion_error_EILSEQ(infilename);
796 status = 1;
797 goto done;
799 } else if (errno == EINVAL) {
800 if (!silent)
801 conversion_error_EINVAL(infilename);
802 status = 1;
803 goto done;
804 } else if (errno == E2BIG) {
805 if (outptr==outbuf) {
806 /* outbuf is too small. Double its size. */
807 if (outbuf != initial_outbuf)
808 free(outbuf);
809 outbufsize = 2*outbufsize;
810 if (outbufsize==0) /* integer overflow? */
811 xalloc_die();
812 outbuf = (char*)xmalloc(outbufsize);
814 } else {
815 if (!silent)
816 conversion_error_other(errno,infilename);
817 status = 1;
818 goto done;
820 } else
821 break;
823 if (infile_error) {
824 fflush(stdout);
825 if (column > 0)
826 putc('\n',stderr);
827 error(0,infile_error,
828 /* TRANSLATORS: An error message.
829 The placeholder expands to the input file name. */
830 _("%s: I/O error"),
831 infilename);
832 status = 1;
833 goto done;
835 done:
836 #ifdef __MVS__
837 if (!status) {
838 status = tagfile(fileno(stdout), tocode);
840 #endif
841 if (outbuf != initial_outbuf)
842 free(outbuf);
843 return status;
846 /* ========================================================================= */
848 int main (int argc, char* argv[])
850 const char* fromcode = NULL;
851 const char* tocode = NULL;
852 int do_list = 0;
853 iconv_t cd;
854 struct iconv_fallbacks fallbacks;
855 struct iconv_hooks hooks;
856 int i;
857 int status;
859 set_program_name (argv[0]);
860 #if HAVE_SETLOCALE
861 /* Needed for the locale dependent encodings, "char" and "wchar_t",
862 and for gettext. */
863 setlocale(LC_CTYPE,"");
864 #if ENABLE_NLS
865 /* Needed for gettext. */
866 setlocale(LC_MESSAGES,"");
867 #endif
868 #endif
869 #if ENABLE_NLS
870 bindtextdomain("libiconv",relocate(LOCALEDIR));
871 #endif
872 textdomain("libiconv");
873 /* No need to invoke the gnulib function stdopen() here, because
874 (1) the only file descriptor allocations done by this program are
875 fopen(...,"r"),
876 (2) when such fopen() calls occur, stdin is not used,
877 hence
878 - when an fopen() call happens to open fd 0, it is harmless, by (2),
879 - when an fopen() call happens to open fd 1 or 2, writing to
880 stdout or stderr will produce an error, by (1). */
882 for (i = 1; i < argc;) {
883 size_t len = strlen(argv[i]);
884 if (!strcmp(argv[i],"--")) {
885 i++;
886 break;
888 if (!strcmp(argv[i],"-f")
889 /* --f ... --from-code */
890 || (len >= 3 && len <= 11 && !strncmp(argv[i],"--from-code",len))
891 /* --from-code=... */
892 || (len >= 12 && !strncmp(argv[i],"--from-code=",12))) {
893 if (len < 12)
894 if (i == argc-1) usage(1);
895 if (fromcode != NULL) usage(1);
896 if (len < 12) {
897 fromcode = argv[i+1];
898 i += 2;
899 } else {
900 fromcode = argv[i]+12;
901 i++;
903 continue;
905 if (!strcmp(argv[i],"-t")
906 /* --t ... --to-code */
907 || (len >= 3 && len <= 9 && !strncmp(argv[i],"--to-code",len))
908 /* --from-code=... */
909 || (len >= 10 && !strncmp(argv[i],"--to-code=",10))) {
910 if (len < 10)
911 if (i == argc-1) usage(1);
912 if (tocode != NULL) usage(1);
913 if (len < 10) {
914 tocode = argv[i+1];
915 i += 2;
916 } else {
917 tocode = argv[i]+10;
918 i++;
920 continue;
922 if (!strcmp(argv[i],"-l")
923 /* --l ... --list */
924 || (len >= 3 && len <= 6 && !strncmp(argv[i],"--list",len))) {
925 do_list = 1;
926 i++;
927 continue;
929 if (/* --by ... --byte-subst */
930 (len >= 4 && len <= 12 && !strncmp(argv[i],"--byte-subst",len))
931 /* --byte-subst=... */
932 || (len >= 13 && !strncmp(argv[i],"--byte-subst=",13))) {
933 if (len < 13) {
934 if (i == argc-1) usage(1);
935 ilseq_byte_subst = argv[i+1];
936 i += 2;
937 } else {
938 ilseq_byte_subst = argv[i]+13;
939 i++;
941 ilseq_byte_subst_size =
942 check_subst_formatstring(ilseq_byte_subst, "--byte-subst");
943 continue;
945 if (/* --w ... --widechar-subst */
946 (len >= 3 && len <= 16 && !strncmp(argv[i],"--widechar-subst",len))
947 /* --widechar-subst=... */
948 || (len >= 17 && !strncmp(argv[i],"--widechar-subst=",17))) {
949 if (len < 17) {
950 if (i == argc-1) usage(1);
951 ilseq_wchar_subst = argv[i+1];
952 i += 2;
953 } else {
954 ilseq_wchar_subst = argv[i]+17;
955 i++;
957 ilseq_wchar_subst_size =
958 check_subst_formatstring(ilseq_wchar_subst, "--widechar-subst");
959 continue;
961 if (/* --u ... --unicode-subst */
962 (len >= 3 && len <= 15 && !strncmp(argv[i],"--unicode-subst",len))
963 /* --unicode-subst=... */
964 || (len >= 16 && !strncmp(argv[i],"--unicode-subst=",16))) {
965 if (len < 16) {
966 if (i == argc-1) usage(1);
967 ilseq_unicode_subst = argv[i+1];
968 i += 2;
969 } else {
970 ilseq_unicode_subst = argv[i]+16;
971 i++;
973 ilseq_unicode_subst_size =
974 check_subst_formatstring(ilseq_unicode_subst, "--unicode-subst");
975 continue;
977 if /* --s ... --silent */
978 (len >= 3 && len <= 8 && !strncmp(argv[i],"--silent",len)) {
979 silent = 1;
980 i++;
981 continue;
983 if /* --h ... --help */
984 (len >= 3 && len <= 6 && !strncmp(argv[i],"--help",len)) {
985 usage(0);
987 if /* --v ... --version */
988 (len >= 3 && len <= 9 && !strncmp(argv[i],"--version",len)) {
989 print_version();
991 #if O_BINARY
992 /* Backward compatibility with iconv <= 1.9.1. */
993 if /* --bi ... --binary */
994 (len >= 4 && len <= 8 && !strncmp(argv[i],"--binary",len)) {
995 i++;
996 continue;
998 #endif
999 if (argv[i][0] == '-' && argv[i][1] != '\0') {
1000 const char *option = argv[i] + 1;
1001 if (*option == '\0')
1002 usage(1);
1003 for (; *option; option++)
1004 switch (*option) {
1005 case 'c': discard_unconvertible = 1; break;
1006 case 's': silent = 1; break;
1007 default: usage(1);
1009 i++;
1010 continue;
1012 break;
1014 if (do_list) {
1015 if (i != 2 || i != argc)
1016 usage(1);
1017 iconvlist(print_one,NULL);
1018 status = 0;
1019 } else {
1020 #if O_BINARY
1021 SET_BINARY(fileno(stdout));
1022 #endif
1023 if (fromcode == NULL)
1024 fromcode = "char";
1025 if (tocode == NULL)
1026 tocode = "char";
1027 cd = iconv_open(tocode,fromcode);
1028 if (cd == (iconv_t)(-1)) {
1029 if (iconv_open("UCS-4",fromcode) == (iconv_t)(-1))
1030 error(0,0,
1031 /* TRANSLATORS: An error message.
1032 The placeholder expands to the encoding name, specified through --from-code. */
1033 _("conversion from %s unsupported"),
1034 fromcode);
1035 else if (iconv_open(tocode,"UCS-4") == (iconv_t)(-1))
1036 error(0,0,
1037 /* TRANSLATORS: An error message.
1038 The placeholder expands to the encoding name, specified through --to-code. */
1039 _("conversion to %s unsupported"),
1040 tocode);
1041 else
1042 error(0,0,
1043 /* TRANSLATORS: An error message.
1044 The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
1045 _("conversion from %s to %s unsupported"),
1046 fromcode,tocode);
1047 error(EXIT_FAILURE,0,
1048 /* TRANSLATORS: Additional advice after an error message.
1049 The %s placeholder expands to the program name. */
1050 _("try '%s -l' to get the list of supported encodings"),
1051 program_name);
1053 /* For EBCDIC encodings, determine how to map 0x15 (which encodes the
1054 "newline function", see the Unicode standard, chapter 5). */
1055 const char *envvar_value = getenv("ICONV_EBCDIC_ZOS_UNIX");
1056 if (envvar_value != NULL && envvar_value[0] != '\0') {
1057 unsigned int surface;
1058 iconvctl(cd, ICONV_GET_FROM_SURFACE, &surface);
1059 surface |= ICONV_SURFACE_EBCDIC_ZOS_UNIX;
1060 iconvctl(cd, ICONV_SET_FROM_SURFACE, &surface);
1061 iconvctl(cd, ICONV_GET_TO_SURFACE, &surface);
1062 surface |= ICONV_SURFACE_EBCDIC_ZOS_UNIX;
1063 iconvctl(cd, ICONV_SET_TO_SURFACE, &surface);
1065 /* Look at fromcode and tocode, to determine whether character widths
1066 should be determined according to legacy CJK conventions. */
1067 cjkcode = iconv_canonicalize(tocode);
1068 if (!is_cjk_encoding(cjkcode))
1069 cjkcode = iconv_canonicalize(fromcode);
1070 /* Set up fallback routines for handling impossible conversions. */
1071 if (ilseq_byte_subst != NULL)
1072 ilseq_byte_subst_buffer = (char*)xmalloc((ilseq_byte_subst_size+1)*sizeof(char));
1073 if (!discard_unconvertible) {
1074 if (ilseq_wchar_subst != NULL)
1075 ilseq_wchar_subst_buffer = (char*)xmalloc((ilseq_wchar_subst_size+1)*sizeof(char));
1076 if (ilseq_unicode_subst != NULL)
1077 ilseq_unicode_subst_buffer = (char*)xmalloc((ilseq_unicode_subst_size+1)*sizeof(char));
1078 if (ilseq_byte_subst != NULL) {
1079 subst_mb_to_uc_cd = iconv_open("UCS-4-INTERNAL","char");
1080 subst_mb_to_uc_temp_buffer = (unsigned int*)xmalloc(ilseq_byte_subst_size*sizeof(unsigned int));
1081 subst_mb_to_wc_cd = iconv_open("wchar_t","char");
1082 subst_mb_to_wc_temp_buffer = (wchar_t*)xmalloc(ilseq_byte_subst_size*sizeof(wchar_t));
1083 subst_mb_to_mb_cd = iconv_open(tocode,"char");
1084 subst_mb_to_mb_temp_buffer = (char*)xmalloc(ilseq_byte_subst_size*4);
1086 if (ilseq_wchar_subst != NULL) {
1087 subst_wc_to_mb_cd = iconv_open(tocode,"char");
1088 subst_wc_to_mb_temp_buffer = (char*)xmalloc(ilseq_wchar_subst_size*4);
1090 if (ilseq_unicode_subst != NULL) {
1091 subst_uc_to_mb_cd = iconv_open(tocode,"char");
1092 subst_uc_to_mb_temp_buffer = (char*)xmalloc(ilseq_unicode_subst_size*4);
1094 fallbacks.mb_to_uc_fallback =
1095 (ilseq_byte_subst != NULL ? subst_mb_to_uc_fallback : NULL);
1096 fallbacks.uc_to_mb_fallback =
1097 (ilseq_unicode_subst != NULL ? subst_uc_to_mb_fallback : NULL);
1098 fallbacks.mb_to_wc_fallback =
1099 (ilseq_byte_subst != NULL ? subst_mb_to_wc_fallback : NULL);
1100 fallbacks.wc_to_mb_fallback =
1101 (ilseq_wchar_subst != NULL ? subst_wc_to_mb_fallback : NULL);
1102 fallbacks.data = NULL;
1103 iconvctl(cd, ICONV_SET_FALLBACKS, &fallbacks);
1105 /* Set up hooks for updating the line and column position. */
1106 hooks.uc_hook = update_line_column;
1107 hooks.wc_hook = NULL;
1108 hooks.data = NULL;
1109 iconvctl(cd, ICONV_SET_HOOKS, &hooks);
1110 if (i == argc)
1111 status = convert(cd,fileno(stdin),
1112 /* TRANSLATORS: A filename substitute denoting standard input. */
1113 _("(stdin)"),
1114 tocode);
1115 else {
1116 status = 0;
1117 for (; i < argc; i++) {
1118 const char* infilename = argv[i];
1119 if (strcmp(infilename,"-") == 0) {
1120 status |= convert(cd,fileno(stdin),
1121 /* TRANSLATORS: A filename substitute denoting standard input. */
1122 _("(stdin)"),
1123 tocode);
1124 } else {
1125 FILE* infile = fopen(infilename,"r");
1126 if (infile == NULL) {
1127 int saved_errno = errno;
1128 error(0,saved_errno,
1129 /* TRANSLATORS: The first part of an error message.
1130 It is followed by a colon and a detail message.
1131 The %s placeholder expands to the input file name. */
1132 _("%s"),
1133 infilename);
1134 status = 1;
1135 } else {
1136 status |= convert(cd,fileno(infile),infilename,tocode);
1137 fclose(infile);
1142 iconv_close(cd);
1144 if (ferror(stdout) || fclose(stdout)) {
1145 error(0,0,
1146 /* TRANSLATORS: An error message. */
1147 _("I/O error"));
1148 status = 1;
1150 exit(status);