1 /* Copyright (C) 2000-2024 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
32 /* Ensure that iconv_no_i18n does not depend on libintl. */
35 # undef ENABLE_RELOCATABLE
38 #include "binary-io.h"
40 #include "relocatable.h"
41 #include "safe-read.h"
44 #include "uniwidth/cjk.h"
50 /* Ensure that iconv_no_i18n does not depend on libintl. */
54 error (int status
, int errnum
, const char *message
, ...)
59 fprintf(stderr
,"%s: ",program_name
);
60 va_start(args
,message
);
61 vfprintf(stderr
,message
,args
);
64 const char *s
= strerror(errnum
);
66 s
= "Unknown system error";
79 #define _(str) gettext(str)
81 /* Ensure that iconv_no_i18n does not depend on libintl. */
83 # define xmalloc malloc
84 # define xalloc_die abort
87 /* Locale independent test for a decimal digit.
88 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
89 <ctype.h> isdigit must be an 'unsigned char'.) */
91 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
93 /* Locale independent test for a printable character.
94 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
95 <ctype.h> isdigit must be an 'unsigned char'.) */
96 #define c_isprint(c) ((c) >= ' ' && (c) <= '~')
98 /* ========================================================================= */
100 static int discard_unconvertible
= 0;
101 static int silent
= 0;
103 static void usage (int exitcode
)
106 const char* helpstring1
=
107 /* TRANSLATORS: The first line of the short usage message. */
108 _("Usage: iconv [-c] [-s] [-f fromcode] [-t tocode] [file ...]");
109 const char* helpstring2
=
110 /* TRANSLATORS: The second line of the short usage message.
111 Align it correctly against the first line. */
113 fprintf(stderr
, "%s\n%s\n", helpstring1
, helpstring2
);
114 fprintf(stderr
, _("Try '%s --help' for more information.\n"), program_name
);
116 /* xgettext: no-wrap */
117 /* TRANSLATORS: The first line of the long usage message.
118 The %s placeholder expands to the program name. */
120 Usage: %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]\n"),
122 /* xgettext: no-wrap */
123 /* TRANSLATORS: The second line of the long usage message.
124 Align it correctly against the first line.
125 The %s placeholder expands to the program name. */
130 /* xgettext: no-wrap */
131 /* TRANSLATORS: Description of the iconv program. */
133 Converts text from one encoding to another encoding.\n"));
135 /* xgettext: no-wrap */
137 Options controlling the input and output format:\n"));
138 /* xgettext: no-wrap */
140 -f ENCODING, --from-code=ENCODING\n\
141 the encoding of the input\n"));
142 /* xgettext: no-wrap */
144 -t ENCODING, --to-code=ENCODING\n\
145 the encoding of the output\n"));
147 /* xgettext: no-wrap */
149 Options controlling conversion problems:\n"));
150 /* xgettext: no-wrap */
152 -c discard unconvertible characters\n"));
153 /* xgettext: no-wrap */
155 --unicode-subst=FORMATSTRING\n\
156 substitution for unconvertible Unicode characters\n"));
157 /* xgettext: no-wrap */
159 --byte-subst=FORMATSTRING substitution for unconvertible bytes\n"));
160 /* xgettext: no-wrap */
162 --widechar-subst=FORMATSTRING\n\
163 substitution for unconvertible wide characters\n"));
165 /* xgettext: no-wrap */
167 Options controlling error output:\n"));
168 /* xgettext: no-wrap */
170 -s, --silent suppress error messages about conversion problems\n"));
172 /* xgettext: no-wrap */
174 Informative output:\n"));
175 /* xgettext: no-wrap */
177 -l, --list list the supported encodings\n"));
178 /* xgettext: no-wrap */
180 --help display this help and exit\n"));
181 /* xgettext: no-wrap */
183 --version output version information and exit\n"));
185 /* TRANSLATORS: The first placeholder is the web address of the Savannah
186 project of this package. The second placeholder is the bug-reporting
187 email address for this package. Please add _another line_ saying
188 "Report translation bugs to <...>\n" with the address for translation
189 bugs (typically your translation team's web or email address). */
191 Report bugs in the bug tracker at <%s>\n\
192 or by email to <%s>.\n"),
193 "https://savannah.gnu.org/projects/libiconv",
194 "bug-gnu-libiconv@gnu.org");
199 static void print_version (void)
201 printf("iconv (GNU libiconv %d.%d)\n",
202 _libiconv_version
>> 8, _libiconv_version
& 0xff);
203 printf("Copyright (C) %s Free Software Foundation, Inc.\n", "2000-2022");
204 /* xgettext: no-wrap */
205 /* TRANSLATORS: The %s placeholder is the web address of the GPL license. */
207 License GPLv3+: GNU GPL version 3 or later <%s>\n\
208 This is free software: you are free to change and redistribute it.\n\
209 There is NO WARRANTY, to the extent permitted by law.\n"),
210 "https://gnu.org/licenses/gpl.html");
211 /* TRANSLATORS: The %s placeholder expands to an author's name. */
212 printf(_("Written by %s.\n"),"Bruno Haible");
216 static int print_one (unsigned int namescount
, const char * const * names
,
221 for (i
= 0; i
< namescount
; i
++) {
224 fputs(names
[i
],stdout
);
230 /* ========================================================================= */
232 /* Line number and column position. */
233 static unsigned int line
;
234 static unsigned int column
;
235 static const char* cjkcode
;
236 /* Update the line number and column position after a character was
237 successfully converted. */
238 static void update_line_column (unsigned int uc
, void* data
)
244 int width
= uc_width(uc
, cjkcode
);
247 else if (uc
== 0x0009)
248 column
+= 8 - (column
% 8);
252 /* ========================================================================= */
254 /* Production of placeholder strings as fallback for unconvertible
257 /* Check that the argument is a format string taking either no argument
258 or exactly one unsigned integer argument. Returns the maximum output
259 size of the format string. */
260 static size_t check_subst_formatstring (const char *format
, const char *param_name
)
262 /* C format strings are described in POSIX (IEEE P1003.1 2001), section
263 XSH 3 fprintf(). See also Linux fprintf(3) manual page.
264 For simplicity, we don't accept
265 - the '%m$' reordering syntax,
267 - width specifications referring to an argument,
268 - precision specifications referring to an argument,
270 - format specifiers other than 'o', 'u', 'x', 'X'.
274 - is optionally followed by any of the characters '#', '0', '-', ' ',
275 '+', "'", each of which acts as a flag,
276 - is optionally followed by a width specification: a nonempty digit
278 - is optionally followed by '.' and a precision specification: a
279 nonempty digit sequence,
280 - is finished by a specifier
281 - '%', that needs no argument,
282 - 'o', 'u', 'x', 'X', that need an unsigned integer argument.
285 unsigned int unnumbered_arg_count
= 0;
287 for (; *format
!= '\0';) {
288 if (*format
++ == '%') {
290 unsigned int width
= 0;
291 unsigned int precision
= 0;
295 if (*format
== ' ' || *format
== '+' || *format
== '-'
296 || *format
== '#' || *format
== '0' || *format
== '\'')
303 error(EXIT_FAILURE
,0,
304 /* TRANSLATORS: An error message.
305 The %s placeholder expands to a command-line option. */
306 _("%s argument: A format directive with a variable width is not allowed here."),
308 if (isdigit (*format
)) {
310 width
= 10*width
+ (*format
- '0');
312 } while (isdigit (*format
));
314 /* Parse precision. */
315 if (*format
== '.') {
318 error(EXIT_FAILURE
,0,
319 /* TRANSLATORS: An error message.
320 The %s placeholder expands to a command-line option. */
321 _("%s argument: A format directive with a variable precision is not allowed here."),
323 if (isdigit (*format
)) {
325 precision
= 10*precision
+ (*format
- '0');
327 } while (isdigit (*format
));
332 case 'h': case 'l': case 'L': case 'q':
333 case 'j': case 'z': case 'Z': case 't':
334 error(EXIT_FAILURE
,0,
335 /* TRANSLATORS: An error message.
336 The %s placeholder expands to a command-line option. */
337 _("%s argument: A format directive with a size is not allowed here."),
340 /* Parse end of directive. */
345 case 'u': case 'o': case 'x': case 'X':
346 if (*format
== 'u') {
347 length
= (unsigned int) (sizeof (unsigned int) * CHAR_BIT
348 * 0.30103 /* binary -> decimal */
350 + 1; /* turn floor into ceil */
351 if (length
< precision
)
353 length
*= 2; /* estimate for FLAG_GROUP */
354 length
+= 1; /* account for leading sign */
355 } else if (*format
== 'o') {
356 length
= (unsigned int) (sizeof (unsigned int) * CHAR_BIT
357 * 0.333334 /* binary -> octal */
359 + 1; /* turn floor into ceil */
360 if (length
< precision
)
362 length
+= 1; /* account for leading sign */
363 } else { /* 'x', 'X' */
364 length
= (unsigned int) (sizeof (unsigned int) * CHAR_BIT
365 * 0.25 /* binary -> hexadecimal */
367 + 1; /* turn floor into ceil */
368 if (length
< precision
)
370 length
+= 2; /* account for leading sign or alternate form */
372 unnumbered_arg_count
++;
376 error(EXIT_FAILURE
,0,
377 /* TRANSLATORS: An error message.
378 The %s placeholder expands to a command-line option. */
379 _("%s argument: The string ends in the middle of a directive."),
381 else if (c_isprint(*format
))
382 error(EXIT_FAILURE
,0,
383 /* TRANSLATORS: An error message.
384 The %s placeholder expands to a command-line option.
385 The %c placeholder expands to an unknown format directive. */
386 _("%s argument: The character '%c' is not a valid conversion specifier."),
389 error(EXIT_FAILURE
,0,
390 /* TRANSLATORS: An error message.
391 The %s placeholder expands to a command-line option. */
392 _("%s argument: The character that terminates the format directive is not a valid conversion specifier."),
394 abort(); /*NOTREACHED*/
403 if (unnumbered_arg_count
> 1)
404 error(EXIT_FAILURE
,0,
405 /* TRANSLATORS: An error message.
406 The %s placeholder expands to a command-line option.
407 The %u placeholder expands to the number of arguments consumed by the format string. */
408 ngettext("%s argument: The format string consumes more than one argument: %u argument.",
409 "%s argument: The format string consumes more than one argument: %u arguments.",
410 unnumbered_arg_count
),
411 param_name
,unnumbered_arg_count
);
415 /* Format strings. */
416 static const char* ilseq_byte_subst
;
417 static const char* ilseq_wchar_subst
;
418 static const char* ilseq_unicode_subst
;
420 /* Maximum result size for each format string. */
421 static size_t ilseq_byte_subst_size
;
422 static size_t ilseq_wchar_subst_size
;
423 static size_t ilseq_unicode_subst_size
;
425 /* Buffer of size ilseq_byte_subst_size+1. */
426 static char* ilseq_byte_subst_buffer
;
427 /* Buffer of size ilseq_wchar_subst_size+1. */
428 static char* ilseq_wchar_subst_buffer
;
429 /* Buffer of size ilseq_unicode_subst_size+1. */
430 static char* ilseq_unicode_subst_buffer
;
432 /* Auxiliary variables for subst_mb_to_uc_fallback. */
433 /* Converter from locale encoding to UCS-4. */
434 static iconv_t subst_mb_to_uc_cd
;
435 /* Buffer of size ilseq_byte_subst_size. */
436 static unsigned int* subst_mb_to_uc_temp_buffer
;
438 static void subst_mb_to_uc_fallback
439 (const char* inbuf
, size_t inbufsize
,
440 void (*write_replacement
) (const unsigned int *buf
, size_t buflen
,
445 for (; inbufsize
> 0; inbuf
++, inbufsize
--) {
450 sprintf(ilseq_byte_subst_buffer
,
451 ilseq_byte_subst
, (unsigned int)(unsigned char)*inbuf
);
452 inptr
= ilseq_byte_subst_buffer
;
453 inbytesleft
= strlen(ilseq_byte_subst_buffer
);
454 outptr
= (char*)subst_mb_to_uc_temp_buffer
;
455 outbytesleft
= ilseq_byte_subst_size
*sizeof(unsigned int);
456 iconv(subst_mb_to_uc_cd
,NULL
,NULL
,NULL
,NULL
);
457 if (iconv(subst_mb_to_uc_cd
, (ICONV_CONST
char**)&inptr
,&inbytesleft
, &outptr
,&outbytesleft
)
459 || iconv(subst_mb_to_uc_cd
, NULL
,NULL
, &outptr
,&outbytesleft
)
461 error(EXIT_FAILURE
,0,
462 /* TRANSLATORS: An error message.
463 The %s placeholder expands to a piece of text, specified through --byte-subst. */
464 _("cannot convert byte substitution to Unicode: %s"),
465 ilseq_byte_subst_buffer
);
466 if (!(outbytesleft
%sizeof(unsigned int) == 0))
468 write_replacement(subst_mb_to_uc_temp_buffer
,
469 ilseq_byte_subst_size
-(outbytesleft
/sizeof(unsigned int)),
474 /* Auxiliary variables for subst_uc_to_mb_fallback. */
475 /* Converter from locale encoding to target encoding. */
476 static iconv_t subst_uc_to_mb_cd
;
477 /* Buffer of size ilseq_unicode_subst_size*4. */
478 static char* subst_uc_to_mb_temp_buffer
;
480 static void subst_uc_to_mb_fallback
482 void (*write_replacement
) (const char *buf
, size_t buflen
,
491 sprintf(ilseq_unicode_subst_buffer
, ilseq_unicode_subst
, code
);
492 inptr
= ilseq_unicode_subst_buffer
;
493 inbytesleft
= strlen(ilseq_unicode_subst_buffer
);
494 outptr
= subst_uc_to_mb_temp_buffer
;
495 outbytesleft
= ilseq_unicode_subst_size
*4;
496 iconv(subst_uc_to_mb_cd
,NULL
,NULL
,NULL
,NULL
);
497 if (iconv(subst_uc_to_mb_cd
, (ICONV_CONST
char**)&inptr
,&inbytesleft
, &outptr
,&outbytesleft
)
499 || iconv(subst_uc_to_mb_cd
, NULL
,NULL
, &outptr
,&outbytesleft
)
501 error(EXIT_FAILURE
,0,
502 /* TRANSLATORS: An error message.
503 The %s placeholder expands to a piece of text, specified through --unicode-subst. */
504 _("cannot convert unicode substitution to target encoding: %s"),
505 ilseq_unicode_subst_buffer
);
506 write_replacement(subst_uc_to_mb_temp_buffer
,
507 ilseq_unicode_subst_size
*4-outbytesleft
,
511 /* Auxiliary variables for subst_mb_to_wc_fallback. */
512 /* Converter from locale encoding to wchar_t. */
513 static iconv_t subst_mb_to_wc_cd
;
514 /* Buffer of size ilseq_byte_subst_size. */
515 static wchar_t* subst_mb_to_wc_temp_buffer
;
517 static void subst_mb_to_wc_fallback
518 (const char* inbuf
, size_t inbufsize
,
519 void (*write_replacement
) (const wchar_t *buf
, size_t buflen
,
524 for (; inbufsize
> 0; inbuf
++, inbufsize
--) {
529 sprintf(ilseq_byte_subst_buffer
,
530 ilseq_byte_subst
, (unsigned int)(unsigned char)*inbuf
);
531 inptr
= ilseq_byte_subst_buffer
;
532 inbytesleft
= strlen(ilseq_byte_subst_buffer
);
533 outptr
= (char*)subst_mb_to_wc_temp_buffer
;
534 outbytesleft
= ilseq_byte_subst_size
*sizeof(wchar_t);
535 iconv(subst_mb_to_wc_cd
,NULL
,NULL
,NULL
,NULL
);
536 if (iconv(subst_mb_to_wc_cd
, (ICONV_CONST
char**)&inptr
,&inbytesleft
, &outptr
,&outbytesleft
)
538 || iconv(subst_mb_to_wc_cd
, NULL
,NULL
, &outptr
,&outbytesleft
)
540 error(EXIT_FAILURE
,0,
541 /* TRANSLATORS: An error message.
542 The %s placeholder expands to a piece of text, specified through --byte-subst. */
543 _("cannot convert byte substitution to wide string: %s"),
544 ilseq_byte_subst_buffer
);
545 if (!(outbytesleft
%sizeof(wchar_t) == 0))
547 write_replacement(subst_mb_to_wc_temp_buffer
,
548 ilseq_byte_subst_size
-(outbytesleft
/sizeof(wchar_t)),
553 /* Auxiliary variables for subst_wc_to_mb_fallback. */
554 /* Converter from locale encoding to target encoding. */
555 static iconv_t subst_wc_to_mb_cd
;
556 /* Buffer of size ilseq_wchar_subst_size*4.
557 Hardcode factor 4, because MB_LEN_MAX is not reliable on some platforms. */
558 static char* subst_wc_to_mb_temp_buffer
;
560 static void subst_wc_to_mb_fallback
562 void (*write_replacement
) (const char *buf
, size_t buflen
,
571 sprintf(ilseq_wchar_subst_buffer
, ilseq_wchar_subst
, (unsigned int) code
);
572 inptr
= ilseq_wchar_subst_buffer
;
573 inbytesleft
= strlen(ilseq_wchar_subst_buffer
);
574 outptr
= subst_wc_to_mb_temp_buffer
;
575 outbytesleft
= ilseq_wchar_subst_size
*4;
576 iconv(subst_wc_to_mb_cd
,NULL
,NULL
,NULL
,NULL
);
577 if (iconv(subst_wc_to_mb_cd
, (ICONV_CONST
char**)&inptr
,&inbytesleft
, &outptr
,&outbytesleft
)
579 || iconv(subst_wc_to_mb_cd
, NULL
,NULL
, &outptr
,&outbytesleft
)
581 error(EXIT_FAILURE
,0,
582 /* TRANSLATORS: An error message.
583 The %s placeholder expands to a piece of text, specified through --widechar-subst. */
584 _("cannot convert widechar substitution to target encoding: %s"),
585 ilseq_wchar_subst_buffer
);
586 write_replacement(subst_wc_to_mb_temp_buffer
,
587 ilseq_wchar_subst_size
*4-outbytesleft
,
591 /* Auxiliary variables for subst_mb_to_mb_fallback. */
592 /* Converter from locale encoding to target encoding. */
593 static iconv_t subst_mb_to_mb_cd
;
594 /* Buffer of size ilseq_byte_subst_size*4. */
595 static char* subst_mb_to_mb_temp_buffer
;
597 static void subst_mb_to_mb_fallback (const char* inbuf
, size_t inbufsize
)
599 for (; inbufsize
> 0; inbuf
++, inbufsize
--) {
604 sprintf(ilseq_byte_subst_buffer
,
605 ilseq_byte_subst
, (unsigned int)(unsigned char)*inbuf
);
606 inptr
= ilseq_byte_subst_buffer
;
607 inbytesleft
= strlen(ilseq_byte_subst_buffer
);
608 outptr
= subst_mb_to_mb_temp_buffer
;
609 outbytesleft
= ilseq_byte_subst_size
*4;
610 iconv(subst_mb_to_mb_cd
,NULL
,NULL
,NULL
,NULL
);
611 if (iconv(subst_mb_to_mb_cd
, (ICONV_CONST
char**)&inptr
,&inbytesleft
, &outptr
,&outbytesleft
)
613 || iconv(subst_mb_to_mb_cd
, NULL
,NULL
, &outptr
,&outbytesleft
)
615 error(EXIT_FAILURE
,0,
616 /* TRANSLATORS: An error message.
617 The %s placeholder expands to a piece of text, specified through --byte-subst. */
618 _("cannot convert byte substitution to target encoding: %s"),
619 ilseq_byte_subst_buffer
);
620 fwrite(subst_mb_to_mb_temp_buffer
,1,ilseq_byte_subst_size
*4-outbytesleft
,
625 /* ========================================================================= */
627 /* Error messages during conversion. */
629 static void conversion_error_EILSEQ (const char* infilename
)
635 /* TRANSLATORS: An error message.
636 The placeholders expand to the input file name, a line number, and a column number. */
637 _("%s:%u:%u: cannot convert"),
638 infilename
,line
,column
);
641 static void conversion_error_EINVAL (const char* infilename
)
647 /* TRANSLATORS: An error message.
648 The placeholders expand to the input file name, a line number, and a column number.
649 A "shift sequence" is a sequence of bytes that changes the state of the converter;
650 this concept exists only for "stateful" encodings like ISO-2022-JP. */
651 _("%s:%u:%u: incomplete character or shift sequence"),
652 infilename
,line
,column
);
655 static void conversion_error_other (int errnum
, const char* infilename
)
661 /* TRANSLATORS: The first part of an error message.
662 It is followed by a colon and a detail message.
663 The placeholders expand to the input file name, a line number, and a column number. */
665 infilename
,line
,column
);
668 /* Convert the input given in infile. */
670 static int convert (iconv_t cd
, int infile
, const char* infilename
, _GL_UNUSED
const char* tocode
)
672 char inbuf
[4096+4096];
673 size_t inbufrest
= 0;
674 int infile_error
= 0;
675 char initial_outbuf
[4096];
676 char *outbuf
= initial_outbuf
;
677 size_t outbufsize
= sizeof(initial_outbuf
);
684 /* Turn off z/OS auto-conversion. */
685 struct f_cnvrt req
= {SETCVTOFF
, 0, 0};
686 fcntl(infile
, F_CONTROL_CVT
, &req
);
688 line
= 1; column
= 0;
689 iconv(cd
,NULL
,NULL
,NULL
,NULL
);
692 /* Transfer the accumulated output to its destination, in case the
693 safe_read() call will block. */
695 inbufsize
= safe_read(infile
,inbuf
+4096,4096);
696 if (inbufsize
== 0 || inbufsize
== SAFE_READ_ERROR
) {
697 infile_error
= (inbufsize
== SAFE_READ_ERROR
? errno
: 0);
701 if (ilseq_byte_subst
!= NULL
)
702 subst_mb_to_mb_fallback(inbuf
+4096-inbufrest
, inbufrest
);
704 conversion_error_EINVAL(infilename
);
709 const char* inptr
= inbuf
+4096-inbufrest
;
710 size_t insize
= inbufrest
+inbufsize
;
713 char* outptr
= outbuf
;
714 size_t outsize
= outbufsize
;
715 size_t res
= iconv(cd
,(ICONV_CONST
char**)&inptr
,&insize
,&outptr
,&outsize
);
716 if (outptr
!= outbuf
) {
717 int saved_errno
= errno
;
718 if (fwrite(outbuf
,1,outptr
-outbuf
,stdout
) < outptr
-outbuf
) {
724 if (res
== (size_t)(-1)) {
725 if (errno
== EILSEQ
) {
726 if (discard_unconvertible
== 1) {
728 iconvctl(cd
,ICONV_SET_DISCARD_ILSEQ
,&one
);
729 discard_unconvertible
= 2;
733 conversion_error_EILSEQ(infilename
);
737 } else if (errno
== EINVAL
) {
738 if (inbufsize
== 0 || insize
> 4096) {
740 conversion_error_EINVAL(infilename
);
746 /* Like memcpy(inbuf+4096-insize,inptr,insize), except that
747 we cannot use memcpy here, because source and destination
748 regions may overlap. */
749 char* restptr
= inbuf
+4096-insize
;
750 do { *restptr
++ = *inptr
++; } while (--insize
> 0);
754 } else if (errno
== E2BIG
) {
755 if (outptr
==outbuf
) {
756 /* outbuf is too small. Double its size. */
757 if (outbuf
!= initial_outbuf
)
759 outbufsize
= 2*outbufsize
;
760 if (outbufsize
==0) /* integer overflow? */
762 outbuf
= (char*)xmalloc(outbufsize
);
766 conversion_error_other(errno
,infilename
);
775 char* outptr
= outbuf
;
776 size_t outsize
= outbufsize
;
777 size_t res
= iconv(cd
,NULL
,NULL
,&outptr
,&outsize
);
778 if (outptr
!= outbuf
) {
779 int saved_errno
= errno
;
780 if (fwrite(outbuf
,1,outptr
-outbuf
,stdout
) < outptr
-outbuf
) {
786 if (res
== (size_t)(-1)) {
787 if (errno
== EILSEQ
) {
788 if (discard_unconvertible
== 1) {
790 iconvctl(cd
,ICONV_SET_DISCARD_ILSEQ
,&one
);
791 discard_unconvertible
= 2;
795 conversion_error_EILSEQ(infilename
);
799 } else if (errno
== EINVAL
) {
801 conversion_error_EINVAL(infilename
);
804 } else if (errno
== E2BIG
) {
805 if (outptr
==outbuf
) {
806 /* outbuf is too small. Double its size. */
807 if (outbuf
!= initial_outbuf
)
809 outbufsize
= 2*outbufsize
;
810 if (outbufsize
==0) /* integer overflow? */
812 outbuf
= (char*)xmalloc(outbufsize
);
816 conversion_error_other(errno
,infilename
);
827 error(0,infile_error
,
828 /* TRANSLATORS: An error message.
829 The placeholder expands to the input file name. */
838 status
= tagfile(fileno(stdout
), tocode
);
841 if (outbuf
!= initial_outbuf
)
846 /* ========================================================================= */
848 int main (int argc
, char* argv
[])
850 const char* fromcode
= NULL
;
851 const char* tocode
= NULL
;
854 struct iconv_fallbacks fallbacks
;
855 struct iconv_hooks hooks
;
859 set_program_name (argv
[0]);
861 /* Needed for the locale dependent encodings, "char" and "wchar_t",
863 setlocale(LC_CTYPE
,"");
865 /* Needed for gettext. */
866 setlocale(LC_MESSAGES
,"");
870 bindtextdomain("libiconv",relocate(LOCALEDIR
));
872 textdomain("libiconv");
873 /* No need to invoke the gnulib function stdopen() here, because
874 (1) the only file descriptor allocations done by this program are
876 (2) when such fopen() calls occur, stdin is not used,
878 - when an fopen() call happens to open fd 0, it is harmless, by (2),
879 - when an fopen() call happens to open fd 1 or 2, writing to
880 stdout or stderr will produce an error, by (1). */
882 for (i
= 1; i
< argc
;) {
883 size_t len
= strlen(argv
[i
]);
884 if (!strcmp(argv
[i
],"--")) {
888 if (!strcmp(argv
[i
],"-f")
889 /* --f ... --from-code */
890 || (len
>= 3 && len
<= 11 && !strncmp(argv
[i
],"--from-code",len
))
891 /* --from-code=... */
892 || (len
>= 12 && !strncmp(argv
[i
],"--from-code=",12))) {
894 if (i
== argc
-1) usage(1);
895 if (fromcode
!= NULL
) usage(1);
897 fromcode
= argv
[i
+1];
900 fromcode
= argv
[i
]+12;
905 if (!strcmp(argv
[i
],"-t")
906 /* --t ... --to-code */
907 || (len
>= 3 && len
<= 9 && !strncmp(argv
[i
],"--to-code",len
))
908 /* --from-code=... */
909 || (len
>= 10 && !strncmp(argv
[i
],"--to-code=",10))) {
911 if (i
== argc
-1) usage(1);
912 if (tocode
!= NULL
) usage(1);
922 if (!strcmp(argv
[i
],"-l")
924 || (len
>= 3 && len
<= 6 && !strncmp(argv
[i
],"--list",len
))) {
929 if (/* --by ... --byte-subst */
930 (len
>= 4 && len
<= 12 && !strncmp(argv
[i
],"--byte-subst",len
))
931 /* --byte-subst=... */
932 || (len
>= 13 && !strncmp(argv
[i
],"--byte-subst=",13))) {
934 if (i
== argc
-1) usage(1);
935 ilseq_byte_subst
= argv
[i
+1];
938 ilseq_byte_subst
= argv
[i
]+13;
941 ilseq_byte_subst_size
=
942 check_subst_formatstring(ilseq_byte_subst
, "--byte-subst");
945 if (/* --w ... --widechar-subst */
946 (len
>= 3 && len
<= 16 && !strncmp(argv
[i
],"--widechar-subst",len
))
947 /* --widechar-subst=... */
948 || (len
>= 17 && !strncmp(argv
[i
],"--widechar-subst=",17))) {
950 if (i
== argc
-1) usage(1);
951 ilseq_wchar_subst
= argv
[i
+1];
954 ilseq_wchar_subst
= argv
[i
]+17;
957 ilseq_wchar_subst_size
=
958 check_subst_formatstring(ilseq_wchar_subst
, "--widechar-subst");
961 if (/* --u ... --unicode-subst */
962 (len
>= 3 && len
<= 15 && !strncmp(argv
[i
],"--unicode-subst",len
))
963 /* --unicode-subst=... */
964 || (len
>= 16 && !strncmp(argv
[i
],"--unicode-subst=",16))) {
966 if (i
== argc
-1) usage(1);
967 ilseq_unicode_subst
= argv
[i
+1];
970 ilseq_unicode_subst
= argv
[i
]+16;
973 ilseq_unicode_subst_size
=
974 check_subst_formatstring(ilseq_unicode_subst
, "--unicode-subst");
977 if /* --s ... --silent */
978 (len
>= 3 && len
<= 8 && !strncmp(argv
[i
],"--silent",len
)) {
983 if /* --h ... --help */
984 (len
>= 3 && len
<= 6 && !strncmp(argv
[i
],"--help",len
)) {
987 if /* --v ... --version */
988 (len
>= 3 && len
<= 9 && !strncmp(argv
[i
],"--version",len
)) {
992 /* Backward compatibility with iconv <= 1.9.1. */
993 if /* --bi ... --binary */
994 (len
>= 4 && len
<= 8 && !strncmp(argv
[i
],"--binary",len
)) {
999 if (argv
[i
][0] == '-' && argv
[i
][1] != '\0') {
1000 const char *option
= argv
[i
] + 1;
1001 if (*option
== '\0')
1003 for (; *option
; option
++)
1005 case 'c': discard_unconvertible
= 1; break;
1006 case 's': silent
= 1; break;
1015 if (i
!= 2 || i
!= argc
)
1017 iconvlist(print_one
,NULL
);
1021 SET_BINARY(fileno(stdout
));
1023 if (fromcode
== NULL
)
1027 cd
= iconv_open(tocode
,fromcode
);
1028 if (cd
== (iconv_t
)(-1)) {
1029 if (iconv_open("UCS-4",fromcode
) == (iconv_t
)(-1))
1031 /* TRANSLATORS: An error message.
1032 The placeholder expands to the encoding name, specified through --from-code. */
1033 _("conversion from %s unsupported"),
1035 else if (iconv_open(tocode
,"UCS-4") == (iconv_t
)(-1))
1037 /* TRANSLATORS: An error message.
1038 The placeholder expands to the encoding name, specified through --to-code. */
1039 _("conversion to %s unsupported"),
1043 /* TRANSLATORS: An error message.
1044 The placeholders expand to the encoding names, specified through --from-code and --to-code, respectively. */
1045 _("conversion from %s to %s unsupported"),
1047 error(EXIT_FAILURE
,0,
1048 /* TRANSLATORS: Additional advice after an error message.
1049 The %s placeholder expands to the program name. */
1050 _("try '%s -l' to get the list of supported encodings"),
1053 /* For EBCDIC encodings, determine how to map 0x15 (which encodes the
1054 "newline function", see the Unicode standard, chapter 5). */
1055 const char *envvar_value
= getenv("ICONV_EBCDIC_ZOS_UNIX");
1056 if (envvar_value
!= NULL
&& envvar_value
[0] != '\0') {
1057 unsigned int surface
;
1058 iconvctl(cd
, ICONV_GET_FROM_SURFACE
, &surface
);
1059 surface
|= ICONV_SURFACE_EBCDIC_ZOS_UNIX
;
1060 iconvctl(cd
, ICONV_SET_FROM_SURFACE
, &surface
);
1061 iconvctl(cd
, ICONV_GET_TO_SURFACE
, &surface
);
1062 surface
|= ICONV_SURFACE_EBCDIC_ZOS_UNIX
;
1063 iconvctl(cd
, ICONV_SET_TO_SURFACE
, &surface
);
1065 /* Look at fromcode and tocode, to determine whether character widths
1066 should be determined according to legacy CJK conventions. */
1067 cjkcode
= iconv_canonicalize(tocode
);
1068 if (!is_cjk_encoding(cjkcode
))
1069 cjkcode
= iconv_canonicalize(fromcode
);
1070 /* Set up fallback routines for handling impossible conversions. */
1071 if (ilseq_byte_subst
!= NULL
)
1072 ilseq_byte_subst_buffer
= (char*)xmalloc((ilseq_byte_subst_size
+1)*sizeof(char));
1073 if (!discard_unconvertible
) {
1074 if (ilseq_wchar_subst
!= NULL
)
1075 ilseq_wchar_subst_buffer
= (char*)xmalloc((ilseq_wchar_subst_size
+1)*sizeof(char));
1076 if (ilseq_unicode_subst
!= NULL
)
1077 ilseq_unicode_subst_buffer
= (char*)xmalloc((ilseq_unicode_subst_size
+1)*sizeof(char));
1078 if (ilseq_byte_subst
!= NULL
) {
1079 subst_mb_to_uc_cd
= iconv_open("UCS-4-INTERNAL","char");
1080 subst_mb_to_uc_temp_buffer
= (unsigned int*)xmalloc(ilseq_byte_subst_size
*sizeof(unsigned int));
1081 subst_mb_to_wc_cd
= iconv_open("wchar_t","char");
1082 subst_mb_to_wc_temp_buffer
= (wchar_t*)xmalloc(ilseq_byte_subst_size
*sizeof(wchar_t));
1083 subst_mb_to_mb_cd
= iconv_open(tocode
,"char");
1084 subst_mb_to_mb_temp_buffer
= (char*)xmalloc(ilseq_byte_subst_size
*4);
1086 if (ilseq_wchar_subst
!= NULL
) {
1087 subst_wc_to_mb_cd
= iconv_open(tocode
,"char");
1088 subst_wc_to_mb_temp_buffer
= (char*)xmalloc(ilseq_wchar_subst_size
*4);
1090 if (ilseq_unicode_subst
!= NULL
) {
1091 subst_uc_to_mb_cd
= iconv_open(tocode
,"char");
1092 subst_uc_to_mb_temp_buffer
= (char*)xmalloc(ilseq_unicode_subst_size
*4);
1094 fallbacks
.mb_to_uc_fallback
=
1095 (ilseq_byte_subst
!= NULL
? subst_mb_to_uc_fallback
: NULL
);
1096 fallbacks
.uc_to_mb_fallback
=
1097 (ilseq_unicode_subst
!= NULL
? subst_uc_to_mb_fallback
: NULL
);
1098 fallbacks
.mb_to_wc_fallback
=
1099 (ilseq_byte_subst
!= NULL
? subst_mb_to_wc_fallback
: NULL
);
1100 fallbacks
.wc_to_mb_fallback
=
1101 (ilseq_wchar_subst
!= NULL
? subst_wc_to_mb_fallback
: NULL
);
1102 fallbacks
.data
= NULL
;
1103 iconvctl(cd
, ICONV_SET_FALLBACKS
, &fallbacks
);
1105 /* Set up hooks for updating the line and column position. */
1106 hooks
.uc_hook
= update_line_column
;
1107 hooks
.wc_hook
= NULL
;
1109 iconvctl(cd
, ICONV_SET_HOOKS
, &hooks
);
1111 status
= convert(cd
,fileno(stdin
),
1112 /* TRANSLATORS: A filename substitute denoting standard input. */
1117 for (; i
< argc
; i
++) {
1118 const char* infilename
= argv
[i
];
1119 if (strcmp(infilename
,"-") == 0) {
1120 status
|= convert(cd
,fileno(stdin
),
1121 /* TRANSLATORS: A filename substitute denoting standard input. */
1125 FILE* infile
= fopen(infilename
,"r");
1126 if (infile
== NULL
) {
1127 int saved_errno
= errno
;
1128 error(0,saved_errno
,
1129 /* TRANSLATORS: The first part of an error message.
1130 It is followed by a colon and a detail message.
1131 The %s placeholder expands to the input file name. */
1136 status
|= convert(cd
,fileno(infile
),infilename
,tocode
);
1144 if (ferror(stdout
) || fclose(stdout
)) {
1146 /* TRANSLATORS: An error message. */