1 /* Character set conversion with error handling.
2 Copyright (C) 2001-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible and Simon Josefsson.
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include "striconveh.h"
32 #include "c-strcase.h"
33 #include "c-strcaseeq.h"
36 # define SIZE_MAX ((size_t) -1)
42 /* The caller must provide an iconveh_t, not just an iconv_t, because when a
43 conversion error occurs, we may have to determine the Unicode representation
44 of the inconvertible character. */
47 iconveh_open (const char *to_codeset
, const char *from_codeset
, iconveh_t
*cdp
)
53 cd
= iconv_open (to_codeset
, from_codeset
);
55 if (STRCASEEQ (from_codeset
, "UTF-8", 'U','T','F','-','8',0,0,0,0))
59 cd1
= iconv_open ("UTF-8", from_codeset
);
60 if (cd1
== (iconv_t
)(-1))
62 int saved_errno
= errno
;
63 if (cd
!= (iconv_t
)(-1))
70 if (STRCASEEQ (to_codeset
, "UTF-8", 'U','T','F','-','8',0,0,0,0)
71 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
72 && !defined __UCLIBC__) \
73 || _LIBICONV_VERSION >= 0x0105 \
74 || defined ICONV_SET_TRANSLITERATE
75 || c_strcasecmp (to_codeset
, "UTF-8//TRANSLIT") == 0
81 cd2
= iconv_open (to_codeset
, "UTF-8");
82 if (cd2
== (iconv_t
)(-1))
84 int saved_errno
= errno
;
85 if (cd1
!= (iconv_t
)(-1))
87 if (cd
!= (iconv_t
)(-1))
101 iconveh_close (const iconveh_t
*cd
)
103 if (cd
->cd2
!= (iconv_t
)(-1) && iconv_close (cd
->cd2
) < 0)
105 /* Return -1, but preserve the errno from iconv_close. */
106 int saved_errno
= errno
;
107 if (cd
->cd1
!= (iconv_t
)(-1))
108 iconv_close (cd
->cd1
);
109 if (cd
->cd
!= (iconv_t
)(-1))
110 iconv_close (cd
->cd
);
114 if (cd
->cd1
!= (iconv_t
)(-1) && iconv_close (cd
->cd1
) < 0)
116 /* Return -1, but preserve the errno from iconv_close. */
117 int saved_errno
= errno
;
118 if (cd
->cd
!= (iconv_t
)(-1))
119 iconv_close (cd
->cd
);
123 if (cd
->cd
!= (iconv_t
)(-1) && iconv_close (cd
->cd
) < 0)
128 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
129 a conversion error, and it returns in *INCREMENTED a boolean telling whether
130 it has incremented the input pointers past the error location. */
131 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
132 && !(defined __GLIBC__ && !defined __UCLIBC__)
133 /* Irix iconv() inserts a NUL byte if it cannot convert.
134 NetBSD iconv() inserts a question mark if it cannot convert.
135 Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
136 known to prefer to fail rather than doing a lossy conversion. */
138 iconv_carefully (iconv_t cd
,
139 const char **inbuf
, size_t *inbytesleft
,
140 char **outbuf
, size_t *outbytesleft
,
143 const char *inptr
= *inbuf
;
144 const char *inptr_end
= inptr
+ *inbytesleft
;
145 char *outptr
= *outbuf
;
146 size_t outsize
= *outbytesleft
;
147 const char *inptr_before
;
154 inptr_before
= inptr
;
157 for (insize
= 1; inptr
+ insize
<= inptr_end
; insize
++)
160 (ICONV_CONST
char **) &inptr
, &insize
,
162 if (!(res
== (size_t)(-1) && errno
== EINVAL
))
164 /* iconv can eat up a shift sequence but give EINVAL while attempting
165 to convert the first character. E.g. libiconv does this. */
166 if (inptr
> inptr_before
)
176 *outbytesleft
= outsize
;
179 while (res
== 0 && inptr
< inptr_end
);
182 *inbytesleft
= inptr_end
- inptr
;
183 if (res
!= (size_t)(-1) && res
> 0)
185 /* iconv() has already incremented INPTR. We cannot go back to a
186 previous INPTR, otherwise the state inside CD would become invalid,
187 if FROM_CODESET is a stateful encoding. So, tell the caller that
188 *INBUF has already been incremented. */
189 *incremented
= (inptr
> inptr_before
);
195 *incremented
= false;
200 # define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
201 (*(incremented) = false, \
202 iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
205 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
206 converting one character or one shift sequence. */
208 iconv_carefully_1 (iconv_t cd
,
209 const char **inbuf
, size_t *inbytesleft
,
210 char **outbuf
, size_t *outbytesleft
,
213 const char *inptr_before
= *inbuf
;
214 const char *inptr
= inptr_before
;
215 const char *inptr_end
= inptr_before
+ *inbytesleft
;
216 char *outptr
= *outbuf
;
217 size_t outsize
= *outbytesleft
;
218 size_t res
= (size_t)(-1);
221 for (insize
= 1; inptr_before
+ insize
<= inptr_end
; insize
++)
223 inptr
= inptr_before
;
225 (ICONV_CONST
char **) &inptr
, &insize
,
227 if (!(res
== (size_t)(-1) && errno
== EINVAL
))
229 /* iconv can eat up a shift sequence but give EINVAL while attempting
230 to convert the first character. E.g. libiconv does this. */
231 if (inptr
> inptr_before
)
239 *inbytesleft
= inptr_end
- inptr
;
240 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
241 && !(defined __GLIBC__ && !defined __UCLIBC__)
242 /* Irix iconv() inserts a NUL byte if it cannot convert.
243 NetBSD iconv() inserts a question mark if it cannot convert.
244 Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
245 known to prefer to fail rather than doing a lossy conversion. */
246 if (res
!= (size_t)(-1) && res
> 0)
248 /* iconv() has already incremented INPTR. We cannot go back to a
249 previous INPTR, otherwise the state inside CD would become invalid,
250 if FROM_CODESET is a stateful encoding. So, tell the caller that
251 *INBUF has already been incremented. */
252 *incremented
= (inptr
> inptr_before
);
258 if (res
!= (size_t)(-1))
261 *outbytesleft
= outsize
;
263 *incremented
= false;
267 /* utf8conv_carefully is like iconv, except that
268 - it converts from UTF-8 to UTF-8,
269 - it stops as soon as it encounters a conversion error, and it returns
270 in *INCREMENTED a boolean telling whether it has incremented the input
271 pointers past the error location,
272 - if one_character_only is true, it stops after converting one
275 utf8conv_carefully (bool one_character_only
,
276 const char **inbuf
, size_t *inbytesleft
,
277 char **outbuf
, size_t *outbytesleft
,
280 const char *inptr
= *inbuf
;
281 size_t insize
= *inbytesleft
;
282 char *outptr
= *outbuf
;
283 size_t outsize
= *outbytesleft
;
293 n
= u8_mbtoucr (&uc
, (const uint8_t *) inptr
, insize
);
296 errno
= (n
== -2 ? EINVAL
: EILSEQ
);
297 n
= u8_mbtouc (&uc
, (const uint8_t *) inptr
, insize
);
308 *incremented
= false;
311 m
= u8_uctomb ((uint8_t *) outptr
, uc
, outsize
);
316 *incremented
= false;
331 while (!one_character_only
&& insize
> 0);
334 *inbytesleft
= insize
;
336 *outbytesleft
= outsize
;
341 mem_cd_iconveh_internal (const char *src
, size_t srclen
,
342 iconv_t cd
, iconv_t cd1
, iconv_t cd2
,
343 enum iconv_ilseq_handler handler
,
346 char **resultp
, size_t *lengthp
)
348 /* When a conversion error occurs, we cannot start using CD1 and CD2 at
349 this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
350 Instead, we have to start afresh from the beginning of SRC. */
351 /* Use a temporary buffer, so that for small strings, a single malloc()
352 call will be sufficient. */
353 # define tmpbufsize 4096
354 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
355 libiconv's UCS-4-INTERNAL encoding. */
356 union { unsigned int align
; char buf
[tmpbufsize
]; } tmp
;
357 # define tmpbuf tmp.buf
359 char *initial_result
;
363 size_t last_length
= (size_t)(-1); /* only needed if offsets != NULL */
365 if (*resultp
!= NULL
&& *lengthp
>= sizeof (tmpbuf
))
367 initial_result
= *resultp
;
368 allocated
= *lengthp
;
372 initial_result
= tmpbuf
;
373 allocated
= sizeof (tmpbuf
);
375 result
= initial_result
;
377 /* Test whether a direct conversion is possible at all. */
378 if (cd
== (iconv_t
)(-1))
385 for (i
= 0; i
< srclen
; i
++)
386 offsets
[i
] = (size_t)(-1);
388 last_length
= (size_t)(-1);
392 /* First, try a direct conversion, and see whether a conversion error
395 const char *inptr
= src
;
396 size_t insize
= srclen
;
398 /* Set to the initial state. */
399 iconv (cd
, NULL
, NULL
, NULL
, NULL
);
403 char *outptr
= result
+ length
;
404 size_t outsize
= allocated
- extra_alloc
- length
;
411 if (length
!= last_length
) /* ensure that offset[] be increasing */
413 offsets
[inptr
- src
] = length
;
414 last_length
= length
;
416 res
= iconv_carefully_1 (cd
,
422 /* Use iconv_carefully instead of iconv here, because:
423 - If TO_CODESET is UTF-8, we can do the error handling in this
424 loop, no need for a second loop,
425 - With iconv() implementations other than GNU libiconv and GNU
426 libc, if we use iconv() in a big swoop, checking for an E2BIG
427 return, we lose the number of irreversible conversions. */
428 res
= iconv_carefully (cd
,
433 length
= outptr
- result
;
434 grow
= (length
+ extra_alloc
> allocated
/ 2);
435 if (res
== (size_t)(-1))
439 else if (errno
== EINVAL
)
441 else if (errno
== EILSEQ
&& handler
!= iconveh_error
)
443 if (cd2
== (iconv_t
)(-1))
445 /* TO_CODESET is UTF-8. */
446 /* Error handling can produce up to 1 or 3 bytes of
449 (handler
== iconveh_replacement_character
? 3 : 1);
450 if (length
+ extra_need
+ extra_alloc
> allocated
)
454 allocated
= 2 * allocated
;
455 if (length
+ extra_need
+ extra_alloc
> allocated
)
456 allocated
= 2 * allocated
;
457 if (length
+ extra_need
+ extra_alloc
> allocated
)
459 if (result
== initial_result
)
460 memory
= (char *) malloc (allocated
);
462 memory
= (char *) realloc (result
, allocated
);
465 if (result
!= initial_result
)
470 if (result
== initial_result
)
471 memcpy (memory
, initial_result
, length
);
475 /* The input is invalid in FROM_CODESET. Eat up one byte
476 and emit a replacement character or a question mark. */
484 if (handler
== iconveh_replacement_character
)
486 /* U+FFFD in UTF-8 encoding. */
487 result
[length
+0] = '\357';
488 result
[length
+1] = '\277';
489 result
[length
+2] = '\275';
494 result
[length
] = '?';
503 if (result
!= initial_result
)
514 allocated
= 2 * allocated
;
515 if (result
== initial_result
)
516 memory
= (char *) malloc (allocated
);
518 memory
= (char *) realloc (result
, allocated
);
521 if (result
!= initial_result
)
526 if (result
== initial_result
)
527 memcpy (memory
, initial_result
, length
);
533 /* Now get the conversion state back to the initial state.
534 But avoid glibc-2.1 bug and Solaris 2.7 bug. */
535 #if defined _LIBICONV_VERSION \
536 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
540 char *outptr
= result
+ length
;
541 size_t outsize
= allocated
- extra_alloc
- length
;
544 res
= iconv (cd
, NULL
, NULL
, &outptr
, &outsize
);
545 length
= outptr
- result
;
546 if (res
== (size_t)(-1))
552 allocated
= 2 * allocated
;
553 if (result
== initial_result
)
554 memory
= (char *) malloc (allocated
);
556 memory
= (char *) realloc (result
, allocated
);
559 if (result
!= initial_result
)
564 if (result
== initial_result
)
565 memcpy (memory
, initial_result
, length
);
570 if (result
!= initial_result
)
580 /* The direct conversion succeeded. */
584 /* The direct conversion failed.
585 Use a conversion through UTF-8. */
590 for (i
= 0; i
< srclen
; i
++)
591 offsets
[i
] = (size_t)(-1);
593 last_length
= (size_t)(-1);
597 const bool slowly
= (offsets
!= NULL
|| handler
== iconveh_error
);
598 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
599 char utf8buf
[utf8bufsize
+ 3];
601 const char *in1ptr
= src
;
602 size_t in1size
= srclen
;
603 bool do_final_flush1
= true;
604 bool do_final_flush2
= true;
606 /* Set to the initial state. */
607 if (cd1
!= (iconv_t
)(-1))
608 iconv (cd1
, NULL
, NULL
, NULL
, NULL
);
609 if (cd2
!= (iconv_t
)(-1))
610 iconv (cd2
, NULL
, NULL
, NULL
, NULL
);
612 while (in1size
> 0 || do_final_flush1
|| utf8len
> 0 || do_final_flush2
)
614 char *out1ptr
= utf8buf
+ utf8len
;
615 size_t out1size
= utf8bufsize
- utf8len
;
620 /* Conversion step 1: from FROM_CODESET to UTF-8. */
624 && length
!= last_length
) /* ensure that offset[] be increasing */
626 offsets
[in1ptr
- src
] = length
;
627 last_length
= length
;
629 if (cd1
!= (iconv_t
)(-1))
632 res1
= iconv_carefully_1 (cd1
,
637 res1
= iconv_carefully (cd1
,
644 /* FROM_CODESET is UTF-8. */
645 res1
= utf8conv_carefully (slowly
,
651 else if (do_final_flush1
)
653 /* Now get the conversion state of CD1 back to the initial state.
654 But avoid glibc-2.1 bug and Solaris 2.7 bug. */
655 # if defined _LIBICONV_VERSION \
656 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
658 if (cd1
!= (iconv_t
)(-1))
659 res1
= iconv (cd1
, NULL
, NULL
, &out1ptr
, &out1size
);
663 do_final_flush1
= false;
671 if (res1
== (size_t)(-1)
672 && !(errno
== E2BIG
|| errno
== EINVAL
|| errno
== EILSEQ
))
674 if (result
!= initial_result
)
678 if (res1
== (size_t)(-1)
679 && errno
== EILSEQ
&& handler
!= iconveh_error
)
681 /* The input is invalid in FROM_CODESET. Eat up one byte and
682 emit a U+FFFD character or a question mark. Room for this
683 character was allocated at the end of utf8buf. */
691 if (handler
== iconveh_replacement_character
)
693 /* U+FFFD in UTF-8 encoding. */
704 utf8len
= out1ptr
- utf8buf
;
708 || utf8len
> utf8bufsize
/ 2
709 || (res1
== (size_t)(-1) && errno1
== E2BIG
))
711 /* Conversion step 2: from UTF-8 to TO_CODESET. */
712 const char *in2ptr
= utf8buf
;
713 size_t in2size
= utf8len
;
716 || (in1size
== 0 && !do_final_flush1
&& do_final_flush2
))
718 char *out2ptr
= result
+ length
;
719 size_t out2size
= allocated
- extra_alloc
- length
;
726 if (cd2
!= (iconv_t
)(-1))
727 res2
= iconv_carefully (cd2
,
732 /* TO_CODESET is UTF-8. */
733 res2
= utf8conv_carefully (false,
738 else /* in1size == 0 && !do_final_flush1
739 && in2size == 0 && do_final_flush2 */
741 /* Now get the conversion state of CD1 back to the initial
742 state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */
743 # if defined _LIBICONV_VERSION \
744 || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
746 if (cd2
!= (iconv_t
)(-1))
747 res2
= iconv (cd2
, NULL
, NULL
, &out2ptr
, &out2size
);
751 do_final_flush2
= false;
755 length
= out2ptr
- result
;
756 grow
= (length
+ extra_alloc
> allocated
/ 2);
757 if (res2
== (size_t)(-1))
761 else if (errno
== EINVAL
)
763 else if (errno
== EILSEQ
&& handler
!= iconveh_error
)
765 /* Error handling can produce up to 10 bytes of UTF-8
766 output. But TO_CODESET may be UCS-2, UTF-16 or
767 UCS-4, so use CD2 here as well. */
777 if (u8_prev (&uc
, (const uint8_t *) in2ptr
,
778 (const uint8_t *) utf8buf
)
787 n
= u8_mbtouc_unsafe (&uc
, (const uint8_t *) in2ptr
,
793 if (handler
== iconveh_escape_sequence
)
795 static char const hex
[16] = "0123456789ABCDEF";
797 scratchbuf
[scratchlen
++] = '\\';
799 scratchbuf
[scratchlen
++] = 'u';
802 scratchbuf
[scratchlen
++] = 'U';
803 scratchbuf
[scratchlen
++] = hex
[(uc
>>28) & 15];
804 scratchbuf
[scratchlen
++] = hex
[(uc
>>24) & 15];
805 scratchbuf
[scratchlen
++] = hex
[(uc
>>20) & 15];
806 scratchbuf
[scratchlen
++] = hex
[(uc
>>16) & 15];
808 scratchbuf
[scratchlen
++] = hex
[(uc
>>12) & 15];
809 scratchbuf
[scratchlen
++] = hex
[(uc
>>8) & 15];
810 scratchbuf
[scratchlen
++] = hex
[(uc
>>4) & 15];
811 scratchbuf
[scratchlen
++] = hex
[uc
& 15];
813 else if (handler
== iconveh_replacement_character
)
815 /* U+FFFD in UTF-8 encoding. */
816 scratchbuf
[0] = '\357';
817 scratchbuf
[1] = '\277';
818 scratchbuf
[2] = '\275';
829 if (cd2
!= (iconv_t
)(-1))
831 char *out2ptr_try
= out2ptr
;
832 size_t out2size_try
= out2size
;
834 (ICONV_CONST
char **) &inptr
, &insize
,
835 &out2ptr_try
, &out2size_try
);
836 if (handler
== iconveh_replacement_character
837 && (res
== (size_t)(-1)
839 /* FreeBSD iconv(), NetBSD iconv(), and
840 Solaris 11 iconv() insert a '?' if they
841 cannot convert. This is what we want.
842 But IRIX iconv() inserts a NUL byte if it
844 And musl libc iconv() inserts a '*' if it
847 && !(out2ptr_try
- out2ptr
== 1
848 && *out2ptr
== '?'))))
850 /* The iconv() call failed.
851 U+FFFD can't be converted to TO_CODESET.
858 (ICONV_CONST
char **) &inptr
, &insize
,
859 &out2ptr
, &out2size
);
863 /* Accept the results of the iconv() call. */
864 out2ptr
= out2ptr_try
;
865 out2size
= out2size_try
;
871 /* TO_CODESET is UTF-8. */
872 if (out2size
>= insize
)
874 memcpy (out2ptr
, inptr
, insize
);
887 length
= out2ptr
- result
;
888 if (res
== (size_t)(-1) && errno
== E2BIG
)
892 allocated
= 2 * allocated
;
893 if (length
+ 1 + extra_alloc
> allocated
)
895 if (result
== initial_result
)
896 memory
= (char *) malloc (allocated
);
898 memory
= (char *) realloc (result
, allocated
);
901 if (result
!= initial_result
)
906 if (result
== initial_result
)
907 memcpy (memory
, initial_result
, length
);
911 out2ptr
= result
+ length
;
912 out2size
= allocated
- extra_alloc
- length
;
913 if (cd2
!= (iconv_t
)(-1))
915 (ICONV_CONST
char **) &inptr
,
917 &out2ptr
, &out2size
);
920 /* TO_CODESET is UTF-8. */
921 if (!(out2size
>= insize
))
923 memcpy (out2ptr
, inptr
, insize
);
930 length
= out2ptr
- result
;
932 # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
933 && !(defined __GLIBC__ && !defined __UCLIBC__)
934 /* IRIX iconv() inserts a NUL byte if it cannot convert.
935 FreeBSD iconv(), NetBSD iconv(), and Solaris 11
936 iconv() insert a '?' if they cannot convert.
937 musl libc iconv() inserts a '*' if it cannot convert.
938 Only GNU libiconv (excluding the bastard Apple iconv)
939 and GNU libc are known to prefer to fail rather than
940 doing a lossy conversion. */
941 if (res
!= (size_t)(-1) && res
> 0)
947 if (res
== (size_t)(-1))
949 /* Failure converting the ASCII replacement. */
950 if (result
!= initial_result
)
957 if (result
!= initial_result
)
963 || (in1size
== 0 && !do_final_flush1
&& do_final_flush2
)))
969 allocated
= 2 * allocated
;
970 if (result
== initial_result
)
971 memory
= (char *) malloc (allocated
);
973 memory
= (char *) realloc (result
, allocated
);
976 if (result
!= initial_result
)
981 if (result
== initial_result
)
982 memcpy (memory
, initial_result
, length
);
987 /* Move the remaining bytes to the beginning of utf8buf. */
989 memmove (utf8buf
, in2ptr
, in2size
);
993 if (res1
== (size_t)(-1))
995 if (errno1
== EINVAL
)
997 else if (errno1
== EILSEQ
)
999 if (result
!= initial_result
)
1010 /* Now the final memory allocation. */
1011 if (result
== tmpbuf
)
1013 size_t memsize
= length
+ extra_alloc
;
1015 if (*resultp
!= NULL
&& *lengthp
>= memsize
)
1021 memory
= (char *) malloc (memsize
> 0 ? memsize
: 1);
1030 memcpy (result
, tmpbuf
, length
);
1032 else if (result
!= *resultp
&& length
+ extra_alloc
< allocated
)
1034 /* Shrink the allocated memory if possible. */
1035 size_t memsize
= length
+ extra_alloc
;
1038 memory
= (char *) realloc (result
, memsize
> 0 ? memsize
: 1);
1050 mem_cd_iconveh (const char *src
, size_t srclen
,
1051 const iconveh_t
*cd
,
1052 enum iconv_ilseq_handler handler
,
1054 char **resultp
, size_t *lengthp
)
1056 return mem_cd_iconveh_internal (src
, srclen
, cd
->cd
, cd
->cd1
, cd
->cd2
,
1057 handler
, 0, offsets
, resultp
, lengthp
);
1061 str_cd_iconveh (const char *src
,
1062 const iconveh_t
*cd
,
1063 enum iconv_ilseq_handler handler
)
1065 /* For most encodings, a trailing NUL byte in the input will be converted
1066 to a trailing NUL byte in the output. But not for UTF-7. So that this
1067 function is usable for UTF-7, we have to exclude the NUL byte from the
1068 conversion and add it by hand afterwards. */
1069 char *result
= NULL
;
1071 int retval
= mem_cd_iconveh_internal (src
, strlen (src
),
1072 cd
->cd
, cd
->cd1
, cd
->cd2
, handler
, 1,
1073 NULL
, &result
, &length
);
1081 /* Add the terminating NUL byte. */
1082 result
[length
] = '\0';
1090 mem_iconveh (const char *src
, size_t srclen
,
1091 const char *from_codeset
, const char *to_codeset
,
1092 enum iconv_ilseq_handler handler
,
1094 char **resultp
, size_t *lengthp
)
1098 /* Nothing to convert. */
1102 else if (offsets
== NULL
&& c_strcasecmp (from_codeset
, to_codeset
) == 0)
1106 if (*resultp
!= NULL
&& *lengthp
>= srclen
)
1110 result
= (char *) malloc (srclen
);
1117 memcpy (result
, src
, srclen
);
1130 if (iconveh_open (to_codeset
, from_codeset
, &cd
) < 0)
1135 retval
= mem_cd_iconveh (src
, srclen
, &cd
, handler
, offsets
,
1140 /* Close cd, but preserve the errno from str_cd_iconv. */
1141 int saved_errno
= errno
;
1142 iconveh_close (&cd
);
1143 errno
= saved_errno
;
1147 if (iconveh_close (&cd
) < 0)
1149 if (result
!= *resultp
)
1158 /* This is a different error code than if iconv_open existed but didn't
1159 support from_codeset and to_codeset, so that the caller can emit
1160 an error message such as
1161 "iconv() is not supported. Installing GNU libiconv and
1162 then reinstalling this package would fix this." */
1170 str_iconveh (const char *src
,
1171 const char *from_codeset
, const char *to_codeset
,
1172 enum iconv_ilseq_handler handler
)
1174 if (*src
== '\0' || c_strcasecmp (from_codeset
, to_codeset
) == 0)
1176 char *result
= strdup (src
);
1188 if (iconveh_open (to_codeset
, from_codeset
, &cd
) < 0)
1191 result
= str_cd_iconveh (src
, &cd
, handler
);
1195 /* Close cd, but preserve the errno from str_cd_iconv. */
1196 int saved_errno
= errno
;
1197 iconveh_close (&cd
);
1198 errno
= saved_errno
;
1202 if (iconveh_close (&cd
) < 0)
1210 /* This is a different error code than if iconv_open existed but didn't
1211 support from_codeset and to_codeset, so that the caller can emit
1212 an error message such as
1213 "iconv() is not supported. Installing GNU libiconv and
1214 then reinstalling this package would fix this." */