2 * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011, 2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* This file defines three conversion loops:
21 - from wchar_t to anything else,
22 - from anything else to wchar_t,
23 - from wchar_t to wchar_t.
26 #if HAVE_WCRTOMB || HAVE_MBRTOWC
27 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
29 BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
30 included before <wchar.h>.
31 In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
37 # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
38 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
40 # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
41 # define mbsinit(ps) 1
45 # define mbsinit(ps) 1
51 * The first two conversion loops have an extended conversion descriptor.
53 struct wchar_conv_struct
{
54 struct conv_struct parent
;
55 #if HAVE_WCRTOMB || HAVE_MBRTOWC
63 /* From wchar_t to anything else. */
69 struct wc_to_mb_fallback_locals
{
70 struct wchar_conv_struct
* l_wcd
;
72 size_t l_outbytesleft
;
76 /* A callback that writes a string given in the locale encoding. */
77 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
80 struct wc_to_mb_fallback_locals
* plocals
=
81 (struct wc_to_mb_fallback_locals
*) callback_arg
;
82 /* Do nothing if already encountered an error in a previous call. */
83 if (plocals
->l_errno
== 0) {
84 /* Attempt to convert the passed buffer to the target encoding.
85 Here we don't support characters split across multiple calls. */
86 const char* bufptr
= buf
;
87 size_t bufleft
= buflen
;
88 size_t res
= unicode_loop_convert(&plocals
->l_wcd
->parent
,
90 &plocals
->l_outbuf
,&plocals
->l_outbytesleft
);
91 if (res
== (size_t)(-1)) {
92 if (errno
== EILSEQ
|| errno
== EINVAL
)
93 /* Invalid buf contents. */
94 plocals
->l_errno
= EILSEQ
;
95 else if (errno
== E2BIG
)
96 /* Output buffer too small. */
97 plocals
->l_errno
= E2BIG
;
101 /* Successful conversion. */
110 struct wc_to_mb_fallback_locals
{
112 size_t l_outbytesleft
;
116 /* A callback that writes a string given in the target encoding. */
117 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
120 struct wc_to_mb_fallback_locals
* plocals
=
121 (struct wc_to_mb_fallback_locals
*) callback_arg
;
122 /* Do nothing if already encountered an error in a previous call. */
123 if (plocals
->l_errno
== 0) {
124 /* Attempt to copy the passed buffer to the output buffer. */
125 if (plocals
->l_outbytesleft
< buflen
)
126 plocals
->l_errno
= E2BIG
;
128 memcpy(plocals
->l_outbuf
, buf
, buflen
);
129 plocals
->l_outbuf
+= buflen
;
130 plocals
->l_outbytesleft
-= buflen
;
137 #endif /* !LIBICONV_PLUG */
139 static size_t wchar_from_loop_convert (iconv_t icd
,
140 const char* * inbuf
, size_t *inbytesleft
,
141 char* * outbuf
, size_t *outbytesleft
)
143 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
145 while (*inbytesleft
>= sizeof(wchar_t)) {
146 const wchar_t * inptr
= (const wchar_t *) *inbuf
;
147 size_t inleft
= *inbytesleft
;
149 mbstate_t state
= wcd
->state
;
151 while (inleft
>= sizeof(wchar_t)) {
152 /* Convert one wchar_t to multibyte representation. */
153 size_t count
= wcrtomb(buf
+bufcount
,*inptr
,&state
);
154 if (count
== (size_t)(-1)) {
156 if (wcd
->parent
.discard_ilseq
) {
159 #ifndef LIBICONV_PLUG
160 else if (wcd
->parent
.fallbacks
.wc_to_mb_fallback
!= NULL
) {
161 /* Drop the contents of buf[] accumulated so far, and instead
162 pass all queued wide characters to the fallback handler. */
163 struct wc_to_mb_fallback_locals locals
;
164 const wchar_t * fallback_inptr
;
168 locals
.l_outbuf
= *outbuf
;
169 locals
.l_outbytesleft
= *outbytesleft
;
171 for (fallback_inptr
= (const wchar_t *) *inbuf
;
172 fallback_inptr
<= inptr
;
174 wcd
->parent
.fallbacks
.wc_to_mb_fallback(*fallback_inptr
,
175 wc_to_mb_write_replacement
,
177 wcd
->parent
.fallbacks
.data
);
178 if (locals
.l_errno
!= 0) {
179 errno
= locals
.l_errno
;
183 *inbuf
= (const char *) (inptr
+ 1);
184 *inbytesleft
= inleft
- sizeof(wchar_t);
185 *outbuf
= locals
.l_outbuf
;
186 *outbytesleft
= locals
.l_outbytesleft
;
197 inleft
-= sizeof(wchar_t);
200 /* Continue, append next wchar_t. */
202 /* Attempt to convert the accumulated multibyte representations
203 to the target encoding. */
204 const char* bufptr
= buf
;
205 size_t bufleft
= bufcount
;
206 char* outptr
= *outbuf
;
207 size_t outleft
= *outbytesleft
;
208 size_t res
= unicode_loop_convert(&wcd
->parent
,
211 if (res
== (size_t)(-1)) {
215 else if (errno
== E2BIG
)
216 /* Output buffer too small. */
218 else if (errno
== EINVAL
) {
219 /* Continue, append next wchar_t, but avoid buffer overrun. */
220 if (bufcount
+ MB_CUR_MAX
> BUF_SIZE
)
225 /* Successful conversion. */
227 *inbuf
= (const char *) inptr
;
228 *inbytesleft
= inleft
;
230 *outbytesleft
= outleft
;
240 static size_t wchar_from_loop_reset (iconv_t icd
,
241 char* * outbuf
, size_t *outbytesleft
)
243 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
244 if (outbuf
== NULL
|| *outbuf
== NULL
) {
245 /* Reset the states. */
246 memset(&wcd
->state
,'\0',sizeof(mbstate_t));
247 return unicode_loop_reset(&wcd
->parent
,NULL
,NULL
);
249 if (!mbsinit(&wcd
->state
)) {
250 mbstate_t state
= wcd
->state
;
252 size_t bufcount
= wcrtomb(buf
,(wchar_t)0,&state
);
253 if (bufcount
== (size_t)(-1) || bufcount
== 0 || buf
[bufcount
-1] != '\0')
256 const char* bufptr
= buf
;
257 size_t bufleft
= bufcount
-1;
258 char* outptr
= *outbuf
;
259 size_t outleft
= *outbytesleft
;
260 size_t res
= unicode_loop_convert(&wcd
->parent
,
263 if (res
== (size_t)(-1)) {
269 res
= unicode_loop_reset(&wcd
->parent
,&outptr
,&outleft
);
270 if (res
== (size_t)(-1))
276 *outbytesleft
= outleft
;
282 return unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
291 /* From anything else to wchar_t. */
293 #ifndef LIBICONV_PLUG
295 struct mb_to_wc_fallback_locals
{
297 size_t l_outbytesleft
;
301 static void mb_to_wc_write_replacement (const wchar_t *buf
, size_t buflen
,
304 struct mb_to_wc_fallback_locals
* plocals
=
305 (struct mb_to_wc_fallback_locals
*) callback_arg
;
306 /* Do nothing if already encountered an error in a previous call. */
307 if (plocals
->l_errno
== 0) {
308 /* Attempt to copy the passed buffer to the output buffer. */
309 if (plocals
->l_outbytesleft
< sizeof(wchar_t)*buflen
)
310 plocals
->l_errno
= E2BIG
;
312 for (; buflen
> 0; buf
++, buflen
--) {
313 *(wchar_t*) plocals
->l_outbuf
= *buf
;
314 plocals
->l_outbuf
+= sizeof(wchar_t);
315 plocals
->l_outbytesleft
-= sizeof(wchar_t);
321 #endif /* !LIBICONV_PLUG */
323 static size_t wchar_to_loop_convert (iconv_t icd
,
324 const char* * inbuf
, size_t *inbytesleft
,
325 char* * outbuf
, size_t *outbytesleft
)
327 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
329 while (*inbytesleft
> 0) {
331 for (incount
= 1; ; ) {
332 /* Here incount <= *inbytesleft. */
334 const char* inptr
= *inbuf
;
335 size_t inleft
= incount
;
337 size_t bufleft
= BUF_SIZE
;
338 size_t res
= unicode_loop_convert(&wcd
->parent
,
341 if (res
== (size_t)(-1)) {
345 else if (errno
== EINVAL
) {
346 /* Incomplete input. Next try with one more input byte. */
348 /* E2BIG shouldn't occur. */
351 /* Successful conversion. */
352 size_t bufcount
= bufptr
-buf
; /* = BUF_SIZE-bufleft */
353 mbstate_t state
= wcd
->state
;
355 res
= mbrtowc(&wc
,buf
,bufcount
,&state
);
356 if (res
== (size_t)(-2)) {
357 /* Next try with one more input byte. */
359 if (res
== (size_t)(-1)) {
361 if (wcd
->parent
.discard_ilseq
) {
363 #ifndef LIBICONV_PLUG
364 else if (wcd
->parent
.fallbacks
.mb_to_wc_fallback
!= NULL
) {
365 /* Drop the contents of buf[] accumulated so far, and instead
366 pass all queued chars to the fallback handler. */
367 struct mb_to_wc_fallback_locals locals
;
368 locals
.l_outbuf
= *outbuf
;
369 locals
.l_outbytesleft
= *outbytesleft
;
371 wcd
->parent
.fallbacks
.mb_to_wc_fallback(*inbuf
, incount
,
372 mb_to_wc_write_replacement
,
374 wcd
->parent
.fallbacks
.data
);
375 if (locals
.l_errno
!= 0) {
376 errno
= locals
.l_errno
;
379 /* Restoring the state is not needed because it is the initial
380 state anyway: For all known locale encodings, the multibyte
381 to wchar_t conversion doesn't have shift state, and we have
382 excluded partial accumulated characters. */
383 /* wcd->state = state; */
385 *inbytesleft
-= incount
;
386 *outbuf
= locals
.l_outbuf
;
387 *outbytesleft
= locals
.l_outbytesleft
;
395 if (*outbytesleft
< sizeof(wchar_t)) {
399 *(wchar_t*) *outbuf
= wc
;
400 /* Restoring the state is not needed because it is the initial
401 state anyway: For all known locale encodings, the multibyte
402 to wchar_t conversion doesn't have shift state, and we have
403 excluded partial accumulated characters. */
404 /* wcd->state = state; */
405 *outbuf
+= sizeof(wchar_t);
406 *outbytesleft
-= sizeof(wchar_t);
409 *inbytesleft
-= incount
;
415 if (incount
> *inbytesleft
) {
416 /* Incomplete input. */
425 static size_t wchar_to_loop_reset (iconv_t icd
,
426 char* * outbuf
, size_t *outbytesleft
)
428 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
429 size_t res
= unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
430 if (res
== (size_t)(-1))
432 memset(&wcd
->state
,0,sizeof(mbstate_t));
439 /* From wchar_t to wchar_t. */
441 static size_t wchar_id_loop_convert (iconv_t icd
,
442 const char* * inbuf
, size_t *inbytesleft
,
443 char* * outbuf
, size_t *outbytesleft
)
445 struct conv_struct
* cd
= (struct conv_struct
*) icd
;
446 const wchar_t* inptr
= (const wchar_t*) *inbuf
;
447 size_t inleft
= *inbytesleft
/ sizeof(wchar_t);
448 wchar_t* outptr
= (wchar_t*) *outbuf
;
449 size_t outleft
= *outbytesleft
/ sizeof(wchar_t);
450 size_t count
= (inleft
<= outleft
? inleft
: outleft
);
452 *inbytesleft
-= count
* sizeof(wchar_t);
453 *outbytesleft
-= count
* sizeof(wchar_t);
455 wchar_t wc
= *inptr
++;
457 #ifndef LIBICONV_PLUG
458 if (cd
->hooks
.wc_hook
)
459 (*cd
->hooks
.wc_hook
)(wc
, cd
->hooks
.data
);
461 } while (--count
> 0);
462 *inbuf
= (const char*) inptr
;
463 *outbuf
= (char*) outptr
;
468 static size_t wchar_id_loop_reset (iconv_t icd
,
469 char* * outbuf
, size_t *outbytesleft
)