2 * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
21 /* This file defines three conversion loops:
22 - from wchar_t to anything else,
23 - from anything else to wchar_t,
24 - from wchar_t to wchar_t.
27 #if HAVE_WCRTOMB || HAVE_MBRTOWC
28 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
30 BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
31 included before <wchar.h>.
32 In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
38 # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
39 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
40 extern size_t mbrtowc ();
42 # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
43 # define mbsinit(ps) 1
47 # define mbsinit(ps) 1
53 * The first two conversion loops have an extended conversion descriptor.
55 struct wchar_conv_struct
{
56 struct conv_struct parent
;
57 #if HAVE_WCRTOMB || HAVE_MBRTOWC
65 /* From wchar_t to anything else. */
71 struct wc_to_mb_fallback_locals
{
72 struct wchar_conv_struct
* l_wcd
;
74 size_t l_outbytesleft
;
78 /* A callback that writes a string given in the locale encoding. */
79 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
82 struct wc_to_mb_fallback_locals
* plocals
=
83 (struct wc_to_mb_fallback_locals
*) callback_arg
;
84 /* Do nothing if already encountered an error in a previous call. */
85 if (plocals
->l_errno
== 0) {
86 /* Attempt to convert the passed buffer to the target encoding.
87 Here we don't support characters split across multiple calls. */
88 const char* bufptr
= buf
;
89 size_t bufleft
= buflen
;
90 size_t res
= unicode_loop_convert(&plocals
->l_wcd
->parent
,
92 &plocals
->l_outbuf
,&plocals
->l_outbytesleft
);
93 if (res
== (size_t)(-1)) {
94 if (errno
== EILSEQ
|| errno
== EINVAL
)
95 /* Invalid buf contents. */
96 plocals
->l_errno
= EILSEQ
;
97 else if (errno
== E2BIG
)
98 /* Output buffer too small. */
99 plocals
->l_errno
= E2BIG
;
103 /* Successful conversion. */
112 struct wc_to_mb_fallback_locals
{
114 size_t l_outbytesleft
;
118 /* A callback that writes a string given in the target encoding. */
119 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
122 struct wc_to_mb_fallback_locals
* plocals
=
123 (struct wc_to_mb_fallback_locals
*) callback_arg
;
124 /* Do nothing if already encountered an error in a previous call. */
125 if (plocals
->l_errno
== 0) {
126 /* Attempt to copy the passed buffer to the output buffer. */
127 if (plocals
->l_outbytesleft
< buflen
)
128 plocals
->l_errno
= E2BIG
;
130 memcpy(plocals
->l_outbuf
, buf
, buflen
);
131 plocals
->l_outbuf
+= buflen
;
132 plocals
->l_outbytesleft
-= buflen
;
139 #endif /* !LIBICONV_PLUG */
141 static size_t wchar_from_loop_convert (iconv_t icd
,
142 const char* * inbuf
, size_t *inbytesleft
,
143 char* * outbuf
, size_t *outbytesleft
)
145 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
147 while (*inbytesleft
>= sizeof(wchar_t)) {
148 const wchar_t * inptr
= (const wchar_t *) *inbuf
;
149 size_t inleft
= *inbytesleft
;
151 mbstate_t state
= wcd
->state
;
153 while (inleft
>= sizeof(wchar_t)) {
154 /* Convert one wchar_t to multibyte representation. */
155 size_t count
= wcrtomb(buf
+bufcount
,*inptr
,&state
);
156 if (count
== (size_t)(-1)) {
158 if (wcd
->parent
.discard_ilseq
) {
161 #ifndef LIBICONV_PLUG
162 else if (wcd
->parent
.fallbacks
.wc_to_mb_fallback
!= NULL
) {
163 /* Drop the contents of buf[] accumulated so far, and instead
164 pass all queued wide characters to the fallback handler. */
165 struct wc_to_mb_fallback_locals locals
;
166 const wchar_t * fallback_inptr
;
170 locals
.l_outbuf
= *outbuf
;
171 locals
.l_outbytesleft
= *outbytesleft
;
173 for (fallback_inptr
= (const wchar_t *) *inbuf
;
174 fallback_inptr
<= inptr
;
176 wcd
->parent
.fallbacks
.wc_to_mb_fallback(*fallback_inptr
,
177 wc_to_mb_write_replacement
,
179 wcd
->parent
.fallbacks
.data
);
180 if (locals
.l_errno
!= 0) {
181 errno
= locals
.l_errno
;
185 *inbuf
= (const char *) (inptr
+ 1);
186 *inbytesleft
= inleft
- sizeof(wchar_t);
187 *outbuf
= locals
.l_outbuf
;
188 *outbytesleft
= locals
.l_outbytesleft
;
199 inleft
-= sizeof(wchar_t);
202 /* Continue, append next wchar_t. */
204 /* Attempt to convert the accumulated multibyte representations
205 to the target encoding. */
206 const char* bufptr
= buf
;
207 size_t bufleft
= bufcount
;
208 char* outptr
= *outbuf
;
209 size_t outleft
= *outbytesleft
;
210 size_t res
= unicode_loop_convert(&wcd
->parent
,
213 if (res
== (size_t)(-1)) {
217 else if (errno
== E2BIG
)
218 /* Output buffer too small. */
220 else if (errno
== EINVAL
) {
221 /* Continue, append next wchar_t, but avoid buffer overrun. */
222 if (bufcount
+ MB_CUR_MAX
> BUF_SIZE
)
227 /* Successful conversion. */
229 *inbuf
= (const char *) inptr
;
230 *inbytesleft
= inleft
;
232 *outbytesleft
= outleft
;
242 static size_t wchar_from_loop_reset (iconv_t icd
,
243 char* * outbuf
, size_t *outbytesleft
)
245 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
246 if (outbuf
== NULL
|| *outbuf
== NULL
) {
247 /* Reset the states. */
248 memset(&wcd
->state
,'\0',sizeof(mbstate_t));
249 return unicode_loop_reset(&wcd
->parent
,NULL
,NULL
);
251 if (!mbsinit(&wcd
->state
)) {
252 mbstate_t state
= wcd
->state
;
254 size_t bufcount
= wcrtomb(buf
,(wchar_t)0,&state
);
255 if (bufcount
== (size_t)(-1) || bufcount
== 0 || buf
[bufcount
-1] != '\0')
258 const char* bufptr
= buf
;
259 size_t bufleft
= bufcount
-1;
260 char* outptr
= *outbuf
;
261 size_t outleft
= *outbytesleft
;
262 size_t res
= unicode_loop_convert(&wcd
->parent
,
265 if (res
== (size_t)(-1)) {
271 res
= unicode_loop_reset(&wcd
->parent
,&outptr
,&outleft
);
272 if (res
== (size_t)(-1))
278 *outbytesleft
= outleft
;
284 return unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
293 /* From anything else to wchar_t. */
295 #ifndef LIBICONV_PLUG
297 struct mb_to_wc_fallback_locals
{
299 size_t l_outbytesleft
;
303 static void mb_to_wc_write_replacement (const wchar_t *buf
, size_t buflen
,
306 struct mb_to_wc_fallback_locals
* plocals
=
307 (struct mb_to_wc_fallback_locals
*) callback_arg
;
308 /* Do nothing if already encountered an error in a previous call. */
309 if (plocals
->l_errno
== 0) {
310 /* Attempt to copy the passed buffer to the output buffer. */
311 if (plocals
->l_outbytesleft
< sizeof(wchar_t)*buflen
)
312 plocals
->l_errno
= E2BIG
;
314 for (; buflen
> 0; buf
++, buflen
--) {
315 *(wchar_t*) plocals
->l_outbuf
= *buf
;
316 plocals
->l_outbuf
+= sizeof(wchar_t);
317 plocals
->l_outbytesleft
-= sizeof(wchar_t);
323 #endif /* !LIBICONV_PLUG */
325 static size_t wchar_to_loop_convert (iconv_t icd
,
326 const char* * inbuf
, size_t *inbytesleft
,
327 char* * outbuf
, size_t *outbytesleft
)
329 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
331 while (*inbytesleft
> 0) {
333 for (incount
= 1; ; ) {
334 /* Here incount <= *inbytesleft. */
336 const char* inptr
= *inbuf
;
337 size_t inleft
= incount
;
339 size_t bufleft
= BUF_SIZE
;
340 size_t res
= unicode_loop_convert(&wcd
->parent
,
343 if (res
== (size_t)(-1)) {
347 else if (errno
== EINVAL
) {
348 /* Incomplete input. Next try with one more input byte. */
350 /* E2BIG shouldn't occur. */
353 /* Successful conversion. */
354 size_t bufcount
= bufptr
-buf
; /* = BUF_SIZE-bufleft */
355 mbstate_t state
= wcd
->state
;
357 res
= mbrtowc(&wc
,buf
,bufcount
,&state
);
358 if (res
== (size_t)(-2)) {
359 /* Next try with one more input byte. */
361 if (res
== (size_t)(-1)) {
363 if (wcd
->parent
.discard_ilseq
) {
365 #ifndef LIBICONV_PLUG
366 else if (wcd
->parent
.fallbacks
.mb_to_wc_fallback
!= NULL
) {
367 /* Drop the contents of buf[] accumulated so far, and instead
368 pass all queued chars to the fallback handler. */
369 struct mb_to_wc_fallback_locals locals
;
370 locals
.l_outbuf
= *outbuf
;
371 locals
.l_outbytesleft
= *outbytesleft
;
373 wcd
->parent
.fallbacks
.mb_to_wc_fallback(*inbuf
, incount
,
374 mb_to_wc_write_replacement
,
376 wcd
->parent
.fallbacks
.data
);
377 if (locals
.l_errno
!= 0) {
378 errno
= locals
.l_errno
;
381 /* Restoring the state is not needed because it is the initial
382 state anyway: For all known locale encodings, the multibyte
383 to wchar_t conversion doesn't have shift state, and we have
384 excluded partial accumulated characters. */
385 /* wcd->state = state; */
387 *inbytesleft
-= incount
;
388 *outbuf
= locals
.l_outbuf
;
389 *outbytesleft
= locals
.l_outbytesleft
;
397 if (*outbytesleft
< sizeof(wchar_t)) {
401 *(wchar_t*) *outbuf
= wc
;
402 /* Restoring the state is not needed because it is the initial
403 state anyway: For all known locale encodings, the multibyte
404 to wchar_t conversion doesn't have shift state, and we have
405 excluded partial accumulated characters. */
406 /* wcd->state = state; */
407 *outbuf
+= sizeof(wchar_t);
408 *outbytesleft
-= sizeof(wchar_t);
411 *inbytesleft
-= incount
;
417 if (incount
> *inbytesleft
) {
418 /* Incomplete input. */
427 static size_t wchar_to_loop_reset (iconv_t icd
,
428 char* * outbuf
, size_t *outbytesleft
)
430 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
431 size_t res
= unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
432 if (res
== (size_t)(-1))
434 memset(&wcd
->state
,0,sizeof(mbstate_t));
441 /* From wchar_t to wchar_t. */
443 static size_t wchar_id_loop_convert (iconv_t icd
,
444 const char* * inbuf
, size_t *inbytesleft
,
445 char* * outbuf
, size_t *outbytesleft
)
447 struct conv_struct
* cd
= (struct conv_struct
*) icd
;
448 const wchar_t* inptr
= (const wchar_t*) *inbuf
;
449 size_t inleft
= *inbytesleft
/ sizeof(wchar_t);
450 wchar_t* outptr
= (wchar_t*) *outbuf
;
451 size_t outleft
= *outbytesleft
/ sizeof(wchar_t);
452 size_t count
= (inleft
<= outleft
? inleft
: outleft
);
454 *inbytesleft
-= count
* sizeof(wchar_t);
455 *outbytesleft
-= count
* sizeof(wchar_t);
457 wchar_t wc
= *inptr
++;
459 #ifndef LIBICONV_PLUG
460 if (cd
->hooks
.wc_hook
)
461 (*cd
->hooks
.wc_hook
)(wc
, cd
->hooks
.data
);
463 } while (--count
> 0);
464 *inbuf
= (const char*) inptr
;
465 *outbuf
= (char*) outptr
;
470 static size_t wchar_id_loop_reset (iconv_t icd
,
471 char* * outbuf
, size_t *outbytesleft
)