2 * Copyright (C) 2000-2002, 2005-2006 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
21 /* This file defines three conversion loops:
22 - from wchar_t to anything else,
23 - from anything else to wchar_t,
24 - from wchar_t to wchar_t.
27 #if HAVE_WCRTOMB || HAVE_MBRTOWC
29 # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
30 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
31 extern size_t mbrtowc ();
33 # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
34 # define mbsinit(ps) 1
38 # define mbsinit(ps) 1
43 typedef int mbstate_t;
48 * The first two conversion loops have an extended conversion descriptor.
50 struct wchar_conv_struct
{
51 struct conv_struct parent
;
58 /* From wchar_t to anything else. */
64 struct wc_to_mb_fallback_locals
{
65 struct wchar_conv_struct
* l_wcd
;
67 size_t l_outbytesleft
;
71 /* A callback that writes a string given in the locale encoding. */
72 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
75 struct wc_to_mb_fallback_locals
* plocals
=
76 (struct wc_to_mb_fallback_locals
*) callback_arg
;
77 /* Do nothing if already encountered an error in a previous call. */
78 if (plocals
->l_errno
== 0) {
79 /* Attempt to convert the passed buffer to the target encoding.
80 Here we don't support characters split across multiple calls. */
81 const char* bufptr
= buf
;
82 size_t bufleft
= buflen
;
83 size_t res
= unicode_loop_convert(&plocals
->l_wcd
->parent
,
85 &plocals
->l_outbuf
,&plocals
->l_outbytesleft
);
86 if (res
== (size_t)(-1)) {
87 if (errno
== EILSEQ
|| errno
== EINVAL
)
88 /* Invalid buf contents. */
89 plocals
->l_errno
= EILSEQ
;
90 else if (errno
== E2BIG
)
91 /* Output buffer too small. */
92 plocals
->l_errno
= E2BIG
;
96 /* Successful conversion. */
105 struct wc_to_mb_fallback_locals
{
107 size_t l_outbytesleft
;
111 /* A callback that writes a string given in the target encoding. */
112 static void wc_to_mb_write_replacement (const char *buf
, size_t buflen
,
115 struct wc_to_mb_fallback_locals
* plocals
=
116 (struct wc_to_mb_fallback_locals
*) callback_arg
;
117 /* Do nothing if already encountered an error in a previous call. */
118 if (plocals
->l_errno
== 0) {
119 /* Attempt to copy the passed buffer to the output buffer. */
120 if (plocals
->l_outbytesleft
< buflen
)
121 plocals
->l_errno
= E2BIG
;
123 memcpy(plocals
->l_outbuf
, buf
, buflen
);
124 plocals
->l_outbuf
+= buflen
;
125 plocals
->l_outbytesleft
-= buflen
;
132 #endif /* !LIBICONV_PLUG */
134 static size_t wchar_from_loop_convert (iconv_t icd
,
135 const char* * inbuf
, size_t *inbytesleft
,
136 char* * outbuf
, size_t *outbytesleft
)
138 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
140 while (*inbytesleft
>= sizeof(wchar_t)) {
141 const wchar_t * inptr
= (const wchar_t *) *inbuf
;
142 size_t inleft
= *inbytesleft
;
144 mbstate_t state
= wcd
->state
;
146 while (inleft
>= sizeof(wchar_t)) {
147 /* Convert one wchar_t to multibyte representation. */
148 size_t count
= wcrtomb(buf
+bufcount
,*inptr
,&state
);
149 if (count
== (size_t)(-1)) {
151 if (wcd
->parent
.discard_ilseq
) {
154 #ifndef LIBICONV_PLUG
155 else if (wcd
->parent
.fallbacks
.wc_to_mb_fallback
!= NULL
) {
156 /* Drop the contents of buf[] accumulated so far, and instead
157 pass all queued wide characters to the fallback handler. */
158 struct wc_to_mb_fallback_locals locals
;
159 const wchar_t * fallback_inptr
;
163 locals
.l_outbuf
= *outbuf
;
164 locals
.l_outbytesleft
= *outbytesleft
;
166 for (fallback_inptr
= (const wchar_t *) *inbuf
;
167 fallback_inptr
<= inptr
;
169 wcd
->parent
.fallbacks
.wc_to_mb_fallback(*fallback_inptr
,
170 wc_to_mb_write_replacement
,
172 wcd
->parent
.fallbacks
.data
);
173 if (locals
.l_errno
!= 0) {
174 errno
= locals
.l_errno
;
178 *inbuf
= (const char *) (inptr
+ 1);
179 *inbytesleft
= inleft
- sizeof(wchar_t);
180 *outbuf
= locals
.l_outbuf
;
181 *outbytesleft
= locals
.l_outbytesleft
;
192 inleft
-= sizeof(wchar_t);
195 /* Continue, append next wchar_t. */
197 /* Attempt to convert the accumulated multibyte representations
198 to the target encoding. */
199 const char* bufptr
= buf
;
200 size_t bufleft
= bufcount
;
201 char* outptr
= *outbuf
;
202 size_t outleft
= *outbytesleft
;
203 size_t res
= unicode_loop_convert(&wcd
->parent
,
206 if (res
== (size_t)(-1)) {
210 else if (errno
== E2BIG
)
211 /* Output buffer too small. */
213 else if (errno
== EINVAL
) {
214 /* Continue, append next wchar_t, but avoid buffer overrun. */
215 if (bufcount
+ MB_CUR_MAX
> BUF_SIZE
)
220 /* Successful conversion. */
222 *inbuf
= (const char *) inptr
;
223 *inbytesleft
= inleft
;
225 *outbytesleft
= outleft
;
235 static size_t wchar_from_loop_reset (iconv_t icd
,
236 char* * outbuf
, size_t *outbytesleft
)
238 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
239 if (outbuf
== NULL
|| *outbuf
== NULL
) {
240 /* Reset the states. */
241 memset(&wcd
->state
,'\0',sizeof(mbstate_t));
242 return unicode_loop_reset(&wcd
->parent
,NULL
,NULL
);
244 if (!mbsinit(&wcd
->state
)) {
245 mbstate_t state
= wcd
->state
;
247 size_t bufcount
= wcrtomb(buf
,(wchar_t)0,&state
);
248 if (bufcount
== (size_t)(-1) || bufcount
== 0 || buf
[bufcount
-1] != '\0')
251 const char* bufptr
= buf
;
252 size_t bufleft
= bufcount
-1;
253 char* outptr
= *outbuf
;
254 size_t outleft
= *outbytesleft
;
255 size_t res
= unicode_loop_convert(&wcd
->parent
,
258 if (res
== (size_t)(-1)) {
264 res
= unicode_loop_reset(&wcd
->parent
,&outptr
,&outleft
);
265 if (res
== (size_t)(-1))
271 *outbytesleft
= outleft
;
277 return unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
286 /* From anything else to wchar_t. */
288 #ifndef LIBICONV_PLUG
290 struct mb_to_wc_fallback_locals
{
292 size_t l_outbytesleft
;
296 static void mb_to_wc_write_replacement (const wchar_t *buf
, size_t buflen
,
299 struct mb_to_wc_fallback_locals
* plocals
=
300 (struct mb_to_wc_fallback_locals
*) callback_arg
;
301 /* Do nothing if already encountered an error in a previous call. */
302 if (plocals
->l_errno
== 0) {
303 /* Attempt to copy the passed buffer to the output buffer. */
304 if (plocals
->l_outbytesleft
< sizeof(wchar_t)*buflen
)
305 plocals
->l_errno
= E2BIG
;
307 for (; buflen
> 0; buf
++, buflen
--) {
308 *(wchar_t*) plocals
->l_outbuf
= *buf
;
309 plocals
->l_outbuf
+= sizeof(wchar_t);
310 plocals
->l_outbytesleft
-= sizeof(wchar_t);
316 #endif /* !LIBICONV_PLUG */
318 static size_t wchar_to_loop_convert (iconv_t icd
,
319 const char* * inbuf
, size_t *inbytesleft
,
320 char* * outbuf
, size_t *outbytesleft
)
322 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
324 while (*inbytesleft
> 0) {
326 for (incount
= 1; incount
<= *inbytesleft
; incount
++) {
328 const char* inptr
= *inbuf
;
329 size_t inleft
= incount
;
331 size_t bufleft
= BUF_SIZE
;
332 size_t res
= unicode_loop_convert(&wcd
->parent
,
335 if (res
== (size_t)(-1)) {
339 else if (errno
== EINVAL
) {
340 /* Incomplete input. Next try with one more input byte. */
342 /* E2BIG shouldn't occur. */
345 /* Successful conversion. */
346 size_t bufcount
= bufptr
-buf
; /* = BUF_SIZE-bufleft */
347 mbstate_t state
= wcd
->state
;
349 res
= mbrtowc(&wc
,buf
,bufcount
,&state
);
350 if (res
== (size_t)(-2)) {
351 /* Next try with one more input byte. */
353 if (res
== (size_t)(-1)) {
355 if (wcd
->parent
.discard_ilseq
) {
357 #ifndef LIBICONV_PLUG
358 else if (wcd
->parent
.fallbacks
.mb_to_wc_fallback
!= NULL
) {
359 /* Drop the contents of buf[] accumulated so far, and instead
360 pass all queued chars to the fallback handler. */
361 struct mb_to_wc_fallback_locals locals
;
362 locals
.l_outbuf
= *outbuf
;
363 locals
.l_outbytesleft
= *outbytesleft
;
365 wcd
->parent
.fallbacks
.mb_to_wc_fallback(*inbuf
, incount
,
366 mb_to_wc_write_replacement
,
368 wcd
->parent
.fallbacks
.data
);
369 if (locals
.l_errno
!= 0) {
370 errno
= locals
.l_errno
;
373 /* Restoring the state is not needed because it is the initial
374 state anyway: For all known locale encodings, the multibyte
375 to wchar_t conversion doesn't have shift state, and we have
376 excluded partial accumulated characters. */
377 /* wcd->state = state; */
379 *inbytesleft
-= incount
;
380 *outbuf
= locals
.l_outbuf
;
381 *outbytesleft
= locals
.l_outbytesleft
;
389 if (*outbytesleft
< sizeof(wchar_t)) {
393 *(wchar_t*) *outbuf
= wc
;
394 /* Restoring the state is not needed because it is the initial
395 state anyway: For all known locale encodings, the multibyte
396 to wchar_t conversion doesn't have shift state, and we have
397 excluded partial accumulated characters. */
398 /* wcd->state = state; */
399 *outbuf
+= sizeof(wchar_t);
400 *outbytesleft
-= sizeof(wchar_t);
403 *inbytesleft
-= incount
;
413 static size_t wchar_to_loop_reset (iconv_t icd
,
414 char* * outbuf
, size_t *outbytesleft
)
416 struct wchar_conv_struct
* wcd
= (struct wchar_conv_struct
*) icd
;
417 size_t res
= unicode_loop_reset(&wcd
->parent
,outbuf
,outbytesleft
);
418 if (res
== (size_t)(-1))
420 memset(&wcd
->state
,0,sizeof(mbstate_t));
427 /* From wchar_t to wchar_t. */
429 static size_t wchar_id_loop_convert (iconv_t icd
,
430 const char* * inbuf
, size_t *inbytesleft
,
431 char* * outbuf
, size_t *outbytesleft
)
433 struct conv_struct
* cd
= (struct conv_struct
*) icd
;
434 const wchar_t* inptr
= (const wchar_t*) *inbuf
;
435 size_t inleft
= *inbytesleft
/ sizeof(wchar_t);
436 wchar_t* outptr
= (wchar_t*) *outbuf
;
437 size_t outleft
= *outbytesleft
/ sizeof(wchar_t);
438 size_t count
= (inleft
<= outleft
? inleft
: outleft
);
440 *inbytesleft
-= count
* sizeof(wchar_t);
441 *outbytesleft
-= count
* sizeof(wchar_t);
443 wchar_t wc
= *inptr
++;
445 #ifndef LIBICONV_PLUG
446 if (cd
->hooks
.wc_hook
)
447 (*cd
->hooks
.wc_hook
)(wc
, cd
->hooks
.data
);
449 } while (--count
> 0);
450 *inbuf
= (const char*) inptr
;
451 *outbuf
= (char*) outptr
;
456 static size_t wchar_id_loop_reset (iconv_t icd
,
457 char* * outbuf
, size_t *outbytesleft
)