Prefer #include <...> for system headers.
[libiconv.git] / lib / loop_wchar.h
blob9eb508e81a4faf3ca6a8bd95d3124d9727e8dd17
1 /*
2 * Copyright (C) 2000-2002, 2005-2006, 2008-2009, 2011, 2023 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* This file defines three conversion loops:
21 - from wchar_t to anything else,
22 - from anything else to wchar_t,
23 - from wchar_t to wchar_t.
26 #if HAVE_WCRTOMB || HAVE_MBRTOWC
27 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
28 <wchar.h>.
29 BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
30 included before <wchar.h>.
31 In some builds of uClibc, <wchar.h> is nonexistent and wchar_t is defined
32 by <stddef.h>. */
33 # include <stddef.h>
34 # include <stdio.h>
35 # include <time.h>
36 # include <wchar.h>
37 # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
38 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
39 # ifdef mbstate_t
40 # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
41 # define mbsinit(ps) 1
42 # endif
43 # ifndef mbsinit
44 # if !HAVE_MBSINIT
45 # define mbsinit(ps) 1
46 # endif
47 # endif
48 #endif
51 * The first two conversion loops have an extended conversion descriptor.
53 struct wchar_conv_struct {
54 struct conv_struct parent;
55 #if HAVE_WCRTOMB || HAVE_MBRTOWC
56 mbstate_t state;
57 #endif
61 #if HAVE_WCRTOMB
63 /* From wchar_t to anything else. */
65 #ifndef LIBICONV_PLUG
67 #if 0
69 struct wc_to_mb_fallback_locals {
70 struct wchar_conv_struct * l_wcd;
71 char* l_outbuf;
72 size_t l_outbytesleft;
73 int l_errno;
76 /* A callback that writes a string given in the locale encoding. */
77 static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
78 void* callback_arg)
80 struct wc_to_mb_fallback_locals * plocals =
81 (struct wc_to_mb_fallback_locals *) callback_arg;
82 /* Do nothing if already encountered an error in a previous call. */
83 if (plocals->l_errno == 0) {
84 /* Attempt to convert the passed buffer to the target encoding.
85 Here we don't support characters split across multiple calls. */
86 const char* bufptr = buf;
87 size_t bufleft = buflen;
88 size_t res = unicode_loop_convert(&plocals->l_wcd->parent,
89 &bufptr,&bufleft,
90 &plocals->l_outbuf,&plocals->l_outbytesleft);
91 if (res == (size_t)(-1)) {
92 if (errno == EILSEQ || errno == EINVAL)
93 /* Invalid buf contents. */
94 plocals->l_errno = EILSEQ;
95 else if (errno == E2BIG)
96 /* Output buffer too small. */
97 plocals->l_errno = E2BIG;
98 else
99 abort();
100 } else {
101 /* Successful conversion. */
102 if (bufleft > 0)
103 abort();
108 #else
110 struct wc_to_mb_fallback_locals {
111 char* l_outbuf;
112 size_t l_outbytesleft;
113 int l_errno;
116 /* A callback that writes a string given in the target encoding. */
117 static void wc_to_mb_write_replacement (const char *buf, size_t buflen,
118 void* callback_arg)
120 struct wc_to_mb_fallback_locals * plocals =
121 (struct wc_to_mb_fallback_locals *) callback_arg;
122 /* Do nothing if already encountered an error in a previous call. */
123 if (plocals->l_errno == 0) {
124 /* Attempt to copy the passed buffer to the output buffer. */
125 if (plocals->l_outbytesleft < buflen)
126 plocals->l_errno = E2BIG;
127 else {
128 memcpy(plocals->l_outbuf, buf, buflen);
129 plocals->l_outbuf += buflen;
130 plocals->l_outbytesleft -= buflen;
135 #endif
137 #endif /* !LIBICONV_PLUG */
139 static size_t wchar_from_loop_convert (iconv_t icd,
140 const char* * inbuf, size_t *inbytesleft,
141 char* * outbuf, size_t *outbytesleft)
143 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
144 size_t result = 0;
145 while (*inbytesleft >= sizeof(wchar_t)) {
146 const wchar_t * inptr = (const wchar_t *) *inbuf;
147 size_t inleft = *inbytesleft;
148 char buf[BUF_SIZE];
149 mbstate_t state = wcd->state;
150 size_t bufcount = 0;
151 while (inleft >= sizeof(wchar_t)) {
152 /* Convert one wchar_t to multibyte representation. */
153 size_t count = wcrtomb(buf+bufcount,*inptr,&state);
154 if (count == (size_t)(-1)) {
155 /* Invalid input. */
156 if (wcd->parent.discard_ilseq) {
157 count = 0;
159 #ifndef LIBICONV_PLUG
160 else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) {
161 /* Drop the contents of buf[] accumulated so far, and instead
162 pass all queued wide characters to the fallback handler. */
163 struct wc_to_mb_fallback_locals locals;
164 const wchar_t * fallback_inptr;
165 #if 0
166 locals.l_wcd = wcd;
167 #endif
168 locals.l_outbuf = *outbuf;
169 locals.l_outbytesleft = *outbytesleft;
170 locals.l_errno = 0;
171 for (fallback_inptr = (const wchar_t *) *inbuf;
172 fallback_inptr <= inptr;
173 fallback_inptr++)
174 wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr,
175 wc_to_mb_write_replacement,
176 &locals,
177 wcd->parent.fallbacks.data);
178 if (locals.l_errno != 0) {
179 errno = locals.l_errno;
180 return -1;
182 wcd->state = state;
183 *inbuf = (const char *) (inptr + 1);
184 *inbytesleft = inleft - sizeof(wchar_t);
185 *outbuf = locals.l_outbuf;
186 *outbytesleft = locals.l_outbytesleft;
187 result += 1;
188 break;
190 #endif
191 else {
192 errno = EILSEQ;
193 return -1;
196 inptr++;
197 inleft -= sizeof(wchar_t);
198 bufcount += count;
199 if (count == 0) {
200 /* Continue, append next wchar_t. */
201 } else {
202 /* Attempt to convert the accumulated multibyte representations
203 to the target encoding. */
204 const char* bufptr = buf;
205 size_t bufleft = bufcount;
206 char* outptr = *outbuf;
207 size_t outleft = *outbytesleft;
208 size_t res = unicode_loop_convert(&wcd->parent,
209 &bufptr,&bufleft,
210 &outptr,&outleft);
211 if (res == (size_t)(-1)) {
212 if (errno == EILSEQ)
213 /* Invalid input. */
214 return -1;
215 else if (errno == E2BIG)
216 /* Output buffer too small. */
217 return -1;
218 else if (errno == EINVAL) {
219 /* Continue, append next wchar_t, but avoid buffer overrun. */
220 if (bufcount + MB_CUR_MAX > BUF_SIZE)
221 abort();
222 } else
223 abort();
224 } else {
225 /* Successful conversion. */
226 wcd->state = state;
227 *inbuf = (const char *) inptr;
228 *inbytesleft = inleft;
229 *outbuf = outptr;
230 *outbytesleft = outleft;
231 result += res;
232 break;
237 return result;
240 static size_t wchar_from_loop_reset (iconv_t icd,
241 char* * outbuf, size_t *outbytesleft)
243 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
244 if (outbuf == NULL || *outbuf == NULL) {
245 /* Reset the states. */
246 memset(&wcd->state,'\0',sizeof(mbstate_t));
247 return unicode_loop_reset(&wcd->parent,NULL,NULL);
248 } else {
249 if (!mbsinit(&wcd->state)) {
250 mbstate_t state = wcd->state;
251 char buf[BUF_SIZE];
252 size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
253 if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
254 abort();
255 else {
256 const char* bufptr = buf;
257 size_t bufleft = bufcount-1;
258 char* outptr = *outbuf;
259 size_t outleft = *outbytesleft;
260 size_t res = unicode_loop_convert(&wcd->parent,
261 &bufptr,&bufleft,
262 &outptr,&outleft);
263 if (res == (size_t)(-1)) {
264 if (errno == E2BIG)
265 return -1;
266 else
267 abort();
268 } else {
269 res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
270 if (res == (size_t)(-1))
271 return res;
272 else {
273 /* Successful. */
274 wcd->state = state;
275 *outbuf = outptr;
276 *outbytesleft = outleft;
277 return 0;
281 } else
282 return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
286 #endif
289 #if HAVE_MBRTOWC
291 /* From anything else to wchar_t. */
293 #ifndef LIBICONV_PLUG
295 struct mb_to_wc_fallback_locals {
296 char* l_outbuf;
297 size_t l_outbytesleft;
298 int l_errno;
301 static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen,
302 void* callback_arg)
304 struct mb_to_wc_fallback_locals * plocals =
305 (struct mb_to_wc_fallback_locals *) callback_arg;
306 /* Do nothing if already encountered an error in a previous call. */
307 if (plocals->l_errno == 0) {
308 /* Attempt to copy the passed buffer to the output buffer. */
309 if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen)
310 plocals->l_errno = E2BIG;
311 else {
312 for (; buflen > 0; buf++, buflen--) {
313 *(wchar_t*) plocals->l_outbuf = *buf;
314 plocals->l_outbuf += sizeof(wchar_t);
315 plocals->l_outbytesleft -= sizeof(wchar_t);
321 #endif /* !LIBICONV_PLUG */
323 static size_t wchar_to_loop_convert (iconv_t icd,
324 const char* * inbuf, size_t *inbytesleft,
325 char* * outbuf, size_t *outbytesleft)
327 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
328 size_t result = 0;
329 while (*inbytesleft > 0) {
330 size_t incount;
331 for (incount = 1; ; ) {
332 /* Here incount <= *inbytesleft. */
333 char buf[BUF_SIZE];
334 const char* inptr = *inbuf;
335 size_t inleft = incount;
336 char* bufptr = buf;
337 size_t bufleft = BUF_SIZE;
338 size_t res = unicode_loop_convert(&wcd->parent,
339 &inptr,&inleft,
340 &bufptr,&bufleft);
341 if (res == (size_t)(-1)) {
342 if (errno == EILSEQ)
343 /* Invalid input. */
344 return -1;
345 else if (errno == EINVAL) {
346 /* Incomplete input. Next try with one more input byte. */
347 } else
348 /* E2BIG shouldn't occur. */
349 abort();
350 } else {
351 /* Successful conversion. */
352 size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
353 mbstate_t state = wcd->state;
354 wchar_t wc;
355 res = mbrtowc(&wc,buf,bufcount,&state);
356 if (res == (size_t)(-2)) {
357 /* Next try with one more input byte. */
358 } else {
359 if (res == (size_t)(-1)) {
360 /* Invalid input. */
361 if (wcd->parent.discard_ilseq) {
363 #ifndef LIBICONV_PLUG
364 else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) {
365 /* Drop the contents of buf[] accumulated so far, and instead
366 pass all queued chars to the fallback handler. */
367 struct mb_to_wc_fallback_locals locals;
368 locals.l_outbuf = *outbuf;
369 locals.l_outbytesleft = *outbytesleft;
370 locals.l_errno = 0;
371 wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount,
372 mb_to_wc_write_replacement,
373 &locals,
374 wcd->parent.fallbacks.data);
375 if (locals.l_errno != 0) {
376 errno = locals.l_errno;
377 return -1;
379 /* Restoring the state is not needed because it is the initial
380 state anyway: For all known locale encodings, the multibyte
381 to wchar_t conversion doesn't have shift state, and we have
382 excluded partial accumulated characters. */
383 /* wcd->state = state; */
384 *inbuf += incount;
385 *inbytesleft -= incount;
386 *outbuf = locals.l_outbuf;
387 *outbytesleft = locals.l_outbytesleft;
388 result += 1;
389 break;
391 #endif
392 else
393 return -1;
394 } else {
395 if (*outbytesleft < sizeof(wchar_t)) {
396 errno = E2BIG;
397 return -1;
399 *(wchar_t*) *outbuf = wc;
400 /* Restoring the state is not needed because it is the initial
401 state anyway: For all known locale encodings, the multibyte
402 to wchar_t conversion doesn't have shift state, and we have
403 excluded partial accumulated characters. */
404 /* wcd->state = state; */
405 *outbuf += sizeof(wchar_t);
406 *outbytesleft -= sizeof(wchar_t);
408 *inbuf += incount;
409 *inbytesleft -= incount;
410 result += res;
411 break;
414 incount++;
415 if (incount > *inbytesleft) {
416 /* Incomplete input. */
417 errno = EINVAL;
418 return -1;
422 return result;
425 static size_t wchar_to_loop_reset (iconv_t icd,
426 char* * outbuf, size_t *outbytesleft)
428 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
429 size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
430 if (res == (size_t)(-1))
431 return res;
432 memset(&wcd->state,0,sizeof(mbstate_t));
433 return 0;
436 #endif
439 /* From wchar_t to wchar_t. */
441 static size_t wchar_id_loop_convert (iconv_t icd,
442 const char* * inbuf, size_t *inbytesleft,
443 char* * outbuf, size_t *outbytesleft)
445 struct conv_struct * cd = (struct conv_struct *) icd;
446 const wchar_t* inptr = (const wchar_t*) *inbuf;
447 size_t inleft = *inbytesleft / sizeof(wchar_t);
448 wchar_t* outptr = (wchar_t*) *outbuf;
449 size_t outleft = *outbytesleft / sizeof(wchar_t);
450 size_t count = (inleft <= outleft ? inleft : outleft);
451 if (count > 0) {
452 *inbytesleft -= count * sizeof(wchar_t);
453 *outbytesleft -= count * sizeof(wchar_t);
454 do {
455 wchar_t wc = *inptr++;
456 *outptr++ = wc;
457 #ifndef LIBICONV_PLUG
458 if (cd->hooks.wc_hook)
459 (*cd->hooks.wc_hook)(wc, cd->hooks.data);
460 #endif
461 } while (--count > 0);
462 *inbuf = (const char*) inptr;
463 *outbuf = (char*) outptr;
465 return 0;
468 static size_t wchar_id_loop_reset (iconv_t icd,
469 char* * outbuf, size_t *outbytesleft)
471 return 0;