CP1258 handles combining characters.
[libiconv.git] / lib / loop_wchar.h
blobfeacabe4117c59bd718951e2b041750c36a0f5bf
1 /*
2 * Copyright (C) 2000-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines three conversion loops:
22 - from wchar_t to anything else,
23 - from anything else to wchar_t,
24 - from wchar_t to wchar_t.
27 #if HAVE_WCRTOMB || HAVE_MBRTOWC
28 #include <wchar.h>
29 #define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */
30 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
31 extern size_t mbrtowc ();
32 #ifdef mbstate_t
33 #define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0)
34 #define mbsinit(ps) 1
35 #endif
36 #else
37 #ifndef mbstate_t
38 typedef int mbstate_t;
39 #endif
40 #endif
43 * The first two conversion loops have an extended conversion descriptor.
45 struct wchar_conv_struct {
46 struct conv_struct parent;
47 mbstate_t state;
51 #if HAVE_WCRTOMB
53 /* From wchar_t to anything else. */
55 static size_t wchar_from_loop_convert (iconv_t icd,
56 const char* * inbuf, size_t *inbytesleft,
57 char* * outbuf, size_t *outbytesleft)
59 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
60 size_t result = 0;
61 while (*inbytesleft >= sizeof(wchar_t)) {
62 const wchar_t * inptr = (const wchar_t *) *inbuf;
63 size_t inleft = *inbytesleft;
64 char buf[BUF_SIZE];
65 mbstate_t state = wcd->state;
66 size_t bufcount = 0;
67 while (inleft >= sizeof(wchar_t)) {
68 /* Convert one wchar_t to multibyte representation. */
69 size_t count = wcrtomb(buf+bufcount,*inptr,&state);
70 if (count == (size_t)(-1)) {
71 /* Invalid input. */
72 errno = EILSEQ;
73 return -1;
75 inptr++;
76 inleft -= sizeof(wchar_t);
77 bufcount += count;
78 if (count == 0) {
79 /* Continue, append next wchar_t. */
80 } else {
81 /* Attempt to convert the accumulated multibyte representations
82 to the target encoding. */
83 const char* bufptr = buf;
84 size_t bufleft = bufcount;
85 char* outptr = *outbuf;
86 size_t outleft = *outbytesleft;
87 size_t res = unicode_loop_convert(&wcd->parent,
88 &bufptr,&bufleft,
89 &outptr,&outleft);
90 if (res == (size_t)(-1)) {
91 if (errno == EILSEQ)
92 /* Invalid input. */
93 return -1;
94 else if (errno == E2BIG)
95 /* Output buffer too small. */
96 return -1;
97 else if (errno == EINVAL) {
98 /* Continue, append next wchar_t, but avoid buffer overrun. */
99 if (bufcount + MB_CUR_MAX > BUF_SIZE)
100 abort();
101 } else
102 abort();
103 } else {
104 /* Successful conversion. */
105 wcd->state = state;
106 *inbuf = (const char *) inptr;
107 *inbytesleft = inleft;
108 *outbuf = outptr;
109 *outbytesleft = outleft;
110 result += res;
111 break;
116 return result;
119 static size_t wchar_from_loop_reset (iconv_t icd,
120 char* * outbuf, size_t *outbytesleft)
122 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
123 if (outbuf == NULL || *outbuf == NULL) {
124 /* Reset the states. */
125 memset(&wcd->state,'\0',sizeof(mbstate_t));
126 return unicode_loop_reset(&wcd->parent,NULL,NULL);
127 } else {
128 if (!mbsinit(&wcd->state)) {
129 mbstate_t state = wcd->state;
130 char buf[BUF_SIZE];
131 size_t bufcount = wcrtomb(buf,(wchar_t)0,&state);
132 if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0')
133 abort();
134 else {
135 const char* bufptr = buf;
136 size_t bufleft = bufcount-1;
137 char* outptr = *outbuf;
138 size_t outleft = *outbytesleft;
139 size_t res = unicode_loop_convert(&wcd->parent,
140 &bufptr,&bufleft,
141 &outptr,&outleft);
142 if (res == (size_t)(-1)) {
143 if (errno == E2BIG)
144 return -1;
145 else
146 abort();
147 } else {
148 res = unicode_loop_reset(&wcd->parent,&outptr,&outleft);
149 if (res == (size_t)(-1))
150 return res;
151 else {
152 /* Successful. */
153 wcd->state = state;
154 *outbuf = outptr;
155 *outbytesleft = outleft;
156 return 0;
160 } else
161 return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
165 #endif
168 #if HAVE_MBRTOWC
170 /* From anything else to wchar_t. */
172 static size_t wchar_to_loop_convert (iconv_t icd,
173 const char* * inbuf, size_t *inbytesleft,
174 char* * outbuf, size_t *outbytesleft)
176 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
177 size_t result = 0;
178 while (*inbytesleft > 0) {
179 size_t try;
180 for (try = 1; try <= *inbytesleft; try++) {
181 char buf[BUF_SIZE];
182 const char* inptr = *inbuf;
183 size_t inleft = try;
184 char* bufptr = buf;
185 size_t bufleft = BUF_SIZE;
186 size_t res = unicode_loop_convert(&wcd->parent,
187 &inptr,&inleft,
188 &bufptr,&bufleft);
189 if (res == (size_t)(-1)) {
190 if (errno == EILSEQ)
191 /* Invalid input. */
192 return -1;
193 else if (errno == EINVAL) {
194 /* Incomplete input. Next try with one more input byte. */
195 } else
196 /* E2BIG shouldn't occur. */
197 abort();
198 } else {
199 /* Successful conversion. */
200 size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */
201 mbstate_t state = wcd->state;
202 wchar_t wc;
203 res = mbrtowc(&wc,buf,bufcount,&state);
204 if (res == (size_t)(-2)) {
205 /* Next try with one more input byte. */
206 } else if (res == (size_t)(-1)) {
207 /* Invalid input. */
208 return -1;
209 } else {
210 if (*outbytesleft < sizeof(wchar_t)) {
211 errno = E2BIG;
212 return -1;
214 *(wchar_t*) *outbuf = wc;
215 *outbuf += sizeof(wchar_t);
216 *outbytesleft -= sizeof(wchar_t);
217 *inbuf += try;
218 *inbytesleft -= try;
219 result += res;
220 break;
225 return result;
228 static size_t wchar_to_loop_reset (iconv_t icd,
229 char* * outbuf, size_t *outbytesleft)
231 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd;
232 size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft);
233 if (res == (size_t)(-1))
234 return res;
235 memset(&wcd->state,0,sizeof(mbstate_t));
236 return 0;
239 #endif
242 /* From wchar_t to wchar_t. */
244 static size_t wchar_id_loop_convert (iconv_t icd,
245 const char* * inbuf, size_t *inbytesleft,
246 char* * outbuf, size_t *outbytesleft)
248 const wchar_t* inptr = (const wchar_t*) *inbuf;
249 size_t inleft = *inbytesleft / sizeof(wchar_t);
250 wchar_t* outptr = (wchar_t*) *outbuf;
251 size_t outleft = *outbytesleft / sizeof(wchar_t);
252 size_t count = (inleft <= outleft ? inleft : outleft);
253 if (count > 0) {
254 *inbytesleft -= count * sizeof(wchar_t);
255 *outbytesleft -= count * sizeof(wchar_t);
257 *outptr++ = *inptr++;
258 while (--count > 0);
259 *inbuf = (const char*) inptr;
260 *outbuf = (char*) outptr;
262 return 0;
265 static size_t wchar_id_loop_reset (iconv_t icd,
266 char* * outbuf, size_t *outbytesleft)
268 return 0;