Decouple the mbtowc and wctomb calling conventions.
[libiconv.git] / lib / loop_unicode.h
blobcbe80f4bb89466afb93e3b5dab03e3f2dca67c02
1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 static size_t unicode_loop_convert (iconv_t icd,
24 const char* * inbuf, size_t *inbytesleft,
25 char* * outbuf, size_t *outbytesleft)
27 conv_t cd = (conv_t) icd;
28 size_t result = 0;
29 const unsigned char* inptr = (const unsigned char*) *inbuf;
30 size_t inleft = *inbytesleft;
31 unsigned char* outptr = (unsigned char*) *outbuf;
32 size_t outleft = *outbytesleft;
33 while (inleft > 0) {
34 ucs4_t wc;
35 int incount;
36 int outcount;
37 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
38 if (incount < 0) {
39 if (incount == RET_ILSEQ) {
40 /* Case 1: invalid input */
41 errno = EILSEQ;
42 result = -1;
43 break;
45 if (incount == RET_TOOFEW(0)) {
46 /* Case 2: not enough bytes available to detect anything */
47 errno = EINVAL;
48 result = -1;
49 break;
51 /* Case 3: k bytes read, but only a shift sequence */
52 incount = -2-incount;
53 } else {
54 /* Case 4: k bytes read, making up a wide character */
55 if (outleft == 0) {
56 errno = E2BIG;
57 result = -1;
58 break;
60 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
61 if (outcount != RET_ILUNI)
62 goto outcount_ok;
63 /* Try transliteration. */
64 result++;
65 if (cd->transliterate) {
66 if (cd->oflags & HAVE_HANGUL_JAMO) {
67 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
68 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
69 (contained in Unicode only). */
70 ucs4_t buf[3];
71 int ret = johab_hangul_decompose(cd,buf,wc);
72 if (ret != RET_ILUNI) {
73 /* we know 1 <= ret <= 3 */
74 state_t backup_state = cd->ostate;
75 unsigned char* backup_outptr = outptr;
76 size_t backup_outleft = outleft;
77 int i, sub_outcount;
78 for (i = 0; i < ret; i++) {
79 if (outleft == 0) {
80 sub_outcount = RET_TOOSMALL;
81 goto johab_hangul_failed;
83 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
84 if (sub_outcount <= 0)
85 goto johab_hangul_failed;
86 if (!(sub_outcount <= outleft)) abort();
87 outptr += sub_outcount; outleft -= sub_outcount;
89 goto char_done;
90 johab_hangul_failed:
91 cd->ostate = backup_state;
92 outptr = backup_outptr;
93 outleft = backup_outleft;
94 if (sub_outcount < 0) {
95 errno = E2BIG;
96 result = -1;
97 break;
102 /* Try to use a variant, but postfix it with
103 U+303E IDEOGRAPHIC VARIATION INDICATOR
104 (cf. Ken Lunde's "CJKV information processing", p. 188). */
105 int indx = -1;
106 if (wc == 0x3006)
107 indx = 0;
108 else if (wc == 0x30f6)
109 indx = 1;
110 else if (wc >= 0x4e00 && wc < 0xa000)
111 indx = cjk_variants_indx[wc-0x4e00];
112 if (indx >= 0) {
113 for (;; indx++) {
114 ucs4_t buf[2];
115 unsigned short variant = cjk_variants[indx];
116 unsigned short last = variant & 0x8000;
117 variant &= 0x7fff;
118 variant += 0x3000;
119 buf[0] = variant; buf[1] = 0x303e;
121 state_t backup_state = cd->ostate;
122 unsigned char* backup_outptr = outptr;
123 size_t backup_outleft = outleft;
124 int i, sub_outcount;
125 for (i = 0; i < 2; i++) {
126 if (outleft == 0) {
127 sub_outcount = RET_TOOSMALL;
128 goto variant_failed;
130 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
131 if (sub_outcount <= 0)
132 goto variant_failed;
133 if (!(sub_outcount <= outleft)) abort();
134 outptr += sub_outcount; outleft -= sub_outcount;
136 goto char_done;
137 variant_failed:
138 cd->ostate = backup_state;
139 outptr = backup_outptr;
140 outleft = backup_outleft;
141 if (sub_outcount < 0) {
142 errno = E2BIG;
143 result = -1;
144 break;
147 if (last)
148 break;
152 if (wc >= 0x2018 && wc <= 0x201a) {
153 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
154 ucs4_t substitute =
155 (cd->oflags & HAVE_QUOTATION_MARKS
156 ? (wc == 0x201a ? 0x2018 : wc)
157 : (cd->oflags & HAVE_ACCENTS
158 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
159 : 0x0027 /* use apostrophe */
160 ) );
161 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
162 if (outcount != 0)
163 goto outcount_ok;
166 /* Use the transliteration table. */
167 int indx = translit_index(wc);
168 if (indx >= 0) {
169 const unsigned short * cp = &translit_data[indx];
170 unsigned int num = *cp++;
171 state_t backup_state = cd->ostate;
172 unsigned char* backup_outptr = outptr;
173 size_t backup_outleft = outleft;
174 unsigned int i;
175 int sub_outcount;
176 for (i = 0; i < num; i++) {
177 if (outleft == 0) {
178 sub_outcount = RET_TOOSMALL;
179 goto translit_failed;
181 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
182 if (sub_outcount <= 0)
183 goto translit_failed;
184 if (!(sub_outcount <= outleft)) abort();
185 outptr += sub_outcount; outleft -= sub_outcount;
187 goto char_done;
188 translit_failed:
189 cd->ostate = backup_state;
190 outptr = backup_outptr;
191 outleft = backup_outleft;
192 if (sub_outcount < 0) {
193 errno = E2BIG;
194 result = -1;
195 break;
200 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
201 if (outcount != 0)
202 goto outcount_ok;
203 errno = EILSEQ;
204 result = -1;
205 break;
206 outcount_ok:
207 if (outcount < 0) {
208 errno = E2BIG;
209 result = -1;
210 break;
212 if (!(outcount <= outleft)) abort();
213 outptr += outcount; outleft -= outcount;
214 char_done:
217 if (!(incount <= inleft)) abort();
218 inptr += incount; inleft -= incount;
220 *inbuf = (const char*) inptr;
221 *inbytesleft = inleft;
222 *outbuf = (char*) outptr;
223 *outbytesleft = outleft;
224 return result;
227 static size_t unicode_loop_reset (iconv_t icd,
228 char* * outbuf, size_t *outbytesleft)
230 conv_t cd = (conv_t) icd;
231 if (outbuf == NULL || *outbuf == NULL) {
232 /* Reset the states. */
233 memset(&cd->istate,'\0',sizeof(state_t));
234 memset(&cd->ostate,'\0',sizeof(state_t));
235 return 0;
236 } else {
237 if (cd->ofuncs.xxx_reset) {
238 int outcount =
239 cd->ofuncs.xxx_reset(cd, (unsigned char *) *outbuf, *outbytesleft);
240 if (outcount < 0) {
241 errno = E2BIG;
242 return -1;
244 *outbuf += outcount; *outbytesleft -= outcount;
246 memset(&cd->istate,'\0',sizeof(state_t));
247 memset(&cd->ostate,'\0',sizeof(state_t));
248 return 0;