moved common stuff to CVSROOT/cvsignore
[libiconv.git] / lib / loop_unicode.h
blob74166abb42586feb270a741f39c8264dfc962350
1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24 static int unicode_transliterate (conv_t cd, ucs4_t wc,
25 unsigned char* outptr, size_t outleft)
27 if (cd->oflags & HAVE_HANGUL_JAMO) {
28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30 (contained in Unicode only). */
31 ucs4_t buf[3];
32 int ret = johab_hangul_decompose(cd,buf,wc);
33 if (ret != RET_ILUNI) {
34 /* we know 1 <= ret <= 3 */
35 state_t backup_state = cd->ostate;
36 unsigned char* backup_outptr = outptr;
37 size_t backup_outleft = outleft;
38 int i, sub_outcount;
39 for (i = 0; i < ret; i++) {
40 if (outleft == 0) {
41 sub_outcount = RET_TOOSMALL;
42 goto johab_hangul_failed;
44 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
45 if (sub_outcount <= RET_ILUNI)
46 goto johab_hangul_failed;
47 if (!(sub_outcount <= outleft)) abort();
48 outptr += sub_outcount; outleft -= sub_outcount;
50 return outptr-backup_outptr;
51 johab_hangul_failed:
52 cd->ostate = backup_state;
53 outptr = backup_outptr;
54 outleft = backup_outleft;
55 if (sub_outcount < 0)
56 return RET_TOOSMALL;
60 /* Try to use a variant, but postfix it with
61 U+303E IDEOGRAPHIC VARIATION INDICATOR
62 (cf. Ken Lunde's "CJKV information processing", p. 188). */
63 int indx = -1;
64 if (wc == 0x3006)
65 indx = 0;
66 else if (wc == 0x30f6)
67 indx = 1;
68 else if (wc >= 0x4e00 && wc < 0xa000)
69 indx = cjk_variants_indx[wc-0x4e00];
70 if (indx >= 0) {
71 for (;; indx++) {
72 ucs4_t buf[2];
73 unsigned short variant = cjk_variants[indx];
74 unsigned short last = variant & 0x8000;
75 variant &= 0x7fff;
76 variant += 0x3000;
77 buf[0] = variant; buf[1] = 0x303e;
79 state_t backup_state = cd->ostate;
80 unsigned char* backup_outptr = outptr;
81 size_t backup_outleft = outleft;
82 int i, sub_outcount;
83 for (i = 0; i < 2; i++) {
84 if (outleft == 0) {
85 sub_outcount = RET_TOOSMALL;
86 goto variant_failed;
88 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,buf[i],outleft);
89 if (sub_outcount <= RET_ILUNI)
90 goto variant_failed;
91 if (!(sub_outcount <= outleft)) abort();
92 outptr += sub_outcount; outleft -= sub_outcount;
94 return outptr-backup_outptr;
95 variant_failed:
96 cd->ostate = backup_state;
97 outptr = backup_outptr;
98 outleft = backup_outleft;
99 if (sub_outcount < 0)
100 return RET_TOOSMALL;
102 if (last)
103 break;
107 if (wc >= 0x2018 && wc <= 0x201a) {
108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109 ucs4_t substitute =
110 (cd->oflags & HAVE_QUOTATION_MARKS
111 ? (wc == 0x201a ? 0x2018 : wc)
112 : (cd->oflags & HAVE_ACCENTS
113 ? (wc==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114 : 0x0027 /* use apostrophe */
115 ) );
116 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,substitute,outleft);
117 if (outcount != RET_ILUNI)
118 return outcount;
121 /* Use the transliteration table. */
122 int indx = translit_index(wc);
123 if (indx >= 0) {
124 const unsigned short * cp = &translit_data[indx];
125 unsigned int num = *cp++;
126 state_t backup_state = cd->ostate;
127 unsigned char* backup_outptr = outptr;
128 size_t backup_outleft = outleft;
129 unsigned int i;
130 int sub_outcount;
131 for (i = 0; i < num; i++) {
132 if (outleft == 0) {
133 sub_outcount = RET_TOOSMALL;
134 goto translit_failed;
136 sub_outcount = cd->ofuncs.xxx_wctomb(cd,outptr,cp[i],outleft);
137 if (sub_outcount <= RET_ILUNI)
138 goto translit_failed;
139 if (!(sub_outcount <= outleft)) abort();
140 outptr += sub_outcount; outleft -= sub_outcount;
142 return outptr-backup_outptr;
143 translit_failed:
144 cd->ostate = backup_state;
145 outptr = backup_outptr;
146 outleft = backup_outleft;
147 if (sub_outcount < 0)
148 return RET_TOOSMALL;
151 return RET_ILUNI;
154 static size_t unicode_loop_convert (iconv_t icd,
155 const char* * inbuf, size_t *inbytesleft,
156 char* * outbuf, size_t *outbytesleft)
158 conv_t cd = (conv_t) icd;
159 size_t result = 0;
160 const unsigned char* inptr = (const unsigned char*) *inbuf;
161 size_t inleft = *inbytesleft;
162 unsigned char* outptr = (unsigned char*) *outbuf;
163 size_t outleft = *outbytesleft;
164 while (inleft > 0) {
165 ucs4_t wc;
166 int incount;
167 int outcount;
168 incount = cd->ifuncs.xxx_mbtowc(cd,&wc,inptr,inleft);
169 if (incount < 0) {
170 if (incount == RET_ILSEQ) {
171 /* Case 1: invalid input */
172 errno = EILSEQ;
173 result = -1;
174 break;
176 if (incount == RET_TOOFEW(0)) {
177 /* Case 2: not enough bytes available to detect anything */
178 errno = EINVAL;
179 result = -1;
180 break;
182 /* Case 3: k bytes read, but only a shift sequence */
183 incount = -2-incount;
184 } else {
185 /* Case 4: k bytes read, making up a wide character */
186 if (outleft == 0) {
187 errno = E2BIG;
188 result = -1;
189 break;
191 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
192 if (outcount != RET_ILUNI)
193 goto outcount_ok;
194 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
195 if ((wc >> 7) == (0xe0000 >> 7))
196 goto outcount_zero;
197 /* Try transliteration. */
198 result++;
199 if (cd->transliterate) {
200 outcount = unicode_transliterate(cd,wc,outptr,outleft);
201 if (outcount != RET_ILUNI)
202 goto outcount_ok;
204 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
205 if (outcount != RET_ILUNI)
206 goto outcount_ok;
207 errno = EILSEQ;
208 result = -1;
209 break;
210 outcount_ok:
211 if (outcount < 0) {
212 errno = E2BIG;
213 result = -1;
214 break;
216 if (!(outcount <= outleft)) abort();
217 outptr += outcount; outleft -= outcount;
218 outcount_zero: ;
220 if (!(incount <= inleft)) abort();
221 inptr += incount; inleft -= incount;
223 *inbuf = (const char*) inptr;
224 *inbytesleft = inleft;
225 *outbuf = (char*) outptr;
226 *outbytesleft = outleft;
227 return result;
230 static size_t unicode_loop_reset (iconv_t icd,
231 char* * outbuf, size_t *outbytesleft)
233 conv_t cd = (conv_t) icd;
234 if (outbuf == NULL || *outbuf == NULL) {
235 /* Reset the states. */
236 memset(&cd->istate,'\0',sizeof(state_t));
237 memset(&cd->ostate,'\0',sizeof(state_t));
238 return 0;
239 } else {
240 size_t result = 0;
241 if (cd->ifuncs.xxx_flushwc) {
242 ucs4_t wc;
243 if (cd->ifuncs.xxx_flushwc(cd, &wc)) {
244 unsigned char* outptr = (unsigned char*) *outbuf;
245 size_t outleft = *outbytesleft;
246 int outcount = cd->ofuncs.xxx_wctomb(cd,outptr,wc,outleft);
247 if (outcount != RET_ILUNI)
248 goto outcount_ok;
249 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
250 if ((wc >> 7) == (0xe0000 >> 7))
251 goto outcount_zero;
252 /* Try transliteration. */
253 result++;
254 if (cd->transliterate) {
255 outcount = unicode_transliterate(cd,wc,outptr,outleft);
256 if (outcount != RET_ILUNI)
257 goto outcount_ok;
259 outcount = cd->ofuncs.xxx_wctomb(cd,outptr,0xFFFD,outleft);
260 if (outcount != RET_ILUNI)
261 goto outcount_ok;
262 errno = EILSEQ;
263 return -1;
264 outcount_ok:
265 if (outcount < 0) {
266 errno = E2BIG;
267 return -1;
269 if (!(outcount <= outleft)) abort();
270 outptr += outcount;
271 outleft -= outcount;
272 outcount_zero:
273 *outbuf = (char*) outptr;
274 *outbytesleft = outleft;
277 if (cd->ofuncs.xxx_reset) {
278 unsigned char* outptr = (unsigned char*) *outbuf;
279 size_t outleft = *outbytesleft;
280 int outcount = cd->ofuncs.xxx_reset(cd,outptr,outleft);
281 if (outcount < 0) {
282 errno = E2BIG;
283 return -1;
285 if (!(outcount <= outleft)) abort();
286 *outbuf = (char*) (outptr + outcount);
287 *outbytesleft = outleft - outcount;
289 memset(&cd->istate,'\0',sizeof(state_t));
290 memset(&cd->ostate,'\0',sizeof(state_t));
291 return result;