2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24 static int unicode_transliterate (conv_t cd
, ucs4_t wc
,
25 unsigned char* outptr
, size_t outleft
)
27 if (cd
->oflags
& HAVE_HANGUL_JAMO
) {
28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30 (contained in Unicode only). */
32 int ret
= johab_hangul_decompose(cd
,buf
,wc
);
33 if (ret
!= RET_ILUNI
) {
34 /* we know 1 <= ret <= 3 */
35 state_t backup_state
= cd
->ostate
;
36 unsigned char* backup_outptr
= outptr
;
37 size_t backup_outleft
= outleft
;
39 for (i
= 0; i
< ret
; i
++) {
41 sub_outcount
= RET_TOOSMALL
;
42 goto johab_hangul_failed
;
44 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
45 if (sub_outcount
<= RET_ILUNI
)
46 goto johab_hangul_failed
;
47 if (!(sub_outcount
<= outleft
)) abort();
48 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
50 return outptr
-backup_outptr
;
52 cd
->ostate
= backup_state
;
53 outptr
= backup_outptr
;
54 outleft
= backup_outleft
;
60 /* Try to use a variant, but postfix it with
61 U+303E IDEOGRAPHIC VARIATION INDICATOR
62 (cf. Ken Lunde's "CJKV information processing", p. 188). */
66 else if (wc
== 0x30f6)
68 else if (wc
>= 0x4e00 && wc
< 0xa000)
69 indx
= cjk_variants_indx
[wc
-0x4e00];
73 unsigned short variant
= cjk_variants
[indx
];
74 unsigned short last
= variant
& 0x8000;
77 buf
[0] = variant
; buf
[1] = 0x303e;
79 state_t backup_state
= cd
->ostate
;
80 unsigned char* backup_outptr
= outptr
;
81 size_t backup_outleft
= outleft
;
83 for (i
= 0; i
< 2; i
++) {
85 sub_outcount
= RET_TOOSMALL
;
88 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
89 if (sub_outcount
<= RET_ILUNI
)
91 if (!(sub_outcount
<= outleft
)) abort();
92 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
94 return outptr
-backup_outptr
;
96 cd
->ostate
= backup_state
;
97 outptr
= backup_outptr
;
98 outleft
= backup_outleft
;
107 if (wc
>= 0x2018 && wc
<= 0x201a) {
108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
110 (cd
->oflags
& HAVE_QUOTATION_MARKS
111 ? (wc
== 0x201a ? 0x2018 : wc
)
112 : (cd
->oflags
& HAVE_ACCENTS
113 ? (wc
==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114 : 0x0027 /* use apostrophe */
116 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,substitute
,outleft
);
117 if (outcount
!= RET_ILUNI
)
121 /* Use the transliteration table. */
122 int indx
= translit_index(wc
);
124 const unsigned short * cp
= &translit_data
[indx
];
125 unsigned int num
= *cp
++;
126 state_t backup_state
= cd
->ostate
;
127 unsigned char* backup_outptr
= outptr
;
128 size_t backup_outleft
= outleft
;
131 for (i
= 0; i
< num
; i
++) {
133 sub_outcount
= RET_TOOSMALL
;
134 goto translit_failed
;
136 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,cp
[i
],outleft
);
137 if (sub_outcount
<= RET_ILUNI
)
138 goto translit_failed
;
139 if (!(sub_outcount
<= outleft
)) abort();
140 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
142 return outptr
-backup_outptr
;
144 cd
->ostate
= backup_state
;
145 outptr
= backup_outptr
;
146 outleft
= backup_outleft
;
147 if (sub_outcount
< 0)
154 static size_t unicode_loop_convert (iconv_t icd
,
155 const char* * inbuf
, size_t *inbytesleft
,
156 char* * outbuf
, size_t *outbytesleft
)
158 conv_t cd
= (conv_t
) icd
;
160 const unsigned char* inptr
= (const unsigned char*) *inbuf
;
161 size_t inleft
= *inbytesleft
;
162 unsigned char* outptr
= (unsigned char*) *outbuf
;
163 size_t outleft
= *outbytesleft
;
168 incount
= cd
->ifuncs
.xxx_mbtowc(cd
,&wc
,inptr
,inleft
);
170 if (incount
== RET_ILSEQ
) {
171 /* Case 1: invalid input */
176 if (incount
== RET_TOOFEW(0)) {
177 /* Case 2: not enough bytes available to detect anything */
182 /* Case 3: k bytes read, but only a shift sequence */
183 incount
= -2-incount
;
185 /* Case 4: k bytes read, making up a wide character */
191 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
192 if (outcount
!= RET_ILUNI
)
194 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
195 if ((wc
>> 7) == (0xe0000 >> 7))
197 /* Try transliteration. */
199 if (cd
->transliterate
) {
200 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
201 if (outcount
!= RET_ILUNI
)
204 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
205 if (outcount
!= RET_ILUNI
)
216 if (!(outcount
<= outleft
)) abort();
217 outptr
+= outcount
; outleft
-= outcount
;
220 if (!(incount
<= inleft
)) abort();
221 inptr
+= incount
; inleft
-= incount
;
223 *inbuf
= (const char*) inptr
;
224 *inbytesleft
= inleft
;
225 *outbuf
= (char*) outptr
;
226 *outbytesleft
= outleft
;
230 static size_t unicode_loop_reset (iconv_t icd
,
231 char* * outbuf
, size_t *outbytesleft
)
233 conv_t cd
= (conv_t
) icd
;
234 if (outbuf
== NULL
|| *outbuf
== NULL
) {
235 /* Reset the states. */
236 memset(&cd
->istate
,'\0',sizeof(state_t
));
237 memset(&cd
->ostate
,'\0',sizeof(state_t
));
241 if (cd
->ifuncs
.xxx_flushwc
) {
243 if (cd
->ifuncs
.xxx_flushwc(cd
, &wc
)) {
244 unsigned char* outptr
= (unsigned char*) *outbuf
;
245 size_t outleft
= *outbytesleft
;
246 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
247 if (outcount
!= RET_ILUNI
)
249 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
250 if ((wc
>> 7) == (0xe0000 >> 7))
252 /* Try transliteration. */
254 if (cd
->transliterate
) {
255 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
256 if (outcount
!= RET_ILUNI
)
259 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
260 if (outcount
!= RET_ILUNI
)
269 if (!(outcount
<= outleft
)) abort();
273 *outbuf
= (char*) outptr
;
274 *outbytesleft
= outleft
;
277 if (cd
->ofuncs
.xxx_reset
) {
278 unsigned char* outptr
= (unsigned char*) *outbuf
;
279 size_t outleft
= *outbytesleft
;
280 int outcount
= cd
->ofuncs
.xxx_reset(cd
,outptr
,outleft
);
285 if (!(outcount
<= outleft
)) abort();
286 *outbuf
= (char*) (outptr
+ outcount
);
287 *outbytesleft
= outleft
- outcount
;
289 memset(&cd
->istate
,'\0',sizeof(state_t
));
290 memset(&cd
->ostate
,'\0',sizeof(state_t
));