2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 static size_t unicode_loop_convert (iconv_t icd
,
24 const char* * inbuf
, size_t *inbytesleft
,
25 char* * outbuf
, size_t *outbytesleft
)
27 conv_t cd
= (conv_t
) icd
;
29 const unsigned char* inptr
= (const unsigned char*) *inbuf
;
30 size_t inleft
= *inbytesleft
;
31 unsigned char* outptr
= (unsigned char*) *outbuf
;
32 size_t outleft
= *outbytesleft
;
37 incount
= cd
->ifuncs
.xxx_mbtowc(cd
,&wc
,inptr
,inleft
);
39 if (incount
== RET_ILSEQ
) {
40 /* Case 1: invalid input */
45 if (incount
== RET_TOOFEW(0)) {
46 /* Case 2: not enough bytes available to detect anything */
51 /* Case 3: k bytes read, but only a shift sequence */
54 /* Case 4: k bytes read, making up a wide character */
60 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
61 if (outcount
!= RET_ILUNI
)
63 /* Try transliteration. */
65 if (cd
->transliterate
) {
66 if (cd
->oflags
& HAVE_HANGUL_JAMO
) {
67 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
68 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
69 (contained in Unicode only). */
71 int ret
= johab_hangul_decompose(cd
,buf
,wc
);
72 if (ret
!= RET_ILUNI
) {
73 /* we know 1 <= ret <= 3 */
74 state_t backup_state
= cd
->ostate
;
75 unsigned char* backup_outptr
= outptr
;
76 size_t backup_outleft
= outleft
;
78 for (i
= 0; i
< ret
; i
++) {
80 sub_outcount
= RET_TOOSMALL
;
81 goto johab_hangul_failed
;
83 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
84 if (sub_outcount
<= 0)
85 goto johab_hangul_failed
;
86 if (!(sub_outcount
<= outleft
)) abort();
87 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
91 cd
->ostate
= backup_state
;
92 outptr
= backup_outptr
;
93 outleft
= backup_outleft
;
94 if (sub_outcount
< 0) {
102 /* Try to use a variant, but postfix it with
103 U+303E IDEOGRAPHIC VARIATION INDICATOR
104 (cf. Ken Lunde's "CJKV information processing", p. 188). */
108 else if (wc
== 0x30f6)
110 else if (wc
>= 0x4e00 && wc
< 0xa000)
111 indx
= cjk_variants_indx
[wc
-0x4e00];
115 unsigned short variant
= cjk_variants
[indx
];
116 unsigned short last
= variant
& 0x8000;
119 buf
[0] = variant
; buf
[1] = 0x303e;
121 state_t backup_state
= cd
->ostate
;
122 unsigned char* backup_outptr
= outptr
;
123 size_t backup_outleft
= outleft
;
125 for (i
= 0; i
< 2; i
++) {
127 sub_outcount
= RET_TOOSMALL
;
130 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
131 if (sub_outcount
<= 0)
133 if (!(sub_outcount
<= outleft
)) abort();
134 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
138 cd
->ostate
= backup_state
;
139 outptr
= backup_outptr
;
140 outleft
= backup_outleft
;
141 if (sub_outcount
< 0) {
152 if (wc
>= 0x2018 && wc
<= 0x201a) {
153 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
155 (cd
->oflags
& HAVE_QUOTATION_MARKS
156 ? (wc
== 0x201a ? 0x2018 : wc
)
157 : (cd
->oflags
& HAVE_ACCENTS
158 ? (wc
==0x2019 ? 0x00b4 : 0x0060) /* use accents */
159 : 0x0027 /* use apostrophe */
161 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,substitute
,outleft
);
166 /* Use the transliteration table. */
167 int indx
= translit_index(wc
);
169 const unsigned short * cp
= &translit_data
[indx
];
170 unsigned int num
= *cp
++;
171 state_t backup_state
= cd
->ostate
;
172 unsigned char* backup_outptr
= outptr
;
173 size_t backup_outleft
= outleft
;
176 for (i
= 0; i
< num
; i
++) {
178 sub_outcount
= RET_TOOSMALL
;
179 goto translit_failed
;
181 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,cp
[i
],outleft
);
182 if (sub_outcount
<= 0)
183 goto translit_failed
;
184 if (!(sub_outcount
<= outleft
)) abort();
185 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
189 cd
->ostate
= backup_state
;
190 outptr
= backup_outptr
;
191 outleft
= backup_outleft
;
192 if (sub_outcount
< 0) {
200 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
212 if (!(outcount
<= outleft
)) abort();
213 outptr
+= outcount
; outleft
-= outcount
;
217 if (!(incount
<= inleft
)) abort();
218 inptr
+= incount
; inleft
-= incount
;
220 *inbuf
= (const char*) inptr
;
221 *inbytesleft
= inleft
;
222 *outbuf
= (char*) outptr
;
223 *outbytesleft
= outleft
;
227 static size_t unicode_loop_reset (iconv_t icd
,
228 char* * outbuf
, size_t *outbytesleft
)
230 conv_t cd
= (conv_t
) icd
;
231 if (outbuf
== NULL
|| *outbuf
== NULL
) {
232 /* Reset the states. */
233 memset(&cd
->istate
,'\0',sizeof(state_t
));
234 memset(&cd
->ostate
,'\0',sizeof(state_t
));
237 if (cd
->ofuncs
.xxx_reset
) {
239 cd
->ofuncs
.xxx_reset(cd
, (unsigned char *) *outbuf
, *outbytesleft
);
244 *outbuf
+= outcount
; *outbytesleft
-= outcount
;
246 memset(&cd
->istate
,'\0',sizeof(state_t
));
247 memset(&cd
->ostate
,'\0',sizeof(state_t
));