2 * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24 static int unicode_transliterate (conv_t cd
, ucs4_t wc
,
25 unsigned char* outptr
, size_t outleft
)
27 if (cd
->oflags
& HAVE_HANGUL_JAMO
) {
28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30 (contained in Unicode only). */
32 int ret
= johab_hangul_decompose(cd
,buf
,wc
);
33 if (ret
!= RET_ILUNI
) {
34 /* we know 1 <= ret <= 3 */
35 state_t backup_state
= cd
->ostate
;
36 unsigned char* backup_outptr
= outptr
;
37 size_t backup_outleft
= outleft
;
39 for (i
= 0; i
< ret
; i
++) {
41 sub_outcount
= RET_TOOSMALL
;
42 goto johab_hangul_failed
;
44 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
45 if (sub_outcount
<= RET_ILUNI
)
46 goto johab_hangul_failed
;
47 if (!(sub_outcount
<= outleft
)) abort();
48 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
50 return outptr
-backup_outptr
;
52 cd
->ostate
= backup_state
;
53 outptr
= backup_outptr
;
54 outleft
= backup_outleft
;
55 if (sub_outcount
!= RET_ILUNI
)
60 /* Try to use a variant, but postfix it with
61 U+303E IDEOGRAPHIC VARIATION INDICATOR
62 (cf. Ken Lunde's "CJKV information processing", p. 188). */
66 else if (wc
== 0x30f6)
68 else if (wc
>= 0x4e00 && wc
< 0xa000)
69 indx
= cjk_variants_indx
[wc
-0x4e00];
73 unsigned short variant
= cjk_variants
[indx
];
74 unsigned short last
= variant
& 0x8000;
77 buf
[0] = variant
; buf
[1] = 0x303e;
79 state_t backup_state
= cd
->ostate
;
80 unsigned char* backup_outptr
= outptr
;
81 size_t backup_outleft
= outleft
;
83 for (i
= 0; i
< 2; i
++) {
85 sub_outcount
= RET_TOOSMALL
;
88 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
89 if (sub_outcount
<= RET_ILUNI
)
91 if (!(sub_outcount
<= outleft
)) abort();
92 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
94 return outptr
-backup_outptr
;
96 cd
->ostate
= backup_state
;
97 outptr
= backup_outptr
;
98 outleft
= backup_outleft
;
99 if (sub_outcount
!= RET_ILUNI
)
107 if (wc
>= 0x2018 && wc
<= 0x201a) {
108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
110 (cd
->oflags
& HAVE_QUOTATION_MARKS
111 ? (wc
== 0x201a ? 0x2018 : wc
)
112 : (cd
->oflags
& HAVE_ACCENTS
113 ? (wc
==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114 : 0x0027 /* use apostrophe */
116 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,substitute
,outleft
);
117 if (outcount
!= RET_ILUNI
)
121 /* Use the transliteration table. */
122 int indx
= translit_index(wc
);
124 const unsigned int * cp
= &translit_data
[indx
];
125 unsigned int num
= *cp
++;
126 state_t backup_state
= cd
->ostate
;
127 unsigned char* backup_outptr
= outptr
;
128 size_t backup_outleft
= outleft
;
131 for (i
= 0; i
< num
; i
++) {
133 sub_outcount
= RET_TOOSMALL
;
134 goto translit_failed
;
136 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,cp
[i
],outleft
);
137 if (sub_outcount
== RET_ILUNI
)
138 /* Recursive transliteration. */
139 sub_outcount
= unicode_transliterate(cd
,cp
[i
],outptr
,outleft
);
140 if (sub_outcount
<= RET_ILUNI
)
141 goto translit_failed
;
142 if (!(sub_outcount
<= outleft
)) abort();
143 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
145 return outptr
-backup_outptr
;
147 cd
->ostate
= backup_state
;
148 outptr
= backup_outptr
;
149 outleft
= backup_outleft
;
150 if (sub_outcount
!= RET_ILUNI
)
157 #ifndef LIBICONV_PLUG
159 struct uc_to_mb_fallback_locals
{
160 unsigned char* l_outbuf
;
161 size_t l_outbytesleft
;
165 static void uc_to_mb_write_replacement (const char *buf
, size_t buflen
,
168 struct uc_to_mb_fallback_locals
* plocals
=
169 (struct uc_to_mb_fallback_locals
*) callback_arg
;
170 /* Do nothing if already encountered an error in a previous call. */
171 if (plocals
->l_errno
== 0) {
172 /* Attempt to copy the passed buffer to the output buffer. */
173 if (plocals
->l_outbytesleft
< buflen
)
174 plocals
->l_errno
= E2BIG
;
176 memcpy(plocals
->l_outbuf
, buf
, buflen
);
177 plocals
->l_outbuf
+= buflen
;
178 plocals
->l_outbytesleft
-= buflen
;
183 struct mb_to_uc_fallback_locals
{
185 unsigned char* l_outbuf
;
186 size_t l_outbytesleft
;
190 static void mb_to_uc_write_replacement (const unsigned int *buf
, size_t buflen
,
193 struct mb_to_uc_fallback_locals
* plocals
=
194 (struct mb_to_uc_fallback_locals
*) callback_arg
;
195 /* Do nothing if already encountered an error in a previous call. */
196 if (plocals
->l_errno
== 0) {
197 /* Attempt to convert the passed buffer to the target encoding. */
198 conv_t cd
= plocals
->l_cd
;
199 unsigned char* outptr
= plocals
->l_outbuf
;
200 size_t outleft
= plocals
->l_outbytesleft
;
201 for (; buflen
> 0; buf
++, buflen
--) {
205 plocals
->l_errno
= E2BIG
;
208 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
209 if (outcount
!= RET_ILUNI
)
211 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212 if ((wc
>> 7) == (0xe0000 >> 7))
214 /* Try transliteration. */
215 if (cd
->transliterate
) {
216 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
217 if (outcount
!= RET_ILUNI
)
220 if (cd
->discard_ilseq
) {
224 #ifndef LIBICONV_PLUG
225 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
226 struct uc_to_mb_fallback_locals locals
;
227 locals
.l_outbuf
= outptr
;
228 locals
.l_outbytesleft
= outleft
;
230 cd
->fallbacks
.uc_to_mb_fallback(wc
,
231 uc_to_mb_write_replacement
,
234 if (locals
.l_errno
!= 0) {
235 plocals
->l_errno
= locals
.l_errno
;
238 outptr
= locals
.l_outbuf
;
239 outleft
= locals
.l_outbytesleft
;
244 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
245 if (outcount
!= RET_ILUNI
)
247 plocals
->l_errno
= EILSEQ
;
251 plocals
->l_errno
= E2BIG
;
254 #ifndef LIBICONV_PLUG
255 if (cd
->hooks
.uc_hook
)
256 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
258 if (!(outcount
<= outleft
)) abort();
259 outptr
+= outcount
; outleft
-= outcount
;
262 plocals
->l_outbuf
= outptr
;
263 plocals
->l_outbytesleft
= outleft
;
267 #endif /* !LIBICONV_PLUG */
269 static size_t unicode_loop_convert (iconv_t icd
,
270 const char* * inbuf
, size_t *inbytesleft
,
271 char* * outbuf
, size_t *outbytesleft
)
273 conv_t cd
= (conv_t
) icd
;
275 const unsigned char* inptr
= (const unsigned char*) *inbuf
;
276 size_t inleft
= *inbytesleft
;
277 unsigned char* outptr
= (unsigned char*) *outbuf
;
278 size_t outleft
= *outbytesleft
;
280 state_t last_istate
= cd
->istate
;
284 incount
= cd
->ifuncs
.xxx_mbtowc(cd
,&wc
,inptr
,inleft
);
286 if ((unsigned int)(-1-incount
) % 2 == (unsigned int)(-1-RET_ILSEQ
) % 2) {
287 /* Case 1: invalid input, possibly after a shift sequence */
288 incount
= DECODE_SHIFT_ILSEQ(incount
);
289 if (cd
->discard_ilseq
) {
290 switch (cd
->iindex
) {
291 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
292 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
293 case ei_ucs4internal
: case ei_ucs4swapped
:
295 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
296 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
297 case ei_ucs2internal
: case ei_ucs2swapped
:
304 #ifndef LIBICONV_PLUG
305 else if (cd
->fallbacks
.mb_to_uc_fallback
!= NULL
) {
306 unsigned int incount2
;
307 struct mb_to_uc_fallback_locals locals
;
308 switch (cd
->iindex
) {
309 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
310 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
311 case ei_ucs4internal
: case ei_ucs4swapped
:
313 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
314 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
315 case ei_ucs2internal
: case ei_ucs2swapped
:
321 locals
.l_outbuf
= outptr
;
322 locals
.l_outbytesleft
= outleft
;
324 cd
->fallbacks
.mb_to_uc_fallback((const char*)inptr
+incount
, incount2
,
325 mb_to_uc_write_replacement
,
328 if (locals
.l_errno
!= 0) {
329 inptr
+= incount
; inleft
-= incount
;
330 errno
= locals
.l_errno
;
335 outptr
= locals
.l_outbuf
;
336 outleft
= locals
.l_outbytesleft
;
341 inptr
+= incount
; inleft
-= incount
;
346 if (incount
== RET_TOOFEW(0)) {
347 /* Case 2: not enough bytes available to detect anything */
352 /* Case 3: k bytes read, but only a shift sequence */
353 incount
= DECODE_TOOFEW(incount
);
355 /* Case 4: k bytes read, making up a wide character */
357 cd
->istate
= last_istate
;
362 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
363 if (outcount
!= RET_ILUNI
)
365 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
366 if ((wc
>> 7) == (0xe0000 >> 7))
368 /* Try transliteration. */
370 if (cd
->transliterate
) {
371 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
372 if (outcount
!= RET_ILUNI
)
375 if (cd
->discard_ilseq
) {
379 #ifndef LIBICONV_PLUG
380 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
381 struct uc_to_mb_fallback_locals locals
;
382 locals
.l_outbuf
= outptr
;
383 locals
.l_outbytesleft
= outleft
;
385 cd
->fallbacks
.uc_to_mb_fallback(wc
,
386 uc_to_mb_write_replacement
,
389 if (locals
.l_errno
!= 0) {
390 cd
->istate
= last_istate
;
391 errno
= locals
.l_errno
;
394 outptr
= locals
.l_outbuf
;
395 outleft
= locals
.l_outbytesleft
;
400 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
401 if (outcount
!= RET_ILUNI
)
403 cd
->istate
= last_istate
;
409 cd
->istate
= last_istate
;
414 #ifndef LIBICONV_PLUG
415 if (cd
->hooks
.uc_hook
)
416 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
418 if (!(outcount
<= outleft
)) abort();
419 outptr
+= outcount
; outleft
-= outcount
;
422 if (!(incount
<= inleft
)) abort();
423 inptr
+= incount
; inleft
-= incount
;
425 *inbuf
= (const char*) inptr
;
426 *inbytesleft
= inleft
;
427 *outbuf
= (char*) outptr
;
428 *outbytesleft
= outleft
;
432 static size_t unicode_loop_reset (iconv_t icd
,
433 char* * outbuf
, size_t *outbytesleft
)
435 conv_t cd
= (conv_t
) icd
;
436 if (outbuf
== NULL
|| *outbuf
== NULL
) {
437 /* Reset the states. */
438 memset(&cd
->istate
,'\0',sizeof(state_t
));
439 memset(&cd
->ostate
,'\0',sizeof(state_t
));
443 if (cd
->ifuncs
.xxx_flushwc
) {
444 state_t last_istate
= cd
->istate
;
446 if (cd
->ifuncs
.xxx_flushwc(cd
, &wc
)) {
447 unsigned char* outptr
= (unsigned char*) *outbuf
;
448 size_t outleft
= *outbytesleft
;
449 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
450 if (outcount
!= RET_ILUNI
)
452 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
453 if ((wc
>> 7) == (0xe0000 >> 7))
455 /* Try transliteration. */
457 if (cd
->transliterate
) {
458 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
459 if (outcount
!= RET_ILUNI
)
462 if (cd
->discard_ilseq
) {
466 #ifndef LIBICONV_PLUG
467 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
468 struct uc_to_mb_fallback_locals locals
;
469 locals
.l_outbuf
= outptr
;
470 locals
.l_outbytesleft
= outleft
;
472 cd
->fallbacks
.uc_to_mb_fallback(wc
,
473 uc_to_mb_write_replacement
,
476 if (locals
.l_errno
!= 0) {
477 cd
->istate
= last_istate
;
478 errno
= locals
.l_errno
;
481 outptr
= locals
.l_outbuf
;
482 outleft
= locals
.l_outbytesleft
;
487 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
488 if (outcount
!= RET_ILUNI
)
490 cd
->istate
= last_istate
;
495 cd
->istate
= last_istate
;
499 #ifndef LIBICONV_PLUG
500 if (cd
->hooks
.uc_hook
)
501 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
503 if (!(outcount
<= outleft
)) abort();
507 *outbuf
= (char*) outptr
;
508 *outbytesleft
= outleft
;
511 if (cd
->ofuncs
.xxx_reset
) {
512 unsigned char* outptr
= (unsigned char*) *outbuf
;
513 size_t outleft
= *outbytesleft
;
514 int outcount
= cd
->ofuncs
.xxx_reset(cd
,outptr
,outleft
);
519 if (!(outcount
<= outleft
)) abort();
520 *outbuf
= (char*) (outptr
+ outcount
);
521 *outbytesleft
= outleft
- outcount
;
523 memset(&cd
->istate
,'\0',sizeof(state_t
));
524 memset(&cd
->ostate
,'\0',sizeof(state_t
));