2 * Copyright (C) 1999-2003, 2005-2006 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
21 /* This file defines the conversion loop via Unicode as a pivot encoding. */
23 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
24 static int unicode_transliterate (conv_t cd
, ucs4_t wc
,
25 unsigned char* outptr
, size_t outleft
)
27 if (cd
->oflags
& HAVE_HANGUL_JAMO
) {
28 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
29 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
30 (contained in Unicode only). */
32 int ret
= johab_hangul_decompose(cd
,buf
,wc
);
33 if (ret
!= RET_ILUNI
) {
34 /* we know 1 <= ret <= 3 */
35 state_t backup_state
= cd
->ostate
;
36 unsigned char* backup_outptr
= outptr
;
37 size_t backup_outleft
= outleft
;
39 for (i
= 0; i
< ret
; i
++) {
41 sub_outcount
= RET_TOOSMALL
;
42 goto johab_hangul_failed
;
44 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
45 if (sub_outcount
<= RET_ILUNI
)
46 goto johab_hangul_failed
;
47 if (!(sub_outcount
<= outleft
)) abort();
48 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
50 return outptr
-backup_outptr
;
52 cd
->ostate
= backup_state
;
53 outptr
= backup_outptr
;
54 outleft
= backup_outleft
;
55 if (sub_outcount
!= RET_ILUNI
)
60 /* Try to use a variant, but postfix it with
61 U+303E IDEOGRAPHIC VARIATION INDICATOR
62 (cf. Ken Lunde's "CJKV information processing", p. 188). */
66 else if (wc
== 0x30f6)
68 else if (wc
>= 0x4e00 && wc
< 0xa000)
69 indx
= cjk_variants_indx
[wc
-0x4e00];
73 unsigned short variant
= cjk_variants
[indx
];
74 unsigned short last
= variant
& 0x8000;
77 buf
[0] = variant
; buf
[1] = 0x303e;
79 state_t backup_state
= cd
->ostate
;
80 unsigned char* backup_outptr
= outptr
;
81 size_t backup_outleft
= outleft
;
83 for (i
= 0; i
< 2; i
++) {
85 sub_outcount
= RET_TOOSMALL
;
88 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
89 if (sub_outcount
<= RET_ILUNI
)
91 if (!(sub_outcount
<= outleft
)) abort();
92 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
94 return outptr
-backup_outptr
;
96 cd
->ostate
= backup_state
;
97 outptr
= backup_outptr
;
98 outleft
= backup_outleft
;
99 if (sub_outcount
!= RET_ILUNI
)
107 if (wc
>= 0x2018 && wc
<= 0x201a) {
108 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
110 (cd
->oflags
& HAVE_QUOTATION_MARKS
111 ? (wc
== 0x201a ? 0x2018 : wc
)
112 : (cd
->oflags
& HAVE_ACCENTS
113 ? (wc
==0x2019 ? 0x00b4 : 0x0060) /* use accents */
114 : 0x0027 /* use apostrophe */
116 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,substitute
,outleft
);
117 if (outcount
!= RET_ILUNI
)
121 /* Use the transliteration table. */
122 int indx
= translit_index(wc
);
124 const unsigned int * cp
= &translit_data
[indx
];
125 unsigned int num
= *cp
++;
126 state_t backup_state
= cd
->ostate
;
127 unsigned char* backup_outptr
= outptr
;
128 size_t backup_outleft
= outleft
;
131 for (i
= 0; i
< num
; i
++) {
133 sub_outcount
= RET_TOOSMALL
;
134 goto translit_failed
;
136 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,cp
[i
],outleft
);
137 if (sub_outcount
== RET_ILUNI
)
138 /* Recursive transliteration. */
139 sub_outcount
= unicode_transliterate(cd
,cp
[i
],outptr
,outleft
);
140 if (sub_outcount
<= RET_ILUNI
)
141 goto translit_failed
;
142 if (!(sub_outcount
<= outleft
)) abort();
143 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
145 return outptr
-backup_outptr
;
147 cd
->ostate
= backup_state
;
148 outptr
= backup_outptr
;
149 outleft
= backup_outleft
;
150 if (sub_outcount
!= RET_ILUNI
)
157 #ifndef LIBICONV_PLUG
159 struct uc_to_mb_fallback_locals
{
160 unsigned char* l_outbuf
;
161 size_t l_outbytesleft
;
165 static void uc_to_mb_write_replacement (const char *buf
, size_t buflen
,
168 struct uc_to_mb_fallback_locals
* plocals
=
169 (struct uc_to_mb_fallback_locals
*) callback_arg
;
170 /* Do nothing if already encountered an error in a previous call. */
171 if (plocals
->l_errno
== 0) {
172 /* Attempt to copy the passed buffer to the output buffer. */
173 if (plocals
->l_outbytesleft
< buflen
)
174 plocals
->l_errno
= E2BIG
;
176 memcpy(plocals
->l_outbuf
, buf
, buflen
);
177 plocals
->l_outbuf
+= buflen
;
178 plocals
->l_outbytesleft
-= buflen
;
183 struct mb_to_uc_fallback_locals
{
185 unsigned char* l_outbuf
;
186 size_t l_outbytesleft
;
190 static void mb_to_uc_write_replacement (const unsigned int *buf
, size_t buflen
,
193 struct mb_to_uc_fallback_locals
* plocals
=
194 (struct mb_to_uc_fallback_locals
*) callback_arg
;
195 /* Do nothing if already encountered an error in a previous call. */
196 if (plocals
->l_errno
== 0) {
197 /* Attempt to convert the passed buffer to the target encoding. */
198 conv_t cd
= plocals
->l_cd
;
199 unsigned char* outptr
= plocals
->l_outbuf
;
200 size_t outleft
= plocals
->l_outbytesleft
;
201 for (; buflen
> 0; buf
++, buflen
--) {
205 plocals
->l_errno
= E2BIG
;
208 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
209 if (outcount
!= RET_ILUNI
)
211 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
212 if ((wc
>> 7) == (0xe0000 >> 7))
214 /* Try transliteration. */
215 if (cd
->transliterate
) {
216 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
217 if (outcount
!= RET_ILUNI
)
220 if (cd
->discard_ilseq
) {
224 #ifndef LIBICONV_PLUG
225 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
226 struct uc_to_mb_fallback_locals locals
;
227 locals
.l_outbuf
= outptr
;
228 locals
.l_outbytesleft
= outleft
;
230 cd
->fallbacks
.uc_to_mb_fallback(wc
,
231 uc_to_mb_write_replacement
,
234 if (locals
.l_errno
!= 0) {
235 plocals
->l_errno
= locals
.l_errno
;
238 outptr
= locals
.l_outbuf
;
239 outleft
= locals
.l_outbytesleft
;
244 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
245 if (outcount
!= RET_ILUNI
)
247 plocals
->l_errno
= EILSEQ
;
251 plocals
->l_errno
= E2BIG
;
254 #ifndef LIBICONV_PLUG
255 if (cd
->hooks
.uc_hook
)
256 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
258 if (!(outcount
<= outleft
)) abort();
259 outptr
+= outcount
; outleft
-= outcount
;
262 plocals
->l_outbuf
= outptr
;
263 plocals
->l_outbytesleft
= outleft
;
267 #endif /* !LIBICONV_PLUG */
269 static size_t unicode_loop_convert (iconv_t icd
,
270 const char* * inbuf
, size_t *inbytesleft
,
271 char* * outbuf
, size_t *outbytesleft
)
273 conv_t cd
= (conv_t
) icd
;
275 const unsigned char* inptr
= (const unsigned char*) *inbuf
;
276 size_t inleft
= *inbytesleft
;
277 unsigned char* outptr
= (unsigned char*) *outbuf
;
278 size_t outleft
= *outbytesleft
;
280 state_t last_istate
= cd
->istate
;
284 incount
= cd
->ifuncs
.xxx_mbtowc(cd
,&wc
,inptr
,inleft
);
286 if (incount
== RET_ILSEQ
) {
287 /* Case 1: invalid input */
288 if (cd
->discard_ilseq
) {
289 switch (cd
->iindex
) {
290 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
291 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
292 case ei_ucs4internal
: case ei_ucs4swapped
:
294 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
295 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
296 case ei_ucs2internal
: case ei_ucs2swapped
:
303 #ifndef LIBICONV_PLUG
304 else if (cd
->fallbacks
.mb_to_uc_fallback
!= NULL
) {
305 struct mb_to_uc_fallback_locals locals
;
306 switch (cd
->iindex
) {
307 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
308 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
309 case ei_ucs4internal
: case ei_ucs4swapped
:
311 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
312 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
313 case ei_ucs2internal
: case ei_ucs2swapped
:
319 locals
.l_outbuf
= outptr
;
320 locals
.l_outbytesleft
= outleft
;
322 cd
->fallbacks
.mb_to_uc_fallback(inptr
, incount
,
323 mb_to_uc_write_replacement
,
326 if (locals
.l_errno
!= 0) {
327 errno
= locals
.l_errno
;
331 outptr
= locals
.l_outbuf
;
332 outleft
= locals
.l_outbytesleft
;
341 if (incount
== RET_TOOFEW(0)) {
342 /* Case 2: not enough bytes available to detect anything */
347 /* Case 3: k bytes read, but only a shift sequence */
348 incount
= -2-incount
;
350 /* Case 4: k bytes read, making up a wide character */
352 cd
->istate
= last_istate
;
357 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
358 if (outcount
!= RET_ILUNI
)
360 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
361 if ((wc
>> 7) == (0xe0000 >> 7))
363 /* Try transliteration. */
365 if (cd
->transliterate
) {
366 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
367 if (outcount
!= RET_ILUNI
)
370 if (cd
->discard_ilseq
) {
374 #ifndef LIBICONV_PLUG
375 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
376 struct uc_to_mb_fallback_locals locals
;
377 locals
.l_outbuf
= outptr
;
378 locals
.l_outbytesleft
= outleft
;
380 cd
->fallbacks
.uc_to_mb_fallback(wc
,
381 uc_to_mb_write_replacement
,
384 if (locals
.l_errno
!= 0) {
385 cd
->istate
= last_istate
;
386 errno
= locals
.l_errno
;
389 outptr
= locals
.l_outbuf
;
390 outleft
= locals
.l_outbytesleft
;
395 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
396 if (outcount
!= RET_ILUNI
)
398 cd
->istate
= last_istate
;
404 cd
->istate
= last_istate
;
409 #ifndef LIBICONV_PLUG
410 if (cd
->hooks
.uc_hook
)
411 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
413 if (!(outcount
<= outleft
)) abort();
414 outptr
+= outcount
; outleft
-= outcount
;
417 if (!(incount
<= inleft
)) abort();
418 inptr
+= incount
; inleft
-= incount
;
420 *inbuf
= (const char*) inptr
;
421 *inbytesleft
= inleft
;
422 *outbuf
= (char*) outptr
;
423 *outbytesleft
= outleft
;
427 static size_t unicode_loop_reset (iconv_t icd
,
428 char* * outbuf
, size_t *outbytesleft
)
430 conv_t cd
= (conv_t
) icd
;
431 if (outbuf
== NULL
|| *outbuf
== NULL
) {
432 /* Reset the states. */
433 memset(&cd
->istate
,'\0',sizeof(state_t
));
434 memset(&cd
->ostate
,'\0',sizeof(state_t
));
438 if (cd
->ifuncs
.xxx_flushwc
) {
439 state_t last_istate
= cd
->istate
;
441 if (cd
->ifuncs
.xxx_flushwc(cd
, &wc
)) {
442 unsigned char* outptr
= (unsigned char*) *outbuf
;
443 size_t outleft
= *outbytesleft
;
444 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
445 if (outcount
!= RET_ILUNI
)
447 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
448 if ((wc
>> 7) == (0xe0000 >> 7))
450 /* Try transliteration. */
452 if (cd
->transliterate
) {
453 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
454 if (outcount
!= RET_ILUNI
)
457 if (cd
->discard_ilseq
) {
461 #ifndef LIBICONV_PLUG
462 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
463 struct uc_to_mb_fallback_locals locals
;
464 locals
.l_outbuf
= outptr
;
465 locals
.l_outbytesleft
= outleft
;
467 cd
->fallbacks
.uc_to_mb_fallback(wc
,
468 uc_to_mb_write_replacement
,
471 if (locals
.l_errno
!= 0) {
472 cd
->istate
= last_istate
;
473 errno
= locals
.l_errno
;
476 outptr
= locals
.l_outbuf
;
477 outleft
= locals
.l_outbytesleft
;
482 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
483 if (outcount
!= RET_ILUNI
)
485 cd
->istate
= last_istate
;
490 cd
->istate
= last_istate
;
494 #ifndef LIBICONV_PLUG
495 if (cd
->hooks
.uc_hook
)
496 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
498 if (!(outcount
<= outleft
)) abort();
502 *outbuf
= (char*) outptr
;
503 *outbytesleft
= outleft
;
506 if (cd
->ofuncs
.xxx_reset
) {
507 unsigned char* outptr
= (unsigned char*) *outbuf
;
508 size_t outleft
= *outbytesleft
;
509 int outcount
= cd
->ofuncs
.xxx_reset(cd
,outptr
,outleft
);
514 if (!(outcount
<= outleft
)) abort();
515 *outbuf
= (char*) (outptr
+ outcount
);
516 *outbytesleft
= outleft
- outcount
;
518 memset(&cd
->istate
,'\0',sizeof(state_t
));
519 memset(&cd
->ostate
,'\0',sizeof(state_t
));