2 * Copyright (C) 1999-2003, 2005-2006, 2008 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
20 /* This file defines the conversion loop via Unicode as a pivot encoding. */
22 /* Attempt to transliterate wc. Return code as in xxx_wctomb. */
23 static int unicode_transliterate (conv_t cd
, ucs4_t wc
,
24 unsigned char* outptr
, size_t outleft
)
26 if (cd
->oflags
& HAVE_HANGUL_JAMO
) {
27 /* Decompose Hangul into Jamo. Use double-width Jamo (contained
28 in all Korean encodings and ISO-2022-JP-2), not half-width Jamo
29 (contained in Unicode only). */
31 int ret
= johab_hangul_decompose(cd
,buf
,wc
);
32 if (ret
!= RET_ILUNI
) {
33 /* we know 1 <= ret <= 3 */
34 state_t backup_state
= cd
->ostate
;
35 unsigned char* backup_outptr
= outptr
;
36 size_t backup_outleft
= outleft
;
38 for (i
= 0; i
< ret
; i
++) {
40 sub_outcount
= RET_TOOSMALL
;
41 goto johab_hangul_failed
;
43 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
44 if (sub_outcount
<= RET_ILUNI
)
45 goto johab_hangul_failed
;
46 if (!(sub_outcount
<= outleft
)) abort();
47 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
49 return outptr
-backup_outptr
;
51 cd
->ostate
= backup_state
;
52 outptr
= backup_outptr
;
53 outleft
= backup_outleft
;
54 if (sub_outcount
!= RET_ILUNI
)
59 /* Try to use a variant, but postfix it with
60 U+303E IDEOGRAPHIC VARIATION INDICATOR
61 (cf. Ken Lunde's "CJKV information processing", p. 188). */
65 else if (wc
== 0x30f6)
67 else if (wc
>= 0x4e00 && wc
< 0xa000)
68 indx
= cjk_variants_indx
[wc
-0x4e00];
72 unsigned short variant
= cjk_variants
[indx
];
73 unsigned short last
= variant
& 0x8000;
76 buf
[0] = variant
; buf
[1] = 0x303e;
78 state_t backup_state
= cd
->ostate
;
79 unsigned char* backup_outptr
= outptr
;
80 size_t backup_outleft
= outleft
;
82 for (i
= 0; i
< 2; i
++) {
84 sub_outcount
= RET_TOOSMALL
;
87 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,buf
[i
],outleft
);
88 if (sub_outcount
<= RET_ILUNI
)
90 if (!(sub_outcount
<= outleft
)) abort();
91 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
93 return outptr
-backup_outptr
;
95 cd
->ostate
= backup_state
;
96 outptr
= backup_outptr
;
97 outleft
= backup_outleft
;
98 if (sub_outcount
!= RET_ILUNI
)
106 if (wc
>= 0x2018 && wc
<= 0x201a) {
107 /* Special case for quotation marks 0x2018, 0x2019, 0x201a */
109 (cd
->oflags
& HAVE_QUOTATION_MARKS
110 ? (wc
== 0x201a ? 0x2018 : wc
)
111 : (cd
->oflags
& HAVE_ACCENTS
112 ? (wc
==0x2019 ? 0x00b4 : 0x0060) /* use accents */
113 : 0x0027 /* use apostrophe */
115 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,substitute
,outleft
);
116 if (outcount
!= RET_ILUNI
)
120 /* Use the transliteration table. */
121 int indx
= translit_index(wc
);
123 const unsigned int * cp
= &translit_data
[indx
];
124 unsigned int num
= *cp
++;
125 state_t backup_state
= cd
->ostate
;
126 unsigned char* backup_outptr
= outptr
;
127 size_t backup_outleft
= outleft
;
130 for (i
= 0; i
< num
; i
++) {
132 sub_outcount
= RET_TOOSMALL
;
133 goto translit_failed
;
135 sub_outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,cp
[i
],outleft
);
136 if (sub_outcount
== RET_ILUNI
)
137 /* Recursive transliteration. */
138 sub_outcount
= unicode_transliterate(cd
,cp
[i
],outptr
,outleft
);
139 if (sub_outcount
<= RET_ILUNI
)
140 goto translit_failed
;
141 if (!(sub_outcount
<= outleft
)) abort();
142 outptr
+= sub_outcount
; outleft
-= sub_outcount
;
144 return outptr
-backup_outptr
;
146 cd
->ostate
= backup_state
;
147 outptr
= backup_outptr
;
148 outleft
= backup_outleft
;
149 if (sub_outcount
!= RET_ILUNI
)
156 #ifndef LIBICONV_PLUG
158 struct uc_to_mb_fallback_locals
{
159 unsigned char* l_outbuf
;
160 size_t l_outbytesleft
;
164 static void uc_to_mb_write_replacement (const char *buf
, size_t buflen
,
167 struct uc_to_mb_fallback_locals
* plocals
=
168 (struct uc_to_mb_fallback_locals
*) callback_arg
;
169 /* Do nothing if already encountered an error in a previous call. */
170 if (plocals
->l_errno
== 0) {
171 /* Attempt to copy the passed buffer to the output buffer. */
172 if (plocals
->l_outbytesleft
< buflen
)
173 plocals
->l_errno
= E2BIG
;
175 memcpy(plocals
->l_outbuf
, buf
, buflen
);
176 plocals
->l_outbuf
+= buflen
;
177 plocals
->l_outbytesleft
-= buflen
;
182 struct mb_to_uc_fallback_locals
{
184 unsigned char* l_outbuf
;
185 size_t l_outbytesleft
;
189 static void mb_to_uc_write_replacement (const unsigned int *buf
, size_t buflen
,
192 struct mb_to_uc_fallback_locals
* plocals
=
193 (struct mb_to_uc_fallback_locals
*) callback_arg
;
194 /* Do nothing if already encountered an error in a previous call. */
195 if (plocals
->l_errno
== 0) {
196 /* Attempt to convert the passed buffer to the target encoding. */
197 conv_t cd
= plocals
->l_cd
;
198 unsigned char* outptr
= plocals
->l_outbuf
;
199 size_t outleft
= plocals
->l_outbytesleft
;
200 for (; buflen
> 0; buf
++, buflen
--) {
204 plocals
->l_errno
= E2BIG
;
207 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
208 if (outcount
!= RET_ILUNI
)
210 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
211 if ((wc
>> 7) == (0xe0000 >> 7))
213 /* Try transliteration. */
214 if (cd
->transliterate
) {
215 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
216 if (outcount
!= RET_ILUNI
)
219 if (cd
->discard_ilseq
) {
223 #ifndef LIBICONV_PLUG
224 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
225 struct uc_to_mb_fallback_locals locals
;
226 locals
.l_outbuf
= outptr
;
227 locals
.l_outbytesleft
= outleft
;
229 cd
->fallbacks
.uc_to_mb_fallback(wc
,
230 uc_to_mb_write_replacement
,
233 if (locals
.l_errno
!= 0) {
234 plocals
->l_errno
= locals
.l_errno
;
237 outptr
= locals
.l_outbuf
;
238 outleft
= locals
.l_outbytesleft
;
243 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
244 if (outcount
!= RET_ILUNI
)
246 plocals
->l_errno
= EILSEQ
;
250 plocals
->l_errno
= E2BIG
;
253 #ifndef LIBICONV_PLUG
254 if (cd
->hooks
.uc_hook
)
255 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
257 if (!(outcount
<= outleft
)) abort();
258 outptr
+= outcount
; outleft
-= outcount
;
261 plocals
->l_outbuf
= outptr
;
262 plocals
->l_outbytesleft
= outleft
;
266 #endif /* !LIBICONV_PLUG */
268 static size_t unicode_loop_convert (iconv_t icd
,
269 const char* * inbuf
, size_t *inbytesleft
,
270 char* * outbuf
, size_t *outbytesleft
)
272 conv_t cd
= (conv_t
) icd
;
274 const unsigned char* inptr
= (const unsigned char*) *inbuf
;
275 size_t inleft
= *inbytesleft
;
276 unsigned char* outptr
= (unsigned char*) *outbuf
;
277 size_t outleft
= *outbytesleft
;
279 state_t last_istate
= cd
->istate
;
283 incount
= cd
->ifuncs
.xxx_mbtowc(cd
,&wc
,inptr
,inleft
);
285 if ((unsigned int)(-1-incount
) % 2 == (unsigned int)(-1-RET_ILSEQ
) % 2) {
286 /* Case 1: invalid input, possibly after a shift sequence */
287 incount
= DECODE_SHIFT_ILSEQ(incount
);
288 if (cd
->discard_ilseq
) {
289 switch (cd
->iindex
) {
290 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
291 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
292 case ei_ucs4internal
: case ei_ucs4swapped
:
294 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
295 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
296 case ei_ucs2internal
: case ei_ucs2swapped
:
303 #ifndef LIBICONV_PLUG
304 else if (cd
->fallbacks
.mb_to_uc_fallback
!= NULL
) {
305 unsigned int incount2
;
306 struct mb_to_uc_fallback_locals locals
;
307 switch (cd
->iindex
) {
308 case ei_ucs4
: case ei_ucs4be
: case ei_ucs4le
:
309 case ei_utf32
: case ei_utf32be
: case ei_utf32le
:
310 case ei_ucs4internal
: case ei_ucs4swapped
:
312 case ei_ucs2
: case ei_ucs2be
: case ei_ucs2le
:
313 case ei_utf16
: case ei_utf16be
: case ei_utf16le
:
314 case ei_ucs2internal
: case ei_ucs2swapped
:
320 locals
.l_outbuf
= outptr
;
321 locals
.l_outbytesleft
= outleft
;
323 cd
->fallbacks
.mb_to_uc_fallback((const char*)inptr
+incount
, incount2
,
324 mb_to_uc_write_replacement
,
327 if (locals
.l_errno
!= 0) {
328 inptr
+= incount
; inleft
-= incount
;
329 errno
= locals
.l_errno
;
334 outptr
= locals
.l_outbuf
;
335 outleft
= locals
.l_outbytesleft
;
340 inptr
+= incount
; inleft
-= incount
;
345 if (incount
== RET_TOOFEW(0)) {
346 /* Case 2: not enough bytes available to detect anything */
351 /* Case 3: k bytes read, but only a shift sequence */
352 incount
= DECODE_TOOFEW(incount
);
354 /* Case 4: k bytes read, making up a wide character */
356 cd
->istate
= last_istate
;
361 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
362 if (outcount
!= RET_ILUNI
)
364 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
365 if ((wc
>> 7) == (0xe0000 >> 7))
367 /* Try transliteration. */
369 if (cd
->transliterate
) {
370 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
371 if (outcount
!= RET_ILUNI
)
374 if (cd
->discard_ilseq
) {
378 #ifndef LIBICONV_PLUG
379 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
380 struct uc_to_mb_fallback_locals locals
;
381 locals
.l_outbuf
= outptr
;
382 locals
.l_outbytesleft
= outleft
;
384 cd
->fallbacks
.uc_to_mb_fallback(wc
,
385 uc_to_mb_write_replacement
,
388 if (locals
.l_errno
!= 0) {
389 cd
->istate
= last_istate
;
390 errno
= locals
.l_errno
;
393 outptr
= locals
.l_outbuf
;
394 outleft
= locals
.l_outbytesleft
;
399 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
400 if (outcount
!= RET_ILUNI
)
402 cd
->istate
= last_istate
;
408 cd
->istate
= last_istate
;
413 #ifndef LIBICONV_PLUG
414 if (cd
->hooks
.uc_hook
)
415 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
417 if (!(outcount
<= outleft
)) abort();
418 outptr
+= outcount
; outleft
-= outcount
;
421 if (!(incount
<= inleft
)) abort();
422 inptr
+= incount
; inleft
-= incount
;
424 *inbuf
= (const char*) inptr
;
425 *inbytesleft
= inleft
;
426 *outbuf
= (char*) outptr
;
427 *outbytesleft
= outleft
;
431 static size_t unicode_loop_reset (iconv_t icd
,
432 char* * outbuf
, size_t *outbytesleft
)
434 conv_t cd
= (conv_t
) icd
;
435 if (outbuf
== NULL
|| *outbuf
== NULL
) {
436 /* Reset the states. */
437 memset(&cd
->istate
,'\0',sizeof(state_t
));
438 memset(&cd
->ostate
,'\0',sizeof(state_t
));
442 if (cd
->ifuncs
.xxx_flushwc
) {
443 state_t last_istate
= cd
->istate
;
445 if (cd
->ifuncs
.xxx_flushwc(cd
, &wc
)) {
446 unsigned char* outptr
= (unsigned char*) *outbuf
;
447 size_t outleft
= *outbytesleft
;
448 int outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,wc
,outleft
);
449 if (outcount
!= RET_ILUNI
)
451 /* Handle Unicode tag characters (range U+E0000..U+E007F). */
452 if ((wc
>> 7) == (0xe0000 >> 7))
454 /* Try transliteration. */
456 if (cd
->transliterate
) {
457 outcount
= unicode_transliterate(cd
,wc
,outptr
,outleft
);
458 if (outcount
!= RET_ILUNI
)
461 if (cd
->discard_ilseq
) {
465 #ifndef LIBICONV_PLUG
466 else if (cd
->fallbacks
.uc_to_mb_fallback
!= NULL
) {
467 struct uc_to_mb_fallback_locals locals
;
468 locals
.l_outbuf
= outptr
;
469 locals
.l_outbytesleft
= outleft
;
471 cd
->fallbacks
.uc_to_mb_fallback(wc
,
472 uc_to_mb_write_replacement
,
475 if (locals
.l_errno
!= 0) {
476 cd
->istate
= last_istate
;
477 errno
= locals
.l_errno
;
480 outptr
= locals
.l_outbuf
;
481 outleft
= locals
.l_outbytesleft
;
486 outcount
= cd
->ofuncs
.xxx_wctomb(cd
,outptr
,0xFFFD,outleft
);
487 if (outcount
!= RET_ILUNI
)
489 cd
->istate
= last_istate
;
494 cd
->istate
= last_istate
;
498 #ifndef LIBICONV_PLUG
499 if (cd
->hooks
.uc_hook
)
500 (*cd
->hooks
.uc_hook
)(wc
, cd
->hooks
.data
);
502 if (!(outcount
<= outleft
)) abort();
506 *outbuf
= (char*) outptr
;
507 *outbytesleft
= outleft
;
510 if (cd
->ofuncs
.xxx_reset
) {
511 unsigned char* outptr
= (unsigned char*) *outbuf
;
512 size_t outleft
= *outbytesleft
;
513 int outcount
= cd
->ofuncs
.xxx_reset(cd
,outptr
,outleft
);
518 if (!(outcount
<= outleft
)) abort();
519 *outbuf
= (char*) (outptr
+ outcount
);
520 *outbytesleft
= outleft
- outcount
;
522 memset(&cd
->istate
,'\0',sizeof(state_t
));
523 memset(&cd
->ostate
,'\0',sizeof(state_t
));