4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
34 #include <sys/sunddi.h>
35 #include <sys/byteorder.h>
36 #include <sys/errno.h>
37 #include <sys/modctl.h>
38 #include <sys/u8_textprep.h>
39 #include <sys/kiconv.h>
40 #include <sys/kiconv_cck_common.h>
41 #include <sys/kiconv_tc.h>
42 #include <sys/kiconv_big5_utf8.h>
43 #include <sys/kiconv_euctw_utf8.h>
44 #include <sys/kiconv_hkscs_utf8.h>
45 #include <sys/kiconv_cp950hkscs_utf8.h>
46 #include <sys/kiconv_utf8_big5.h>
47 #include <sys/kiconv_utf8_euctw.h>
48 #include <sys/kiconv_utf8_cp950hkscs.h>
49 #include <sys/kiconv_utf8_hkscs.h>
51 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
52 static uchar_t hkscs_special_sequence
[][4] = {
53 { 0xc3, 0x8a, 0xcc, 0x84 }, /* 0x8862 */
54 { 0xc3, 0x8a, 0xcc, 0x8c }, /* 0x8864 */
55 { 0xc3, 0xaa, 0xcc, 0x84 }, /* 0x88a3 */
56 { 0xc3, 0xaa, 0xcc, 0x8c } /* 0x88a5 */
59 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
60 static uint32_t ucs_special_sequence
[] = {
62 0x8862, /* U+00ca U+0304 */
63 0x8864, /* U+00ca U+030c */
65 0x88a3, /* U+00ea U+0304 */
66 0x88a5 /* U+00ea U+030c */
69 typedef int8_t (*kiconv_big5toutf8_t
)(uint32_t value
, uchar_t
*ob
,
70 uchar_t
*obtail
, size_t *ret_val
);
72 static int8_t utf8_to_big5(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
73 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
74 static int8_t utf8_to_euctw(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
75 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
76 static int8_t utf8_to_cp950hkscs(uint32_t utf8
, uchar_t
**inbuf
,
77 uchar_t
*ibtail
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
78 static int8_t utf8_to_big5hkscs(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
79 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
80 static int8_t big5_to_utf8(uint32_t big5_val
, uchar_t
*ob
, uchar_t
*obtail
,
82 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val
, uchar_t
*ob
,
83 uchar_t
*obtail
, size_t *ret_val
);
84 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val
, uchar_t
*ob
,
85 uchar_t
*obtail
, size_t *ret_val
);
86 static int8_t euctw_to_utf8(size_t plane_no
, uint32_t euctw_val
,
87 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
88 static uint32_t get_unicode_from_UDA(size_t plane_no
, uchar_t byte1
,
91 #define KICONV_TC_BIG5 (0x01)
92 #define KICONV_TC_BIG5HKSCS (0x02)
93 #define KICONV_TC_CP950HKSCS (0x03)
94 #define KICONV_TC_EUCTW (0x04)
95 #define KICONV_TC_MAX_MAGIC_ID (0x04)
100 return ((void *)KICONV_TC_BIG5
);
106 return ((void *)KICONV_TC_BIG5HKSCS
);
112 return ((void *)KICONV_TC_CP950HKSCS
);
118 return ((void *)KICONV_TC_EUCTW
);
124 if ((uintptr_t)s
> KICONV_TC_MAX_MAGIC_ID
)
131 * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
134 kiconv_fr_big5_common(void *kcd
, char **inbuf
, size_t *inbytesleft
,
135 char **outbuf
, size_t *outbytesleft
, int *errno
,
136 kiconv_big5toutf8_t ptr_big5touf8
)
146 /* Check on the kiconv code conversion descriptor. */
147 if (kcd
== NULL
|| kcd
== (void *)-1) {
152 /* If this is a state reset request, process and return. */
153 if (inbuf
== NULL
|| *inbuf
== NULL
) {
158 ib
= (uchar_t
*)*inbuf
;
159 ob
= (uchar_t
*)*outbuf
;
160 ibtail
= ib
+ *inbytesleft
;
161 obtail
= ob
+ *outbytesleft
;
163 while (ib
< ibtail
) {
164 if (KICONV_IS_ASCII(*ib
)) {
166 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
174 * Issue EILSEQ error if the first byte is not a
175 * valid BIG5/HKSCS leading byte.
177 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib
)) {
178 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
182 * Issue EINVAL error if input buffer has an incomplete
183 * character at the end of the buffer.
185 if (ibtail
- ib
< 2) {
186 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
190 * Issue EILSEQ error if the remaining bytes is not
191 * a valid BIG5/HKSCS byte.
193 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib
+ 1))) {
194 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
197 /* Now we have a valid BIG5/HKSCS character. */
198 big5_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
199 sz
= ptr_big5touf8(big5_val
, ob
, obtail
, &ret_val
);
202 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
210 *inbytesleft
= ibtail
- ib
;
211 *outbuf
= (char *)ob
;
212 *outbytesleft
= obtail
- ob
;
218 * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
222 kiconvstr_fr_big5_common(uchar_t
*ib
, size_t *inlen
, uchar_t
*ob
,
223 size_t *outlen
, int flag
, int *errno
,
224 kiconv_big5toutf8_t ptr_big5touf8
)
232 boolean_t do_not_ignore_null
;
235 ibtail
= ib
+ *inlen
;
236 obtail
= ob
+ *outlen
;
237 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
239 while (ib
< ibtail
) {
240 if (*ib
== '\0' && do_not_ignore_null
)
243 if (KICONV_IS_ASCII(*ib
)) {
245 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
254 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib
)) {
255 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
258 if (ibtail
- ib
< 2) {
259 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL
);
262 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib
+ 1))) {
263 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
267 big5_val
= (big5_val
<< 8) | *ib
++;
268 sz
= ptr_big5touf8(big5_val
, ob
, obtail
, &ret_val
);
272 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
279 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
281 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
284 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
285 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
286 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
290 *inlen
= ibtail
- ib
;
291 *outlen
= obtail
- ob
;
297 * Encoding convertor from BIG5 to UTF-8.
300 kiconv_fr_big5(void *kcd
, char **inbuf
, size_t *inbytesleft
, char **outbuf
,
301 size_t *outbytesleft
, int *errno
)
303 return (kiconv_fr_big5_common(kcd
, inbuf
, inbytesleft
, outbuf
,
304 outbytesleft
, errno
, big5_to_utf8
));
308 * String based encoding convertor from BIG5 to UTF-8.
311 kiconvstr_fr_big5(char *inarray
, size_t *inlen
, char *outarray
,
312 size_t *outlen
, int flag
, int *errno
)
314 return (kiconvstr_fr_big5_common((uchar_t
*)inarray
, inlen
,
315 (uchar_t
*)outarray
, outlen
, flag
, errno
,
320 * Encoding convertor from BIG5-HKSCS to UTF-8.
323 kiconv_fr_big5hkscs(void *kcd
, char **inbuf
, size_t *inbytesleft
,
324 char **outbuf
, size_t *outbytesleft
, int *errno
)
326 return kiconv_fr_big5_common(kcd
, inbuf
, inbytesleft
, outbuf
,
327 outbytesleft
, errno
, big5hkscs_to_utf8
);
331 * String based encoding convertor from BIG5-HKSCS to UTF-8.
334 kiconvstr_fr_big5hkscs(char *inarray
, size_t *inlen
, char *outarray
,
335 size_t *outlen
, int flag
, int *errno
)
337 return kiconvstr_fr_big5_common((uchar_t
*)inarray
, inlen
,
338 (uchar_t
*)outarray
, outlen
, flag
, errno
, big5hkscs_to_utf8
);
342 * Encoding convertor from CP950-HKSCS to UTF-8.
345 kiconv_fr_cp950hkscs(void *kcd
, char **inbuf
, size_t *inbytesleft
,
346 char **outbuf
, size_t *outbytesleft
, int *errno
)
348 return kiconv_fr_big5_common(kcd
, inbuf
, inbytesleft
, outbuf
,
349 outbytesleft
, errno
, cp950hkscs_to_utf8
);
353 * String based encoding convertor from CP950-HKSCS to UTF-8.
356 kiconvstr_fr_cp950hkscs(char *inarray
, size_t *inlen
, char *outarray
,
357 size_t *outlen
, int flag
, int *errno
)
359 return kiconvstr_fr_big5_common((uchar_t
*)inarray
, inlen
,
360 (uchar_t
*)outarray
, outlen
, flag
, errno
, cp950hkscs_to_utf8
);
364 * Encoding convertor from EUC-TW to UTF-8.
367 kiconv_fr_euctw(void *kcd
, char **inbuf
, size_t *inbytesleft
,
368 char **outbuf
, size_t *outbytesleft
, int *errno
)
381 /* Check on the kiconv code conversion descriptor. */
382 if (kcd
== NULL
|| kcd
== (void *)-1) {
387 /* If this is a state reset request, process and return. */
388 if (inbuf
== NULL
|| *inbuf
== NULL
) {
393 ib
= (uchar_t
*)*inbuf
;
394 ob
= (uchar_t
*)*outbuf
;
395 ibtail
= ib
+ *inbytesleft
;
396 obtail
= ob
+ *outbytesleft
;
398 while (ib
< ibtail
) {
399 if (KICONV_IS_ASCII(*ib
)) {
401 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
409 * Issue EILSEQ error if the first byte is not a
410 * valid EUC-TW leading byte.
412 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib
)) {
413 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
416 isplane1
= (*ib
== KICONV_TC_EUCTW_MBYTE
) ?
420 * Issue EINVAL error if input buffer has an incomplete
421 * character at the end of the buffer.
423 if (ibtail
- ib
< (isplane1
? 2 : 4)) {
424 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
428 plane_no
= isplane1
? 1 : *(ib
+ 1) - KICONV_TC_EUCTW_PMASK
;
431 * Issue EILSEQ error if the remaining bytes are not
432 * valid EUC-TW bytes.
434 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib
)) {
435 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
441 /* Now we have a valid EUC-TW character. */
443 euctw_val
= (euctw_val
<< 8) | *ib
++;
444 sz
= euctw_to_utf8(plane_no
, euctw_val
, ob
, obtail
, &ret_val
);
448 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
455 *inbytesleft
= ibtail
- ib
;
456 *outbuf
= (char *)ob
;
457 *outbytesleft
= obtail
- ob
;
463 * String based encoding convertor from EUC-TW to UTF-8.
466 kiconvstr_fr_euctw(char *inarray
, size_t *inlen
, char *outarray
,
467 size_t *outlen
, int flag
, int *errno
)
479 boolean_t do_not_ignore_null
;
482 ib
= (uchar_t
*)inarray
;
483 ob
= (uchar_t
*)outarray
;
484 ibtail
= ib
+ *inlen
;
485 obtail
= ob
+ *outlen
;
486 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
488 while (ib
< ibtail
) {
489 if (*ib
== '\0' && do_not_ignore_null
)
492 if (KICONV_IS_ASCII(*ib
)) {
494 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
503 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib
)) {
504 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
507 isplane1
= (*ib
== KICONV_TC_EUCTW_MBYTE
) ?
510 if (ibtail
- ib
< (isplane1
? 2 : 4)) {
511 if (flag
& KICONV_REPLACE_INVALID
) {
513 goto REPLACE_INVALID
;
516 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
519 plane_no
= isplane1
? 1 : *(ib
+ 1) - KICONV_TC_EUCTW_PMASK
;
521 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib
)) {
522 KICONV_SET_ERRNO_WITH_FLAG(isplane1
? 2 : 4, EILSEQ
);
529 euctw_val
= (euctw_val
<< 8) | *ib
++;
530 sz
= euctw_to_utf8(plane_no
, euctw_val
, ob
, obtail
, &ret_val
);
534 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
541 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
543 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
546 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
547 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
548 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
552 *inlen
= ibtail
- ib
;
553 *outlen
= obtail
- ob
;
559 * Encoding convertor from UTF-8 to BIG5.
562 kiconv_to_big5(void *kcd
, char **inbuf
, size_t *inbytesleft
,
563 char **outbuf
, size_t *outbytesleft
, int *errno
)
565 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
566 outbytesleft
, errno
, utf8_to_big5
);
570 * String based encoding convertor from UTF-8 to BIG5.
573 kiconvstr_to_big5(char *inarray
, size_t *inlen
, char *outarray
,
574 size_t *outlen
, int flag
, int *errno
)
576 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
577 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_big5
);
581 * Encoding convertor from UTF-8 to EUC-TW.
584 kiconv_to_euctw(void *kcd
, char **inbuf
, size_t *inbytesleft
,
585 char **outbuf
, size_t *outbytesleft
, int *errno
)
587 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
588 outbytesleft
, errno
, utf8_to_euctw
);
592 * String based encoding convertor from UTF-8 to EUC-TW.
595 kiconvstr_to_euctw(char *inarray
, size_t *inlen
, char *outarray
,
596 size_t *outlen
, int flag
, int *errno
)
598 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
599 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_euctw
);
603 * Encoding convertor from UTF-8 to CP950HKSCS.
606 kiconv_to_cp950hkscs(void *kcd
, char **inbuf
, size_t *inbytesleft
,
607 char **outbuf
, size_t *outbytesleft
, int *errno
)
609 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
610 outbytesleft
, errno
, utf8_to_cp950hkscs
);
614 * String based encoding convertor from UTF-8 to CP950HKSCS.
617 kiconvstr_to_cp950hkscs(char *inarray
, size_t *inlen
, char *outarray
,
618 size_t *outlen
, int flag
, int *errno
)
620 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
621 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_cp950hkscs
);
625 * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
628 kiconv_to_big5hkscs(void *kcd
, char **inbuf
, size_t *inbytesleft
,
629 char **outbuf
, size_t *outbytesleft
, int *errno
)
631 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
632 outbytesleft
, errno
, utf8_to_big5hkscs
);
636 * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
639 kiconvstr_to_big5hkscs(char *inarray
, size_t *inlen
, char *outarray
,
640 size_t *outlen
, int flag
, int *errno
)
642 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
643 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_big5hkscs
);
647 * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
648 * Return: > 0 - Converted successfully
652 big5_to_utf8_common(uint32_t big5_val
, uchar_t
*ob
, uchar_t
*obtail
,
653 size_t *ret_val
, kiconv_table_array_t
*table
, size_t nitems
)
659 index
= kiconv_binsearch(big5_val
, table
, nitems
);
660 u8
= table
[index
].u8
;
661 sz
= u8_number_of_bytes
[u8
[0]];
663 if (obtail
- ob
< sz
) {
664 *ret_val
= (size_t)-1;
669 (*ret_val
)++; /* Non-identical conversion */
671 for (index
= 0; index
< sz
; index
++)
678 * Convert single BIG5 character to UTF-8.
681 big5_to_utf8(uint32_t big5_val
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
683 return (big5_to_utf8_common(big5_val
, ob
, obtail
, ret_val
,
684 kiconv_big5_utf8
, KICONV_BIG5_UTF8_MAX
));
688 * Convert single CP950-HKSCS character to UTF-8.
691 cp950hkscs_to_utf8(uint32_t hkscs_val
, uchar_t
*ob
, uchar_t
*obtail
,
694 return (big5_to_utf8_common(hkscs_val
, ob
, obtail
, ret_val
,
695 kiconv_cp950hkscs_utf8
, KICONV_CP950HKSCS_UTF8_MAX
));
699 * Calculate unicode value for some CNS planes which fall in Unicode
703 get_unicode_from_UDA(size_t plane_no
, uchar_t b1
, uchar_t b2
)
706 * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
707 * to compute the Unicode value.
712 /* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
713 return (8836 * plane_no
+ 94 * b1
+ b2
+ 0xD2611);
717 * Convert single EUC-TW character to UTF-8.
718 * Return: > 0 - Converted successfully
722 euctw_to_utf8(size_t plane_no
, uint32_t euctw_val
, uchar_t
*ob
,
723 uchar_t
*obtail
, size_t *ret_val
)
733 index
= kiconv_binsearch(euctw_val
, kiconv_cns1_utf8
,
734 KICONV_CNS1_UTF8_MAX
);
735 u8
= kiconv_cns1_utf8
[index
].u8
;
738 index
= kiconv_binsearch(euctw_val
, kiconv_cns2_utf8
,
739 KICONV_CNS2_UTF8_MAX
);
740 u8
= kiconv_cns2_utf8
[index
].u8
;
743 index
= kiconv_binsearch(euctw_val
, kiconv_cns3_utf8
,
744 KICONV_CNS3_UTF8_MAX
);
745 u8
= kiconv_cns3_utf8
[index
].u8
;
748 index
= kiconv_binsearch(euctw_val
, kiconv_cns4_utf8
,
749 KICONV_CNS4_UTF8_MAX
);
750 u8
= kiconv_cns4_utf8
[index
].u8
;
753 index
= kiconv_binsearch(euctw_val
, kiconv_cns5_utf8
,
754 KICONV_CNS5_UTF8_MAX
);
755 u8
= kiconv_cns5_utf8
[index
].u8
;
758 index
= kiconv_binsearch(euctw_val
, kiconv_cns6_utf8
,
759 KICONV_CNS6_UTF8_MAX
);
760 u8
= kiconv_cns6_utf8
[index
].u8
;
763 index
= kiconv_binsearch(euctw_val
, kiconv_cns7_utf8
,
764 KICONV_CNS7_UTF8_MAX
);
765 u8
= kiconv_cns7_utf8
[index
].u8
;
771 u32
= get_unicode_from_UDA(plane_no
,
772 (euctw_val
& 0xFF00) >> 8, euctw_val
& 0xFF);
774 * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
775 * will occupy 4 bytes.
778 udc
[1] = (uchar_t
)(0x80 | (u32
& 0x03F000) >> 12);
779 udc
[2] = (uchar_t
)(0x80 | (u32
& 0x000FC0) >> 6);
780 udc
[3] = (uchar_t
)(0x80 | (u32
& 0x00003F));
785 index
= kiconv_binsearch(euctw_val
, kiconv_cns15_utf8
,
786 KICONV_CNS15_UTF8_MAX
);
787 u8
= kiconv_cns15_utf8
[index
].u8
;
791 u8
= kiconv_cns1_utf8
[index
].u8
;
794 sz
= u8_number_of_bytes
[u8
[0]];
795 if (obtail
- ob
< sz
) {
796 *ret_val
= (size_t)-1;
803 for (index
= 0; index
< sz
; index
++)
810 * Convert single HKSCS character to UTF-8.
811 * Return: > 0 - Converted successfully
815 big5hkscs_to_utf8(uint32_t hkscs_val
, uchar_t
*ob
, uchar_t
*obtail
,
822 index
= kiconv_binsearch(hkscs_val
, kiconv_hkscs_utf8
,
823 KICONV_HKSCS_UTF8_MAX
);
824 u8
= kiconv_hkscs_utf8
[index
].u8
;
827 * Single HKSCS-2004 character may map to 2 Unicode
831 u8
= hkscs_special_sequence
[u8
[1]];
834 sz
= u8_number_of_bytes
[u8
[0]];
837 if (obtail
- ob
< sz
) {
838 *ret_val
= (size_t)-1;
843 (*ret_val
)++; /* Non-identical conversion. */
845 for (index
= 0; index
< sz
; index
++)
852 * Convert single UTF-8 character to EUC-TW.
853 * Return: > 0 - Converted successfully
858 utf8_to_euctw(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
859 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
866 if (utf8
>= KICONV_TC_UDA_UTF8_START
&&
867 utf8
<= KICONV_TC_UDA_UTF8_END
) {
869 * Calculate EUC-TW code if utf8 is in Unicode
872 index
= (((utf8
& 0x7000000) >> 6) | ((utf8
& 0x3F0000) >> 4) |
873 ((utf8
& 0x3F00) >> 2) | (utf8
& 0x3F)) -
874 KICONV_TC_UDA_UCS4_START
;
875 plane_no
= 12 + index
/ 8836;
876 byte1
= 0xA1 + (index
% 8836) / 94;
877 byte2
= 0xA1 + index
% 94;
879 /* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
885 index
= kiconv_binsearch(utf8
, kiconv_utf8_euctw
,
886 KICONV_UTF8_EUCTW_MAX
);
890 *ret_val
= (size_t)-1;
894 *ob
++ = KICONV_ASCII_REPLACEMENT_CHAR
;
900 euctw_val
= kiconv_utf8_euctw
[index
].value
;
901 byte1
= (euctw_val
& 0xFF00) >> 8;
902 byte2
= euctw_val
& 0xFF;
903 plane_no
= euctw_val
>> 16;
906 if (obtail
- ob
< (plane_no
== 1 ? 2 : 4)) {
907 *ret_val
= (size_t)-1;
912 *ob
++ = KICONV_TC_EUCTW_MBYTE
;
913 *ob
++ = KICONV_TC_EUCTW_PMASK
+ plane_no
;
919 return (plane_no
== 1 ? 2 : 4);
923 * Convert single UTF-8 character to BIG5-HKSCS
924 * Return: > 0 - Converted successfully
928 utf8_to_big5hkscs(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
929 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
934 boolean_t special_sequence
= B_FALSE
;
936 index
= kiconv_binsearch(utf8
, kiconv_utf8_hkscs
,
937 KICONV_UTF8_HKSCS_MAX
);
938 hkscscode
= kiconv_utf8_hkscs
[index
].value
;
941 * There are 4 special code points in HKSCS-2004 which mapped
942 * to 2 UNICODE code points.
944 if ((int32_t)hkscscode
< 0) {
945 size_t special_index
= (-(int32_t)hkscscode
- 1) * 3;
947 /* Check the following 2 bytes. */
948 if (ibtail
- *inbuf
>= 2 && **inbuf
== 0xcc &&
949 (*(*inbuf
+ 1) == 0x84 || *(*inbuf
+ 1) == 0x8c)) {
950 special_index
+= (*(*inbuf
+ 1) == 0x84 ? 1 : 2);
951 special_sequence
= B_TRUE
;
954 hkscscode
= ucs_special_sequence
[special_index
];
957 hkscslen
= (hkscscode
<= 0xFF) ? 1 : 2;
958 if (obtail
- ob
< hkscslen
) {
959 *ret_val
= (size_t)-1;
967 *ob
++ = (uchar_t
)(hkscscode
>> 8);
968 *ob
= (uchar_t
)(hkscscode
& 0xFF);
970 if (special_sequence
) { /* Advance for special sequence */
978 * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
979 * Return: > 0 - Converted successfully
983 utf8_to_big5_common(uint32_t utf8
, uchar_t
*ob
, uchar_t
*obtail
,
984 size_t *ret_val
, kiconv_table_t
*table
, size_t nitems
)
990 index
= kiconv_binsearch(utf8
, table
, nitems
);
991 big5code
= table
[index
].value
;
992 big5len
= (big5code
<= 0xFF) ? 1 : 2;
994 if (obtail
- ob
< big5len
) {
995 *ret_val
= (size_t)-1;
1003 *ob
++ = (uchar_t
)(big5code
>> 8);
1004 *ob
= (uchar_t
)(big5code
& 0xFF);
1010 * Convert single UTF-8 character to BIG5.
1014 utf8_to_big5(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
1015 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
1017 return (utf8_to_big5_common(utf8
, ob
, obtail
, ret_val
,
1018 kiconv_utf8_big5
, KICONV_UTF8_BIG5_MAX
));
1022 * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1026 utf8_to_cp950hkscs(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
1027 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
1029 return (utf8_to_big5_common(utf8
, ob
, obtail
, ret_val
,
1030 kiconv_utf8_cp950hkscs
, KICONV_UTF8_CP950HKSCS
));
1033 static kiconv_ops_t kiconv_tc_ops_tbl
[] = {
1035 "big5", "utf-8", kiconv_open_to_cck
, kiconv_to_big5
,
1036 kiconv_close_to_cck
, kiconvstr_to_big5
1039 "utf-8", "big5", open_fr_big5
, kiconv_fr_big5
,
1040 close_fr_tc
, kiconvstr_fr_big5
1044 "big5-hkscs", "utf-8", kiconv_open_to_cck
, kiconv_to_big5hkscs
,
1045 kiconv_close_to_cck
, kiconvstr_to_big5hkscs
1048 "utf-8", "big5-hkscs", open_fr_big5hkscs
, kiconv_fr_big5hkscs
,
1049 close_fr_tc
, kiconvstr_fr_big5hkscs
1053 "euc-tw", "utf-8", kiconv_open_to_cck
, kiconv_to_euctw
,
1054 kiconv_close_to_cck
, kiconvstr_to_euctw
1057 "utf-8", "euc-tw", open_fr_euctw
, kiconv_fr_euctw
,
1058 close_fr_tc
, kiconvstr_fr_euctw
1062 "cp950-hkscs", "utf-8", kiconv_open_to_cck
,
1063 kiconv_to_cp950hkscs
, kiconv_close_to_cck
,
1064 kiconvstr_to_cp950hkscs
1067 "utf-8", "cp950-hkscs", open_fr_cp950hkscs
,
1068 kiconv_fr_cp950hkscs
, close_fr_tc
, kiconvstr_fr_cp950hkscs
1072 static kiconv_module_info_t kiconv_tc_info
= {
1073 "kiconv_tc", /* module name */
1074 sizeof (kiconv_tc_ops_tbl
) / sizeof (kiconv_tc_ops_tbl
[0]),
1082 static struct modlkiconv modlkiconv_tc
= {
1084 "kiconv Traditional Chinese module 1.0",
1088 static struct modlinkage modlinkage
= {
1090 (void *)&modlkiconv_tc
,
1099 err
= mod_install(&modlinkage
);
1101 cmn_err(CE_WARN
, "kiconv_tc: failed to load kernel module");
1112 * If this module is being used, then, we cannot remove the module.
1113 * The following checking will catch pretty much all usual cases.
1115 * Any remaining will be catached by the kiconv_unregister_module()
1116 * during mod_remove() at below.
1118 if (kiconv_module_ref_count(KICONV_MODULE_ID_TC
))
1121 err
= mod_remove(&modlinkage
);
1123 cmn_err(CE_WARN
, "kiconv_tc: failed to remove kernel module");
1129 _info(struct modinfo
*modinfop
)
1131 return (mod_info(&modlinkage
, modinfop
));