4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
34 #include <sys/sunddi.h>
35 #include <sys/byteorder.h>
36 #include <sys/errno.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
39 #include <sys/u8_textprep.h>
40 #include <sys/kiconv_cck_common.h>
41 #include <sys/kiconv_sc.h>
42 #include <sys/kiconv_gb18030_utf8.h>
43 #include <sys/kiconv_gb2312_utf8.h>
44 #include <sys/kiconv_utf8_gb18030.h>
45 #include <sys/kiconv_utf8_gb2312.h>
47 static int8_t gb2312_to_utf8(uchar_t byte1
, uchar_t byte2
, uchar_t
*ob
,
48 uchar_t
*obtail
, size_t *ret_val
);
49 static int8_t gbk_to_utf8(uint32_t gbk_val
, uchar_t
*ob
, uchar_t
*obtail
,
50 size_t *ret_val
, boolean_t isgbk4
);
51 static int8_t utf8_to_gb2312(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
52 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
);
53 static int8_t utf8_to_gbk(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
54 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
);
55 static int8_t utf8_to_gb18030(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
56 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
);
58 #define KICONV_SC_GB18030 (0x01)
59 #define KICONV_SC_GBK (0x02)
60 #define KICONV_SC_EUCCN (0x03)
61 #define KICONV_SC_MAX_MAGIC_ID (0x03)
66 return ((void *)KICONV_SC_GB18030
);
72 return ((void *)KICONV_SC_GBK
);
78 return ((void *)KICONV_SC_EUCCN
);
84 if ((uintptr_t)s
> KICONV_SC_MAX_MAGIC_ID
)
91 * Encoding convertor from UTF-8 to GB18030.
94 kiconv_to_gb18030(void *kcd
, char **inbuf
, size_t *inbytesleft
,
95 char **outbuf
, size_t *outbytesleft
, int *errno
)
98 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
99 outbytesleft
, errno
, utf8_to_gb18030
);
103 * String based encoding convertor from UTF-8 to GB18030.
106 kiconvstr_to_gb18030(char *inarray
, size_t *inlen
, char *outarray
,
107 size_t *outlen
, int flag
, int *errno
)
109 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
110 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_gb18030
);
114 * Encoding convertor from GB18030 to UTF-8.
117 kiconv_fr_gb18030(void *kcd
, char **inbuf
, size_t *inbytesleft
,
118 char **outbuf
, size_t *outbytesleft
, int *errno
)
129 /* Check on the kiconv code conversion descriptor. */
130 if (kcd
== NULL
|| kcd
== (void *)-1) {
135 /* If this is a state reset request, process and return. */
136 if (inbuf
== NULL
|| *inbuf
== NULL
) {
141 ib
= (uchar_t
*)*inbuf
;
142 ob
= (uchar_t
*)*outbuf
;
143 ibtail
= ib
+ *inbytesleft
;
144 obtail
= ob
+ *outbytesleft
;
146 while (ib
< ibtail
) {
147 if (KICONV_IS_ASCII(*ib
)) {
149 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
157 * Issue EILSEQ error if the first byte is not a
158 * valid GB18030 leading byte.
160 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib
)) {
161 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
164 isgbk4
= (ibtail
- ib
< 2) ? B_FALSE
:
165 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib
+ 1));
168 if (ibtail
- ib
< 4) {
169 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
172 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib
+ 1)) &&
173 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib
+ 2)) &&
174 KICONV_SC_IS_GB18030_4th_BYTE(*(ib
+ 3)))) {
175 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
178 gb_val
= (uint32_t)(*ib
) << 24 |
179 (uint32_t)(*(ib
+ 1)) << 16 |
180 (uint32_t)(*(ib
+ 2)) << 8 | *(ib
+ 3);
182 if (ibtail
- ib
< 2) {
183 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
186 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib
+ 1))) {
187 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
190 gb_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
193 sz
= gbk_to_utf8(gb_val
, ob
, obtail
, &ret_val
, isgbk4
);
195 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
198 ib
+= isgbk4
? 4 : 2;
203 *inbytesleft
= ibtail
- ib
;
204 *outbuf
= (char *)ob
;
205 *outbytesleft
= obtail
- ob
;
211 * String based encoding convertor from GB18030 to UTF-8.
214 kiconvstr_fr_gb18030(char *inarray
, size_t *inlen
, char *outarray
,
215 size_t *outlen
, int flag
, int *errno
)
226 boolean_t do_not_ignore_null
;
229 ib
= (uchar_t
*)inarray
;
230 ob
= (uchar_t
*)outarray
;
231 ibtail
= ib
+ *inlen
;
232 obtail
= ob
+ *outlen
;
233 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
235 while (ib
< ibtail
) {
236 if (*ib
== '\0' && do_not_ignore_null
)
239 if (KICONV_IS_ASCII(*ib
)) {
241 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
250 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib
)) {
251 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
254 isgbk4
= (ibtail
- ib
< 2) ? B_FALSE
:
255 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib
+ 1));
258 if (ibtail
- ib
< 4) {
259 if (flag
& KICONV_REPLACE_INVALID
) {
261 goto REPLACE_INVALID
;
264 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
267 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib
+ 1)) &&
268 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib
+ 2)) &&
269 KICONV_SC_IS_GB18030_4th_BYTE(*(ib
+ 3)))) {
270 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ
);
273 gb_val
= (uint32_t)(*ib
) << 24 |
274 (uint32_t)(*(ib
+ 1)) << 16 |
275 (uint32_t)(*(ib
+ 2)) << 8 | *(ib
+ 3);
277 if (ibtail
- ib
< 2) {
278 if (flag
& KICONV_REPLACE_INVALID
) {
280 goto REPLACE_INVALID
;
283 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
286 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib
+ 1))) {
287 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
290 gb_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
293 sz
= gbk_to_utf8(gb_val
, ob
, obtail
, &ret_val
, isgbk4
);
295 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
298 ib
+= isgbk4
? 4 : 2;
303 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
305 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
308 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
309 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
310 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
314 *inlen
= ibtail
- ib
;
315 *outlen
= obtail
- ob
;
321 * Encoding convertor from UTF-8 to GBK.
324 kiconv_to_gbk(void *kcd
, char **inbuf
, size_t *inbytesleft
,
325 char **outbuf
, size_t *outbytesleft
, int *errno
)
328 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
329 outbytesleft
, errno
, utf8_to_gbk
);
333 * String based encoding convertor from UTF-8 to GBK.
336 kiconvstr_to_gbk(char *inarray
, size_t *inlen
, char *outarray
,
337 size_t *outlen
, int flag
, int *errno
)
339 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
340 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_gbk
);
344 * Encoding convertor from GBK to UTF-8.
347 kiconv_fr_gbk(void *kcd
, char **inbuf
, size_t *inbytesleft
,
348 char **outbuf
, size_t *outbytesleft
, int *errno
)
358 /* Check on the kiconv code conversion descriptor. */
359 if (kcd
== NULL
|| kcd
== (void *)-1) {
364 /* If this is a state reset request, process and return. */
365 if (inbuf
== NULL
|| *inbuf
== NULL
) {
370 ib
= (uchar_t
*)*inbuf
;
371 ob
= (uchar_t
*)*outbuf
;
372 ibtail
= ib
+ *inbytesleft
;
373 obtail
= ob
+ *outbytesleft
;
375 while (ib
< ibtail
) {
376 if (KICONV_IS_ASCII(*ib
)) {
378 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
386 * Issue EILSEQ error if the first byte is not a
387 * valid GBK leading byte.
389 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib
)) {
390 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
394 * Issue EINVAL error if input buffer has an incomplete
395 * character at the end of the buffer.
397 if (ibtail
- ib
< 2) {
398 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
402 * Issue EILSEQ error if the remaining byte is not
405 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib
+ 1))) {
406 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
409 /* Now we have a valid GBK character. */
410 gb_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
411 sz
= gbk_to_utf8(gb_val
, ob
, obtail
, &ret_val
, B_FALSE
);
414 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
422 *inbytesleft
= ibtail
- ib
;
423 *outbuf
= (char *)ob
;
424 *outbytesleft
= obtail
- ob
;
430 * String based encoding convertor from GBK to UTF-8.
433 kiconvstr_fr_gbk(char *inarray
, size_t *inlen
, char *outarray
,
434 size_t *outlen
, int flag
, int *errno
)
444 boolean_t do_not_ignore_null
;
447 ib
= (uchar_t
*)inarray
;
448 ob
= (uchar_t
*)outarray
;
449 ibtail
= ib
+ *inlen
;
450 obtail
= ob
+ *outlen
;
451 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
453 while (ib
< ibtail
) {
454 if (*ib
== '\0' && do_not_ignore_null
)
457 if (KICONV_IS_ASCII(*ib
)) {
459 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
468 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib
)) {
469 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
472 if (ibtail
- ib
< 2) {
473 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL
);
476 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib
+ 1))) {
477 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
480 gb_val
= (uint32_t)(*ib
<< 8) | *(ib
+ 1);
481 sz
= gbk_to_utf8(gb_val
, ob
, obtail
, &ret_val
, B_FALSE
);
484 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
492 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
494 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
497 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
498 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
499 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
503 *inlen
= ibtail
- ib
;
504 *outlen
= obtail
- ob
;
510 * Encoding convertor from UTF-8 to EUC-CN.
513 kiconv_to_euccn(void *kcd
, char **inbuf
, size_t *inbytesleft
,
514 char **outbuf
, size_t *outbytesleft
, int *errno
)
516 return kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
517 outbytesleft
, errno
, utf8_to_gb2312
);
521 * String based encoding convertor from UTF-8 to EUC-CN.
524 kiconvstr_to_euccn(char *inarray
, size_t *inlen
, char *outarray
,
525 size_t *outlen
, int flag
, int *errno
)
527 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
528 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_gb2312
);
532 * Encoding converto from EUC-CN to UTF-8 code.
535 kiconv_fr_euccn(void *kcd
, char **inbuf
, size_t *inbytesleft
,
536 char **outbuf
, size_t *outbytesleft
, int *errno
)
545 /* Check on the kiconv code conversion descriptor. */
546 if (kcd
== NULL
|| kcd
== (void *)-1) {
551 /* If this is a state reset request, process and return. */
552 if (inbuf
== NULL
|| *inbuf
== NULL
) {
557 ib
= (uchar_t
*)*inbuf
;
558 ob
= (uchar_t
*)*outbuf
;
559 ibtail
= ib
+ *inbytesleft
;
560 obtail
= ob
+ *outbytesleft
;
562 while (ib
< ibtail
) {
563 if (KICONV_IS_ASCII(*ib
)) {
565 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
573 * Issue EILSEQ error if the first byte is not a
574 * valid GB2312 leading byte.
576 if (! KICONV_SC_IS_GB2312_BYTE(*ib
)) {
577 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
581 * Issue EINVAL error if input buffer has an incomplete
582 * character at the end of the buffer.
584 if (ibtail
- ib
< 2) {
585 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
589 * Issue EILSEQ error if the remaining byte is not
590 * a valid GB2312 byte.
592 if (! KICONV_SC_IS_GB2312_BYTE(*(ib
+ 1))) {
593 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
596 /* Now we have a valid GB2312 character */
597 sz
= gb2312_to_utf8(*ib
, *(ib
+ 1), ob
, obtail
, &ret_val
);
599 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
607 *inbytesleft
= ibtail
- ib
;
608 *outbuf
= (char *)ob
;
609 *outbytesleft
= obtail
- ob
;
615 * String based encoding convertor from EUC-CN to UTF-8.
618 kiconvstr_fr_euccn(char *inarray
, size_t *inlen
, char *outarray
,
619 size_t *outlen
, int flag
, int *errno
)
628 boolean_t do_not_ignore_null
;
631 ib
= (uchar_t
*)inarray
;
632 ob
= (uchar_t
*)outarray
;
633 ibtail
= ib
+ *inlen
;
634 obtail
= ob
+ *outlen
;
635 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
637 while (ib
< ibtail
) {
638 if (*ib
== '\0' && do_not_ignore_null
)
641 if (KICONV_IS_ASCII(*ib
)) {
643 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
652 if (! KICONV_SC_IS_GB2312_BYTE(*ib
)) {
653 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
656 if (ibtail
- ib
< 2) {
657 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL
);
660 if (! KICONV_SC_IS_GB2312_BYTE(*(ib
+ 1))) {
661 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
664 sz
= gb2312_to_utf8(*ib
, *(ib
+ 1), ob
, obtail
, &ret_val
);
666 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
674 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
676 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
679 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
680 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
681 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
685 *inlen
= ibtail
- ib
;
686 *outlen
= obtail
- ob
;
692 * Convert single GB2312 character to UTF-8.
693 * Return: > 0 - Converted successfully
697 gb2312_to_utf8(uchar_t b1
, uchar_t b2
, uchar_t
*ob
, uchar_t
*obtail
,
704 /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705 index
= b1
* 94 + b2
- 0x3BBF;
707 if (index
>= KICONV_GB2312_UTF8_MAX
)
708 index
= KICONV_GB2312_UTF8_MAX
- 1; /* Map to 0xEFBFBD */
710 u8
= kiconv_gb2312_utf8
[index
];
711 sz
= u8_number_of_bytes
[u8
[0]];
713 if (obtail
- ob
< sz
) {
714 *ret_val
= (size_t)-1;
718 for (index
= 0; index
< sz
; index
++)
722 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723 * elements, so need to ckeck more.
725 if (sz
== KICONV_UTF8_REPLACEMENT_CHAR_LEN
&&
726 u8
[0] == KICONV_UTF8_REPLACEMENT_CHAR1
&&
727 u8
[1] == KICONV_UTF8_REPLACEMENT_CHAR2
&&
728 u8
[2] == KICONV_UTF8_REPLACEMENT_CHAR3
)
735 * Convert single GB18030 or GBK character to UTF-8.
736 * Return: > 0 - Converted successfully
740 gbk_to_utf8(uint32_t gbk_val
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
,
749 if (gbk_val
>= KICONV_SC_PLANE1_GB18030_START
) {
753 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754 * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755 * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756 * (gbk_val & 0xFF - 0x30)+
757 * KICONV_SC_PLANE1_UCS4_START;
759 u32
= (gbk_val
>> 24) * 12600 +
760 ((gbk_val
& 0xFF0000) >> 16) * 1260 +
761 ((gbk_val
& 0xFF00) >> 8) * 10 +
762 (gbk_val
& 0xFF) - 0x1BA0FA;
763 u8array
[0] = (uchar_t
)(0xF0 | ((u32
& 0x1C0000) >> 18));
764 u8array
[1] = (uchar_t
)(0x80 | ((u32
& 0x03F000) >> 12));
765 u8array
[2] = (uchar_t
)(0x80 | ((u32
& 0x000FC0) >> 6));
766 u8array
[3] = (uchar_t
)(0x80 | (u32
& 0x00003F));
770 index
= kiconv_binsearch(gbk_val
,
771 kiconv_gbk4_utf8
, KICONV_GBK4_UTF8_MAX
);
772 u8
= kiconv_gbk4_utf8
[index
].u8
;
775 index
= kiconv_binsearch(gbk_val
,
776 kiconv_gbk_utf8
, KICONV_GBK_UTF8_MAX
);
777 u8
= kiconv_gbk_utf8
[index
].u8
;
780 sz
= u8_number_of_bytes
[u8
[0]];
781 if (obtail
- ob
< sz
) {
782 *ret_val
= (size_t)-1;
787 (*ret_val
)++; /* Non-identical conversion */
789 for (index
= 0; index
< sz
; index
++)
796 * Convert single UTF-8 character to GB18030.
797 * Return: > 0 - Converted successfully
802 utf8_to_gb18030(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
803 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
)
809 if (utf8
>= KICONV_SC_PLANE1_UTF8_START
) {
810 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
813 u32
= (((utf8
& 0x07000000) >> 6) | ((utf8
& 0x3F0000) >> 4) |
814 ((utf8
& 0x3F00) >> 2) | (utf8
& 0x3F)) -
815 KICONV_SC_PLANE1_UCS4_START
;
816 gbkcode
= ((u32
/ 12600 + 0x90) << 24) |
817 (((u32
% 12600) / 1260 + 0x30) << 16) |
818 (((u32
% 1260) / 10 + 0x81) << 8) | (u32
% 10 + 0x30);
822 index
= kiconv_binsearch(utf8
, kiconv_utf8_gb18030
,
823 KICONV_UTF8_GB18030_MAX
);
824 gbkcode
= kiconv_utf8_gb18030
[index
].value
;
825 KICONV_SC_GET_GB_LEN(gbkcode
, gbklen
);
828 if (obtail
- ob
< gbklen
) {
834 (*ret
)++; /* Non-identical conversion */
837 *ob
++ = (uchar_t
)(gbkcode
>> 8);
838 } else if (gbklen
== 4) {
839 *ob
++ = (uchar_t
)(gbkcode
>> 24);
840 *ob
++ = (uchar_t
)(gbkcode
>> 16);
841 *ob
++ = (uchar_t
)(gbkcode
>> 8);
843 *ob
= (uchar_t
)(gbkcode
& 0xFF);
849 * Convert single UTF-8 character to GBK.
850 * Return: > 0 - Converted successfully
855 utf8_to_gbk(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
856 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
)
862 index
= kiconv_binsearch(utf8
, kiconv_utf8_gb18030
,
863 KICONV_UTF8_GB18030_MAX
);
864 gbkcode
= kiconv_utf8_gb18030
[index
].value
;
865 KICONV_SC_GET_GB_LEN(gbkcode
, gbklen
);
867 /* GBK and GB18030 share the same table, so check the length. */
870 gbkcode
= kiconv_utf8_gb18030
[index
].value
;
874 if (obtail
- ob
< gbklen
) {
880 (*ret
)++; /* Non-identical conversion */
883 *ob
++ = (uchar_t
)(gbkcode
>> 8);
884 *ob
= (uchar_t
)(gbkcode
& 0xFF);
890 * Convert single UTF-8 character to GB2312.
891 * Return: > 0 - Converted successfully
896 utf8_to_gb2312(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*intail
,
897 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret
)
903 index
= kiconv_binsearch(utf8
, kiconv_utf8_gb2312
,
904 KICONV_UTF8_GB2312_MAX
);
905 gbcode
= kiconv_utf8_gb2312
[index
].value
;
906 gblen
= (gbcode
<= 0xFF) ? 1 : 2;
908 if (obtail
- ob
< gblen
) {
917 *ob
++ = (uchar_t
)(gbcode
>> 8);
918 *ob
= (uchar_t
)(gbcode
& 0xFF);
923 static kiconv_ops_t kiconv_sc_ops_tbl
[] = {
925 "gb18030", "utf-8", kiconv_open_to_cck
, kiconv_to_gb18030
,
926 kiconv_close_to_cck
, kiconvstr_to_gb18030
929 "utf-8", "gb18030", open_fr_gb18030
, kiconv_fr_gb18030
,
930 close_fr_sc
, kiconvstr_fr_gb18030
933 "gbk", "utf-8", kiconv_open_to_cck
, kiconv_to_gbk
,
934 kiconv_close_to_cck
, kiconvstr_to_gbk
937 "utf-8", "gbk", open_fr_gbk
, kiconv_fr_gbk
,
938 close_fr_sc
, kiconvstr_fr_gbk
941 "euccn", "utf-8", kiconv_open_to_cck
, kiconv_to_euccn
,
942 kiconv_close_to_cck
, kiconvstr_to_euccn
945 "utf-8", "euccn", open_fr_euccn
, kiconv_fr_euccn
,
946 close_fr_sc
, kiconvstr_fr_euccn
950 static kiconv_module_info_t kiconv_sc_info
= {
951 "kiconv_sc", /* module name */
952 sizeof (kiconv_sc_ops_tbl
) / sizeof (kiconv_sc_ops_tbl
[0]),
960 static struct modlkiconv modlkiconv_sc
= {
962 "kiconv Simplified Chinese module 1.0",
966 static struct modlinkage modlinkage
= {
968 (void *)&modlkiconv_sc
,
977 err
= mod_install(&modlinkage
);
979 cmn_err(CE_WARN
, "kiconv_sc: failed to load kernel module");
990 * If this module is being used, then, we cannot remove the module.
991 * The following checking will catch pretty much all usual cases.
993 * Any remaining will be catached by the kiconv_unregister_module()
994 * during mod_remove() at below.
996 if (kiconv_module_ref_count(KICONV_MODULE_ID_SC
))
999 err
= mod_remove(&modlinkage
);
1001 cmn_err(CE_WARN
, "kiconv_sc: failed to remove kernel module");
1007 _info(struct modinfo
*modinfop
)
1009 return (mod_info(&modlinkage
, modinfop
));