Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / kiconv / kiconv_sc / kiconv_sc.c
blobecbdd5cb3e192a821cb1ebb3cf93d2e65ac9a8a0
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
33 #include <sys/kmem.h>
34 #include <sys/sunddi.h>
35 #include <sys/byteorder.h>
36 #include <sys/errno.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
39 #include <sys/u8_textprep.h>
40 #include <sys/kiconv_cck_common.h>
41 #include <sys/kiconv_sc.h>
42 #include <sys/kiconv_gb18030_utf8.h>
43 #include <sys/kiconv_gb2312_utf8.h>
44 #include <sys/kiconv_utf8_gb18030.h>
45 #include <sys/kiconv_utf8_gb2312.h>
47 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
48 uchar_t *obtail, size_t *ret_val);
49 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
50 size_t *ret_val, boolean_t isgbk4);
51 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
52 uchar_t *ob, uchar_t *obtail, size_t *ret);
53 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
54 uchar_t *ob, uchar_t *obtail, size_t *ret);
55 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
56 uchar_t *ob, uchar_t *obtail, size_t *ret);
58 #define KICONV_SC_GB18030 (0x01)
59 #define KICONV_SC_GBK (0x02)
60 #define KICONV_SC_EUCCN (0x03)
61 #define KICONV_SC_MAX_MAGIC_ID (0x03)
63 static void *
64 open_fr_gb18030()
66 return ((void *)KICONV_SC_GB18030);
69 static void *
70 open_fr_gbk()
72 return ((void *)KICONV_SC_GBK);
75 static void *
76 open_fr_euccn()
78 return ((void *)KICONV_SC_EUCCN);
81 static int
82 close_fr_sc(void *s)
84 if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
85 return (EBADF);
87 return (0);
91 * Encoding convertor from UTF-8 to GB18030.
93 size_t
94 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
95 char **outbuf, size_t *outbytesleft, int *errno)
98 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
99 outbytesleft, errno, utf8_to_gb18030);
103 * String based encoding convertor from UTF-8 to GB18030.
105 size_t
106 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
107 size_t *outlen, int flag, int *errno)
109 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
110 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
114 * Encoding convertor from GB18030 to UTF-8.
116 size_t
117 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
118 char **outbuf, size_t *outbytesleft, int *errno)
120 uchar_t *ib;
121 uchar_t *ob;
122 uchar_t *ibtail;
123 uchar_t *obtail;
124 size_t ret_val;
125 int8_t sz;
126 uint32_t gb_val;
127 boolean_t isgbk4;
129 /* Check on the kiconv code conversion descriptor. */
130 if (kcd == NULL || kcd == (void *)-1) {
131 *errno = EBADF;
132 return ((size_t)-1);
135 /* If this is a state reset request, process and return. */
136 if (inbuf == NULL || *inbuf == NULL) {
137 return (0);
140 ret_val = 0;
141 ib = (uchar_t *)*inbuf;
142 ob = (uchar_t *)*outbuf;
143 ibtail = ib + *inbytesleft;
144 obtail = ob + *outbytesleft;
146 while (ib < ibtail) {
147 if (KICONV_IS_ASCII(*ib)) {
148 if (ob >= obtail) {
149 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
152 *ob++ = *ib++;
153 continue;
157 * Issue EILSEQ error if the first byte is not a
158 * valid GB18030 leading byte.
160 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
161 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
164 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
165 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
167 if (isgbk4) {
168 if (ibtail - ib < 4) {
169 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
172 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
173 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
174 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
175 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
178 gb_val = (uint32_t)(*ib) << 24 |
179 (uint32_t)(*(ib + 1)) << 16 |
180 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
181 } else {
182 if (ibtail - ib < 2) {
183 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
186 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
187 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
190 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
193 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
194 if (sz < 0) {
195 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
198 ib += isgbk4 ? 4 : 2;
199 ob += sz;
202 *inbuf = (char *)ib;
203 *inbytesleft = ibtail - ib;
204 *outbuf = (char *)ob;
205 *outbytesleft = obtail - ob;
207 return (ret_val);
211 * String based encoding convertor from GB18030 to UTF-8.
213 size_t
214 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
215 size_t *outlen, int flag, int *errno)
217 uchar_t *ib;
218 uchar_t *ob;
219 uchar_t *ibtail;
220 uchar_t *obtail;
221 uchar_t *oldib;
222 size_t ret_val;
223 int8_t sz;
224 uint32_t gb_val;
225 boolean_t isgbk4;
226 boolean_t do_not_ignore_null;
228 ret_val = 0;
229 ib = (uchar_t *)inarray;
230 ob = (uchar_t *)outarray;
231 ibtail = ib + *inlen;
232 obtail = ob + *outlen;
233 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
235 while (ib < ibtail) {
236 if (*ib == '\0' && do_not_ignore_null)
237 break;
239 if (KICONV_IS_ASCII(*ib)) {
240 if (ob >= obtail) {
241 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
244 *ob++ = *ib++;
245 continue;
248 oldib = ib;
250 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
251 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
254 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
255 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
257 if (isgbk4) {
258 if (ibtail - ib < 4) {
259 if (flag & KICONV_REPLACE_INVALID) {
260 ib = ibtail;
261 goto REPLACE_INVALID;
264 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
267 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
268 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
269 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
270 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
273 gb_val = (uint32_t)(*ib) << 24 |
274 (uint32_t)(*(ib + 1)) << 16 |
275 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
276 } else {
277 if (ibtail - ib < 2) {
278 if (flag & KICONV_REPLACE_INVALID) {
279 ib = ibtail;
280 goto REPLACE_INVALID;
283 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
286 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
287 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
290 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
293 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
294 if (sz < 0) {
295 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
298 ib += isgbk4 ? 4 : 2;
299 ob += sz;
300 continue;
302 REPLACE_INVALID:
303 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
304 ib = oldib;
305 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
308 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
309 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
310 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
311 ret_val++;
314 *inlen = ibtail - ib;
315 *outlen = obtail - ob;
317 return (ret_val);
321 * Encoding convertor from UTF-8 to GBK.
323 size_t
324 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
325 char **outbuf, size_t *outbytesleft, int *errno)
328 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
329 outbytesleft, errno, utf8_to_gbk);
333 * String based encoding convertor from UTF-8 to GBK.
335 size_t
336 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
337 size_t *outlen, int flag, int *errno)
339 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
340 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
344 * Encoding convertor from GBK to UTF-8.
346 size_t
347 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
348 char **outbuf, size_t *outbytesleft, int *errno)
350 uchar_t *ib;
351 uchar_t *ob;
352 uchar_t *ibtail;
353 uchar_t *obtail;
354 size_t ret_val;
355 int8_t sz;
356 uint32_t gb_val;
358 /* Check on the kiconv code conversion descriptor. */
359 if (kcd == NULL || kcd == (void *)-1) {
360 *errno = EBADF;
361 return ((size_t)-1);
364 /* If this is a state reset request, process and return. */
365 if (inbuf == NULL || *inbuf == NULL) {
366 return (0);
369 ret_val = 0;
370 ib = (uchar_t *)*inbuf;
371 ob = (uchar_t *)*outbuf;
372 ibtail = ib + *inbytesleft;
373 obtail = ob + *outbytesleft;
375 while (ib < ibtail) {
376 if (KICONV_IS_ASCII(*ib)) {
377 if (ob >= obtail) {
378 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
381 *ob++ = *ib++;
382 continue;
386 * Issue EILSEQ error if the first byte is not a
387 * valid GBK leading byte.
389 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
390 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
394 * Issue EINVAL error if input buffer has an incomplete
395 * character at the end of the buffer.
397 if (ibtail - ib < 2) {
398 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
402 * Issue EILSEQ error if the remaining byte is not
403 * a valid GBK byte.
405 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
406 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
409 /* Now we have a valid GBK character. */
410 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
411 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
413 if (sz < 0) {
414 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
417 ib += 2;
418 ob += sz;
421 *inbuf = (char *)ib;
422 *inbytesleft = ibtail - ib;
423 *outbuf = (char *)ob;
424 *outbytesleft = obtail - ob;
426 return (ret_val);
430 * String based encoding convertor from GBK to UTF-8.
432 size_t
433 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
434 size_t *outlen, int flag, int *errno)
436 uchar_t *ib;
437 uchar_t *ob;
438 uchar_t *ibtail;
439 uchar_t *obtail;
440 uchar_t *oldib;
441 size_t ret_val;
442 int8_t sz;
443 uint32_t gb_val;
444 boolean_t do_not_ignore_null;
446 ret_val = 0;
447 ib = (uchar_t *)inarray;
448 ob = (uchar_t *)outarray;
449 ibtail = ib + *inlen;
450 obtail = ob + *outlen;
451 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
453 while (ib < ibtail) {
454 if (*ib == '\0' && do_not_ignore_null)
455 break;
457 if (KICONV_IS_ASCII(*ib)) {
458 if (ob >= obtail) {
459 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
462 *ob++ = *ib++;
463 continue;
466 oldib = ib;
468 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
469 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
472 if (ibtail - ib < 2) {
473 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
476 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
477 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
480 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
481 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
483 if (sz < 0) {
484 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
487 ib += 2;
488 ob += sz;
489 continue;
491 REPLACE_INVALID:
492 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
493 ib = oldib;
494 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
497 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
498 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
499 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
500 ret_val++;
503 *inlen = ibtail - ib;
504 *outlen = obtail - ob;
506 return (ret_val);
510 * Encoding convertor from UTF-8 to EUC-CN.
512 size_t
513 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
514 char **outbuf, size_t *outbytesleft, int *errno)
516 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
517 outbytesleft, errno, utf8_to_gb2312);
521 * String based encoding convertor from UTF-8 to EUC-CN.
523 size_t
524 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
525 size_t *outlen, int flag, int *errno)
527 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
528 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
532 * Encoding converto from EUC-CN to UTF-8 code.
534 size_t
535 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
536 char **outbuf, size_t *outbytesleft, int *errno)
538 uchar_t *ib;
539 uchar_t *ob;
540 uchar_t *ibtail;
541 uchar_t *obtail;
542 size_t ret_val;
543 int8_t sz;
545 /* Check on the kiconv code conversion descriptor. */
546 if (kcd == NULL || kcd == (void *)-1) {
547 *errno = EBADF;
548 return ((size_t)-1);
551 /* If this is a state reset request, process and return. */
552 if (inbuf == NULL || *inbuf == NULL) {
553 return (0);
556 ret_val = 0;
557 ib = (uchar_t *)*inbuf;
558 ob = (uchar_t *)*outbuf;
559 ibtail = ib + *inbytesleft;
560 obtail = ob + *outbytesleft;
562 while (ib < ibtail) {
563 if (KICONV_IS_ASCII(*ib)) {
564 if (ob >= obtail) {
565 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
568 *ob++ = *ib++;
569 continue;
573 * Issue EILSEQ error if the first byte is not a
574 * valid GB2312 leading byte.
576 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
577 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
581 * Issue EINVAL error if input buffer has an incomplete
582 * character at the end of the buffer.
584 if (ibtail - ib < 2) {
585 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
589 * Issue EILSEQ error if the remaining byte is not
590 * a valid GB2312 byte.
592 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
593 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
596 /* Now we have a valid GB2312 character */
597 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
598 if (sz < 0) {
599 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
602 ib += 2;
603 ob += sz;
606 *inbuf = (char *)ib;
607 *inbytesleft = ibtail - ib;
608 *outbuf = (char *)ob;
609 *outbytesleft = obtail - ob;
611 return (ret_val);
615 * String based encoding convertor from EUC-CN to UTF-8.
617 size_t
618 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
619 size_t *outlen, int flag, int *errno)
621 uchar_t *ib;
622 uchar_t *ob;
623 uchar_t *ibtail;
624 uchar_t *obtail;
625 uchar_t *oldib;
626 size_t ret_val;
627 int8_t sz;
628 boolean_t do_not_ignore_null;
630 ret_val = 0;
631 ib = (uchar_t *)inarray;
632 ob = (uchar_t *)outarray;
633 ibtail = ib + *inlen;
634 obtail = ob + *outlen;
635 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
637 while (ib < ibtail) {
638 if (*ib == '\0' && do_not_ignore_null)
639 break;
641 if (KICONV_IS_ASCII(*ib)) {
642 if (ob >= obtail) {
643 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
646 *ob++ = *ib++;
647 continue;
650 oldib = ib;
652 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
653 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
656 if (ibtail - ib < 2) {
657 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
660 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
661 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
664 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
665 if (sz < 0) {
666 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
669 ib += 2;
670 ob += sz;
671 continue;
673 REPLACE_INVALID:
674 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
675 ib = oldib;
676 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
679 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
680 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
681 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
682 ret_val++;
685 *inlen = ibtail - ib;
686 *outlen = obtail - ob;
688 return (ret_val);
692 * Convert single GB2312 character to UTF-8.
693 * Return: > 0 - Converted successfully
694 * = -1 - E2BIG
696 static int8_t
697 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
698 size_t *ret_val)
700 size_t index;
701 int8_t sz;
702 uchar_t *u8;
704 /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705 index = b1 * 94 + b2 - 0x3BBF;
707 if (index >= KICONV_GB2312_UTF8_MAX)
708 index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
710 u8 = kiconv_gb2312_utf8[index];
711 sz = u8_number_of_bytes[u8[0]];
713 if (obtail - ob < sz) {
714 *ret_val = (size_t)-1;
715 return (-1);
718 for (index = 0; index < sz; index++)
719 *ob++ = u8[index];
722 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723 * elements, so need to ckeck more.
725 if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
726 u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
727 u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
728 u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
729 (*ret_val)++;
731 return (sz);
735 * Convert single GB18030 or GBK character to UTF-8.
736 * Return: > 0 - Converted successfully
737 * = -1 - E2BIG
739 static int8_t
740 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
741 boolean_t isgbk4)
743 size_t index;
744 int8_t sz;
745 uchar_t u8array[4];
746 uchar_t *u8;
748 if (isgbk4) {
749 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
750 uint32_t u32;
753 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754 * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755 * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756 * (gbk_val & 0xFF - 0x30)+
757 * KICONV_SC_PLANE1_UCS4_START;
759 u32 = (gbk_val >> 24) * 12600 +
760 ((gbk_val & 0xFF0000) >> 16) * 1260 +
761 ((gbk_val & 0xFF00) >> 8) * 10 +
762 (gbk_val & 0xFF) - 0x1BA0FA;
763 u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
764 u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
765 u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
766 u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
767 u8 = u8array;
768 index = 1;
769 } else {
770 index = kiconv_binsearch(gbk_val,
771 kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
772 u8 = kiconv_gbk4_utf8[index].u8;
774 } else {
775 index = kiconv_binsearch(gbk_val,
776 kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
777 u8 = kiconv_gbk_utf8[index].u8;
780 sz = u8_number_of_bytes[u8[0]];
781 if (obtail - ob < sz) {
782 *ret_val = (size_t)-1;
783 return (-1);
786 if (index == 0)
787 (*ret_val)++; /* Non-identical conversion */
789 for (index = 0; index < sz; index++)
790 *ob++ = u8[index];
792 return (sz);
796 * Convert single UTF-8 character to GB18030.
797 * Return: > 0 - Converted successfully
798 * = -1 - E2BIG
800 /* ARGSUSED */
801 static int8_t
802 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
803 uchar_t *ob, uchar_t *obtail, size_t *ret)
805 size_t index;
806 int8_t gbklen;
807 uint32_t gbkcode;
809 if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
810 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
811 uint32_t u32;
813 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
814 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
815 KICONV_SC_PLANE1_UCS4_START;
816 gbkcode = ((u32 / 12600 + 0x90) << 24) |
817 (((u32 % 12600) / 1260 + 0x30) << 16) |
818 (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
819 gbklen = 4;
820 index = 1;
821 } else {
822 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
823 KICONV_UTF8_GB18030_MAX);
824 gbkcode = kiconv_utf8_gb18030[index].value;
825 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
828 if (obtail - ob < gbklen) {
829 *ret = (size_t)-1;
830 return (-1);
833 if (index == 0)
834 (*ret)++; /* Non-identical conversion */
836 if (gbklen == 2) {
837 *ob++ = (uchar_t)(gbkcode >> 8);
838 } else if (gbklen == 4) {
839 *ob++ = (uchar_t)(gbkcode >> 24);
840 *ob++ = (uchar_t)(gbkcode >> 16);
841 *ob++ = (uchar_t)(gbkcode >> 8);
843 *ob = (uchar_t)(gbkcode & 0xFF);
845 return (gbklen);
849 * Convert single UTF-8 character to GBK.
850 * Return: > 0 - Converted successfully
851 * = -1 - E2BIG
853 /* ARGSUSED */
854 static int8_t
855 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
856 uchar_t *ob, uchar_t *obtail, size_t *ret)
858 size_t index;
859 int8_t gbklen;
860 uint32_t gbkcode;
862 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
863 KICONV_UTF8_GB18030_MAX);
864 gbkcode = kiconv_utf8_gb18030[index].value;
865 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
867 /* GBK and GB18030 share the same table, so check the length. */
868 if (gbklen == 4) {
869 index = 0;
870 gbkcode = kiconv_utf8_gb18030[index].value;
871 gbklen = 1;
874 if (obtail - ob < gbklen) {
875 *ret = (size_t)-1;
876 return (-1);
879 if (index == 0)
880 (*ret)++; /* Non-identical conversion */
882 if (gbklen > 1)
883 *ob++ = (uchar_t)(gbkcode >> 8);
884 *ob = (uchar_t)(gbkcode & 0xFF);
886 return (gbklen);
890 * Convert single UTF-8 character to GB2312.
891 * Return: > 0 - Converted successfully
892 * = -1 - E2BIG
894 /* ARGSUSED */
895 static int8_t
896 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
897 uchar_t *ob, uchar_t *obtail, size_t *ret)
899 size_t index;
900 int8_t gblen;
901 uint32_t gbcode;
903 index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
904 KICONV_UTF8_GB2312_MAX);
905 gbcode = kiconv_utf8_gb2312[index].value;
906 gblen = (gbcode <= 0xFF) ? 1 : 2;
908 if (obtail - ob < gblen) {
909 *ret = (size_t)-1;
910 return (-1);
913 if (index == 0)
914 (*ret)++;
916 if (gblen > 1)
917 *ob++ = (uchar_t)(gbcode >> 8);
918 *ob = (uchar_t)(gbcode & 0xFF);
920 return (gblen);
923 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
925 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
926 kiconv_close_to_cck, kiconvstr_to_gb18030
929 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
930 close_fr_sc, kiconvstr_fr_gb18030
933 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
934 kiconv_close_to_cck, kiconvstr_to_gbk
937 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
938 close_fr_sc, kiconvstr_fr_gbk
941 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
942 kiconv_close_to_cck, kiconvstr_to_euccn
945 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
946 close_fr_sc, kiconvstr_fr_euccn
950 static kiconv_module_info_t kiconv_sc_info = {
951 "kiconv_sc", /* module name */
952 sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
953 kiconv_sc_ops_tbl,
955 NULL,
956 NULL,
960 static struct modlkiconv modlkiconv_sc = {
961 &mod_kiconvops,
962 "kiconv Simplified Chinese module 1.0",
963 &kiconv_sc_info
966 static struct modlinkage modlinkage = {
967 MODREV_1,
968 (void *)&modlkiconv_sc,
969 NULL
973 _init(void)
975 int err;
977 err = mod_install(&modlinkage);
978 if (err)
979 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
981 return (err);
985 _fini(void)
987 int err;
990 * If this module is being used, then, we cannot remove the module.
991 * The following checking will catch pretty much all usual cases.
993 * Any remaining will be catached by the kiconv_unregister_module()
994 * during mod_remove() at below.
996 if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
997 return (EBUSY);
999 err = mod_remove(&modlinkage);
1000 if (err)
1001 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1003 return (err);
1007 _info(struct modinfo *modinfop)
1009 return (mod_info(&modlinkage, modinfop));