4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/debug.h>
35 #include <sys/sunddi.h>
36 #include <sys/byteorder.h>
37 #include <sys/errno.h>
38 #include <sys/modctl.h>
39 #include <sys/u8_textprep.h>
40 #include <sys/kiconv.h>
41 #include <sys/kiconv_cck_common.h>
42 #include <sys/kiconv_ko.h>
43 #include <sys/kiconv_uhc_utf8.h>
44 #include <sys/kiconv_utf8_uhc.h>
45 #include <sys/kiconv_euckr_utf8.h>
46 #include <sys/kiconv_utf8_euckr.h>
48 static int8_t utf8_to_euckr(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
49 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
50 static int8_t utf8_to_uhc(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
51 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
);
52 static int8_t ko_to_utf8(uint32_t ko_val
, uchar_t
*ob
, uchar_t
*obtail
,
53 size_t *ret_val
, kiconv_table_array_t
*table
, size_t nitems
);
56 #define KICONV_KO_EUCKR (0x01)
57 #define KICONV_KO_UHC (0x02)
58 #define KICONV_KO_MAX_MAGIC_ID (0x02)
63 return ((void *)KICONV_KO_EUCKR
);
69 return ((void *)KICONV_KO_UHC
);
75 if ((uintptr_t)s
> KICONV_KO_MAX_MAGIC_ID
)
82 * Encoding convertor from EUC-KR to UTF-8.
85 kiconv_fr_euckr(void *kcd
, char **inbuf
, size_t *inbufleft
,
86 char **outbuf
, size_t *outbufleft
, int *errno
)
96 /* Check on the kiconv code conversion descriptor. */
97 if (kcd
== NULL
|| kcd
== (void *)-1) {
102 /* If this is a state reset request, process and return. */
103 if (inbuf
== NULL
|| *inbuf
== NULL
) {
108 ib
= (uchar_t
*)*inbuf
;
109 ob
= (uchar_t
*)*outbuf
;
110 ibtail
= ib
+ *inbufleft
;
111 obtail
= ob
+ *outbufleft
;
113 while (ib
< ibtail
) {
114 if (KICONV_IS_ASCII(*ib
)) {
116 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
124 * Issue EILSEQ error if the first byte is not a
125 * valid EUC-KR leading byte.
127 if (! KICONV_KO_IS_EUCKR_BYTE(*ib
)) {
128 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
132 * Issue EINVAL error if input buffer has an incomplete
133 * character at the end of the buffer.
135 if (ibtail
- ib
< 2) {
136 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
140 * Issue EILSEQ error if the remaining byte is not
141 * a valid EUC-KR byte.
143 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib
+ 1))) {
144 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
147 euckr_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
148 sz
= ko_to_utf8(euckr_val
, ob
, obtail
, &ret_val
,
149 kiconv_euckr_utf8
, KICONV_EUCKR_UTF8_MAX
);
152 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
160 *inbufleft
= ibtail
- ib
;
161 *outbuf
= (char *)ob
;
162 *outbufleft
= obtail
- ob
;
168 * String based encoding convertor from EUC-KR to UTF-8.
171 kiconvstr_fr_euckr(char *inarray
, size_t *inlen
, char *outarray
,
172 size_t *outlen
, int flag
, int *errno
)
182 boolean_t do_not_ignore_null
;
185 ib
= (uchar_t
*)inarray
;
186 ob
= (uchar_t
*)outarray
;
187 ibtail
= ib
+ *inlen
;
188 obtail
= ob
+ *outlen
;
189 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
191 while (ib
< ibtail
) {
192 if (*ib
== '\0' && do_not_ignore_null
)
195 if (KICONV_IS_ASCII(*ib
)) {
197 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
206 if (! KICONV_KO_IS_EUCKR_BYTE(*ib
)) {
207 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
210 if (ibtail
- ib
< 2) {
211 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL
);
214 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib
+ 1))) {
215 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
219 euckr_val
= (euckr_val
<< 8) | *ib
++;
220 sz
= ko_to_utf8(euckr_val
, ob
, obtail
, &ret_val
,
221 kiconv_euckr_utf8
, KICONV_EUCKR_UTF8_MAX
);
225 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
232 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
234 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
237 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
238 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
239 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
243 *inlen
= ibtail
- ib
;
244 *outlen
= obtail
- ob
;
250 * Encoding convertor from Unified Hangul Code to UTF-8.
253 kiconv_fr_uhc(void *kcd
, char **inbuf
, size_t *inbufleft
,
254 char **outbuf
, size_t *outbufleft
, int *errno
)
264 /* Check on the kiconv code conversion descriptor. */
265 if (kcd
== NULL
|| kcd
== (void *)-1) {
270 /* If this is a state reset request, process and return. */
271 if (inbuf
== NULL
|| *inbuf
== NULL
) {
276 ib
= (uchar_t
*)*inbuf
;
277 ob
= (uchar_t
*)*outbuf
;
278 ibtail
= ib
+ *inbufleft
;
279 obtail
= ob
+ *outbufleft
;
281 while (ib
< ibtail
) {
282 if (KICONV_IS_ASCII(*ib
)) {
284 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
292 * Issue EILSEQ error if the first byte is not a
293 * valid UHC leading byte.
295 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib
)) {
296 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
300 * Issue EINVAL error if input buffer has an incomplete
301 * character at the end of the buffer.
303 if (ibtail
- ib
< 2) {
304 KICONV_SET_ERRNO_AND_BREAK(EINVAL
);
308 * Issue EILSEQ error if the remaining byte is not
311 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib
+ 1))) {
312 KICONV_SET_ERRNO_AND_BREAK(EILSEQ
);
315 uhc_val
= (uint32_t)(*ib
) << 8 | *(ib
+ 1);
316 sz
= ko_to_utf8(uhc_val
, ob
, obtail
, &ret_val
,
317 kiconv_uhc_utf8
, KICONV_UHC_UTF8_MAX
);
320 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
328 *inbufleft
= ibtail
- ib
;
329 *outbuf
= (char *)ob
;
330 *outbufleft
= obtail
- ob
;
336 * String based encoding convertor from Unified Hangul Code to UTF-8.
339 kiconvstr_fr_uhc(char *inarray
, size_t *inlen
, char *outarray
,
340 size_t *outlen
, int flag
, int *errno
)
350 boolean_t do_not_ignore_null
;
353 ib
= (uchar_t
*)inarray
;
354 ob
= (uchar_t
*)outarray
;
355 ibtail
= ib
+ *inlen
;
356 obtail
= ob
+ *outlen
;
357 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
359 while (ib
< ibtail
) {
360 if (*ib
== '\0' && do_not_ignore_null
)
363 if (KICONV_IS_ASCII(*ib
)) {
365 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
374 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib
)) {
375 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ
);
378 if (ibtail
- ib
< 2) {
379 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL
);
382 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib
+ 1))) {
383 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ
);
387 uhc_val
= (uhc_val
<< 8) | *ib
++;
388 sz
= ko_to_utf8(uhc_val
, ob
, obtail
, &ret_val
,
389 kiconv_uhc_utf8
, KICONV_UHC_UTF8_MAX
);
393 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
400 if (obtail
- ob
< KICONV_UTF8_REPLACEMENT_CHAR_LEN
) {
402 KICONV_SET_ERRNO_AND_BREAK(E2BIG
);
405 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR1
;
406 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR2
;
407 *ob
++ = KICONV_UTF8_REPLACEMENT_CHAR3
;
411 *inlen
= ibtail
- ib
;
412 *outlen
= obtail
- ob
;
418 * Encoding convertor from UTF-8 to EUC-KR.
421 kiconv_to_euckr(void *kcd
, char **inbuf
, size_t *inbytesleft
,
422 char **outbuf
, size_t *outbytesleft
, int *errno
)
424 return (kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
425 outbytesleft
, errno
, utf8_to_euckr
));
429 * Encoding convertor from UTF-8 to Unified Hangul Code.
432 kiconv_to_uhc(void *kcd
, char **inbuf
, size_t *inbytesleft
,
433 char **outbuf
, size_t *outbytesleft
, int *errno
)
435 return (kiconv_utf8_to_cck(kcd
, inbuf
, inbytesleft
, outbuf
,
436 outbytesleft
, errno
, utf8_to_uhc
));
440 * String based encoding convertor from UTF-8 to EUC-KR.
443 kiconvstr_to_euckr(char *inarray
, size_t *inlen
, char *outarray
,
444 size_t *outlen
, int flag
, int *errno
)
446 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
447 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_euckr
);
451 * String based encoding convertor from UTF-8 to Unified Hangul Code.
454 kiconvstr_to_uhc(char *inarray
, size_t *inlen
, char *outarray
,
455 size_t *outlen
, int flag
, int *errno
)
457 return kiconvstr_utf8_to_cck((uchar_t
*)inarray
, inlen
,
458 (uchar_t
*)outarray
, outlen
, flag
, errno
, utf8_to_uhc
);
462 * Convert an UTF-8 character to a character of ko encodings
466 utf8_to_ko(uint32_t utf8
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
,
467 kiconv_table_t
*table
, size_t nitems
)
473 if (KICONV_KO_IS_UDC_IN_UTF8(utf8
)) {
474 /* User Definable Area handing. */
475 kocode
= (((utf8
& 0xF0000) >> 4) | ((utf8
& 0x3F00) >> 2) |
476 (utf8
& 0x3F)) - KICONV_KO_UDA_UCS4_START
;
477 if (kocode
< KICONV_KO_UDA_RANGE
) {
478 kocode
= (KICONV_KO_UDA_EUC_SEG1
<< 8) |
479 (kocode
+ KICONV_KO_UDA_OFFSET_START
);
481 /* 0x43 = 0xA1 - 0x5E */
482 kocode
= (KICONV_KO_UDA_EUC_SEG2
<< 8) |
488 index
= kiconv_binsearch(utf8
, table
, nitems
);
489 kocode
= table
[index
].value
;
492 kolen
= (kocode
<= 0xFF) ? 1 : 2;
494 if (obtail
- ob
< kolen
) {
495 *ret_val
= (size_t)-1;
503 *ob
++ = (uchar_t
)(kocode
>> 8);
504 *ob
= (uchar_t
)(kocode
& 0xFF);
510 * Convert an UTF-8 character to Unified Hangual Code.
514 utf8_to_uhc(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
515 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
517 return (utf8_to_ko(utf8
, ob
, obtail
, ret_val
, kiconv_utf8_uhc
,
518 KICONV_UTF8_UHC_MAX
));
522 * Convert an UTF-8 character to EUC-KR.
526 utf8_to_euckr(uint32_t utf8
, uchar_t
**inbuf
, uchar_t
*ibtail
,
527 uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
)
529 return (utf8_to_ko(utf8
, ob
, obtail
, ret_val
, kiconv_utf8_euckr
,
530 KICONV_UTF8_EUCKR_MAX
));
534 * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
537 ko_to_utf8(uint32_t ko_val
, uchar_t
*ob
, uchar_t
*obtail
, size_t *ret_val
,
538 kiconv_table_array_t
*table
, size_t nitems
)
545 if (KICONV_KO_IS_UDC_IN_EUC(ko_val
)) {
546 /* UDA(User Definable Area) handling. */
549 u32
= (ko_val
& 0xFF) + (((ko_val
& 0xFF00) == 0xC900) ?
550 KICONV_KO_UDA_OFFSET_1
: KICONV_KO_UDA_OFFSET_2
);
552 udc
[1] = (uchar_t
)(0x80 | (u32
& 0x00000FC0) >> 6);
553 udc
[2] = (uchar_t
)(0x80 | (u32
& 0x0000003F));
557 index
= kiconv_binsearch(ko_val
, table
, nitems
);
558 u8
= table
[index
].u8
;
561 sz
= u8_number_of_bytes
[u8
[0]];
563 if (obtail
- ob
< sz
) {
564 *ret_val
= (size_t)-1;
569 (*ret_val
)++; /* Non-identical conversion */
571 for (index
= 0; index
< sz
; index
++)
577 static kiconv_ops_t kiconv_ko_ops_tbl
[] = {
579 "euc-kr", "utf-8", kiconv_open_to_cck
, kiconv_to_euckr
,
580 kiconv_close_to_cck
, kiconvstr_to_euckr
583 "utf-8", "euc-kr", open_fr_euckr
, kiconv_fr_euckr
,
584 close_fr_ko
, kiconvstr_fr_euckr
587 "unifiedhangul", "utf-8", kiconv_open_to_cck
, kiconv_to_uhc
,
588 kiconv_close_to_cck
, kiconvstr_to_uhc
591 "utf-8", "unifiedhangul", open_fr_uhc
, kiconv_fr_uhc
,
592 close_fr_ko
, kiconvstr_fr_uhc
596 static kiconv_module_info_t kiconv_ko_info
= {
597 "kiconv_ko", /* module name */
598 sizeof (kiconv_ko_ops_tbl
) / sizeof (kiconv_ko_ops_tbl
[0]),
606 static struct modlkiconv modlkiconv_ko
= {
608 "kiconv korean module 1.0",
612 static struct modlinkage modlinkage
= {
614 (void *)&modlkiconv_ko
,
623 err
= mod_install(&modlinkage
);
625 cmn_err(CE_WARN
, "kiconv_ko: failed to load kernel module");
636 * If this module is being used, then, we cannot remove the module.
637 * The following checking will catch pretty much all usual cases.
639 * Any remaining will be catached by the kiconv_unregister_module()
640 * during mod_remove() at below.
642 if (kiconv_module_ref_count(KICONV_MODULE_ID_KO
))
645 err
= mod_remove(&modlinkage
);
647 cmn_err(CE_WARN
, "kiconv_ko: failed to remove kernel module");
653 _info(struct modinfo
*modinfop
)
655 return (mod_info(&modlinkage
, modinfop
));