Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / kiconv / kiconv_ko / kiconv_ko.c
blob7fae073313402121608703d4a2525c34d600ec61
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/debug.h>
34 #include <sys/kmem.h>
35 #include <sys/sunddi.h>
36 #include <sys/byteorder.h>
37 #include <sys/errno.h>
38 #include <sys/modctl.h>
39 #include <sys/u8_textprep.h>
40 #include <sys/kiconv.h>
41 #include <sys/kiconv_cck_common.h>
42 #include <sys/kiconv_ko.h>
43 #include <sys/kiconv_uhc_utf8.h>
44 #include <sys/kiconv_utf8_uhc.h>
45 #include <sys/kiconv_euckr_utf8.h>
46 #include <sys/kiconv_utf8_euckr.h>
48 static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
49 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
50 static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
51 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
52 static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail,
53 size_t *ret_val, kiconv_table_array_t *table, size_t nitems);
56 #define KICONV_KO_EUCKR (0x01)
57 #define KICONV_KO_UHC (0x02)
58 #define KICONV_KO_MAX_MAGIC_ID (0x02)
60 static void *
61 open_fr_euckr()
63 return ((void *)KICONV_KO_EUCKR);
66 static void *
67 open_fr_uhc()
69 return ((void *)KICONV_KO_UHC);
72 static int
73 close_fr_ko(void *s)
75 if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID)
76 return (EBADF);
78 return (0);
82 * Encoding convertor from EUC-KR to UTF-8.
84 static size_t
85 kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft,
86 char **outbuf, size_t *outbufleft, int *errno)
88 uchar_t *ib;
89 uchar_t *ob;
90 uchar_t *ibtail;
91 uchar_t *obtail;
92 size_t ret_val;
93 int8_t sz;
94 uint32_t euckr_val;
96 /* Check on the kiconv code conversion descriptor. */
97 if (kcd == NULL || kcd == (void *)-1) {
98 *errno = EBADF;
99 return ((size_t)-1);
102 /* If this is a state reset request, process and return. */
103 if (inbuf == NULL || *inbuf == NULL) {
104 return (0);
107 ret_val = 0;
108 ib = (uchar_t *)*inbuf;
109 ob = (uchar_t *)*outbuf;
110 ibtail = ib + *inbufleft;
111 obtail = ob + *outbufleft;
113 while (ib < ibtail) {
114 if (KICONV_IS_ASCII(*ib)) {
115 if (ob >= obtail) {
116 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
119 *ob++ = *ib++;
120 continue;
124 * Issue EILSEQ error if the first byte is not a
125 * valid EUC-KR leading byte.
127 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
128 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
132 * Issue EINVAL error if input buffer has an incomplete
133 * character at the end of the buffer.
135 if (ibtail - ib < 2) {
136 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
140 * Issue EILSEQ error if the remaining byte is not
141 * a valid EUC-KR byte.
143 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
144 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
147 euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1);
148 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
149 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
151 if (sz < 0) {
152 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
155 ib += 2;
156 ob += sz;
159 *inbuf = (char *)ib;
160 *inbufleft = ibtail - ib;
161 *outbuf = (char *)ob;
162 *outbufleft = obtail - ob;
164 return (ret_val);
168 * String based encoding convertor from EUC-KR to UTF-8.
170 static size_t
171 kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray,
172 size_t *outlen, int flag, int *errno)
174 uchar_t *ib;
175 uchar_t *ob;
176 uchar_t *ibtail;
177 uchar_t *obtail;
178 uchar_t *oldib;
179 size_t ret_val;
180 int8_t sz;
181 uint32_t euckr_val;
182 boolean_t do_not_ignore_null;
184 ret_val = 0;
185 ib = (uchar_t *)inarray;
186 ob = (uchar_t *)outarray;
187 ibtail = ib + *inlen;
188 obtail = ob + *outlen;
189 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
191 while (ib < ibtail) {
192 if (*ib == '\0' && do_not_ignore_null)
193 break;
195 if (KICONV_IS_ASCII(*ib)) {
196 if (ob >= obtail) {
197 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
200 *ob++ = *ib++;
201 continue;
204 oldib = ib;
206 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
207 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
210 if (ibtail - ib < 2) {
211 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
214 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
215 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
218 euckr_val = *ib++;
219 euckr_val = (euckr_val << 8) | *ib++;
220 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
221 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
223 if (sz < 0) {
224 ib = oldib;
225 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
228 ob += sz;
229 continue;
231 REPLACE_INVALID:
232 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
233 ib = oldib;
234 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
237 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
238 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
239 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
240 ret_val++;
243 *inlen = ibtail - ib;
244 *outlen = obtail - ob;
246 return (ret_val);
250 * Encoding convertor from Unified Hangul Code to UTF-8.
252 static size_t
253 kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft,
254 char **outbuf, size_t *outbufleft, int *errno)
256 uchar_t *ib;
257 uchar_t *ob;
258 uchar_t *ibtail;
259 uchar_t *obtail;
260 size_t ret_val;
261 int8_t sz;
262 uint32_t uhc_val;
264 /* Check on the kiconv code conversion descriptor. */
265 if (kcd == NULL || kcd == (void *)-1) {
266 *errno = EBADF;
267 return ((size_t)-1);
270 /* If this is a state reset request, process and return. */
271 if (inbuf == NULL || *inbuf == NULL) {
272 return (0);
275 ret_val = 0;
276 ib = (uchar_t *)*inbuf;
277 ob = (uchar_t *)*outbuf;
278 ibtail = ib + *inbufleft;
279 obtail = ob + *outbufleft;
281 while (ib < ibtail) {
282 if (KICONV_IS_ASCII(*ib)) {
283 if (ob >= obtail) {
284 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
287 *ob++ = *ib++;
288 continue;
292 * Issue EILSEQ error if the first byte is not a
293 * valid UHC leading byte.
295 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
296 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
300 * Issue EINVAL error if input buffer has an incomplete
301 * character at the end of the buffer.
303 if (ibtail - ib < 2) {
304 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
308 * Issue EILSEQ error if the remaining byte is not
309 * a valid UHC byte.
311 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
312 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
315 uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1);
316 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
317 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
319 if (sz < 0) {
320 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
323 ib += 2;
324 ob += sz;
327 *inbuf = (char *)ib;
328 *inbufleft = ibtail - ib;
329 *outbuf = (char *)ob;
330 *outbufleft = obtail - ob;
332 return (ret_val);
336 * String based encoding convertor from Unified Hangul Code to UTF-8.
338 static size_t
339 kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray,
340 size_t *outlen, int flag, int *errno)
342 uchar_t *ib;
343 uchar_t *ob;
344 uchar_t *ibtail;
345 uchar_t *obtail;
346 uchar_t *oldib;
347 size_t ret_val;
348 int8_t sz;
349 uint32_t uhc_val;
350 boolean_t do_not_ignore_null;
352 ret_val = 0;
353 ib = (uchar_t *)inarray;
354 ob = (uchar_t *)outarray;
355 ibtail = ib + *inlen;
356 obtail = ob + *outlen;
357 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
359 while (ib < ibtail) {
360 if (*ib == '\0' && do_not_ignore_null)
361 break;
363 if (KICONV_IS_ASCII(*ib)) {
364 if (ob >= obtail) {
365 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
368 *ob++ = *ib++;
369 continue;
372 oldib = ib;
374 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
375 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
378 if (ibtail - ib < 2) {
379 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
382 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
383 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
386 uhc_val = *ib++;
387 uhc_val = (uhc_val << 8) | *ib++;
388 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
389 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
391 if (sz < 0) {
392 ib = oldib;
393 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
396 ob += sz;
397 continue;
399 REPLACE_INVALID:
400 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
401 ib = oldib;
402 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
405 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
406 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
407 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
408 ret_val++;
411 *inlen = ibtail - ib;
412 *outlen = obtail - ob;
414 return (ret_val);
418 * Encoding convertor from UTF-8 to EUC-KR.
420 static size_t
421 kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft,
422 char **outbuf, size_t *outbytesleft, int *errno)
424 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
425 outbytesleft, errno, utf8_to_euckr));
429 * Encoding convertor from UTF-8 to Unified Hangul Code.
431 static size_t
432 kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft,
433 char **outbuf, size_t *outbytesleft, int *errno)
435 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
436 outbytesleft, errno, utf8_to_uhc));
440 * String based encoding convertor from UTF-8 to EUC-KR.
442 static size_t
443 kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray,
444 size_t *outlen, int flag, int *errno)
446 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
447 (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr);
451 * String based encoding convertor from UTF-8 to Unified Hangul Code.
453 static size_t
454 kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray,
455 size_t *outlen, int flag, int *errno)
457 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
458 (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc);
462 * Convert an UTF-8 character to a character of ko encodings
463 * (EUC-KR or UHC).
465 static int8_t
466 utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
467 kiconv_table_t *table, size_t nitems)
469 size_t index;
470 size_t kocode;
471 int8_t kolen;
473 if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) {
474 /* User Definable Area handing. */
475 kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) |
476 (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START;
477 if (kocode < KICONV_KO_UDA_RANGE) {
478 kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) |
479 (kocode + KICONV_KO_UDA_OFFSET_START);
480 } else {
481 /* 0x43 = 0xA1 - 0x5E */
482 kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) |
483 (kocode + 0x43);
486 index = 1;
487 } else {
488 index = kiconv_binsearch(utf8, table, nitems);
489 kocode = table[index].value;
492 kolen = (kocode <= 0xFF) ? 1 : 2;
494 if (obtail - ob < kolen) {
495 *ret_val = (size_t)-1;
496 return (-1);
499 if (index == 0)
500 (*ret_val)++;
502 if (kolen > 1)
503 *ob++ = (uchar_t)(kocode >> 8);
504 *ob = (uchar_t)(kocode & 0xFF);
506 return (kolen);
510 * Convert an UTF-8 character to Unified Hangual Code.
512 /* ARGSUSED */
513 static int8_t
514 utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
515 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
517 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc,
518 KICONV_UTF8_UHC_MAX));
522 * Convert an UTF-8 character to EUC-KR.
524 /* ARGSUSED */
525 static int8_t
526 utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
527 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
529 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr,
530 KICONV_UTF8_EUCKR_MAX));
534 * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
536 static int8_t
537 ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
538 kiconv_table_array_t *table, size_t nitems)
540 size_t index;
541 int8_t sz;
542 uchar_t udc[3];
543 uchar_t *u8;
545 if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) {
546 /* UDA(User Definable Area) handling. */
547 uint32_t u32;
549 u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ?
550 KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2);
551 udc[0] = 0xEF;
552 udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6);
553 udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F));
554 u8 = udc;
555 index = 1;
556 } else {
557 index = kiconv_binsearch(ko_val, table, nitems);
558 u8 = table[index].u8;
561 sz = u8_number_of_bytes[u8[0]];
563 if (obtail - ob < sz) {
564 *ret_val = (size_t)-1;
565 return (-1);
568 if (index == 0)
569 (*ret_val)++; /* Non-identical conversion */
571 for (index = 0; index < sz; index++)
572 *ob++ = u8[index];
574 return (sz);
577 static kiconv_ops_t kiconv_ko_ops_tbl[] = {
579 "euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr,
580 kiconv_close_to_cck, kiconvstr_to_euckr
583 "utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr,
584 close_fr_ko, kiconvstr_fr_euckr
587 "unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc,
588 kiconv_close_to_cck, kiconvstr_to_uhc
591 "utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc,
592 close_fr_ko, kiconvstr_fr_uhc
596 static kiconv_module_info_t kiconv_ko_info = {
597 "kiconv_ko", /* module name */
598 sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]),
599 kiconv_ko_ops_tbl,
601 NULL,
602 NULL,
606 static struct modlkiconv modlkiconv_ko = {
607 &mod_kiconvops,
608 "kiconv korean module 1.0",
609 &kiconv_ko_info
612 static struct modlinkage modlinkage = {
613 MODREV_1,
614 (void *)&modlkiconv_ko,
615 NULL
619 _init(void)
621 int err;
623 err = mod_install(&modlinkage);
624 if (err)
625 cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module");
627 return (err);
631 _fini(void)
633 int err;
636 * If this module is being used, then, we cannot remove the module.
637 * The following checking will catch pretty much all usual cases.
639 * Any remaining will be catached by the kiconv_unregister_module()
640 * during mod_remove() at below.
642 if (kiconv_module_ref_count(KICONV_MODULE_ID_KO))
643 return (EBUSY);
645 err = mod_remove(&modlinkage);
646 if (err)
647 cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module");
649 return (err);
653 _info(struct modinfo *modinfop)
655 return (mod_info(&modlinkage, modinfop));