Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / kiconv / kiconv_ja / kiconv_ja.c
blobeeaf204a82349462a8640edeb2400c2c63408fb6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/debug.h>
32 #include <sys/kmem.h>
33 #include <sys/sunddi.h>
34 #include <sys/byteorder.h>
35 #include <sys/errno.h>
36 #include <sys/euc.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
40 #include <sys/kiconv_ja.h>
41 #include <sys/kiconv_ja_jis_to_unicode.h>
42 #include <sys/kiconv_ja_unicode_to_jis.h>
45 * The following vector shows remaining bytes in a UTF-8 character.
46 * Index will be the first byte of the character. This is defined in
47 * u8_textprep.c.
49 extern const int8_t u8_number_of_bytes[];
52 * The following is a vector of bit-masks to get used bits in
53 * the first byte of a UTF-8 character. Index is remaining bytes at above of
54 * the character. This is defined in uconv.c.
56 extern const uchar_t u8_masks_tbl[];
59 * The following two vectors are to provide valid minimum and
60 * maximum values for the 2'nd byte of a multibyte UTF-8 character for
61 * better illegal sequence checking. The index value must be the value of
62 * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
64 extern const uint8_t u8_valid_min_2nd_byte[];
65 extern const uint8_t u8_valid_max_2nd_byte[];
67 static kiconv_ja_euc16_t
68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
70 const kiconv_ja_euc16_t *p;
72 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
73 return (p[ucs2 & 0xff]);
75 return (KICONV_JA_NODEST);
78 static size_t
79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
81 uint_t l; /* to be copied to *p on successful return */
82 uchar_t ic; /* current byte */
83 uchar_t ic1; /* 1st byte */
84 uchar_t *ip = *pip; /* next byte to read */
85 size_t ileft = *pileft; /* number of bytes available */
86 size_t rv = 0; /* return value of this function */
87 int remaining_bytes;
88 int u8_size;
90 KICONV_JA_NGET(ic1); /* read 1st byte */
92 if (ic1 < 0x80) {
93 /* successfully converted */
94 *p = (uint_t)ic1;
95 goto ret;
98 u8_size = u8_number_of_bytes[ic1];
99 if (u8_size == U8_ILLEGAL_CHAR) {
100 KICONV_JA_RETERROR(EILSEQ)
101 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
102 KICONV_JA_RETERROR(ERANGE)
105 remaining_bytes = u8_size - 1;
106 if (remaining_bytes != 0) {
107 l = ic1 & u8_masks_tbl[remaining_bytes];
109 for (; remaining_bytes > 0; remaining_bytes--) {
110 KICONV_JA_NGET(ic);
111 if (ic1 != 0U) {
112 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
113 (ic > u8_valid_max_2nd_byte[ic1])) {
114 KICONV_JA_RETERROR(EILSEQ)
116 ic1 = 0U; /* 2nd byte check done */
117 } else {
118 if ((ic < 0x80) || (ic > 0xbf)) {
119 KICONV_JA_RETERROR(EILSEQ)
122 l = (l << 6) | (ic & 0x3f);
125 /* successfully converted */
126 *p = l;
127 } else {
128 KICONV_JA_RETERROR(EILSEQ)
131 ret:
132 if (rv == 0) {
134 * Update rv, *pip, and *pileft on successfule return.
136 rv = *pileft - ileft;
137 *pip = ip;
138 *pileft = ileft;
141 return (rv);
144 static size_t
145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
147 uint_t l; /* to be copied to *p on successful return */
148 uchar_t ic; /* current byte */
149 uchar_t ic1; /* 1st byte */
150 uchar_t *ip = *pip; /* next byte to read */
151 size_t ileft = *pileft; /* number of bytes available */
152 size_t rv = 0; /* return value of this function */
153 int remaining_bytes;
154 int u8_size;
156 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */
158 if (ic1 < 0x80) {
159 /* successfully converted */
160 l = (uint_t)ic1;
161 goto ret;
164 u8_size = u8_number_of_bytes[ic1];
165 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
166 l = KICONV_JA_DEF_SINGLE;
167 (*repnum)++;
168 goto ret;
171 remaining_bytes = u8_size - 1;
173 if (remaining_bytes != 0) {
174 l = ic1 & u8_masks_tbl[remaining_bytes];
176 for (; remaining_bytes > 0; remaining_bytes--) {
177 KICONV_JA_NGET_REP_TO_MB(ic);
178 if (ic1 != 0U) {
179 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
180 (ic > u8_valid_max_2nd_byte[ic1])) {
181 l = KICONV_JA_DEF_SINGLE;
182 (*repnum)++;
183 ileft -= (remaining_bytes - 1);
184 ip += (remaining_bytes - 1);
185 break;
187 ic1 = 0U; /* 2nd byte check done */
188 } else {
189 if ((ic < 0x80) || (ic > 0xbf)) {
190 l = KICONV_JA_DEF_SINGLE;
191 (*repnum)++;
192 ileft -= (remaining_bytes - 1);
193 ip += (remaining_bytes - 1);
194 break;
197 l = (l << 6) | (ic & 0x3f);
199 } else {
200 l = KICONV_JA_DEF_SINGLE;
201 (*repnum)++;
204 ret:
205 /* successfully converted */
206 *p = l;
207 rv = *pileft - ileft;
209 *pip = ip;
210 *pileft = ileft;
212 return (rv);
215 static size_t /* return #bytes read, or -1 */
216 read_unicode(
217 uint_t *p, /* point variable to store UTF-32 */
218 uchar_t **pip, /* point pointer to input buf */
219 size_t *pileft, /* point #bytes left in input buf */
220 int *errno, /* point variable to errno */
221 int flag, /* kiconvstr flag */
222 size_t *rv) /* point return valuse */
224 if (flag & KICONV_REPLACE_INVALID)
225 return (utf8_ucs_replace(p, pip, pileft, rv));
226 else
227 return (utf8_ucs(p, pip, pileft, errno));
230 static size_t
231 write_unicode(
232 uint_t u32, /* UTF-32 to write */
233 char **pop, /* point pointer to output buf */
234 size_t *poleft, /* point #bytes left in output buf */
235 int *errno) /* point variable to errno */
237 char *op = *pop;
238 size_t oleft = *poleft;
239 size_t rv = 0; /* return value */
241 if (u32 <= 0x7f) {
242 KICONV_JA_NPUT((uchar_t)(u32));
243 rv = 1;
244 } else if (u32 <= 0x7ff) {
245 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
246 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
247 rv = 2;
248 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
249 KICONV_JA_RETERROR(EILSEQ)
250 } else if (u32 <= 0xffff) {
251 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
252 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
253 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
254 rv = 3;
255 } else if (u32 <= 0x10ffff) {
256 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
257 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
258 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
259 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
260 rv = 4;
261 } else {
262 KICONV_JA_RETERROR(EILSEQ)
265 ret:
266 if (rv != (size_t)-1) {
267 /* update *pop and *poleft only on successful return */
268 *pop = op;
269 *poleft = oleft;
272 return (rv);
275 static void *
276 _kiconv_ja_open_unicode(uint8_t id)
278 kiconv_state_t kcd;
280 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
281 KM_SLEEP);
282 kcd->id = id;
283 kcd->bom_processed = 0;
284 return ((void *)kcd);
287 static void *
288 open_eucjp(void)
290 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
293 static void *
294 open_eucjpms(void)
296 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
299 static void *
300 open_sjis(void)
302 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
305 static void *
306 open_cp932(void)
308 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
312 close_ja(void *kcd)
314 if (! kcd || kcd == (void *)-1)
315 return (EBADF);
317 kmem_free(kcd, sizeof (kiconv_state_data_t));
319 return (0);
322 static size_t
323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
324 char **outbuf, size_t *outbytesleft, int *errno)
326 uint_t u32; /* UTF-32 */
327 uint_t index; /* index for table lookup */
328 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
329 size_t rv = 0; /* return value of this function */
331 uchar_t *ip;
332 size_t ileft;
333 char *op;
334 size_t oleft;
335 size_t id = ((kiconv_state_t)kcd)->id;
337 if ((inbuf == NULL) || (*inbuf == NULL)) {
338 return (0);
341 ip = (uchar_t *)*inbuf;
342 ileft = *inbytesleft;
343 op = *outbuf;
344 oleft = *outbytesleft;
346 while (ileft != 0) {
347 KICONV_JA_NGET(ic1); /* get 1st byte */
349 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
350 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
351 KICONV_JA_PUTU(u32);
352 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
353 KICONV_JA_NGET(ic2);
354 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
355 ic1 &= KICONV_JA_CMASK;
356 ic2 &= KICONV_JA_CMASK;
357 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
358 if (u32 == KICONV_JA_NODEST) {
359 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
360 u32 = kiconv_ja_jisx0208_to_ucs2[index];
362 if (u32 == KICONV_JA_REPLACE)
363 rv++;
364 KICONV_JA_PUTU(u32);
365 } else { /* 2nd byte check failed */
366 KICONV_JA_RETERROR(EILSEQ)
368 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
369 KICONV_JA_NGET(ic2);
370 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
371 index = (ic2 - 0xa1);
372 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
373 KICONV_JA_PUTU(u32);
374 } else { /* 2nd byte check failed */
375 KICONV_JA_RETERROR(EILSEQ)
377 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
378 KICONV_JA_NGET(ic2);
379 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
380 KICONV_JA_NGET(ic3);
381 if (KICONV_JA_ISCS3(ic3)) {
382 /* 3rd byte check passed */
383 ic2 &= KICONV_JA_CMASK;
384 ic3 &= KICONV_JA_CMASK;
385 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
386 ic2, ic3);
387 if (u32 == KICONV_JA_NODEST) {
388 index = ((ic2 - 0x21) * 94 +
389 (ic3 - 0x21));
390 u32 = kiconv_ja_jisx0212_to_ucs2
391 [index];
393 if (u32 == KICONV_JA_REPLACE)
394 rv++;
395 KICONV_JA_PUTU(u32);
396 } else { /* 3rd byte check failed */
397 KICONV_JA_RETERROR(EILSEQ)
399 } else { /* 2nd byte check failed */
400 KICONV_JA_RETERROR(EILSEQ)
402 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
403 /* C1 control; 1 byte */
404 u32 = ic1;
405 KICONV_JA_PUTU(u32);
406 } else { /* 1st byte check failed */
407 KICONV_JA_RETERROR(EILSEQ)
411 * One character successfully converted so update
412 * values outside of this function's stack.
414 *inbuf = (char *)ip;
415 *inbytesleft = ileft;
416 *outbuf = op;
417 *outbytesleft = oleft;
420 ret:
421 return (rv);
424 static size_t
425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
426 char **outbuf, size_t *outbytesleft, int *errno)
428 uchar_t ic;
429 size_t rv = 0;
430 uint_t ucs4;
431 ushort_t euc16;
433 uchar_t *ip;
434 size_t ileft;
435 char *op;
436 size_t oleft;
437 size_t read_len;
439 size_t id = ((kiconv_state_t)kcd)->id;
441 if ((inbuf == NULL) || (*inbuf == NULL)) {
442 return (0);
445 ip = (uchar_t *)*inbuf;
446 ileft = *inbytesleft;
447 op = *outbuf;
448 oleft = *outbytesleft;
450 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
452 while (ileft != 0) {
453 KICONV_JA_GETU(&ucs4, 0);
455 if (ucs4 > 0xffff) {
456 /* non-BMP */
457 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
458 rv++;
459 goto next;
462 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
463 if (euc16 == KICONV_JA_NODEST) {
464 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
466 if (euc16 == KICONV_JA_NODEST) {
467 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
468 rv++;
469 goto next;
472 switch (euc16 & 0x8080) {
473 case 0x0000: /* CS0 */
474 ic = (uchar_t)euc16;
475 KICONV_JA_NPUT(ic);
476 break;
477 case 0x8080: /* CS1 */
478 ic = (uchar_t)((euc16 >> 8) & 0xff);
479 KICONV_JA_NPUT(ic);
480 ic = (uchar_t)(euc16 & 0xff);
481 KICONV_JA_NPUT(ic);
482 break;
483 case 0x0080: /* CS2 */
484 KICONV_JA_NPUT(SS2);
485 ic = (uchar_t)euc16;
486 KICONV_JA_NPUT(ic);
487 break;
488 case 0x8000: /* CS3 */
489 KICONV_JA_NPUT(SS3);
490 ic = (uchar_t)((euc16 >> 8) & 0xff);
491 KICONV_JA_NPUT(ic);
492 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
493 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
494 break;
496 next:
498 * One character successfully converted so update
499 * values outside of this function's stack.
501 *inbuf = (char *)ip;
502 *inbytesleft = ileft;
503 *outbuf = op;
504 *outbytesleft = oleft;
507 ret:
508 return (rv);
511 static size_t
512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
513 size_t *outbytesleft, int flag, int *errno, uint8_t id)
515 uint_t u32; /* UTF-32 */
516 uint_t index; /* index for table lookup */
517 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
518 size_t rv = 0; /* return value of this function */
520 uchar_t *ip;
521 size_t ileft;
522 char *op;
523 size_t oleft;
525 boolean_t do_not_ignore_null;
527 if ((inbuf == NULL) || (*inbuf == '\0')) {
528 return (0);
531 ip = (uchar_t *)inbuf;
532 ileft = *inbytesleft;
533 op = outbuf;
534 oleft = *outbytesleft;
536 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
538 while (ileft != 0) {
539 KICONV_JA_NGET(ic1); /* get 1st byte */
541 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
542 if (ic1 == '\0' && do_not_ignore_null) {
543 return (0);
545 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
546 KICONV_JA_PUTU(u32);
547 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
548 if (flag & KICONV_REPLACE_INVALID) {
549 KICONV_JA_NGET_REP_FR_MB(ic2);
550 } else {
551 KICONV_JA_NGET(ic2);
553 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
554 ic1 &= KICONV_JA_CMASK;
555 ic2 &= KICONV_JA_CMASK;
556 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
557 if (u32 == KICONV_JA_NODEST) {
558 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
559 u32 = kiconv_ja_jisx0208_to_ucs2[index];
561 if (u32 == KICONV_JA_REPLACE)
562 rv++;
563 KICONV_JA_PUTU(u32);
564 } else { /* 2nd byte check failed */
565 if (flag & KICONV_REPLACE_INVALID) {
566 KICONV_JA_PUTU(KICONV_JA_REPLACE);
567 rv++;
568 } else {
569 KICONV_JA_RETERROR(EILSEQ)
572 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
573 if (flag & KICONV_REPLACE_INVALID) {
574 KICONV_JA_NGET_REP_FR_MB(ic2);
575 } else {
576 KICONV_JA_NGET(ic2);
578 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
579 index = (ic2 - 0xa1);
580 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
581 KICONV_JA_PUTU(u32);
582 } else { /* 2nd byte check failed */
583 if (flag & KICONV_REPLACE_INVALID) {
584 KICONV_JA_PUTU(KICONV_JA_REPLACE);
585 rv++;
586 } else {
587 KICONV_JA_RETERROR(EILSEQ)
590 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
591 if (flag & KICONV_REPLACE_INVALID) {
592 KICONV_JA_NGET_REP_FR_MB(ic2);
593 } else {
594 KICONV_JA_NGET(ic2);
596 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
597 if (flag & KICONV_REPLACE_INVALID) {
598 KICONV_JA_NGET_REP_FR_MB(ic3);
599 } else {
600 KICONV_JA_NGET(ic3);
602 if (KICONV_JA_ISCS3(ic3)) {
603 /* 3rd byte check passed */
604 ic2 &= KICONV_JA_CMASK;
605 ic3 &= KICONV_JA_CMASK;
606 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
607 ic2, ic3);
608 if (u32 == KICONV_JA_NODEST) {
609 index = ((ic2 - 0x21) * 94 +
610 (ic3 - 0x21));
611 u32 = kiconv_ja_jisx0212_to_ucs2
612 [index];
614 if (u32 == KICONV_JA_REPLACE)
615 rv++;
616 KICONV_JA_PUTU(u32);
617 } else { /* 3rd byte check failed */
618 if (flag & KICONV_REPLACE_INVALID) {
619 KICONV_JA_PUTU(
620 KICONV_JA_REPLACE);
621 rv++;
622 } else {
623 KICONV_JA_RETERROR(EILSEQ)
626 } else { /* 2nd byte check failed */
627 if (flag & KICONV_REPLACE_INVALID) {
628 KICONV_JA_PUTU(KICONV_JA_REPLACE);
629 rv++;
630 } else {
631 KICONV_JA_RETERROR(EILSEQ)
634 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
635 /* C1 control; 1 byte */
636 u32 = ic1;
637 KICONV_JA_PUTU(u32);
638 } else { /* 1st byte check failed */
639 if (flag & KICONV_REPLACE_INVALID) {
640 KICONV_JA_PUTU(KICONV_JA_REPLACE);
641 rv++;
642 } else {
643 KICONV_JA_RETERROR(EILSEQ)
647 next:
649 * One character successfully converted so update
650 * values outside of this function's stack.
652 *inbytesleft = ileft;
653 *outbytesleft = oleft;
656 ret:
657 return (rv);
660 static size_t
661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
662 size_t *outbytesleft, int flag, int *errno, uint8_t id)
664 uchar_t ic;
665 size_t rv = 0;
666 uint_t ucs4;
667 ushort_t euc16;
669 uchar_t *ip;
670 size_t ileft;
671 char *op;
672 size_t oleft;
673 size_t read_len;
675 boolean_t do_not_ignore_null;
677 if ((inbuf == NULL) || (*inbuf == '\0')) {
678 return (0);
681 ip = (uchar_t *)inbuf;
682 ileft = *inbytesleft;
683 op = outbuf;
684 oleft = *outbytesleft;
686 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
688 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
690 while (ileft != 0) {
691 KICONV_JA_GETU(&ucs4, flag);
693 if (ucs4 == 0x0 && do_not_ignore_null) {
694 return (0);
697 if (ucs4 > 0xffff) {
698 /* non-BMP */
699 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
700 rv++;
701 goto next;
704 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
705 if (euc16 == KICONV_JA_NODEST) {
706 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
708 if (euc16 == KICONV_JA_NODEST) {
709 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
710 rv++;
711 goto next;
714 switch (euc16 & 0x8080) {
715 case 0x0000: /* CS0 */
716 ic = (uchar_t)euc16;
717 KICONV_JA_NPUT(ic);
718 break;
719 case 0x8080: /* CS1 */
720 ic = (uchar_t)((euc16 >> 8) & 0xff);
721 KICONV_JA_NPUT(ic);
722 ic = (uchar_t)(euc16 & 0xff);
723 KICONV_JA_NPUT(ic);
724 break;
725 case 0x0080: /* CS2 */
726 KICONV_JA_NPUT(SS2);
727 ic = (uchar_t)euc16;
728 KICONV_JA_NPUT(ic);
729 break;
730 case 0x8000: /* CS3 */
731 KICONV_JA_NPUT(SS3);
732 ic = (uchar_t)((euc16 >> 8) & 0xff);
733 KICONV_JA_NPUT(ic);
734 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
735 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
736 break;
738 next:
740 * One character successfully converted so update
741 * values outside of this function's stack.
743 *inbytesleft = ileft;
744 *outbytesleft = oleft;
747 ret:
748 return (rv);
751 static size_t
752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
753 char **outbuf, size_t *outbytesleft, int *errno)
755 if (! kcd || kcd == (void *)-1) {
756 *errno = EBADF;
757 return ((size_t)-1);
760 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
761 outbuf, outbytesleft, errno));
764 static size_t
765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
766 char **outbuf, size_t *outbytesleft, int *errno)
768 if (! kcd || kcd == (void *)-1) {
769 *errno = EBADF;
770 return ((size_t)-1);
773 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
774 outbuf, outbytesleft, errno));
777 static size_t
778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
779 size_t *outbytesleft, int flag, int *errno)
781 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
782 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
785 static size_t
786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
787 size_t *outbytesleft, int flag, int *errno)
789 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
790 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
793 static size_t
794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
795 size_t *outbytesleft, int flag, int *errno)
797 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
798 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
801 static size_t
802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
803 size_t *outbytesleft, int flag, int *errno)
805 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
806 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
809 static size_t
810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
811 char **outbuf, size_t *outbytesleft, int *errno)
813 uint_t uni; /* UTF-32 */
814 uint_t index; /* index for table lookup */
815 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
816 size_t rv = 0; /* return value of this function */
818 uchar_t *ip;
819 size_t ileft;
820 char *op;
821 size_t oleft;
822 size_t id = ((kiconv_state_t)kcd)->id;
824 if ((inbuf == NULL) || (*inbuf == NULL)) {
825 return (0);
828 ip = (uchar_t *)*inbuf;
829 ileft = *inbytesleft;
830 op = *outbuf;
831 oleft = *outbytesleft;
833 while (ileft != 0) {
834 KICONV_JA_NGET(ic1); /* get 1st byte */
836 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
837 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
838 KICONV_JA_PUTU(uni);
839 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
840 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
841 KICONV_JA_PUTU(uni);
842 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
843 KICONV_JA_NGET(ic2);
844 if (KICONV_JA_ISSJKANJI2(ic2)) {
845 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
846 if (ic2 >= 0x9f) {
847 ic1++;
849 ic2 = kiconv_ja_sjtojis2[ic2];
850 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
851 if (uni == KICONV_JA_NODEST) {
852 index = ((ic1 - 0x21) * 94)
853 + (ic2 - 0x21);
854 uni = kiconv_ja_jisx0208_to_ucs2[index];
856 if (uni == KICONV_JA_REPLACE)
857 rv++;
858 KICONV_JA_PUTU(uni);
859 } else { /* 2nd byte check failed */
860 KICONV_JA_RETERROR(EILSEQ)
861 /* NOTREACHED */
863 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
864 KICONV_JA_NGET(ic2);
865 if (KICONV_JA_ISSJKANJI2(ic2)) {
866 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
867 if (ic2 >= 0x9f) {
868 ic1++;
870 index = ((ic1 - 0x21) * 94)
871 + (kiconv_ja_sjtojis2[ic2] - 0x21);
872 uni = kiconv_ja_jisx0212_to_ucs2[index];
873 if (uni == KICONV_JA_REPLACE)
874 rv++;
875 KICONV_JA_PUTU(uni);
876 } else { /* 2nd byte check failed */
877 KICONV_JA_RETERROR(EILSEQ)
879 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
880 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
882 * We need a special treatment for each codes.
883 * By adding some offset number for them, we
884 * can process them as the same way of that of
885 * extended IBM chars.
887 KICONV_JA_NGET(ic2);
888 if (KICONV_JA_ISSJKANJI2(ic2)) {
889 ushort_t dest, upper, lower;
890 dest = (ic1 << 8) + ic2;
891 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
892 KICONV_JA_REMAP_NEC(dest);
893 if (dest == 0xffff) {
894 KICONV_JA_RETERROR(EILSEQ)
898 * XXX: 0xfa54 and 0xfa5b must be mapped
899 * to JIS0208 area. Therefore we
900 * have to do special treatment.
902 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
903 if (dest == 0xfa54) {
904 upper = 0x22;
905 lower = 0x4c;
906 } else {
907 upper = 0x22;
908 lower = 0x68;
910 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
911 upper, lower);
912 if (uni == KICONV_JA_NODEST) {
913 index = (uint_t)((upper - 0x21)
914 * 94 + (lower - 0x21));
915 uni = kiconv_ja_jisx0208_to_ucs2
916 [index];
918 if (uni == KICONV_JA_REPLACE)
919 rv++;
920 KICONV_JA_PUTU(uni);
921 } else {
922 dest = dest - 0xfa40 -
923 (((dest>>8) - 0xfa) * 0x40);
924 dest = kiconv_ja_sjtoibmext[dest];
925 if (dest == 0xffff) {
926 KICONV_JA_RETERROR(EILSEQ)
928 upper = (dest >> 8) & KICONV_JA_CMASK;
929 lower = dest & KICONV_JA_CMASK;
930 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
931 upper, lower);
932 if (uni == KICONV_JA_NODEST) {
933 index = (uint_t)((upper - 0x21)
934 * 94 + (lower - 0x21));
935 uni = kiconv_ja_jisx0212_to_ucs2
936 [index];
938 if (uni == KICONV_JA_REPLACE)
939 rv++;
940 KICONV_JA_PUTU(uni);
942 } else { /* 2nd byte check failed */
943 KICONV_JA_RETERROR(EILSEQ)
945 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
947 * Based on the draft convention of OSF-JVC CDEWG,
948 * characters in this area will be mapped to
949 * "CHIKAN-MOJI." (convertible character)
950 * We use U+FFFD in this case.
952 KICONV_JA_NGET(ic2);
953 if (KICONV_JA_ISSJKANJI2(ic2)) {
954 uni = 0xfffd;
955 KICONV_JA_PUTU(uni);
956 } else { /* 2nd byte check failed */
957 KICONV_JA_RETERROR(EILSEQ)
959 } else { /* 1st byte check failed */
960 KICONV_JA_RETERROR(EILSEQ)
964 * One character successfully converted so update
965 * values outside of this function's stack.
967 *inbuf = (char *)ip;
968 *inbytesleft = ileft;
969 *outbuf = op;
970 *outbytesleft = oleft;
973 ret:
974 return (rv);
978 * _kiconv_ja_lookuptbl()
979 * Return the index number if its index-ed number
980 * is the same as dest value.
982 static ushort_t
983 _kiconv_ja_lookuptbl(ushort_t dest)
985 ushort_t tmp;
986 int i;
987 int sz = (sizeof (kiconv_ja_sjtoibmext) /
988 sizeof (kiconv_ja_sjtoibmext[0]));
990 for (i = 0; i < sz; i++) {
991 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
992 if (tmp == dest)
993 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
995 return (0x3f);
998 static size_t
999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1000 char **outbuf, size_t *outbytesleft, int *errno)
1002 uchar_t ic;
1003 size_t rv = 0;
1004 uint_t ucs4;
1005 ushort_t euc16;
1006 ushort_t dest;
1008 uchar_t *ip;
1009 size_t ileft;
1010 char *op;
1011 size_t oleft;
1012 size_t read_len;
1014 size_t id = ((kiconv_state_t)kcd)->id;
1016 if ((inbuf == NULL) || (*inbuf == NULL)) {
1017 return (0);
1020 ip = (uchar_t *)*inbuf;
1021 ileft = *inbytesleft;
1022 op = *outbuf;
1023 oleft = *outbytesleft;
1025 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1027 while (ileft != 0) {
1028 KICONV_JA_GETU(&ucs4, 0);
1030 if (ucs4 > 0xffff) {
1031 /* non-BMP */
1032 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1033 rv++;
1034 goto next;
1037 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1038 if (euc16 == KICONV_JA_NODEST) {
1039 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1041 if (euc16 == KICONV_JA_NODEST) {
1042 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1043 rv++;
1044 goto next;
1047 switch (euc16 & 0x8080) {
1048 case 0x0000: /* CS0 */
1049 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1050 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1051 rv++;
1052 } else {
1053 ic = (uchar_t)euc16;
1054 KICONV_JA_NPUT(ic);
1056 break;
1057 case 0x8080: /* CS1 */
1058 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1059 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1061 * for even number row (Ku), add 0x80 to
1062 * look latter half of kiconv_ja_jistosj2[] array
1064 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1065 + (((ic % 2) == 0) ? 0x80 : 0x00));
1066 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1067 break;
1068 case 0x0080: /* CS2 */
1069 ic = (uchar_t)euc16;
1070 KICONV_JA_NPUT(ic);
1071 break;
1072 case 0x8000: /* CS3 */
1073 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1074 if (euc16 == 0xa271) {
1075 /* NUMERO SIGN */
1076 KICONV_JA_NPUT(0x87);
1077 KICONV_JA_NPUT(0x82);
1078 } else if (ic < 0x75) { /* check if IBM VDC */
1079 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1080 if (dest == 0xffff) {
1081 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1082 } else {
1083 /* avoid putting NUL ('\0') */
1084 if (dest > 0xff) {
1085 KICONV_JA_NPUT(
1086 (dest >> 8) & 0xff);
1087 KICONV_JA_NPUT(dest & 0xff);
1088 } else {
1089 KICONV_JA_NPUT(dest & 0xff);
1092 } else {
1093 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1095 * for even number row (Ku), add 0x80 to
1096 * look latter half of kiconv_ja_jistosj2[]
1098 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1099 + (((ic % 2) == 0) ? 0x80 : 0x00));
1100 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1102 break;
1105 next:
1107 * One character successfully converted so update
1108 * values outside of this function's stack.
1110 *inbuf = (char *)ip;
1111 *inbytesleft = ileft;
1112 *outbuf = op;
1113 *outbytesleft = oleft;
1116 ret:
1117 return (rv);
1120 static size_t
1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1122 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1124 uint_t uni; /* UTF-32 */
1125 uint_t index; /* index for table lookup */
1126 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
1127 size_t rv = 0; /* return value of this function */
1129 uchar_t *ip;
1130 size_t ileft;
1131 char *op;
1132 size_t oleft;
1134 boolean_t do_not_ignore_null;
1136 if ((inbuf == NULL) || (*inbuf == '\0')) {
1137 return (0);
1140 ip = (uchar_t *)inbuf;
1141 ileft = *inbytesleft;
1142 op = outbuf;
1143 oleft = *outbytesleft;
1145 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1147 while (ileft != 0) {
1148 KICONV_JA_NGET(ic1); /* get 1st byte */
1150 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
1151 if (ic1 == '\0' && do_not_ignore_null) {
1152 return (0);
1154 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1155 KICONV_JA_PUTU(uni);
1156 } else if (KICONV_JA_ISSJKANA(ic1)) {
1157 /* JIS X 0201 Kana; 1 byte */
1158 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1159 KICONV_JA_PUTU(uni);
1160 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1161 /* JIS X 0208 or UDC; 2 bytes */
1162 if (flag & KICONV_REPLACE_INVALID) {
1163 KICONV_JA_NGET_REP_FR_MB(ic2);
1164 } else {
1165 KICONV_JA_NGET(ic2);
1167 if (KICONV_JA_ISSJKANJI2(ic2)) {
1168 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1169 if (ic2 >= 0x9f) {
1170 ic1++;
1172 ic2 = kiconv_ja_sjtojis2[ic2];
1173 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1174 if (uni == KICONV_JA_NODEST) {
1175 index = ((ic1 - 0x21) * 94)
1176 + (ic2 - 0x21);
1177 uni = kiconv_ja_jisx0208_to_ucs2[index];
1179 if (uni == KICONV_JA_REPLACE)
1180 rv++;
1181 KICONV_JA_PUTU(uni);
1182 } else { /* 2nd byte check failed */
1183 if (flag & KICONV_REPLACE_INVALID) {
1184 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1185 rv++;
1186 } else {
1187 KICONV_JA_RETERROR(EILSEQ)
1189 /* NOTREACHED */
1191 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1192 if (flag & KICONV_REPLACE_INVALID) {
1193 KICONV_JA_NGET_REP_FR_MB(ic2);
1194 } else {
1195 KICONV_JA_NGET(ic2);
1197 if (KICONV_JA_ISSJKANJI2(ic2)) {
1198 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1199 if (ic2 >= 0x9f) {
1200 ic1++;
1202 index = ((ic1 - 0x21) * 94)
1203 + (kiconv_ja_sjtojis2[ic2] - 0x21);
1204 uni = kiconv_ja_jisx0212_to_ucs2[index];
1205 if (uni == KICONV_JA_REPLACE)
1206 rv++;
1207 KICONV_JA_PUTU(uni);
1208 } else { /* 2nd byte check failed */
1209 if (flag & KICONV_REPLACE_INVALID) {
1210 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1211 rv++;
1212 } else {
1213 KICONV_JA_RETERROR(EILSEQ)
1216 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1217 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1219 * We need a special treatment for each codes.
1220 * By adding some offset number for them, we
1221 * can process them as the same way of that of
1222 * extended IBM chars.
1224 if (flag & KICONV_REPLACE_INVALID) {
1225 KICONV_JA_NGET_REP_FR_MB(ic2);
1226 } else {
1227 KICONV_JA_NGET(ic2);
1229 if (KICONV_JA_ISSJKANJI2(ic2)) {
1230 ushort_t dest, upper, lower;
1231 dest = (ic1 << 8) + ic2;
1232 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1233 KICONV_JA_REMAP_NEC(dest);
1234 if (dest == 0xffff) {
1235 if (flag &
1236 KICONV_REPLACE_INVALID) {
1237 KICONV_JA_PUTU(
1238 KICONV_JA_REPLACE);
1239 rv++;
1240 } else {
1241 KICONV_JA_RETERROR(
1242 EILSEQ)
1247 * XXX: 0xfa54 and 0xfa5b must be mapped
1248 * to JIS0208 area. Therefore we
1249 * have to do special treatment.
1251 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1252 if (dest == 0xfa54) {
1253 upper = 0x22;
1254 lower = 0x4c;
1255 } else {
1256 upper = 0x22;
1257 lower = 0x68;
1259 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1260 upper, lower);
1261 if (uni == KICONV_JA_NODEST) {
1262 index = (uint_t)((upper - 0x21)
1263 * 94 + (lower - 0x21));
1264 uni = kiconv_ja_jisx0208_to_ucs2
1265 [index];
1267 if (uni == KICONV_JA_REPLACE)
1268 rv++;
1269 KICONV_JA_PUTU(uni);
1270 } else {
1271 dest = dest - 0xfa40 -
1272 (((dest>>8) - 0xfa) * 0x40);
1273 dest = kiconv_ja_sjtoibmext[dest];
1274 if (dest == 0xffff) {
1275 if (flag &
1276 KICONV_REPLACE_INVALID) {
1277 KICONV_JA_PUTU(
1278 KICONV_JA_REPLACE);
1279 rv++;
1280 } else {
1281 KICONV_JA_RETERROR(
1282 EILSEQ)
1285 upper = (dest >> 8) & KICONV_JA_CMASK;
1286 lower = dest & KICONV_JA_CMASK;
1287 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1288 upper, lower);
1289 if (uni == KICONV_JA_NODEST) {
1290 index = (uint_t)((upper - 0x21)
1291 * 94 + (lower - 0x21));
1292 uni = kiconv_ja_jisx0212_to_ucs2
1293 [index];
1295 if (uni == KICONV_JA_REPLACE)
1296 rv++;
1297 KICONV_JA_PUTU(uni);
1299 } else { /* 2nd byte check failed */
1300 if (flag & KICONV_REPLACE_INVALID) {
1301 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1302 rv++;
1303 } else {
1304 KICONV_JA_RETERROR(EILSEQ)
1307 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1309 * Based on the draft convention of OSF-JVC CDEWG,
1310 * characters in this area will be mapped to
1311 * "CHIKAN-MOJI." (convertible character)
1312 * We use U+FFFD in this case.
1314 if (flag & KICONV_REPLACE_INVALID) {
1315 KICONV_JA_NGET_REP_FR_MB(ic2);
1316 } else {
1317 KICONV_JA_NGET(ic2);
1319 if (KICONV_JA_ISSJKANJI2(ic2)) {
1320 uni = 0xfffd;
1321 KICONV_JA_PUTU(uni);
1322 } else { /* 2nd byte check failed */
1323 if (flag & KICONV_REPLACE_INVALID) {
1324 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1325 rv++;
1326 } else {
1327 KICONV_JA_RETERROR(EILSEQ)
1330 } else { /* 1st byte check failed */
1331 if (flag & KICONV_REPLACE_INVALID) {
1332 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1333 rv++;
1334 } else {
1335 KICONV_JA_RETERROR(EILSEQ)
1339 next:
1341 * One character successfully converted so update
1342 * values outside of this function's stack.
1344 *inbytesleft = ileft;
1345 *outbytesleft = oleft;
1348 ret:
1349 return (rv);
1352 static size_t
1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1354 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1356 uchar_t ic;
1357 size_t rv = 0;
1358 uint_t ucs4;
1359 ushort_t euc16;
1360 ushort_t dest;
1362 uchar_t *ip;
1363 size_t ileft;
1364 char *op;
1365 size_t oleft;
1366 size_t read_len;
1368 boolean_t do_not_ignore_null;
1370 if ((inbuf == NULL) || (*inbuf == '\0')) {
1371 return (0);
1374 ip = (uchar_t *)inbuf;
1375 ileft = *inbytesleft;
1376 op = outbuf;
1377 oleft = *outbytesleft;
1379 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1381 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1383 while (ileft != 0) {
1384 KICONV_JA_GETU(&ucs4, flag);
1386 if (ucs4 == 0x0 && do_not_ignore_null) {
1387 return (0);
1390 if (ucs4 > 0xffff) {
1391 /* non-BMP */
1392 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1393 rv++;
1394 goto next;
1397 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1398 if (euc16 == KICONV_JA_NODEST) {
1399 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1401 if (euc16 == KICONV_JA_NODEST) {
1402 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1403 rv++;
1404 goto next;
1407 switch (euc16 & 0x8080) {
1408 case 0x0000: /* CS0 */
1409 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1410 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1411 rv++;
1412 } else {
1413 ic = (uchar_t)euc16;
1414 KICONV_JA_NPUT(ic);
1416 break;
1417 case 0x8080: /* CS1 */
1418 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1419 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1421 * for even number row (Ku), add 0x80 to
1422 * look latter half of kiconv_ja_jistosj2[] array
1424 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1425 + (((ic % 2) == 0) ? 0x80 : 0x00));
1426 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1427 break;
1428 case 0x0080: /* CS2 */
1429 ic = (uchar_t)euc16;
1430 KICONV_JA_NPUT(ic);
1431 break;
1432 case 0x8000: /* CS3 */
1433 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1434 if (euc16 == 0xa271) {
1435 /* NUMERO SIGN */
1436 KICONV_JA_NPUT(0x87);
1437 KICONV_JA_NPUT(0x82);
1438 } else if (ic < 0x75) { /* check if IBM VDC */
1439 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1440 if (dest == 0xffff) {
1441 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1442 } else {
1443 /* avoid putting NUL ('\0') */
1444 if (dest > 0xff) {
1445 KICONV_JA_NPUT(
1446 (dest >> 8) & 0xff);
1447 KICONV_JA_NPUT(dest & 0xff);
1448 } else {
1449 KICONV_JA_NPUT(dest & 0xff);
1452 } else {
1453 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1455 * for even number row (Ku), add 0x80 to
1456 * look latter half of kiconv_ja_jistosj2[]
1458 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1459 + (((ic % 2) == 0) ? 0x80 : 0x00));
1460 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1462 break;
1465 next:
1467 * One character successfully converted so update
1468 * values outside of this function's stack.
1470 *inbytesleft = ileft;
1471 *outbytesleft = oleft;
1474 ret:
1475 return (rv);
1478 static size_t
1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1480 char **outbuf, size_t *outbytesleft, int *errno)
1482 if (! kcd || kcd == (void *)-1) {
1483 *errno = EBADF;
1484 return ((size_t)-1);
1487 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1488 outbuf, outbytesleft, errno));
1491 static size_t
1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1493 char **outbuf, size_t *outbytesleft, int *errno)
1495 if (! kcd || kcd == (void *)-1) {
1496 *errno = EBADF;
1497 return ((size_t)-1);
1500 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1501 outbuf, outbytesleft, errno));
1504 static size_t
1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1506 size_t *outbytesleft, int flag, int *errno)
1508 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1509 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512 static size_t
1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1514 size_t *outbytesleft, int flag, int *errno)
1516 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1517 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520 static size_t
1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1522 size_t *outbytesleft, int flag, int *errno)
1524 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1525 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528 static size_t
1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1530 size_t *outbytesleft, int flag, int *errno)
1532 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1533 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1538 "eucjp", "utf-8", open_eucjp,
1539 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542 "utf-8", "eucjp", open_eucjp,
1543 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546 "eucjpms", "utf-8", open_eucjpms,
1547 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550 "utf-8", "eucjpms", open_eucjpms,
1551 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554 "sjis", "utf-8", open_sjis,
1555 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558 "utf-8", "sjis", open_sjis,
1559 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562 "cp932", "utf-8", open_cp932,
1563 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566 "utf-8", "cp932", open_cp932,
1567 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1574 #define KICONV_JA_MAX_JA_OPS \
1575 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1576 #define KICONV_JA_MAX_JA_ALIAS \
1577 (sizeof (kiconv_ja_aliases) / sizeof (char *))
1579 static kiconv_module_info_t kiconv_ja_info = {
1580 "kiconv_ja", /* module name */
1581 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
1582 kiconv_ja_ops_tbl, /* kiconv_ja ops table */
1583 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1584 kiconv_ja_aliases, /* kiconv_ja aliases */
1585 kiconv_ja_canonicals, /* kiconv_ja canonicals */
1589 static struct modlkiconv modlkiconv_ja = {
1590 &mod_kiconvops,
1591 "kiconv module for Japanese",
1592 &kiconv_ja_info
1595 static struct modlinkage modlinkage = {
1596 MODREV_1,
1597 (void *)&modlkiconv_ja,
1598 NULL
1602 _init(void)
1604 int err;
1606 err = mod_install(&modlinkage);
1607 if (err)
1608 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1610 return (err);
1614 _info(struct modinfo *modinfop)
1616 return (mod_info(&modlinkage, modinfop));
1620 _fini(void)
1622 int err;
1625 * If this module is being used, then, we cannot remove the module.
1626 * The following checking will catch pretty much all usual cases.
1628 * Any remaining will be catached by the kiconv_unregister_module()
1629 * during mod_remove() at below.
1631 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1632 return (EBUSY);
1634 err = mod_remove(&modlinkage);
1635 if (err)
1636 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1638 return (err);