4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 * Kernel iconv code conversion functions (PSARC/2007/173).
31 * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
32 * Interface stability: Committed.
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/sysmacros.h>
38 #include <sys/systm.h>
39 #include <sys/debug.h>
41 #include <sys/sunddi.h>
42 #include <sys/ksynch.h>
43 #include <sys/modctl.h>
44 #include <sys/byteorder.h>
45 #include <sys/errno.h>
46 #include <sys/kiconv.h>
47 #include <sys/kiconv_latin1.h>
51 * The following macros indicate ids to the correct code conversion mapping
52 * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
54 #define KICONV_TBLID_1252 (0x00)
55 #define KICONV_TBLID_8859_1 (0x01)
56 #define KICONV_TBLID_8859_15 (0x02)
57 #define KICONV_TBLID_850 (0x03)
59 #define KICONV_MAX_MAPPING_TBLID (0x03)
62 * The following tables are coming from u8_textprep.c. We use them to
63 * check on validity of UTF-8 characters and their bytes.
65 extern const int8_t u8_number_of_bytes
[];
66 extern const uint8_t u8_valid_min_2nd_byte
[];
67 extern const uint8_t u8_valid_max_2nd_byte
[];
71 * The following four functions, open_to_1252(), open_to_88591(),
72 * open_to_885915(), and open_to_850(), are kiconv_open functions from
73 * UTF-8 to corresponding single byte codesets.
80 s
= (kiconv_state_t
)kmem_alloc(sizeof (kiconv_state_data_t
), KM_SLEEP
);
81 s
->id
= KICONV_TBLID_1252
;
92 s
= (kiconv_state_t
)kmem_alloc(sizeof (kiconv_state_data_t
), KM_SLEEP
);
93 s
->id
= KICONV_TBLID_8859_1
;
104 s
= (kiconv_state_t
)kmem_alloc(sizeof (kiconv_state_data_t
), KM_SLEEP
);
105 s
->id
= KICONV_TBLID_8859_15
;
106 s
->bom_processed
= 0;
116 s
= (kiconv_state_t
)kmem_alloc(sizeof (kiconv_state_data_t
), KM_SLEEP
);
117 s
->id
= KICONV_TBLID_850
;
118 s
->bom_processed
= 0;
124 * The following four functions, open_fr_1252(), open_fr_88591(),
125 * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
126 * corresponding single byte codesets to UTF-8.
131 return ((void *)KICONV_TBLID_1252
);
137 return ((void *)KICONV_TBLID_8859_1
);
143 return ((void *)KICONV_TBLID_8859_15
);
149 return ((void *)KICONV_TBLID_850
);
153 * The following close_to_sb() function is kiconv_close function for
154 * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
155 * is kiconv_close function for the conversions from single byte codesets to
161 if (! s
|| s
== (void *)-1)
164 kmem_free(s
, sizeof (kiconv_state_data_t
));
172 if ((ulong_t
)s
> KICONV_MAX_MAPPING_TBLID
)
179 * The following is the common kiconv function for conversions from UTF-8
180 * to single byte codesets.
183 kiconv_to_sb(void *kcd
, char **inbuf
, size_t *inbytesleft
, char **outbuf
,
184 size_t *outbytesleft
, int *errno
)
201 /* Check on the kiconv code conversion descriptor. */
202 if (! kcd
|| kcd
== (void *)-1) {
208 * Get the table id we are going to use for the code conversion
209 * and let's double check on it.
211 id
= ((kiconv_state_t
)kcd
)->id
;
212 if (id
> KICONV_MAX_MAPPING_TBLID
) {
217 /* If this is a state reset request, process and return. */
218 if (! inbuf
|| ! (*inbuf
)) {
219 ((kiconv_state_t
)kcd
)->bom_processed
= 0;
224 ib
= (uchar_t
*)*inbuf
;
225 ob
= (uchar_t
*)*outbuf
;
226 ibtail
= ib
+ *inbytesleft
;
227 obtail
= ob
+ *outbytesleft
;
230 * The inital high value for the binary search we will be using
231 * shortly is a literal constant as of today but to be future proof,
232 * let's calculate it like the following at here.
234 init_h
= sizeof (to_sb_tbl
[id
]) / sizeof (kiconv_to_sb_tbl_comp_t
) - 1;
237 * If we haven't checked on the UTF-8 signature BOM character in
238 * the beginning of the conversion data stream, we check it and if
239 * find one, we skip it since we have no use for it.
241 if (((kiconv_state_t
)kcd
)->bom_processed
== 0 && (ibtail
- ib
) >= 3 &&
242 *ib
== 0xef && *(ib
+ 1) == 0xbb && *(ib
+ 2) == 0xbf)
244 ((kiconv_state_t
)kcd
)->bom_processed
= 1;
246 while (ib
< ibtail
) {
247 sz
= u8_number_of_bytes
[*ib
];
250 ret_val
= (size_t)-1;
255 * If there is no room to write at the output buffer,
260 ret_val
= (size_t)-1;
265 * If it is a 7-bit ASCII character, we don't need to
266 * process further and we just copy the character over.
268 * If not, we collect the character bytes up to four bytes,
269 * validate the bytes, and binary search for the corresponding
270 * single byte codeset character byte. If we find it from
271 * the mapping table, we put that into the output buffer;
272 * otherwise, we put a replacement character instead as
273 * a non-identical conversion.
281 * Issue EINVAL error if input buffer has an incomplete
282 * character at the end of the buffer.
284 if ((ibtail
- ib
) < sz
) {
286 ret_val
= (size_t)-1;
291 * We collect UTF-8 character bytes and also check if
292 * this is a valid UTF-8 character without any bogus bytes
293 * based on the latest UTF-8 binary representation.
298 for (i
= 1; i
< sz
; i
++) {
300 if (*ib
< u8_valid_min_2nd_byte
[u8
] ||
301 *ib
> u8_valid_max_2nd_byte
[u8
]) {
303 ret_val
= (size_t)-1;
305 goto TO_SB_ILLEGAL_CHAR_ERR
;
308 } else if (*ib
< 0x80 || *ib
> 0xbf) {
310 ret_val
= (size_t)-1;
312 goto TO_SB_ILLEGAL_CHAR_ERR
;
314 u8
= (u8
<< 8) | ((uint32_t)*ib
);
322 if (to_sb_tbl
[id
][i
].u8
== u8
)
324 else if (to_sb_tbl
[id
][i
].u8
< u8
)
330 if (to_sb_tbl
[id
][i
].u8
== u8
) {
331 *ob
++ = to_sb_tbl
[id
][i
].sb
;
334 * If we don't find a character in the target
335 * codeset, we insert an ASCII replacement character
336 * at the output buffer and indicate such
337 * "non-identical" conversion by increasing the
338 * return value which is the non-identical conversion
339 * counter if bigger than 0.
341 *ob
++ = KICONV_ASCII_REPLACEMENT_CHAR
;
346 TO_SB_ILLEGAL_CHAR_ERR
:
348 *inbytesleft
= ibtail
- ib
;
349 *outbuf
= (char *)ob
;
350 *outbytesleft
= obtail
- ob
;
356 * The following is the common kiconv function from single byte codesets to
360 kiconv_fr_sb(void *kcd
, char **inbuf
, size_t *inbytesleft
, char **outbuf
,
361 size_t *outbytesleft
, int *errno
)
372 /* Check on the kiconv code conversion descriptor validity. */
373 if ((ulong_t
)kcd
> KICONV_MAX_MAPPING_TBLID
) {
379 * If this is a state reset request, there is nothing to do and so
382 if (! inbuf
|| ! (*inbuf
))
386 ib
= (uchar_t
*)*inbuf
;
387 ob
= (uchar_t
*)*outbuf
;
388 ibtail
= ib
+ *inbytesleft
;
389 obtail
= ob
+ *outbytesleft
;
391 while (ib
< ibtail
) {
393 * If this is a 7-bit ASCII character, we just copy over and
394 * that's all we need to do for this character.
399 ret_val
= (size_t)-1;
408 * Otherwise, we get the corresponding UTF-8 character bytes
409 * from the mapping table and copy them over.
411 * We don't need to worry about if the UTF-8 character bytes
412 * at the mapping tables are valid or not since they are good.
415 sz
= u8_number_of_bytes
[to_u8_tbl
[(ulong_t
)kcd
][k
].u8
[0]];
418 * If sz <= 0, that means we don't have any assigned character
419 * at the code point, k + 0x80, of the single byte codeset
420 * which is the fromcode. In other words, the input buffer
421 * has an illegal character.
425 ret_val
= (size_t)-1;
429 if ((obtail
- ob
) < sz
) {
431 ret_val
= (size_t)-1;
435 for (i
= 0; i
< sz
; i
++)
436 *ob
++ = to_u8_tbl
[(ulong_t
)kcd
][k
].u8
[i
];
442 *inbytesleft
= ibtail
- ib
;
443 *outbuf
= (char *)ob
;
444 *outbytesleft
= obtail
- ob
;
450 * The following is the common kiconvstr function from UTF-8 to single byte
454 kiconvstr_to_sb(size_t id
, uchar_t
*ib
, size_t *inlen
, uchar_t
*ob
,
455 size_t *outlen
, int flag
, int *errno
)
468 boolean_t do_not_ignore_null
;
470 /* Let's make sure that the table id is within the valid boundary. */
471 if (id
> KICONV_MAX_MAPPING_TBLID
) {
477 ibtail
= ib
+ *inlen
;
478 obtail
= ob
+ *outlen
;
479 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
480 init_h
= sizeof (to_sb_tbl
[id
]) / sizeof (kiconv_to_sb_tbl_comp_t
) - 1;
482 /* Skip any UTF-8 signature BOM character in the beginning. */
483 if ((ibtail
- ib
) >= 3 && *ib
== 0xef && *(ib
+ 1) == 0xbb &&
488 * Basically this is pretty much the same as kiconv_to_sb() except
489 * that we are now accepting two flag values and doing the processing
492 while (ib
< ibtail
) {
493 sz
= u8_number_of_bytes
[*ib
];
495 if (flag
& KICONV_REPLACE_INVALID
) {
498 ret_val
= (size_t)-1;
503 goto STR_TO_SB_REPLACE_INVALID
;
507 ret_val
= (size_t)-1;
511 if (*ib
== '\0' && do_not_ignore_null
)
516 ret_val
= (size_t)-1;
525 if ((ibtail
- ib
) < sz
) {
526 if (flag
& KICONV_REPLACE_INVALID
) {
528 goto STR_TO_SB_REPLACE_INVALID
;
532 ret_val
= (size_t)-1;
539 for (i
= 1; i
< sz
; i
++) {
541 if (*ib
< u8_valid_min_2nd_byte
[u8
] ||
542 *ib
> u8_valid_max_2nd_byte
[u8
]) {
543 if (flag
& KICONV_REPLACE_INVALID
) {
545 goto STR_TO_SB_REPLACE_INVALID
;
549 ret_val
= (size_t)-1;
551 goto STR_TO_SB_ILLEGAL_CHAR_ERR
;
554 } else if (*ib
< 0x80 || *ib
> 0xbf) {
555 if (flag
& KICONV_REPLACE_INVALID
) {
557 goto STR_TO_SB_REPLACE_INVALID
;
561 ret_val
= (size_t)-1;
563 goto STR_TO_SB_ILLEGAL_CHAR_ERR
;
565 u8
= (u8
<< 8) | ((uint32_t)*ib
);
573 if (to_sb_tbl
[id
][i
].u8
== u8
)
575 else if (to_sb_tbl
[id
][i
].u8
< u8
)
581 if (to_sb_tbl
[id
][i
].u8
== u8
) {
582 *ob
++ = to_sb_tbl
[id
][i
].sb
;
584 STR_TO_SB_REPLACE_INVALID
:
585 *ob
++ = KICONV_ASCII_REPLACEMENT_CHAR
;
590 STR_TO_SB_ILLEGAL_CHAR_ERR
:
591 *inlen
= ibtail
- ib
;
592 *outlen
= obtail
- ob
;
598 * The following four functions are entry points recorded at the conv_list[]
602 kiconvstr_to_1252(char *inarray
, size_t *inlen
, char *outarray
,
603 size_t *outlen
, int flag
, int *errno
)
605 return (kiconvstr_to_sb(KICONV_TBLID_1252
, (uchar_t
*)inarray
,
606 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
610 kiconvstr_to_1(char *inarray
, size_t *inlen
, char *outarray
,
611 size_t *outlen
, int flag
, int *errno
)
613 return (kiconvstr_to_sb(KICONV_TBLID_8859_1
, (uchar_t
*)inarray
,
614 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
618 kiconvstr_to_15(char *inarray
, size_t *inlen
, char *outarray
,
619 size_t *outlen
, int flag
, int *errno
)
621 return (kiconvstr_to_sb(KICONV_TBLID_8859_15
, (uchar_t
*)inarray
,
622 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
626 kiconvstr_to_850(char *inarray
, size_t *inlen
, char *outarray
,
627 size_t *outlen
, int flag
, int *errno
)
629 return (kiconvstr_to_sb(KICONV_TBLID_850
, (uchar_t
*)inarray
,
630 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
634 * The following is the common kiconvstr function for conversions from
635 * single byte codesets to UTF-8.
638 kiconvstr_fr_sb(size_t id
, uchar_t
*ib
, size_t *inlen
, uchar_t
*ob
,
639 size_t *outlen
, int flag
, int *errno
)
647 boolean_t do_not_ignore_null
;
650 ibtail
= ib
+ *inlen
;
651 obtail
= ob
+ *outlen
;
652 do_not_ignore_null
= ((flag
& KICONV_IGNORE_NULL
) == 0);
654 while (ib
< ibtail
) {
655 if (*ib
== '\0' && do_not_ignore_null
)
661 ret_val
= (size_t)-1;
669 sz
= u8_number_of_bytes
[to_u8_tbl
[id
][k
].u8
[0]];
672 if (flag
& KICONV_REPLACE_INVALID
) {
673 if ((obtail
- ob
) < 3) {
675 ret_val
= (size_t)-1;
679 /* Save KICONV_UTF8_REPLACEMENT_CHAR. */
690 ret_val
= (size_t)-1;
694 if ((obtail
- ob
) < sz
) {
696 ret_val
= (size_t)-1;
700 for (i
= 0; i
< sz
; i
++)
701 *ob
++ = to_u8_tbl
[id
][k
].u8
[i
];
706 *inlen
= ibtail
- ib
;
707 *outlen
= obtail
- ob
;
713 * The following four functions are also entry points recorded at
714 * the conv_list[] at below.
717 kiconvstr_fr_1252(char *inarray
, size_t *inlen
, char *outarray
,
718 size_t *outlen
, int flag
, int *errno
)
720 return (kiconvstr_fr_sb(KICONV_TBLID_1252
, (uchar_t
*)inarray
,
721 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
725 kiconvstr_fr_1(char *inarray
, size_t *inlen
, char *outarray
,
726 size_t *outlen
, int flag
, int *errno
)
728 return (kiconvstr_fr_sb(KICONV_TBLID_8859_1
, (uchar_t
*)inarray
,
729 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
733 kiconvstr_fr_15(char *inarray
, size_t *inlen
, char *outarray
,
734 size_t *outlen
, int flag
, int *errno
)
736 return (kiconvstr_fr_sb(KICONV_TBLID_8859_15
, (uchar_t
*)inarray
,
737 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
741 kiconvstr_fr_850(char *inarray
, size_t *inlen
, char *outarray
,
742 size_t *outlen
, int flag
, int *errno
)
744 return (kiconvstr_fr_sb(KICONV_TBLID_850
, (uchar_t
*)inarray
,
745 inlen
, (uchar_t
*)outarray
, outlen
, flag
, errno
));
749 * The following static vector contains the normalized code names
750 * and their corresponding code ids. They are somewhat arbitrarily ordered
751 * based on marketing data available. A code id could repeat for aliases.
753 * The vector was generated by using a small utility program called
754 * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
756 * The code ids must be portable, i.e., if needed, you can always generate
757 * the code_list[] again with different code ids. You'll also need to
758 * update the conv_list[] at below.
760 #define KICONV_MAX_CODEID_ENTRY 68
761 #define KICONV_MAX_CODEID 42
763 static kiconv_code_list_t code_list
[KICONV_MAX_CODEID_ENTRY
] = {
784 { "unifiedhangul", 13 },
792 { "cp950hkscs", 17 },
835 * The list of code conversions supported are grouped together per
836 * module which will be loaded as needed.
838 #define KICONV_MAX_CONVERSIONS 84
840 static kiconv_conv_list_t conv_list
[KICONV_MAX_CONVERSIONS
] = {
841 /* Embedded code conversions: */
843 1, 0, KICONV_EMBEDDED
,
844 open_to_1252
, kiconv_to_sb
, close_to_sb
, kiconvstr_to_1252
847 0, 1, KICONV_EMBEDDED
,
848 open_fr_1252
, kiconv_fr_sb
, close_fr_sb
, kiconvstr_fr_1252
851 2, 0, KICONV_EMBEDDED
,
852 open_to_88591
, kiconv_to_sb
, close_to_sb
, kiconvstr_to_1
855 0, 2, KICONV_EMBEDDED
,
856 open_fr_88591
, kiconv_fr_sb
, close_fr_sb
, kiconvstr_fr_1
859 3, 0, KICONV_EMBEDDED
,
860 open_to_885915
, kiconv_to_sb
, close_to_sb
, kiconvstr_to_15
863 0, 3, KICONV_EMBEDDED
,
864 open_fr_885915
, kiconv_fr_sb
, close_fr_sb
, kiconvstr_fr_15
867 4, 0, KICONV_EMBEDDED
,
868 open_to_850
, kiconv_to_sb
, close_to_sb
, kiconvstr_to_850
871 0, 4, KICONV_EMBEDDED
,
872 open_fr_850
, kiconv_fr_sb
, close_fr_sb
, kiconvstr_fr_850
875 /* kiconv_ja module conversions: */
876 { 0, 5, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
877 { 5, 0, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
878 { 0, 6, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
879 { 6, 0, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
880 { 0, 7, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
881 { 7, 0, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
882 { 0, 8, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
883 { 8, 0, KICONV_MODULE_ID_JA
, NULL
, NULL
, NULL
, NULL
},
885 /* kiconv_sc module conversions: */
886 { 0, 9, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
887 { 9, 0, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
888 { 0, 10, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
889 { 10, 0, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
890 { 0, 11, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
891 { 11, 0, KICONV_MODULE_ID_SC
, NULL
, NULL
, NULL
, NULL
},
893 /* kiconv_ko module conversions: */
894 { 0, 12, KICONV_MODULE_ID_KO
, NULL
, NULL
, NULL
, NULL
},
895 { 12, 0, KICONV_MODULE_ID_KO
, NULL
, NULL
, NULL
, NULL
},
896 { 0, 13, KICONV_MODULE_ID_KO
, NULL
, NULL
, NULL
, NULL
},
897 { 13, 0, KICONV_MODULE_ID_KO
, NULL
, NULL
, NULL
, NULL
},
899 /* kiconv_tc module conversions: */
900 { 0, 14, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
901 { 14, 0, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
902 { 0, 15, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
903 { 15, 0, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
904 { 0, 16, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
905 { 16, 0, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
906 { 0, 17, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
907 { 17, 0, KICONV_MODULE_ID_TC
, NULL
, NULL
, NULL
, NULL
},
909 /* kiconv_emea module conversions: */
910 { 0, 18, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
911 { 18, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
912 { 0, 19, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
913 { 19, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
914 { 0, 20, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
915 { 20, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
916 { 0, 21, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
917 { 21, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
918 { 0, 22, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
919 { 22, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
920 { 0, 23, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
921 { 23, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
922 { 0, 24, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
923 { 24, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
924 { 0, 25, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
925 { 25, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
926 { 0, 26, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
927 { 26, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
928 { 0, 27, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
929 { 27, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
930 { 0, 28, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
931 { 28, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
932 { 0, 29, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
933 { 29, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
934 { 0, 30, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
935 { 30, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
936 { 0, 31, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
937 { 31, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
938 { 0, 32, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
939 { 32, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
940 { 0, 33, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
941 { 33, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
942 { 0, 34, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
943 { 34, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
944 { 0, 35, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
945 { 35, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
946 { 0, 36, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
947 { 36, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
948 { 0, 37, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
949 { 37, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
950 { 0, 38, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
951 { 38, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
952 { 0, 39, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
953 { 39, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
954 { 0, 40, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
955 { 40, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
956 { 0, 41, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
957 { 41, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
958 { 0, 42, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
959 { 42, 0, KICONV_MODULE_ID_EMEA
, NULL
, NULL
, NULL
, NULL
},
962 /* The list of implemeted and supported modules. */
963 static kiconv_mod_list_t module_list
[KICONV_MAX_MODULE_ID
+ 1] = {
964 "kiconv_embedded", 0,
973 * We use conv_list_lock to restrict data access of both conv_list[] and
974 * module_list[] as they are tightly coupled critical sections that need to be
975 * dealt together as a unit.
977 static kmutex_t conv_list_lock
;
982 mutex_init(&conv_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
986 * The following is used to check on whether a kiconv module is being
987 * used or not at the _fini() of the module.
990 kiconv_module_ref_count(size_t mid
)
994 if (mid
<= 0 || mid
> KICONV_MAX_MODULE_ID
)
997 mutex_enter(&conv_list_lock
);
999 count
= module_list
[mid
].refcount
;
1001 mutex_exit(&conv_list_lock
);
1007 * This function "normalizes" a given code name, n, by not including skippable
1008 * characters and folding uppercase letters to corresponding lowercase letters.
1009 * We only fold 7-bit ASCII uppercase characters since the names should be in
1010 * Portable Character Set of 7-bit ASCII.
1012 * By doing this, we will be able to maximize the code name matches.
1015 normalize_codename(const char *n
)
1017 char s
[KICONV_MAX_CODENAME_LEN
+ 1];
1021 return ((size_t)-1);
1023 for (i
= 0; *n
; n
++) {
1024 if (KICONV_SKIPPABLE_CHAR(*n
))
1027 /* If unreasonably lengthy, we don't support such names. */
1028 if (i
>= KICONV_MAX_CODENAME_LEN
)
1029 return ((size_t)-1);
1031 s
[i
++] = (*n
>= 'A' && *n
<= 'Z') ? *n
- 'A' + 'a' : *n
;
1035 /* With the normalized name, find the corresponding codeset id. */
1036 for (i
= 0; i
< KICONV_MAX_CODEID_ENTRY
; i
++)
1037 if (strcmp(s
, code_list
[i
].name
) == 0)
1038 return (code_list
[i
].id
);
1041 * In future time, we will also have a few more lines of code at below
1042 * that will deal with other user-created modules' fromcodes and
1043 * tocodes including aliases in a different vector. For now, we don't
1044 * support that but only the known names to this project at this time.
1047 return ((size_t)-1);
1051 * This function called from mod_install() registers supplied code
1052 * conversions. At this point, it does not honor aliases and hence does not
1053 * use nowait data field from the kiconv module info data structure.
1056 kiconv_register_module(kiconv_module_info_t
*info
)
1065 /* Validate the given kiconv module info. */
1066 if (info
== NULL
|| info
->module_name
== NULL
||
1067 info
->kiconv_num_convs
== 0 || info
->kiconv_ops_tbl
== NULL
)
1071 * Check if this is one of the known modules. At this point,
1072 * we do not allow user-defined kiconv modules and that'd be for
1075 for (mid
= 1; mid
<= KICONV_MAX_MODULE_ID
; mid
++)
1076 if (strcmp(module_list
[mid
].name
, info
->module_name
) == 0)
1078 if (mid
> KICONV_MAX_MODULE_ID
)
1081 /* Let's register the conversions supplied. */
1082 mutex_enter(&conv_list_lock
);
1085 * This is very unlikely situation but by any chance we don't want to
1086 * register a module that is already in.
1088 if (module_list
[mid
].refcount
> 0) {
1089 mutex_exit(&conv_list_lock
);
1093 for (i
= 0; i
< info
->kiconv_num_convs
; i
++) {
1094 op
= &(info
->kiconv_ops_tbl
[i
]);
1096 fid
= normalize_codename(op
->fromcode
);
1097 tid
= normalize_codename(op
->tocode
);
1100 * If we find anything wrong in this particular conversion,
1101 * we skip this one and continue to the next one. This include
1102 * a case where there is a conversion already being assigned
1103 * into the conv_list[] somehow, i.e., new one never kicks out
1106 if (op
->kiconv_open
== NULL
|| op
->kiconv
== NULL
||
1107 op
->kiconv_close
== NULL
|| op
->kiconvstr
== NULL
)
1110 for (j
= 0; j
< KICONV_MAX_CONVERSIONS
; j
++) {
1111 if (conv_list
[j
].mid
== mid
&&
1112 conv_list
[j
].fid
== fid
&&
1113 conv_list
[j
].tid
== tid
) {
1114 if (conv_list
[j
].open
== NULL
) {
1115 conv_list
[j
].open
= op
->kiconv_open
;
1116 conv_list
[j
].kiconv
= op
->kiconv
;
1117 conv_list
[j
].close
= op
->kiconv_close
;
1118 conv_list
[j
].kiconvstr
= op
->kiconvstr
;
1125 mutex_exit(&conv_list_lock
);
1131 * The following function called during mod_remove() will try to unregister,
1132 * i.e., clear up conversion function pointers, from the conv_list[] if it
1133 * can. If there is any code conversions being used, then, the function will
1134 * just return EBUSY indicating that the module cannot be unloaded.
1137 kiconv_unregister_module(kiconv_module_info_t
*info
)
1142 if (info
== NULL
|| info
->module_name
== NULL
||
1143 info
->kiconv_num_convs
== 0 || info
->kiconv_ops_tbl
== NULL
)
1146 for (mid
= 1; mid
<= KICONV_MAX_MODULE_ID
; mid
++)
1147 if (strcmp(module_list
[mid
].name
, info
->module_name
) == 0)
1149 if (mid
> KICONV_MAX_MODULE_ID
)
1152 mutex_enter(&conv_list_lock
);
1155 * If any of the conversions are used, then, this module canont be
1158 if (module_list
[mid
].refcount
> 0) {
1159 mutex_exit(&conv_list_lock
);
1164 * Otherwise, we unregister all conversions from this module
1165 * and be ready for the unloading. At this point, we only care about
1166 * the conversions we know about with the module.
1168 for (i
= 0; i
< KICONV_MAX_CONVERSIONS
; i
++) {
1169 if (conv_list
[i
].mid
== mid
) {
1170 conv_list
[i
].open
= NULL
;
1171 conv_list
[i
].kiconv
= NULL
;
1172 conv_list
[i
].close
= NULL
;
1173 conv_list
[i
].kiconvstr
= NULL
;
1177 mutex_exit(&conv_list_lock
);
1183 * The following function check if asked code conversion is available
1184 * and if necessary, load the corresponding kiconv module that contains
1185 * the conversion (and others).
1188 check_and_load_conversions(const char *tocode
, const char *fromcode
)
1196 /* Normalize the given names and find the corresponding code ids. */
1197 tid
= normalize_codename(tocode
);
1198 if (tid
== (size_t)-1)
1199 return ((kiconv_t
)-1);
1201 fid
= normalize_codename(fromcode
);
1202 if (fid
== (size_t)-1)
1203 return ((kiconv_t
)-1);
1206 * Search the conversion.
1208 * If the conversion isn't supported, just return -1.
1209 * If the conversion is supported but there is no corresponding
1210 * module loaded, try to load it and if successful, return
1211 * a kiconv conversion descriptor memory block.
1213 * We maintain a reference counter of uint_t for each module.
1215 mutex_enter(&conv_list_lock
);
1217 for (i
= 0; i
< KICONV_MAX_CONVERSIONS
; i
++)
1218 if (conv_list
[i
].tid
== tid
&& conv_list
[i
].fid
== fid
)
1220 if (i
>= KICONV_MAX_CONVERSIONS
) {
1221 mutex_exit(&conv_list_lock
);
1222 return ((kiconv_t
)-1);
1225 mid
= conv_list
[i
].mid
;
1227 if (conv_list
[i
].open
== NULL
) {
1228 mutex_exit(&conv_list_lock
);
1230 if (modload("kiconv", module_list
[mid
].name
) < 0)
1231 return ((kiconv_t
)-1);
1234 * Let's double check if something happened right after
1235 * the modload and/or if the module really has the conversion.
1237 mutex_enter(&conv_list_lock
);
1239 if (conv_list
[i
].open
== NULL
) {
1240 mutex_exit(&conv_list_lock
);
1241 return ((kiconv_t
)-1);
1246 * If we got the conversion, we will use the conversion function
1247 * in the module and so let's increase the module's refcounter
1248 * so that the module won't be kicked out. (To be more exact and
1249 * specific, the "refcount" is thus the reference counter of
1250 * the module functions being used.)
1252 if (module_list
[mid
].refcount
< UINT_MAX
)
1253 module_list
[mid
].refcount
++;
1255 mutex_exit(&conv_list_lock
);
1257 kcd
= (kiconv_t
)kmem_alloc(sizeof (kiconv_data_t
), KM_SLEEP
);
1258 kcd
->handle
= (void *)-1;
1265 * The following are the four "Committed" interfaces.
1268 kiconv_open(const char *tocode
, const char *fromcode
)
1273 kcd
= check_and_load_conversions(tocode
, fromcode
);
1274 if (kcd
== (kiconv_t
)-1)
1275 return ((kiconv_t
)-1);
1277 kcd
->handle
= (conv_list
[kcd
->id
].open
)();
1278 if (kcd
->handle
== (void *)-1) {
1280 * If the conversion couldn't be opened for some reason,
1281 * then, we unallocate the kcd and, more importantly, before
1282 * that, we also decrease the module reference counter.
1284 mid
= conv_list
[kcd
->id
].mid
;
1286 mutex_enter(&conv_list_lock
);
1288 if (module_list
[mid
].refcount
> 0)
1289 module_list
[mid
].refcount
--;
1291 mutex_exit(&conv_list_lock
);
1293 kmem_free((void *)kcd
, sizeof (kiconv_data_t
));
1295 return ((kiconv_t
)-1);
1302 kiconv(kiconv_t kcd
, char **inbuf
, size_t *inbytesleft
,
1303 char **outbuf
, size_t *outbytesleft
, int *errno
)
1305 /* Do some minimum checking on the kiconv conversion descriptor. */
1306 if (! kcd
|| kcd
== (kiconv_t
)-1 || conv_list
[kcd
->id
].kiconv
== NULL
) {
1308 return ((size_t)-1);
1311 return ((conv_list
[kcd
->id
].kiconv
)(kcd
->handle
, inbuf
, inbytesleft
,
1312 outbuf
, outbytesleft
, errno
));
1316 kiconv_close(kiconv_t kcd
)
1321 if (! kcd
|| kcd
== (kiconv_t
)-1 || conv_list
[kcd
->id
].close
== NULL
)
1324 mid
= conv_list
[kcd
->id
].mid
;
1326 ret
= (conv_list
[kcd
->id
].close
)(kcd
->handle
);
1328 kmem_free((void *)kcd
, sizeof (kiconv_data_t
));
1330 mutex_enter(&conv_list_lock
);
1333 * While we maintain reference conter for each module, once loaded,
1334 * we don't modunload from kiconv functions even if the counter
1335 * reaches back to zero.
1337 if (module_list
[mid
].refcount
> 0)
1338 module_list
[mid
].refcount
--;
1340 mutex_exit(&conv_list_lock
);
1346 kiconvstr(const char *tocode
, const char *fromcode
, char *inarray
,
1347 size_t *inlen
, char *outarray
, size_t *outlen
, int flag
, int *errno
)
1353 kcd
= check_and_load_conversions(tocode
, fromcode
);
1354 if (kcd
== (kiconv_t
)-1 || conv_list
[kcd
->id
].kiconvstr
== NULL
) {
1356 return ((size_t)-1);
1359 mid
= conv_list
[kcd
->id
].mid
;
1361 ret
= (conv_list
[kcd
->id
].kiconvstr
)(inarray
, inlen
, outarray
, outlen
,
1364 kmem_free((void *)kcd
, sizeof (kiconv_data_t
));
1366 mutex_enter(&conv_list_lock
);
1368 if (module_list
[mid
].refcount
> 0)
1369 module_list
[mid
].refcount
--;
1371 mutex_exit(&conv_list_lock
);