Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / uts / common / kiconv / kiconv_sc / kiconv_cck_common.c
blob3de830f2d9994eb04aa261981db1b18f718c513d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/debug.h>
33 #include <sys/kmem.h>
34 #include <sys/sunddi.h>
35 #include <sys/byteorder.h>
36 #include <sys/errno.h>
37 #include <sys/u8_textprep.h>
38 #include <sys/kiconv.h>
39 #include <sys/kiconv_cck_common.h>
41 /*LINTLIBRARY*/
44 * Common kiconv_open method for UTF-8 -> CCK conversion.
46 void *
47 kiconv_open_to_cck()
49 kiconv_state_t st;
51 st = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
53 st->bom_processed = 0;
55 return ((void *)st);
59 * Common kiconv_close method for UTF-8 -> CCK conversion.
61 int
62 kiconv_close_to_cck(void *kcd)
64 if (! kcd || kcd == (void *)-1)
65 return (EBADF);
67 kmem_free(kcd, sizeof (kiconv_state_data_t));
69 return (0);
73 * Common routine to convert UTF-8 sequence to CCK legal character sequence.
75 size_t
76 kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft,
77 char **outbuf, size_t *outbytesleft, int *errno,
78 kiconv_utf8tocck_t ptr_utf8tocck)
80 uchar_t *ib;
81 uchar_t *ob;
82 uchar_t *ibtail;
83 uchar_t *obtail;
84 uchar_t *oldib;
85 size_t ret_val;
86 size_t i; /* temp variable in for loop */
87 uint32_t u8;
88 int8_t sz;
90 /* Check on the kiconv code conversion descriptor. */
91 if (! kcd || kcd == (void *)-1) {
92 *errno = EBADF;
93 return ((size_t)-1);
96 /* If this is a state reset request, process and return. */
97 if (! inbuf || !(*inbuf)) {
98 ((kiconv_state_t)kcd)->bom_processed = 0;
99 return (0);
102 ret_val = 0;
103 ib = (uchar_t *)*inbuf;
104 ob = (uchar_t *)*outbuf;
105 ibtail = ib + *inbytesleft;
106 obtail = ob + *outbytesleft;
108 KICONV_CHECK_UTF8_BOM(ib, ibtail);
110 while (ib < ibtail) {
111 sz = u8_number_of_bytes[*ib];
114 * If it is a 7-bit ASCII character, we don't need to
115 * process further and we just copy the character over.
117 * If not, we connect the chracter bytes up to four bytes,
118 * validate the bytes, and binary search for the corresponding
119 * table. If we find it from the mapping table, we put that
120 * into the output buffer; otherwise, we put a replacement
121 * character instead as a non-identical conversion.
123 if (sz == 1) {
124 if (ob >= obtail) {
125 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
128 *ob++ = *ib++;
129 continue;
133 * Issue EILSEQ error if the first byte is a
134 * invalid UTF-8 character leading byte.
136 if (sz <= 0) {
137 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
141 * Issue EINVAL error if input buffer has an incomplete
142 * character at the end of the buffer.
144 if (ibtail - ib < sz) {
145 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
149 * We collect UTF-8 character bytes and also check if this
150 * is a valid UTF-8 character without any bogus bytes based
151 * on the latest UTF-8 binary representation.
153 oldib = ib;
154 u8 = *ib++;
156 if (KICONV_IS_INVALID_UTF8_SECOND_BYTE(*ib, u8))
157 goto ILLEGAL_CHAR_PROCESS;
158 u8 = (u8 << 8) | *ib++;
160 for (i = 2; i < sz; i++) {
161 if (*ib < 0x80 || *ib > 0xbf) {
162 ILLEGAL_CHAR_PROCESS:
163 *errno = EILSEQ;
164 ret_val = (size_t)-1;
165 ib = oldib;
166 goto ILLEGAL_CHAR_ERR;
169 u8 = (u8 << 8) | *ib++;
172 /* Now we have a valid UTF-8 character. */
173 sz = ptr_utf8tocck(u8, &ib, ibtail, ob, obtail, &ret_val);
174 if (sz < 0) {
175 ib = oldib;
176 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
179 ob += sz;
182 ILLEGAL_CHAR_ERR:
183 *inbuf = (char *)ib;
184 *inbytesleft = ibtail - ib;
185 *outbuf = (char *)ob;
186 *outbytesleft = obtail - ob;
188 return (ret_val);
191 size_t
192 kiconvstr_utf8_to_cck(uchar_t *ib, size_t *inlen, uchar_t *ob, size_t *outlen,
193 int flag, int *errno, kiconv_utf8tocck_t ptr_utf8tocck)
195 uchar_t *ibtail;
196 uchar_t *obtail;
197 uchar_t *oldib;
198 size_t ret_val;
199 size_t i; /* temp variable in for loop */
200 uint32_t u8;
201 int8_t sz;
202 boolean_t do_not_ignore_null;
204 ret_val = 0;
205 ibtail = ib + *inlen;
206 obtail = ob + *outlen;
207 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
209 KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail);
211 while (ib < ibtail) {
212 if (*ib == '\0' && do_not_ignore_null)
213 break;
215 sz = u8_number_of_bytes[*ib];
217 if (sz == 1) {
218 if (ob >= obtail) {
219 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
222 *ob++ = *ib++;
223 continue;
226 oldib = ib;
228 if (sz <= 0) {
229 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
232 if (ibtail - ib < sz) {
233 if (flag & KICONV_REPLACE_INVALID) {
234 ib = ibtail;
235 goto REPLACE_INVALID;
238 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
241 u8 = *ib++;
243 if (KICONV_IS_INVALID_UTF8_SECOND_BYTE(*ib, u8))
244 goto ILLEGAL_CHAR_PROCESS;
245 u8 = (u8 << 8) | *ib++;
247 for (i = 2; i < sz; i++) {
248 if (*ib < 0x80 || *ib > 0xbf) {
249 ILLEGAL_CHAR_PROCESS:
250 if (flag & KICONV_REPLACE_INVALID) {
251 ib = oldib + sz;
252 goto REPLACE_INVALID;
255 *errno = EILSEQ;
256 ret_val = (size_t)-1;
257 ib = oldib;
258 goto ILLEGAL_CHAR_ERR;
261 u8 = (u8 << 8) | *ib++;
264 /* Now we get a valid character encoded in UTF-8. */
265 sz = ptr_utf8tocck(u8, &ib, ibtail, ob, obtail, &ret_val);
266 if (sz < 0) {
267 ib = oldib;
268 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
271 ob += sz;
272 continue;
274 REPLACE_INVALID:
275 if (ob >= obtail) {
276 ib = oldib;
277 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
280 *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
281 ret_val++;
284 ILLEGAL_CHAR_ERR:
285 *inlen = ibtail - ib;
286 *outlen = obtail - ob;
288 return (ret_val);
292 * Search key in tbl[0] <= tbl[1] <= ... <= tbl[n-1]. Return 0 if not found.
293 * tbl[0] is a special element for non-identical conversion.
295 size_t
296 kiconv_binsearch(uint32_t key, void *tbl, size_t nitems)
298 size_t low, high, mid;
299 kiconv_table_t *table;
301 low = 1;
302 high = nitems - 1;
303 table = (kiconv_table_t *)tbl;
305 while (low <= high) {
306 mid = (low + high) / 2;
308 if (key < table[mid].key)
309 high = mid - 1;
310 else if (key > table[mid].key)
311 low = mid + 1;
312 else
313 return (mid);
316 return (0);