Remove building with NOCRYPTO option
[minix.git] / crypto / external / bsd / heimdal / dist / lib / wind / normalize.c
blobd41d309597867fd62bb026a250c4d7d1d66346b0
1 /* $NetBSD: normalize.c,v 1.1.1.2 2014/04/24 12:45:56 pettai Exp $ */
3 /*
4 * Copyright (c) 2004 Kungliga Tekniska Högskolan
5 * (Royal Institute of Technology, Stockholm, Sweden).
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the Institute nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include "windlocl.h"
41 #include <assert.h>
42 #include <stdlib.h>
43 #include <errno.h>
44 #include <stdio.h>
46 #include <krb5/roken.h>
48 #include "normalize_table.h"
50 static int
51 translation_cmp(const void *key, const void *data)
53 const struct translation *t1 = (const struct translation *)key;
54 const struct translation *t2 = (const struct translation *)data;
56 return t1->key - t2->key;
59 enum { s_base = 0xAC00};
60 enum { s_count = 11172};
61 enum { l_base = 0x1100};
62 enum { l_count = 19};
63 enum { v_base = 0x1161};
64 enum { v_count = 21};
65 enum { t_base = 0x11A7};
66 enum { t_count = 28};
67 enum { n_count = v_count * t_count};
69 static int
70 hangul_decomp(const uint32_t *in, size_t in_len,
71 uint32_t *out, size_t *out_len)
73 uint32_t u = *in;
74 unsigned s_index;
75 unsigned l, v, t;
76 unsigned o;
78 if (u < s_base || u >= s_base + s_count)
79 return 0;
80 s_index = u - s_base;
81 l = l_base + s_index / n_count;
82 v = v_base + (s_index % n_count) / t_count;
83 t = t_base + s_index % t_count;
84 o = 2;
85 if (t != t_base)
86 ++o;
87 if (*out_len < o)
88 return WIND_ERR_OVERRUN;
89 out[0] = l;
90 out[1] = v;
91 if (t != t_base)
92 out[2] = t;
93 *out_len = o;
94 return 1;
97 static uint32_t
98 hangul_composition(const uint32_t *in, size_t in_len)
100 if (in_len < 2)
101 return 0;
102 if (in[0] >= l_base && in[0] < l_base + l_count) {
103 unsigned l_index = in[0] - l_base;
104 unsigned v_index;
106 if (in[1] < v_base || in[1] >= v_base + v_count)
107 return 0;
108 v_index = in[1] - v_base;
109 return (l_index * v_count + v_index) * t_count + s_base;
110 } else if (in[0] >= s_base && in[0] < s_base + s_count) {
111 unsigned s_index = in[0] - s_base;
112 unsigned t_index;
114 if (s_index % t_count != 0)
115 return 0;
116 if (in[1] < t_base || in[1] >= t_base + t_count)
117 return 0;
118 t_index = in[1] - t_base;
119 return in[0] + t_index;
121 return 0;
124 static int
125 compat_decomp(const uint32_t *in, size_t in_len,
126 uint32_t *out, size_t *out_len)
128 unsigned i;
129 unsigned o = 0;
131 for (i = 0; i < in_len; ++i) {
132 struct translation ts = {in[i]};
133 size_t sub_len = *out_len - o;
134 int ret;
136 ret = hangul_decomp(in + i, in_len - i,
137 out + o, &sub_len);
138 if (ret) {
139 if (ret == WIND_ERR_OVERRUN)
140 return ret;
141 o += sub_len;
142 } else {
143 void *s = bsearch(&ts,
144 _wind_normalize_table,
145 _wind_normalize_table_size,
146 sizeof(_wind_normalize_table[0]),
147 translation_cmp);
148 if (s != NULL) {
149 const struct translation *t = (const struct translation *)s;
151 ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
152 t->val_len,
153 out + o, &sub_len);
154 if (ret)
155 return ret;
156 o += sub_len;
157 } else {
158 if (o >= *out_len)
159 return WIND_ERR_OVERRUN;
160 out[o++] = in[i];
165 *out_len = o;
166 return 0;
169 static void
170 swap_char(uint32_t * a, uint32_t * b)
172 uint32_t t;
173 t = *a;
174 *a = *b;
175 *b = t;
178 /* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points
179 * that all have Canonical_Combining_Class > 0 */
180 static void
181 canonical_reorder_sequence(uint32_t * a, size_t len)
183 size_t i, j;
185 if (len <= 1)
186 return;
188 for (i = 1; i < len; i++) {
189 for (j = i;
190 j > 0 &&
191 _wind_combining_class(a[j]) < _wind_combining_class(a[j-1]);
192 j--)
193 swap_char(&a[j], &a[j-1]);
197 static void
198 canonical_reorder(uint32_t *tmp, size_t tmp_len)
200 size_t i;
202 for (i = 0; i < tmp_len; ++i) {
203 int cc = _wind_combining_class(tmp[i]);
204 if (cc) {
205 size_t j;
206 for (j = i + 1;
207 j < tmp_len && _wind_combining_class(tmp[j]);
208 ++j)
210 canonical_reorder_sequence(&tmp[i], j - i);
211 i = j;
216 static uint32_t
217 find_composition(const uint32_t *in, unsigned in_len)
219 unsigned short canon_index = 0;
220 uint32_t cur;
221 unsigned n = 0;
223 cur = hangul_composition(in, in_len);
224 if (cur)
225 return cur;
227 do {
228 const struct canon_node *c = &_wind_canon_table[canon_index];
229 unsigned i;
231 if (n % 5 == 0) {
232 cur = *in++;
233 if (in_len-- == 0)
234 return c->val;
237 i = cur >> 16;
238 if (i < c->next_start || i >= c->next_end)
239 canon_index = 0;
240 else
241 canon_index =
242 _wind_canon_next_table[c->next_offset + i - c->next_start];
243 if (canon_index != 0) {
244 cur = (cur << 4) & 0xFFFFF;
245 ++n;
247 } while (canon_index != 0);
248 return 0;
251 static int
252 combine(const uint32_t *in, size_t in_len,
253 uint32_t *out, size_t *out_len)
255 unsigned i;
256 int ostarter;
257 unsigned o = 0;
258 int old_cc;
260 for (i = 0; i < in_len;) {
261 while (i < in_len && _wind_combining_class(in[i]) != 0) {
262 out[o++] = in[i++];
264 if (i < in_len) {
265 if (o >= *out_len)
266 return WIND_ERR_OVERRUN;
267 ostarter = o;
268 out[o++] = in[i++];
269 old_cc = -1;
271 while (i < in_len) {
272 uint32_t comb;
273 uint32_t v[2];
274 int cc;
276 v[0] = out[ostarter];
277 v[1] = in[i];
279 cc = _wind_combining_class(in[i]);
280 if (old_cc != cc && (comb = find_composition(v, 2))) {
281 out[ostarter] = comb;
282 } else if (cc == 0) {
283 break;
284 } else {
285 if (o >= *out_len)
286 return WIND_ERR_OVERRUN;
287 out[o++] = in[i];
288 old_cc = cc;
290 ++i;
294 *out_len = o;
295 return 0;
299 _wind_stringprep_normalize(const uint32_t *in, size_t in_len,
300 uint32_t *out, size_t *out_len)
302 size_t tmp_len;
303 uint32_t *tmp;
304 int ret;
306 if (in_len == 0) {
307 *out_len = 0;
308 return 0;
311 tmp_len = in_len * 4;
312 if (tmp_len < MAX_LENGTH_CANON)
313 tmp_len = MAX_LENGTH_CANON;
314 tmp = malloc(tmp_len * sizeof(uint32_t));
315 if (tmp == NULL)
316 return ENOMEM;
318 ret = compat_decomp(in, in_len, tmp, &tmp_len);
319 if (ret) {
320 free(tmp);
321 return ret;
323 canonical_reorder(tmp, tmp_len);
324 ret = combine(tmp, tmp_len, out, out_len);
325 free(tmp);
326 return ret;