Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / unormalize.c
blobcecd84399d863657f26153d8af0ecb24f7dbb9b8
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: unormalize.c,v 1.1.1.1 2003/06/04 00:26:43 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9 * All rights reserved.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
35 * JPNIC.
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
50 #include <config.h>
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
56 #include <idn/result.h>
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/ucs4.h>
60 #include <idn/unicode.h>
61 #include <idn/unormalize.h>
62 #include <idn/debug.h>
64 #if !defined(HAVE_MEMMOVE) && defined(HAVE_BCOPY)
65 #define memmove(a,b,c) bcopy((char *)(b),(char *)(a),(int)(c))
66 #endif
68 #define WORKBUF_SIZE 128
69 #define WORKBUF_SIZE_MAX 10000
71 typedef struct {
72 idn__unicode_version_t version; /* Unicode version */
73 int cur; /* pointing now processing character */
74 int last; /* pointing just after the last character */
75 int size; /* size of UCS and CLASS array */
76 unsigned long *ucs4; /* UCS-4 characters */
77 int *class; /* and their canonical classes */
78 unsigned long ucs4_buf[WORKBUF_SIZE]; /* local buffer */
79 int class_buf[WORKBUF_SIZE]; /* ditto */
80 } workbuf_t;
82 static idn_result_t normalize(idn__unicode_version_t version,
83 int do_composition, int compat,
84 const unsigned long *from,
85 unsigned long *to, size_t tolen);
86 static idn_result_t decompose(workbuf_t *wb, unsigned long c, int compat);
87 static void get_class(workbuf_t *wb);
88 static void reorder(workbuf_t *wb);
89 static void compose(workbuf_t *wb);
90 static idn_result_t flush_before_cur(workbuf_t *wb,
91 unsigned long **top, size_t *tolenp);
92 static void workbuf_init(workbuf_t *wb);
93 static void workbuf_free(workbuf_t *wb);
94 static idn_result_t workbuf_extend(workbuf_t *wb);
95 static idn_result_t workbuf_append(workbuf_t *wb, unsigned long c);
96 static void workbuf_shift(workbuf_t *wb, int shift);
97 static void workbuf_removevoid(workbuf_t *wb);
99 idn_result_t
100 idn__unormalize_formkc(idn__unicode_version_t version,
101 const unsigned long *from, unsigned long *to,
102 size_t tolen) {
103 assert(version != NULL && from != NULL && to != NULL && tolen >= 0);
104 TRACE(("idn__unormalize_formkc(from=\"%s\", tolen=%d)\n",
105 idn__debug_ucs4xstring(from, 50), tolen));
106 return (normalize(version, 1, 1, from, to, tolen));
109 static idn_result_t
110 normalize(idn__unicode_version_t version, int do_composition, int compat,
111 const unsigned long *from, unsigned long *to, size_t tolen) {
112 workbuf_t wb;
113 idn_result_t r = idn_success;
116 * Initialize working buffer.
118 workbuf_init(&wb);
119 wb.version = version;
121 while (*from != '\0') {
122 unsigned long c;
124 assert(wb.cur == wb.last);
127 * Get one character from 'from'.
129 c = *from++;
132 * Decompose it.
134 if ((r = decompose(&wb, c, compat)) != idn_success)
135 goto ret;
138 * Get canonical class.
140 get_class(&wb);
143 * Reorder & compose.
145 for (; wb.cur < wb.last; wb.cur++) {
146 if (wb.cur == 0) {
147 continue;
148 } else if (wb.class[wb.cur] > 0) {
150 * This is not a starter. Try reordering.
151 * Note that characters up to it are
152 * already in canonical order.
154 reorder(&wb);
155 continue;
159 * This is a starter character, and there are
160 * some characters before it. Those characters
161 * have been reordered properly, and
162 * ready for composition.
164 if (do_composition && wb.class[0] == 0)
165 compose(&wb);
168 * If CUR points to a starter character,
169 * then process of characters before CUR are
170 * already finished, because any further
171 * reordering/composition for them are blocked
172 * by the starter CUR points.
174 if (wb.cur > 0 && wb.class[wb.cur] == 0) {
175 /* Flush everything before CUR. */
176 r = flush_before_cur(&wb, &to, &tolen);
177 if (r != idn_success)
178 goto ret;
183 if (r == idn_success) {
184 if (do_composition && wb.cur > 0 && wb.class[0] == 0) {
186 * There is some characters left in WB.
187 * They are ordered, but not composed yet.
188 * Now CUR points just after the last character in WB,
189 * and since compose() tries to compose characters
190 * between top and CUR inclusive, we must make CUR
191 * one character back during compose().
193 wb.cur--;
194 compose(&wb);
195 wb.cur++;
198 * Call this even when WB.CUR == 0, to make TO
199 * NUL-terminated.
201 r = flush_before_cur(&wb, &to, &tolen);
202 if (r != idn_success)
203 goto ret;
206 if (tolen <= 0) {
207 r = idn_buffer_overflow;
208 goto ret;
210 *to = '\0';
212 ret:
213 workbuf_free(&wb);
214 return (r);
217 static idn_result_t
218 decompose(workbuf_t *wb, unsigned long c, int compat) {
219 idn_result_t r;
220 int dec_len;
222 again:
223 r = idn__unicode_decompose(wb->version, compat, wb->ucs4 + wb->last,
224 wb->size - wb->last, c, &dec_len);
225 switch (r) {
226 case idn_success:
227 wb->last += dec_len;
228 return (idn_success);
229 case idn_notfound:
230 return (workbuf_append(wb, c));
231 case idn_buffer_overflow:
232 if ((r = workbuf_extend(wb)) != idn_success)
233 return (r);
234 if (wb->size > WORKBUF_SIZE_MAX) {
235 WARNING(("idn__unormalize_form*: "
236 "working buffer too large\n"));
237 return (idn_nomemory);
239 goto again;
240 default:
241 return (r);
243 /* NOTREACHED */
246 static void
247 get_class(workbuf_t *wb) {
248 int i;
250 for (i = wb->cur; i < wb->last; i++)
251 wb->class[i] = idn__unicode_canonicalclass(wb->version,
252 wb->ucs4[i]);
255 static void
256 reorder(workbuf_t *wb) {
257 unsigned long c;
258 int i;
259 int class;
261 assert(wb != NULL);
263 i = wb->cur;
264 c = wb->ucs4[i];
265 class = wb->class[i];
267 while (i > 0 && wb->class[i - 1] > class) {
268 wb->ucs4[i] = wb->ucs4[i - 1];
269 wb->class[i] =wb->class[i - 1];
270 i--;
271 wb->ucs4[i] = c;
272 wb->class[i] = class;
276 static void
277 compose(workbuf_t *wb) {
278 int cur;
279 unsigned long *ucs4;
280 int *class;
281 int last_class;
282 int nvoids;
283 int i;
284 idn__unicode_version_t ver;
286 assert(wb != NULL && wb->class[0] == 0);
288 cur = wb->cur;
289 ucs4 = wb->ucs4;
290 class = wb->class;
291 ver = wb->version;
294 * If there are no decomposition sequence that begins with
295 * the top character, composition is impossible.
297 if (!idn__unicode_iscompositecandidate(ver, ucs4[0]))
298 return;
300 last_class = 0;
301 nvoids = 0;
302 for (i = 1; i <= cur; i++) {
303 unsigned long c;
304 int cl = class[i];
306 if ((last_class < cl || cl == 0) &&
307 idn__unicode_compose(ver, ucs4[0], ucs4[i],
308 &c) == idn_success) {
310 * Replace the top character with the composed one.
312 ucs4[0] = c;
313 class[0] = idn__unicode_canonicalclass(ver, c);
315 class[i] = -1; /* void this character */
316 nvoids++;
317 } else {
318 last_class = cl;
322 /* Purge void characters, if any. */
323 if (nvoids > 0)
324 workbuf_removevoid(wb);
327 static idn_result_t
328 flush_before_cur(workbuf_t *wb, unsigned long **top, size_t *tolenp) {
329 if (*tolenp < wb->cur)
330 return (idn_buffer_overflow);
332 memcpy(*top, wb->ucs4, sizeof(**top) * wb->cur);
333 *top += wb->cur;
334 *tolenp -= wb->cur;
335 workbuf_shift(wb, wb->cur);
337 return (idn_success);
340 static void
341 workbuf_init(workbuf_t *wb) {
342 wb->cur = 0;
343 wb->last = 0;
344 wb->size = WORKBUF_SIZE;
345 wb->ucs4 = wb->ucs4_buf;
346 wb->class = wb->class_buf;
349 static void
350 workbuf_free(workbuf_t *wb) {
351 if (wb->ucs4 != wb->ucs4_buf) {
352 free(wb->ucs4);
353 free(wb->class);
357 static idn_result_t
358 workbuf_extend(workbuf_t *wb) {
359 int newsize = wb->size * 3;
361 if (wb->ucs4 == wb->ucs4_buf) {
362 wb->ucs4 = malloc(sizeof(wb->ucs4[0]) * newsize);
363 wb->class = malloc(sizeof(wb->class[0]) * newsize);
364 } else {
365 wb->ucs4 = realloc(wb->ucs4, sizeof(wb->ucs4[0]) * newsize);
366 wb->class = realloc(wb->class, sizeof(wb->class[0]) * newsize);
368 if (wb->ucs4 == NULL || wb->class == NULL)
369 return (idn_nomemory);
370 else
371 return (idn_success);
374 static idn_result_t
375 workbuf_append(workbuf_t *wb, unsigned long c) {
376 idn_result_t r;
378 if (wb->last >= wb->size && (r = workbuf_extend(wb)) != idn_success)
379 return (r);
380 wb->ucs4[wb->last++] = c;
381 return (idn_success);
384 static void
385 workbuf_shift(workbuf_t *wb, int shift) {
386 int nmove;
388 assert(wb != NULL && wb->cur >= shift);
390 nmove = wb->last - shift;
391 (void)memmove(&wb->ucs4[0], &wb->ucs4[shift],
392 nmove * sizeof(wb->ucs4[0]));
393 (void)memmove(&wb->class[0], &wb->class[shift],
394 nmove * sizeof(wb->class[0]));
395 wb->cur -= shift;
396 wb->last -= shift;
399 static void
400 workbuf_removevoid(workbuf_t *wb) {
401 int i, j;
402 int last = wb->last;
404 for (i = j = 0; i < last; i++) {
405 if (wb->class[i] >= 0) {
406 if (j < i) {
407 wb->ucs4[j] = wb->ucs4[i];
408 wb->class[j] = wb->class[i];
410 j++;
413 wb->cur -= last - j;
414 wb->last = j;