etc/services - sync with NetBSD-8
[minix.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / ucsmap.c
blob651bbba763bb7a9e4c5e04994f1a1803923d92db
1 /* $NetBSD: ucsmap.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */
3 #ifndef lint
4 static char *rcsid = "Id: ucsmap.c,v 1.1 2003/06/04 00:26:14 marka Exp ";
5 #endif
7 /*
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
9 *
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
34 * JPNIC.
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
49 #include <config.h>
51 #include <stdlib.h>
52 #include <string.h>
54 #include <idn/result.h>
55 #include <idn/assert.h>
56 #include <idn/log.h>
57 #include <idn/logmacro.h>
58 #include <idn/ucsmap.h>
60 #define INIT_SIZE 50
61 #define DEFAULT_BUF_SIZE 500
62 #define UCSMAP_HASH_SIZE 103
63 #define MAX_MAPLEN 0xffff
66 * This module implements UCS 1-to-N mapping.
67 * To speed up mapping table lookup, a combination of hash and
68 * binary search is used.
72 * Mapping entry.
73 * Entries are sorted by its hash index and code point.
75 typedef struct {
76 short hidx; /* hash index */
77 unsigned short len; /* length of mapped sequence */
78 unsigned long ucs; /* code point to be mapped */
79 unsigned long *map; /* mapped sequence of code points */
80 } ucsmap_entry_t;
83 * Hash table entry.
84 * Since the entries pointed by ucsmap_hash_t.entry are sorted,
85 * binary search can be used.
87 typedef struct {
88 ucsmap_entry_t *entry; /* sorted by code point */
89 int n; /* length of 'entry' */
90 } ucsmap_hash_t;
93 * UCS character buffer for storing target character sequence.
95 typedef struct ucsmap_buf {
96 struct ucsmap_buf *next;
97 unsigned long buf[1]; /* actually a variable length array */
98 } ucsmap_buf_t;
101 * Mapping object.
103 typedef struct idn_ucsmap {
104 ucsmap_hash_t hash[UCSMAP_HASH_SIZE];
105 ucsmap_entry_t *entries; /* array of entries */
106 size_t entry_size; /* allocated size */
107 size_t nentries; /* # of entries in use */
108 ucsmap_buf_t *mapdata; /* list of character buffers */
109 size_t mapdata_size; /* allocated size of current buffer */
110 size_t mapdata_used; /* # of chars in use */
111 int fixed; /* already fixed? */
112 int refcnt; /* reference count */
113 } ucsmap_t;
115 static int ucsmap_hash(unsigned long v);
116 static unsigned long *save_mapped_sequence(idn_ucsmap_t ctx,
117 unsigned long *map,
118 size_t maplen);
119 static void free_mapbuf(ucsmap_buf_t *buf);
120 static int comp_entry(const void *v1, const void *v2);
122 idn_result_t
123 idn_ucsmap_create(idn_ucsmap_t *ctxp) {
124 idn_ucsmap_t ctx;
126 assert(ctxp != NULL);
128 TRACE(("idn_ucsmap_create()\n"));
130 if ((ctx = malloc(sizeof(*ctx))) == NULL) {
131 WARNING(("idn_ucsmap_create: malloc failed\n"));
132 return (idn_nomemory);
135 ctx->entry_size = 0;
136 ctx->nentries = 0;
137 ctx->entries = NULL;
138 ctx->mapdata = NULL;
139 ctx->mapdata_size = 0;
140 ctx->mapdata_used = 0;
141 ctx->fixed = 0;
142 ctx->refcnt = 1;
143 *ctxp = ctx;
144 return (idn_success);
147 void
148 idn_ucsmap_destroy(idn_ucsmap_t ctx) {
149 assert(ctx != NULL && ctx->refcnt > 0);
151 TRACE(("idn_ucsmap_destroy()\n"));
153 if (--ctx->refcnt == 0) {
154 if (ctx->entries != NULL)
155 free(ctx->entries);
156 if (ctx->mapdata != NULL)
157 free_mapbuf(ctx->mapdata);
158 free(ctx);
162 void
163 idn_ucsmap_incrref(idn_ucsmap_t ctx) {
164 assert(ctx != NULL && ctx->refcnt > 0);
166 ctx->refcnt++;
169 idn_result_t
170 idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs,
171 unsigned long *map, size_t maplen)
173 ucsmap_entry_t *e;
174 ucsmap_entry_t *newbuf;
176 assert(ctx != NULL && ctx->refcnt > 0);
178 TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen));
180 /* Make sure it is not fixed yet. */
181 if (ctx->fixed) {
182 WARNING(("idn_ucsmap_add: attempt to add to fixed map\n"));
183 return (idn_failure);
186 if (maplen > MAX_MAPLEN) {
187 WARNING(("idn_ucsmap_add: maplen too large (> %d)\n",
188 MAX_MAPLEN));
189 return (idn_failure);
192 /* Append an entry. */
193 if (ctx->nentries >= ctx->entry_size) {
194 if (ctx->entry_size == 0)
195 ctx->entry_size = INIT_SIZE;
196 else
197 ctx->entry_size *= 2;
198 newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size);
199 if (newbuf == NULL)
200 return (idn_nomemory);
201 ctx->entries = newbuf;
203 e = &ctx->entries[ctx->nentries];
204 e->hidx = ucsmap_hash(ucs);
205 e->len = maplen;
206 e->ucs = ucs;
207 if (maplen > 0) {
208 /* Save mapped sequence in the buffer. */
209 e->map = save_mapped_sequence(ctx, map, maplen);
210 if (e->map == NULL)
211 return (idn_nomemory);
212 } else {
214 * Zero 'maplen' is perfectly valid meaning one-to-zero
215 * mapping.
217 e->map = NULL;
219 ctx->nentries++;
221 return (idn_success);
224 void
225 idn_ucsmap_fix(idn_ucsmap_t ctx) {
226 ucsmap_entry_t *e;
227 int last_hidx;
228 int i;
230 assert(ctx != NULL && ctx->refcnt > 0);
232 TRACE(("idn_ucsmap_fix()\n"));
234 if (ctx->fixed)
235 return;
237 ctx->fixed = 1;
239 /* Initialize hash. */
240 for (i = 0; i < UCSMAP_HASH_SIZE; i++) {
241 ctx->hash[i].entry = NULL;
242 ctx->hash[i].n = 0;
245 if (ctx->nentries == 0)
246 return;
248 /* Sort entries by the hash value and code point. */
249 qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry);
252 * Now the entries are sorted by their hash value, and
253 * sorted by its code point among the ones with the same hash value.
256 /* Build hash table. */
257 last_hidx = -1;
258 for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) {
259 if (e->hidx != last_hidx) {
260 ctx->hash[e->hidx].entry = e;
261 last_hidx = e->hidx;
263 ctx->hash[last_hidx].n++;
267 idn_result_t
268 idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to,
269 size_t tolen, size_t *maplenp) {
270 int hash;
271 ucsmap_entry_t *e;
272 int n;
273 int hi, lo, mid;
275 assert(ctx != NULL && ctx->refcnt > 0 && to != NULL &&
276 maplenp != NULL);
278 TRACE(("idn_ucsmap_map(v=U+%lX)\n", v));
280 if (!ctx->fixed) {
281 WARNING(("idn_ucsmap_map: not fixed yet\n"));
282 return (idn_failure);
285 /* First, look up hash table. */
286 hash = ucsmap_hash(v);
287 if ((n = ctx->hash[hash].n) == 0)
288 goto nomap;
290 /* Then do binary search. */
291 e = ctx->hash[hash].entry;
292 lo = 0;
293 hi = n - 1;
294 while (lo <= hi) {
295 mid = (lo + hi) / 2;
296 if (v < e[mid].ucs)
297 hi = mid - 1;
298 else if (v > e[mid].ucs)
299 lo = mid + 1;
300 else {
301 /* Found. */
302 if (tolen < e[mid].len)
303 return (idn_buffer_overflow);
304 memcpy(to, e[mid].map, sizeof(*to) * e[mid].len);
305 *maplenp = e[mid].len;
306 return (idn_success);
311 * Not found. Put the original character to 'to'
312 * just for convenience.
314 nomap:
315 if (tolen < 1)
316 return (idn_buffer_overflow);
317 *to = v;
318 *maplenp = 1;
319 return (idn_nomapping);
322 static int
323 ucsmap_hash(unsigned long v) {
324 return (v % UCSMAP_HASH_SIZE);
327 static unsigned long *
328 save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) {
329 ucsmap_buf_t *buf;
330 unsigned long *p;
331 size_t allocsize;
334 * If the current buffer (the first one in the ctx->mapdata list)
335 * has enough space, use it. Otherwise, allocate a new buffer and
336 * insert it at the beginning of the list.
338 if (ctx->mapdata_used + maplen > ctx->mapdata_size) {
339 if (maplen > DEFAULT_BUF_SIZE)
340 allocsize = maplen * 2;
341 else
342 allocsize = DEFAULT_BUF_SIZE;
343 buf = malloc(sizeof(ucsmap_hash_t) +
344 sizeof(unsigned long) * allocsize);
345 if (buf == NULL)
346 return (NULL);
347 buf->next = ctx->mapdata;
348 ctx->mapdata = buf;
349 ctx->mapdata_size = allocsize;
350 ctx->mapdata_used = 0;
352 p = ctx->mapdata->buf + ctx->mapdata_used;
353 memcpy(p, map, sizeof(unsigned long) * maplen);
354 ctx->mapdata_used += maplen;
355 return (p);
358 static void
359 free_mapbuf(ucsmap_buf_t *buf) {
360 while (buf != NULL) {
361 ucsmap_buf_t *next = buf->next;
362 free(buf);
363 buf = next;
367 static int
368 comp_entry(const void *v1, const void *v2) {
369 const ucsmap_entry_t *e1 = v1;
370 const ucsmap_entry_t *e2 = v2;
372 if (e1->hidx < e2->hidx)
373 return (-1);
374 else if (e1->hidx > e2->hidx)
375 return (1);
376 else if (e1->ucs < e2->ucs)
377 return (-1);
378 else if (e1->ucs > e2->ucs)
379 return (1);
380 else
381 return (0);