No empty .Rs/.Re
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / ucsset.c
blobb1ed63bf04909f5f6bc9a8a0bc27fe03dab9eda8
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: ucsset.c,v 1.1.1.1 2003/06/04 00:26:15 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
9 *
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
34 * JPNIC.
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
49 #include <config.h>
51 #include <stdlib.h>
52 #include <stdio.h>
53 #include <string.h>
55 #include <idn/result.h>
56 #include <idn/assert.h>
57 #include <idn/logmacro.h>
58 #include <idn/ucsset.h>
60 #define UCS_MAX 0x80000000UL
62 #define INIT_SIZE 50
65 * Code point range.
67 * The set of code points is represented by an array of code point ranges.
68 * In the building phase, specified ranges by 'idn_ucsset_add' or
69 * 'idn_ucsset_addrange' are simply appended to the array.
70 * And 'idn_ucsset_fix' sorts the array by the code point value, and also
71 * merges any intersecting ranges. Since the array is sorted, a binary
72 * search can be used for looking up.
74 typedef struct {
75 unsigned long from;
76 unsigned long to;
77 } range_t;
80 * Code point segment.
82 * To speed up searching further, the entire region of UCS-4 code points
83 * (U+0000 - U+7FFFFFFF) are divided into segments. For each segment,
84 * the first and last element of the range array corresponding to the
85 * segment are computed by 'idn_ucsset_fix'. This narrows down the
86 * (initial) search range.
88 typedef struct {
89 int range_start; /* index of ucsset.ranges */
90 int range_end; /* ditto */
91 } segment_t;
94 * Code point to segment index conversion.
96 * Below is the function that maps a code point to the corresponding segment.
97 * The mapping is non-uniform, so that BMP, the following 16 planes that
98 * comprise Unicode code points together with BMP, and other planes
99 * have different granularity.
101 #define SEG_THLD1 0x10000 /* BMP */
102 #define SEG_THLD2 0x110000 /* Unicode (BMP+16planes) */
103 #define SEG_SFT1 10 /* BMP: 1K code points/segment */
104 #define SEG_SFT2 14 /* following 16 planes: 16K cp/seg */
105 #define SEG_SFT3 24 /* rest: 16M cp/seg */
106 #define SEG_OFF1 (SEG_THLD1 >> SEG_SFT1)
107 #define SEG_OFF2 (((SEG_THLD2 - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1)
108 #define SEG_INDEX(v) \
109 (((v) < SEG_THLD1) ? ((v) >> SEG_SFT1) : \
110 ((v) < SEG_THLD2) ? ((((v) - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1) : \
111 ((((v) - SEG_THLD2) >> SEG_SFT3) + SEG_OFF2))
112 #define SEG_LEN (SEG_INDEX(UCS_MAX - 1) + 1)
115 * Representation of set of UCS code points.
117 typedef struct idn_ucsset {
118 segment_t segments[SEG_LEN];
119 int fixed;
120 int size; /* allocated size of 'ranges' */
121 int nranges; /* num of ranges */
122 range_t *ranges;
123 int refcnt; /* reference count */
124 } ucsset;
126 static idn_result_t addrange(idn_ucsset_t ctx, unsigned long from,
127 unsigned long to, char *func_name);
128 static int comp_range(const void *v1, const void *v2);
130 idn_result_t
131 idn_ucsset_create(idn_ucsset_t *ctx) {
132 idn_ucsset_t bm;
134 assert(ctx != NULL);
136 TRACE(("idn_ucsset_create()\n"));
138 if ((bm = malloc(sizeof(ucsset))) == NULL) {
139 WARNING(("idn_ucsset_create: malloc failed\n"));
140 return idn_nomemory;
142 bm->size = bm->nranges = 0;
143 bm->ranges = NULL;
144 bm->fixed = 0;
145 bm->refcnt = 1;
146 *ctx = bm;
147 return (idn_success);
150 void
151 idn_ucsset_destroy(idn_ucsset_t ctx) {
152 assert(ctx != NULL && ctx->refcnt > 0);
154 TRACE(("idn_ucsset_destroy()\n"));
156 if (--ctx->refcnt == 0) {
157 if (ctx->ranges != NULL)
158 free(ctx->ranges);
159 free(ctx);
163 void
164 idn_ucsset_incrref(idn_ucsset_t ctx) {
165 assert(ctx != NULL && ctx->refcnt > 0);
167 TRACE(("idn_ucsset_incrref()\n"));
169 ctx->refcnt++;
172 idn_result_t
173 idn_ucsset_add(idn_ucsset_t ctx, unsigned long v) {
174 assert(ctx != NULL && ctx->refcnt > 0);
176 TRACE(("idn_ucsset_add(v=U+%lX)\n", v));
178 return (addrange(ctx, v, v, "idn_ucsset_add"));
181 idn_result_t
182 idn_ucsset_addrange(idn_ucsset_t ctx, unsigned long from,
183 unsigned long to)
185 assert(ctx != NULL && ctx->refcnt > 0);
187 TRACE(("idn_ucsset_addrange(from=U+%lX, to=U+%lX)\n",
188 from, to));
190 return (addrange(ctx, from, to, "idn_ucsset_addrange"));
193 void
194 idn_ucsset_fix(idn_ucsset_t ctx) {
195 int nranges;
196 range_t *ranges;
197 segment_t *segments;
198 int i, j;
200 assert(ctx != NULL && ctx->refcnt > 0);
202 TRACE(("idn_ucsset_fix()\n"));
204 nranges = ctx->nranges;
205 ranges = ctx->ranges;
206 segments = ctx->segments;
208 if (ctx->fixed)
209 return;
211 ctx->fixed = 1;
213 /* Initialize segment array */
214 for (i = 0; i < SEG_LEN; i++) {
215 segments[i].range_start = -1;
216 segments[i].range_end = -1;
219 /* If the set is empty, there's nothing to be done. */
220 if (nranges == 0)
221 return;
223 /* Sort ranges. */
224 qsort(ranges, nranges, sizeof(range_t), comp_range);
226 /* Merge overlapped/continuous ranges. */
227 for (i = 0, j = 1; j < nranges; j++) {
228 if (ranges[i].to + 1 >= ranges[j].from) {
229 /* can be merged */
230 if (ranges[i].to < ranges[j].to) {
231 ranges[i].to = ranges[j].to;
233 } else {
234 i++;
235 if (i < j)
236 ranges[i] = ranges[j];
239 /* 'i' points the last range in the array. */
240 ctx->nranges = nranges = ++i;
242 /* Create segment array. */
243 for (i = 0; i < nranges; i++) {
244 int fidx = SEG_INDEX(ranges[i].from);
245 int tidx = SEG_INDEX(ranges[i].to);
247 for (j = fidx; j <= tidx; j++) {
248 if (segments[j].range_start < 0)
249 segments[j].range_start = i;
250 segments[j].range_end = i;
254 #if 0
256 * Does the standard guarantee realloc() always succeeds
257 * when shrinking?
259 /* Shrink malloc'ed space if possible. */
260 ctx->ranges = realloc(ctx->ranges, ctx->nranges * sizeof(range_t));
261 #endif
264 idn_result_t
265 idn_ucsset_lookup(idn_ucsset_t ctx, unsigned long v, int *found) {
266 int idx;
267 segment_t *segments;
269 assert(ctx != NULL && ctx->refcnt > 0 && found != NULL);
271 TRACE(("idn_ucsset_lookup(v=U+%lX)\n", v));
273 /* Make sure it is fixed. */
274 if (!ctx->fixed) {
275 WARNING(("idn_ucsset_lookup: not fixed yet\n"));
276 return (idn_failure);
279 /* Check the given code point. */
280 if (v >= UCS_MAX)
281 return (idn_invalid_codepoint);
283 /* Get the segment 'v' belongs to. */
284 segments = ctx->segments;
285 idx = SEG_INDEX(v);
287 /* Do binary search. */
288 *found = 0;
289 if (segments[idx].range_start >= 0) {
290 int lo = segments[idx].range_start;
291 int hi = segments[idx].range_end;
292 range_t *ranges = ctx->ranges;
294 while (lo <= hi) {
295 int mid = (lo + hi) / 2;
296 if (v < ranges[mid].from) {
297 hi = mid - 1;
298 } else if (v > ranges[mid].to) {
299 lo = mid + 1;
300 } else {
301 *found = 1;
302 break;
306 return (idn_success);
309 static idn_result_t
310 addrange(idn_ucsset_t ctx, unsigned long from, unsigned long to,
311 char *func_name)
313 range_t *newbuf;
315 /* Check the given code points. */
316 if (from > UCS_MAX) {
317 WARNING(("%s: code point out of range (U+%lX)\n",
318 func_name, from));
319 return (idn_invalid_codepoint);
320 } else if (to > UCS_MAX) {
321 WARNING(("%s: code point out of range (U+%lX)\n",
322 func_name, to));
323 return (idn_invalid_codepoint);
324 } else if (from > to) {
325 WARNING(("%s: invalid range spec (U+%lX-U+%lX)\n",
326 func_name, from, to));
327 return (idn_invalid_codepoint);
330 /* Make sure it is not fixed yet. */
331 if (ctx->fixed) {
332 WARNING(("%s: attempt to add to already fixed object\n",
333 func_name));
334 return (idn_failure);
337 /* Append the specified range to the 'ranges' array. */
338 if (ctx->nranges >= ctx->size) {
339 /* Make it bigger. */
340 if (ctx->size == 0)
341 ctx->size = INIT_SIZE;
342 else
343 ctx->size *= 2;
344 newbuf = realloc(ctx->ranges, ctx->size * sizeof(range_t));
345 if (newbuf == NULL)
346 return (idn_nomemory);
347 ctx->ranges = newbuf;
349 ctx->ranges[ctx->nranges].from = from;
350 ctx->ranges[ctx->nranges].to = to;
351 ctx->nranges++;
353 return (idn_success);
356 static int
357 comp_range(const void *v1, const void *v2) {
359 * Range comparation function suitable for qsort().
361 const range_t *r1 = v1;
362 const range_t *r2 = v2;
364 if (r1->from < r2->from)
365 return (-1);
366 else if (r1->from > r2->from)
367 return (1);
368 else
369 return (0);