4 static char *rcsid
= "Id: ucsset.c,v 1.1.1.1 2003/06/04 00:26:15 marka Exp";
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
55 #include <idn/result.h>
56 #include <idn/assert.h>
57 #include <idn/logmacro.h>
58 #include <idn/ucsset.h>
60 #define UCS_MAX 0x80000000UL
67 * The set of code points is represented by an array of code point ranges.
68 * In the building phase, specified ranges by 'idn_ucsset_add' or
69 * 'idn_ucsset_addrange' are simply appended to the array.
70 * And 'idn_ucsset_fix' sorts the array by the code point value, and also
71 * merges any intersecting ranges. Since the array is sorted, a binary
72 * search can be used for looking up.
82 * To speed up searching further, the entire region of UCS-4 code points
83 * (U+0000 - U+7FFFFFFF) are divided into segments. For each segment,
84 * the first and last element of the range array corresponding to the
85 * segment are computed by 'idn_ucsset_fix'. This narrows down the
86 * (initial) search range.
89 int range_start
; /* index of ucsset.ranges */
90 int range_end
; /* ditto */
94 * Code point to segment index conversion.
96 * Below is the function that maps a code point to the corresponding segment.
97 * The mapping is non-uniform, so that BMP, the following 16 planes that
98 * comprise Unicode code points together with BMP, and other planes
99 * have different granularity.
101 #define SEG_THLD1 0x10000 /* BMP */
102 #define SEG_THLD2 0x110000 /* Unicode (BMP+16planes) */
103 #define SEG_SFT1 10 /* BMP: 1K code points/segment */
104 #define SEG_SFT2 14 /* following 16 planes: 16K cp/seg */
105 #define SEG_SFT3 24 /* rest: 16M cp/seg */
106 #define SEG_OFF1 (SEG_THLD1 >> SEG_SFT1)
107 #define SEG_OFF2 (((SEG_THLD2 - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1)
108 #define SEG_INDEX(v) \
109 (((v) < SEG_THLD1) ? ((v) >> SEG_SFT1) : \
110 ((v) < SEG_THLD2) ? ((((v) - SEG_THLD1) >> SEG_SFT2) + SEG_OFF1) : \
111 ((((v) - SEG_THLD2) >> SEG_SFT3) + SEG_OFF2))
112 #define SEG_LEN (SEG_INDEX(UCS_MAX - 1) + 1)
115 * Representation of set of UCS code points.
117 typedef struct idn_ucsset
{
118 segment_t segments
[SEG_LEN
];
120 int size
; /* allocated size of 'ranges' */
121 int nranges
; /* num of ranges */
123 int refcnt
; /* reference count */
126 static idn_result_t
addrange(idn_ucsset_t ctx
, unsigned long from
,
127 unsigned long to
, char *func_name
);
128 static int comp_range(const void *v1
, const void *v2
);
131 idn_ucsset_create(idn_ucsset_t
*ctx
) {
136 TRACE(("idn_ucsset_create()\n"));
138 if ((bm
= malloc(sizeof(ucsset
))) == NULL
) {
139 WARNING(("idn_ucsset_create: malloc failed\n"));
142 bm
->size
= bm
->nranges
= 0;
147 return (idn_success
);
151 idn_ucsset_destroy(idn_ucsset_t ctx
) {
152 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
154 TRACE(("idn_ucsset_destroy()\n"));
156 if (--ctx
->refcnt
== 0) {
157 if (ctx
->ranges
!= NULL
)
164 idn_ucsset_incrref(idn_ucsset_t ctx
) {
165 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
167 TRACE(("idn_ucsset_incrref()\n"));
173 idn_ucsset_add(idn_ucsset_t ctx
, unsigned long v
) {
174 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
176 TRACE(("idn_ucsset_add(v=U+%lX)\n", v
));
178 return (addrange(ctx
, v
, v
, "idn_ucsset_add"));
182 idn_ucsset_addrange(idn_ucsset_t ctx
, unsigned long from
,
185 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
187 TRACE(("idn_ucsset_addrange(from=U+%lX, to=U+%lX)\n",
190 return (addrange(ctx
, from
, to
, "idn_ucsset_addrange"));
194 idn_ucsset_fix(idn_ucsset_t ctx
) {
200 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
202 TRACE(("idn_ucsset_fix()\n"));
204 nranges
= ctx
->nranges
;
205 ranges
= ctx
->ranges
;
206 segments
= ctx
->segments
;
213 /* Initialize segment array */
214 for (i
= 0; i
< SEG_LEN
; i
++) {
215 segments
[i
].range_start
= -1;
216 segments
[i
].range_end
= -1;
219 /* If the set is empty, there's nothing to be done. */
224 qsort(ranges
, nranges
, sizeof(range_t
), comp_range
);
226 /* Merge overlapped/continuous ranges. */
227 for (i
= 0, j
= 1; j
< nranges
; j
++) {
228 if (ranges
[i
].to
+ 1 >= ranges
[j
].from
) {
230 if (ranges
[i
].to
< ranges
[j
].to
) {
231 ranges
[i
].to
= ranges
[j
].to
;
236 ranges
[i
] = ranges
[j
];
239 /* 'i' points the last range in the array. */
240 ctx
->nranges
= nranges
= ++i
;
242 /* Create segment array. */
243 for (i
= 0; i
< nranges
; i
++) {
244 int fidx
= SEG_INDEX(ranges
[i
].from
);
245 int tidx
= SEG_INDEX(ranges
[i
].to
);
247 for (j
= fidx
; j
<= tidx
; j
++) {
248 if (segments
[j
].range_start
< 0)
249 segments
[j
].range_start
= i
;
250 segments
[j
].range_end
= i
;
256 * Does the standard guarantee realloc() always succeeds
259 /* Shrink malloc'ed space if possible. */
260 ctx
->ranges
= realloc(ctx
->ranges
, ctx
->nranges
* sizeof(range_t
));
265 idn_ucsset_lookup(idn_ucsset_t ctx
, unsigned long v
, int *found
) {
269 assert(ctx
!= NULL
&& ctx
->refcnt
> 0 && found
!= NULL
);
271 TRACE(("idn_ucsset_lookup(v=U+%lX)\n", v
));
273 /* Make sure it is fixed. */
275 WARNING(("idn_ucsset_lookup: not fixed yet\n"));
276 return (idn_failure
);
279 /* Check the given code point. */
281 return (idn_invalid_codepoint
);
283 /* Get the segment 'v' belongs to. */
284 segments
= ctx
->segments
;
287 /* Do binary search. */
289 if (segments
[idx
].range_start
>= 0) {
290 int lo
= segments
[idx
].range_start
;
291 int hi
= segments
[idx
].range_end
;
292 range_t
*ranges
= ctx
->ranges
;
295 int mid
= (lo
+ hi
) / 2;
296 if (v
< ranges
[mid
].from
) {
298 } else if (v
> ranges
[mid
].to
) {
306 return (idn_success
);
310 addrange(idn_ucsset_t ctx
, unsigned long from
, unsigned long to
,
315 /* Check the given code points. */
316 if (from
> UCS_MAX
) {
317 WARNING(("%s: code point out of range (U+%lX)\n",
319 return (idn_invalid_codepoint
);
320 } else if (to
> UCS_MAX
) {
321 WARNING(("%s: code point out of range (U+%lX)\n",
323 return (idn_invalid_codepoint
);
324 } else if (from
> to
) {
325 WARNING(("%s: invalid range spec (U+%lX-U+%lX)\n",
326 func_name
, from
, to
));
327 return (idn_invalid_codepoint
);
330 /* Make sure it is not fixed yet. */
332 WARNING(("%s: attempt to add to already fixed object\n",
334 return (idn_failure
);
337 /* Append the specified range to the 'ranges' array. */
338 if (ctx
->nranges
>= ctx
->size
) {
339 /* Make it bigger. */
341 ctx
->size
= INIT_SIZE
;
344 newbuf
= realloc(ctx
->ranges
, ctx
->size
* sizeof(range_t
));
346 return (idn_nomemory
);
347 ctx
->ranges
= newbuf
;
349 ctx
->ranges
[ctx
->nranges
].from
= from
;
350 ctx
->ranges
[ctx
->nranges
].to
= to
;
353 return (idn_success
);
357 comp_range(const void *v1
, const void *v2
) {
359 * Range comparation function suitable for qsort().
361 const range_t
*r1
= v1
;
362 const range_t
*r2
= v2
;
364 if (r1
->from
< r2
->from
)
366 else if (r1
->from
> r2
->from
)