4 static char *rcsid
= "Id: ucsmap.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp";
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
54 #include <idn/result.h>
55 #include <idn/assert.h>
57 #include <idn/logmacro.h>
58 #include <idn/ucsmap.h>
61 #define DEFAULT_BUF_SIZE 500
62 #define UCSMAP_HASH_SIZE 103
63 #define MAX_MAPLEN 0xffff
66 * This module implements UCS 1-to-N mapping.
67 * To speed up mapping table lookup, a combination of hash and
68 * binary search is used.
73 * Entries are sorted by its hash index and code point.
76 short hidx
; /* hash index */
77 unsigned short len
; /* length of mapped sequence */
78 unsigned long ucs
; /* code point to be mapped */
79 unsigned long *map
; /* mapped sequence of code points */
84 * Since the entries pointed by ucsmap_hash_t.entry are sorted,
85 * binary search can be used.
88 ucsmap_entry_t
*entry
; /* sorted by code point */
89 int n
; /* length of 'entry' */
93 * UCS character buffer for storing target character sequence.
95 typedef struct ucsmap_buf
{
96 struct ucsmap_buf
*next
;
97 unsigned long buf
[1]; /* actually a variable length array */
103 typedef struct idn_ucsmap
{
104 ucsmap_hash_t hash
[UCSMAP_HASH_SIZE
];
105 ucsmap_entry_t
*entries
; /* array of entries */
106 size_t entry_size
; /* allocated size */
107 size_t nentries
; /* # of entries in use */
108 ucsmap_buf_t
*mapdata
; /* list of character buffers */
109 size_t mapdata_size
; /* allocated size of current buffer */
110 size_t mapdata_used
; /* # of chars in use */
111 int fixed
; /* already fixed? */
112 int refcnt
; /* reference count */
115 static int ucsmap_hash(unsigned long v
);
116 static unsigned long *save_mapped_sequence(idn_ucsmap_t ctx
,
119 static void free_mapbuf(ucsmap_buf_t
*buf
);
120 static int comp_entry(const void *v1
, const void *v2
);
123 idn_ucsmap_create(idn_ucsmap_t
*ctxp
) {
126 assert(ctxp
!= NULL
);
128 TRACE(("idn_ucsmap_create()\n"));
130 if ((ctx
= malloc(sizeof(*ctx
))) == NULL
) {
131 WARNING(("idn_ucsmap_create: malloc failed\n"));
132 return (idn_nomemory
);
139 ctx
->mapdata_size
= 0;
140 ctx
->mapdata_used
= 0;
144 return (idn_success
);
148 idn_ucsmap_destroy(idn_ucsmap_t ctx
) {
149 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
151 TRACE(("idn_ucsmap_destroy()\n"));
153 if (--ctx
->refcnt
== 0) {
154 if (ctx
->entries
!= NULL
)
156 if (ctx
->mapdata
!= NULL
)
157 free_mapbuf(ctx
->mapdata
);
163 idn_ucsmap_incrref(idn_ucsmap_t ctx
) {
164 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
170 idn_ucsmap_add(idn_ucsmap_t ctx
, unsigned long ucs
,
171 unsigned long *map
, size_t maplen
)
174 ucsmap_entry_t
*newbuf
;
176 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
178 TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs
, maplen
));
180 /* Make sure it is not fixed yet. */
182 WARNING(("idn_ucsmap_add: attempt to add to fixed map\n"));
183 return (idn_failure
);
186 if (maplen
> MAX_MAPLEN
) {
187 WARNING(("idn_ucsmap_add: maplen too large (> %d)\n",
189 return (idn_failure
);
192 /* Append an entry. */
193 if (ctx
->nentries
>= ctx
->entry_size
) {
194 if (ctx
->entry_size
== 0)
195 ctx
->entry_size
= INIT_SIZE
;
197 ctx
->entry_size
*= 2;
198 newbuf
= realloc(ctx
->entries
, sizeof(*e
) * ctx
->entry_size
);
200 return (idn_nomemory
);
201 ctx
->entries
= newbuf
;
203 e
= &ctx
->entries
[ctx
->nentries
];
204 e
->hidx
= ucsmap_hash(ucs
);
208 /* Save mapped sequence in the buffer. */
209 e
->map
= save_mapped_sequence(ctx
, map
, maplen
);
211 return (idn_nomemory
);
214 * Zero 'maplen' is perfectly valid meaning one-to-zero
221 return (idn_success
);
225 idn_ucsmap_fix(idn_ucsmap_t ctx
) {
230 assert(ctx
!= NULL
&& ctx
->refcnt
> 0);
232 TRACE(("idn_ucsmap_fix()\n"));
239 /* Initialize hash. */
240 for (i
= 0; i
< UCSMAP_HASH_SIZE
; i
++) {
241 ctx
->hash
[i
].entry
= NULL
;
245 if (ctx
->nentries
== 0)
248 /* Sort entries by the hash value and code point. */
249 qsort(ctx
->entries
, ctx
->nentries
, sizeof(ucsmap_entry_t
), comp_entry
);
252 * Now the entries are sorted by their hash value, and
253 * sorted by its code point among the ones with the same hash value.
256 /* Build hash table. */
258 for (i
= 0, e
= ctx
->entries
; i
< ctx
->nentries
; i
++, e
++) {
259 if (e
->hidx
!= last_hidx
) {
260 ctx
->hash
[e
->hidx
].entry
= e
;
263 ctx
->hash
[last_hidx
].n
++;
268 idn_ucsmap_map(idn_ucsmap_t ctx
, unsigned long v
, unsigned long *to
,
269 size_t tolen
, size_t *maplenp
) {
275 assert(ctx
!= NULL
&& ctx
->refcnt
> 0 && to
!= NULL
&&
278 TRACE(("idn_ucsmap_map(v=U+%lX)\n", v
));
281 WARNING(("idn_ucsmap_map: not fixed yet\n"));
282 return (idn_failure
);
285 /* First, look up hash table. */
286 hash
= ucsmap_hash(v
);
287 if ((n
= ctx
->hash
[hash
].n
) == 0)
290 /* Then do binary search. */
291 e
= ctx
->hash
[hash
].entry
;
298 else if (v
> e
[mid
].ucs
)
302 if (tolen
< e
[mid
].len
)
303 return (idn_buffer_overflow
);
304 memcpy(to
, e
[mid
].map
, sizeof(*to
) * e
[mid
].len
);
305 *maplenp
= e
[mid
].len
;
306 return (idn_success
);
311 * Not found. Put the original character to 'to'
312 * just for convenience.
316 return (idn_buffer_overflow
);
319 return (idn_nomapping
);
323 ucsmap_hash(unsigned long v
) {
324 return (v
% UCSMAP_HASH_SIZE
);
327 static unsigned long *
328 save_mapped_sequence(idn_ucsmap_t ctx
, unsigned long *map
, size_t maplen
) {
334 * If the current buffer (the first one in the ctx->mapdata list)
335 * has enough space, use it. Otherwise, allocate a new buffer and
336 * insert it at the beginning of the list.
338 if (ctx
->mapdata_used
+ maplen
> ctx
->mapdata_size
) {
339 if (maplen
> DEFAULT_BUF_SIZE
)
340 allocsize
= maplen
* 2;
342 allocsize
= DEFAULT_BUF_SIZE
;
343 buf
= malloc(sizeof(ucsmap_hash_t
) +
344 sizeof(unsigned long) * allocsize
);
347 buf
->next
= ctx
->mapdata
;
349 ctx
->mapdata_size
= allocsize
;
350 ctx
->mapdata_used
= 0;
352 p
= ctx
->mapdata
->buf
+ ctx
->mapdata_used
;
353 memcpy(p
, map
, sizeof(unsigned long) * maplen
);
354 ctx
->mapdata_used
+= maplen
;
359 free_mapbuf(ucsmap_buf_t
*buf
) {
360 while (buf
!= NULL
) {
361 ucsmap_buf_t
*next
= buf
->next
;
368 comp_entry(const void *v1
, const void *v2
) {
369 const ucsmap_entry_t
*e1
= v1
;
370 const ucsmap_entry_t
*e2
= v2
;
372 if (e1
->hidx
< e2
->hidx
)
374 else if (e1
->hidx
> e2
->hidx
)
376 else if (e1
->ucs
< e2
->ucs
)
378 else if (e1
->ucs
> e2
->ucs
)