4 static char *rcsid
= "Id: normalizer.c,v 1.1.1.1 2003/06/04 00:26:05 marka Exp";
8 * Copyright (c) 2000,2002 Japan Network Information Center.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/result.h>
60 #include <idn/normalizer.h>
61 #include <idn/strhash.h>
62 #include <idn/unormalize.h>
63 #include <idn/unicode.h>
65 #include <idn/debug.h>
68 #define MAX_LOCAL_SCHEME 3
70 #define INITIALIZED (scheme_hash != NULL)
74 idn_normalizer_proc_t proc
;
77 struct idn_normalizer
{
80 normalize_scheme_t
**schemes
;
81 normalize_scheme_t
*local_buf
[MAX_LOCAL_SCHEME
];
85 static idn__strhash_t scheme_hash
;
87 static idn__unicode_version_t vcur
= NULL
;
88 static idn__unicode_version_t v320
= NULL
;
89 #define INIT_VERSION(version, var) \
91 idn_result_t r = idn__unicode_create(version, &var); \
92 if (r != idn_success) \
96 static idn_result_t
expand_schemes(idn_normalizer_t ctx
);
97 static idn_result_t
register_standard_normalizers(void);
98 static idn_result_t
normalizer_formkc(const unsigned long *from
,
99 unsigned long *to
, size_t tolen
);
100 static idn_result_t
normalizer_formkc_v320(const unsigned long *from
,
104 static struct standard_normalizer
{
106 idn_normalizer_proc_t proc
;
107 } standard_normalizer
[] = {
108 { "unicode-form-kc", normalizer_formkc
},
109 { "unicode-form-kc/3.2.0", normalizer_formkc_v320
},
110 { "RFC3491", normalizer_formkc_v320
},
115 idn_normalizer_initialize(void) {
119 TRACE(("idn_normalizer_initialize()\n"));
121 if (scheme_hash
!= NULL
) {
122 r
= idn_success
; /* already initialized */
126 if ((r
= idn__strhash_create(&hash
)) != idn_success
)
130 /* Register standard normalizers */
131 r
= register_standard_normalizers();
133 TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r
)));
138 idn_normalizer_create(idn_normalizer_t
*ctxp
) {
139 idn_normalizer_t ctx
;
142 assert(ctxp
!= NULL
);
143 TRACE(("idn_normalizer_create()\n"));
145 if ((ctx
= malloc(sizeof(struct idn_normalizer
))) == NULL
) {
151 ctx
->scheme_size
= MAX_LOCAL_SCHEME
;
152 ctx
->schemes
= ctx
->local_buf
;
153 ctx
->reference_count
= 1;
158 TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r
)));
163 idn_normalizer_destroy(idn_normalizer_t ctx
) {
166 TRACE(("idn_normalizer_destroy()\n"));
168 ctx
->reference_count
--;
169 if (ctx
->reference_count
<= 0) {
170 TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
171 if (ctx
->schemes
!= ctx
->local_buf
)
175 TRACE(("idn_normalizer_destroy(): "
176 "update reference count (%d->%d)\n",
177 ctx
->reference_count
+ 1, ctx
->reference_count
));
182 idn_normalizer_incrref(idn_normalizer_t ctx
) {
185 TRACE(("idn_normalizer_incrref()\n"));
186 TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
187 ctx
->reference_count
, ctx
->reference_count
+ 1));
189 ctx
->reference_count
++;
193 idn_normalizer_add(idn_normalizer_t ctx
, const char *scheme_name
) {
196 normalize_scheme_t
*scheme
;
198 assert(ctx
!= NULL
&& scheme_name
!= NULL
);
200 TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name
));
204 if (idn__strhash_get(scheme_hash
, scheme_name
, &v
) != idn_success
) {
205 ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
207 r
= idn_invalid_name
;
213 assert(ctx
->nschemes
<= ctx
->scheme_size
);
215 if (ctx
->nschemes
== ctx
->scheme_size
&&
216 (r
= expand_schemes(ctx
)) != idn_success
) {
220 ctx
->schemes
[ctx
->nschemes
++] = scheme
;
223 TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r
)));
228 idn_normalizer_addall(idn_normalizer_t ctx
, const char **scheme_names
,
233 assert(ctx
!= NULL
&& scheme_names
!= NULL
);
235 TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes
));
237 for (i
= 0; i
< nschemes
; i
++) {
238 r
= idn_normalizer_add(ctx
, (const char *)*scheme_names
);
239 if (r
!= idn_success
)
246 TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r
)));
251 idn_normalizer_normalize(idn_normalizer_t ctx
, const unsigned long *from
,
252 unsigned long *to
, size_t tolen
) {
254 unsigned long *src
, *dst
;
255 unsigned long *buffers
[2] = {NULL
, NULL
};
256 size_t buflen
[2] = {0, 0};
261 assert(scheme_hash
!= NULL
);
262 assert(ctx
!= NULL
&& from
!= NULL
&& to
!= NULL
);
264 TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
265 idn__debug_ucs4xstring(from
, 50), (int)tolen
));
267 if (ctx
->nschemes
<= 0) {
268 if (tolen
< idn_ucs4_strlen(from
) + 1) {
269 r
= idn_buffer_overflow
;
272 idn_ucs4_strcpy(to
, from
);
281 dstlen
= idn_ucs4_strlen(from
) + 1;
284 while (i
< ctx
->nschemes
) {
285 TRACE(("idn_normalizer_normalize(): normalize %s\n",
286 ctx
->schemes
[i
]->name
));
289 * Choose destination area to restore the result of a mapping.
291 if (i
+ 1 == ctx
->nschemes
) {
295 if (src
== buffers
[0])
300 if (buflen
[idx
] < dstlen
) {
303 newbuf
= realloc(buffers
[idx
],
304 sizeof(long) * dstlen
);
305 if (newbuf
== NULL
) {
309 buffers
[idx
] = (unsigned long *)newbuf
;
310 buflen
[idx
] = dstlen
;
314 dstlen
= buflen
[idx
];
318 * Perform i-th normalization scheme.
319 * If buffer size is not enough, we double it and try again.
321 r
= (ctx
->schemes
[i
]->proc
)(src
, dst
, dstlen
);
322 if (r
== idn_buffer_overflow
&& dst
!= to
) {
326 if (r
!= idn_success
)
337 if (r
== idn_success
) {
338 TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
339 idn__debug_ucs4xstring(to
, 50)));
341 TRACE(("idn_normalizer_normalize(): %s\n",
342 idn_result_tostring(r
)));
348 idn_normalizer_register(const char *scheme_name
, idn_normalizer_proc_t proc
) {
350 normalize_scheme_t
*scheme
;
352 assert(scheme_name
!= NULL
&& proc
!= NULL
);
354 TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name
));
358 scheme
= malloc(sizeof(*scheme
) + strlen(scheme_name
) + 1);
359 if (scheme
== NULL
) {
363 scheme
->name
= (char *)(scheme
+ 1);
364 (void)strcpy(scheme
->name
, scheme_name
);
367 r
= idn__strhash_put(scheme_hash
, scheme_name
, scheme
);
368 if (r
!= idn_success
)
373 TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r
)));
378 expand_schemes(idn_normalizer_t ctx
) {
379 normalize_scheme_t
**new_schemes
;
380 int new_size
= ctx
->scheme_size
* 2;
382 if (ctx
->schemes
== ctx
->local_buf
) {
383 new_schemes
= malloc(sizeof(normalize_scheme_t
) * new_size
);
385 new_schemes
= realloc(ctx
->schemes
,
386 sizeof(normalize_scheme_t
) * new_size
);
388 if (new_schemes
== NULL
)
389 return (idn_nomemory
);
391 if (ctx
->schemes
== ctx
->local_buf
)
392 memcpy(new_schemes
, ctx
->local_buf
, sizeof(ctx
->local_buf
));
394 ctx
->schemes
= new_schemes
;
395 ctx
->scheme_size
= new_size
;
397 return (idn_success
);
401 register_standard_normalizers(void) {
405 for (i
= 0; standard_normalizer
[i
].name
!= NULL
; i
++) {
407 r
= idn_normalizer_register(standard_normalizer
[i
].name
,
408 standard_normalizer
[i
].proc
);
409 if (r
!= idn_success
) {
410 WARNING(("idn_normalizer_initialize(): "
411 "failed to register \"%-.100s\"\n",
412 standard_normalizer
[i
].name
));
417 return (idn_failure
);
419 return (idn_success
);
423 * Unicode Normalization Forms -- latest version
427 normalizer_formkc(const unsigned long *from
, unsigned long *to
, size_t tolen
) {
428 INIT_VERSION(NULL
, vcur
);
429 return (idn__unormalize_formkc(vcur
, from
, to
, tolen
));
433 * Unicode Normalization Forms -- version 3.2.0
437 normalizer_formkc_v320(const unsigned long *from
, unsigned long *to
,
439 INIT_VERSION("3.2.0", v320
);
440 return (idn__unormalize_formkc(v320
, from
, to
, tolen
));