1 /* $NetBSD: unicode.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */
4 static char *rcsid
= "Id: unicode.c,v 1.1 2003/06/04 00:26:16 marka Exp ";
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
56 #include <idn/result.h>
57 #include <idn/logmacro.h>
58 #include <idn/assert.h>
59 #include <idn/unicode.h>
61 #define UNICODE_CURRENT "3.2.0"
63 #define UCS_MAX 0x10ffff
64 #define END_BIT 0x80000000
67 * Some constants for Hangul decomposition/composition.
76 #define SLast (SBase + LCount * VCount * TCount)
79 * Symbol composition macro.
81 #define compose_sym(a, b) compose_symX(a, b)
82 #define compose_symX(a, b) a ## b
85 unsigned long c2
; /* 2nd character */
86 unsigned long comp
; /* composed character */
89 #include "unicodedata_320.c"
91 #include "unicode_template.c"
94 typedef int (*unicode_canonclassproc
)(unsigned long v
);
95 typedef int (*unicode_decomposeproc
)(unsigned long c
,
96 const unsigned long **seqp
);
97 typedef int (*unicode_composeproc
)(unsigned long c
,
98 const struct composition
**compp
);
100 static struct idn__unicode_ops
{
102 unicode_canonclassproc canonclass_proc
;
103 unicode_decomposeproc decompose_proc
;
104 unicode_composeproc compose_proc
;
105 } unicode_versions
[] = {
106 #define MAKE_UNICODE_HANDLE(version, suffix) \
108 compose_sym(canonclass_, suffix), \
109 compose_sym(decompose_, suffix), \
110 compose_sym(compose_, suffix) }
111 MAKE_UNICODE_HANDLE("3.2.0", v320
),
113 #undef MAKE_UNICODE_HANDLE
117 idn__unicode_create(const char *version
,
118 idn__unicode_version_t
*versionp
) {
119 idn__unicode_version_t v
;
121 assert(versionp
!= NULL
);
122 TRACE(("idn__unicode_create(version=%-.50s)\n",
123 version
== NULL
? "<NULL>" : version
));
126 version
= UNICODE_CURRENT
;
128 for (v
= unicode_versions
; v
->version
!= NULL
; v
++) {
129 if (strcmp(v
->version
, version
) == 0) {
131 return (idn_success
);
134 return (idn_notfound
);
138 idn__unicode_destroy(idn__unicode_version_t version
) {
139 assert(version
!= NULL
);
140 TRACE(("idn__unicode_destroy()\n"));
145 idn__unicode_canonicalclass(idn__unicode_version_t version
, unsigned long c
) {
149 return (*version
->canonclass_proc
)(c
);
153 idn__unicode_decompose(idn__unicode_version_t version
,
154 int compat
, unsigned long *v
, size_t vlen
,
155 unsigned long c
, int *decomp_lenp
) {
156 unsigned long *vorg
= v
;
158 const unsigned long *seq
;
160 assert(v
!= NULL
&& vlen
>= 0 && decomp_lenp
!= NULL
);
163 return (idn_notfound
);
166 * First, check for Hangul.
168 if (SBase
<= c
&& c
< SLast
) {
169 int idx
, t_offset
, v_offset
, l_offset
;
172 t_offset
= idx
% TCount
;
174 v_offset
= idx
% VCount
;
175 l_offset
= idx
/ VCount
;
176 if ((t_offset
== 0 && vlen
< 2) || (t_offset
> 0 && vlen
< 3))
177 return (idn_buffer_overflow
);
178 *v
++ = LBase
+ l_offset
;
179 *v
++ = VBase
+ v_offset
;
181 *v
++ = TBase
+ t_offset
;
182 *decomp_lenp
= v
- vorg
;
183 return (idn_success
);
187 * Look up decomposition table. If no decomposition is defined
188 * or if it is a compatibility decomosition when canonical
189 * decomposition requested, return 'idn_notfound'.
191 seqidx
= (*version
->decompose_proc
)(c
, &seq
);
192 if (seqidx
== 0 || (compat
== 0 && (seqidx
& DECOMP_COMPAT
) != 0))
193 return (idn_notfound
);
196 * Copy the decomposed sequence. The end of the sequence are
197 * marked with END_BIT.
206 /* Decompose recursively. */
207 r
= idn__unicode_decompose(version
, compat
, v
, vlen
, c
, &dlen
);
208 if (r
== idn_success
) {
211 } else if (r
== idn_notfound
) {
213 return (idn_buffer_overflow
);
220 } while ((*seq
++ & END_BIT
) == 0);
222 *decomp_lenp
= v
- vorg
;
224 return (idn_success
);
228 idn__unicode_iscompositecandidate(idn__unicode_version_t version
,
230 const struct composition
*dummy
;
235 /* Check for Hangul */
236 if ((LBase
<= c
&& c
< LBase
+ LCount
) || (SBase
<= c
&& c
< SLast
))
240 * Look up composition table. If there are no composition
241 * that begins with the given character, it is not a
242 * composition candidate.
244 if ((*version
->compose_proc
)(c
, &dummy
) == 0)
251 idn__unicode_compose(idn__unicode_version_t version
, unsigned long c1
,
252 unsigned long c2
, unsigned long *compp
) {
255 const struct composition
*cseq
;
257 assert(compp
!= NULL
);
259 if (c1
> UCS_MAX
|| c2
> UCS_MAX
)
260 return (idn_notfound
);
265 if (LBase
<= c1
&& c1
< LBase
+ LCount
&&
266 VBase
<= c2
&& c2
< VBase
+ VCount
) {
271 ((c1
- LBase
) * VCount
+ (c2
- VBase
)) * TCount
;
272 return (idn_success
);
273 } else if (SBase
<= c1
&& c1
< SLast
&&
274 TBase
<= c2
&& c2
< TBase
+ TCount
&&
275 (c1
- SBase
) % TCount
== 0) {
279 *compp
= c1
+ (c2
- TBase
);
280 return (idn_success
);
284 * Look up composition table. If the result is 0, no composition
285 * is defined. Otherwise, upper 16bits of the result contains
286 * the number of composition that begins with 'c1', and the lower
287 * 16bits is the offset in 'compose_seq'.
289 if ((n
= (*version
->compose_proc
)(c1
, &cseq
)) == 0)
290 return (idn_notfound
);
293 * The composite sequences are sorted by the 2nd character 'c2'.
294 * So we can use binary search.
299 int mid
= (lo
+ hi
) / 2;
301 if (cseq
[mid
].c2
< c2
) {
303 } else if (cseq
[mid
].c2
> c2
) {
306 *compp
= cseq
[mid
].comp
;
307 return (idn_success
);
310 return (idn_notfound
);