etc/services - sync with NetBSD-8
[minix.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / unicode.c
blob735f640ba0e7927353102ff4ff9d9ec889d3ec3a
1 /* $NetBSD: unicode.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */
3 #ifndef lint
4 static char *rcsid = "Id: unicode.c,v 1.1 2003/06/04 00:26:16 marka Exp ";
5 #endif
7 /*
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9 * All rights reserved.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
35 * JPNIC.
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
50 #include <config.h>
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
56 #include <idn/result.h>
57 #include <idn/logmacro.h>
58 #include <idn/assert.h>
59 #include <idn/unicode.h>
61 #define UNICODE_CURRENT "3.2.0"
63 #define UCS_MAX 0x10ffff
64 #define END_BIT 0x80000000
67 * Some constants for Hangul decomposition/composition.
69 #define SBase 0xac00
70 #define LBase 0x1100
71 #define VBase 0x1161
72 #define TBase 0x11a7
73 #define LCount 19
74 #define VCount 21
75 #define TCount 28
76 #define SLast (SBase + LCount * VCount * TCount)
79 * Symbol composition macro.
81 #define compose_sym(a, b) compose_symX(a, b)
82 #define compose_symX(a, b) a ## b
84 struct composition {
85 unsigned long c2; /* 2nd character */
86 unsigned long comp; /* composed character */
89 #include "unicodedata_320.c"
90 #define VERSION v320
91 #include "unicode_template.c"
92 #undef VERSION
94 typedef int (*unicode_canonclassproc)(unsigned long v);
95 typedef int (*unicode_decomposeproc)(unsigned long c,
96 const unsigned long **seqp);
97 typedef int (*unicode_composeproc)(unsigned long c,
98 const struct composition **compp);
100 static struct idn__unicode_ops {
101 char *version;
102 unicode_canonclassproc canonclass_proc;
103 unicode_decomposeproc decompose_proc;
104 unicode_composeproc compose_proc;
105 } unicode_versions[] = {
106 #define MAKE_UNICODE_HANDLE(version, suffix) \
107 { version, \
108 compose_sym(canonclass_, suffix), \
109 compose_sym(decompose_, suffix), \
110 compose_sym(compose_, suffix) }
111 MAKE_UNICODE_HANDLE("3.2.0", v320),
112 { NULL },
113 #undef MAKE_UNICODE_HANDLE
116 idn_result_t
117 idn__unicode_create(const char *version,
118 idn__unicode_version_t *versionp) {
119 idn__unicode_version_t v;
121 assert(versionp != NULL);
122 TRACE(("idn__unicode_create(version=%-.50s)\n",
123 version == NULL ? "<NULL>" : version));
125 if (version == NULL)
126 version = UNICODE_CURRENT;
128 for (v = unicode_versions; v->version != NULL; v++) {
129 if (strcmp(v->version, version) == 0) {
130 *versionp = v;
131 return (idn_success);
134 return (idn_notfound);
137 void
138 idn__unicode_destroy(idn__unicode_version_t version) {
139 assert(version != NULL);
140 TRACE(("idn__unicode_destroy()\n"));
141 /* Nothing to do */
145 idn__unicode_canonicalclass(idn__unicode_version_t version, unsigned long c) {
146 if (c > UCS_MAX)
147 return (0);
149 return (*version->canonclass_proc)(c);
152 idn_result_t
153 idn__unicode_decompose(idn__unicode_version_t version,
154 int compat, unsigned long *v, size_t vlen,
155 unsigned long c, int *decomp_lenp) {
156 unsigned long *vorg = v;
157 int seqidx;
158 const unsigned long *seq;
160 assert(v != NULL && vlen >= 0 && decomp_lenp != NULL);
162 if (c > UCS_MAX)
163 return (idn_notfound);
166 * First, check for Hangul.
168 if (SBase <= c && c < SLast) {
169 int idx, t_offset, v_offset, l_offset;
171 idx = c - SBase;
172 t_offset = idx % TCount;
173 idx /= TCount;
174 v_offset = idx % VCount;
175 l_offset = idx / VCount;
176 if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
177 return (idn_buffer_overflow);
178 *v++ = LBase + l_offset;
179 *v++ = VBase + v_offset;
180 if (t_offset > 0)
181 *v++ = TBase + t_offset;
182 *decomp_lenp = v - vorg;
183 return (idn_success);
187 * Look up decomposition table. If no decomposition is defined
188 * or if it is a compatibility decomosition when canonical
189 * decomposition requested, return 'idn_notfound'.
191 seqidx = (*version->decompose_proc)(c, &seq);
192 if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
193 return (idn_notfound);
196 * Copy the decomposed sequence. The end of the sequence are
197 * marked with END_BIT.
199 do {
200 unsigned long c;
201 int dlen;
202 idn_result_t r;
204 c = *seq & ~END_BIT;
206 /* Decompose recursively. */
207 r = idn__unicode_decompose(version, compat, v, vlen, c, &dlen);
208 if (r == idn_success) {
209 v += dlen;
210 vlen -= dlen;
211 } else if (r == idn_notfound) {
212 if (vlen < 1)
213 return (idn_buffer_overflow);
214 *v++ = c;
215 vlen--;
216 } else {
217 return (r);
220 } while ((*seq++ & END_BIT) == 0);
222 *decomp_lenp = v - vorg;
224 return (idn_success);
228 idn__unicode_iscompositecandidate(idn__unicode_version_t version,
229 unsigned long c) {
230 const struct composition *dummy;
232 if (c > UCS_MAX)
233 return (0);
235 /* Check for Hangul */
236 if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
237 return (1);
240 * Look up composition table. If there are no composition
241 * that begins with the given character, it is not a
242 * composition candidate.
244 if ((*version->compose_proc)(c, &dummy) == 0)
245 return (0);
246 else
247 return (1);
250 idn_result_t
251 idn__unicode_compose(idn__unicode_version_t version, unsigned long c1,
252 unsigned long c2, unsigned long *compp) {
253 int n;
254 int lo, hi;
255 const struct composition *cseq;
257 assert(compp != NULL);
259 if (c1 > UCS_MAX || c2 > UCS_MAX)
260 return (idn_notfound);
263 * Check for Hangul.
265 if (LBase <= c1 && c1 < LBase + LCount &&
266 VBase <= c2 && c2 < VBase + VCount) {
268 * Hangul L and V.
270 *compp = SBase +
271 ((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
272 return (idn_success);
273 } else if (SBase <= c1 && c1 < SLast &&
274 TBase <= c2 && c2 < TBase + TCount &&
275 (c1 - SBase) % TCount == 0) {
277 * Hangul LV and T.
279 *compp = c1 + (c2 - TBase);
280 return (idn_success);
284 * Look up composition table. If the result is 0, no composition
285 * is defined. Otherwise, upper 16bits of the result contains
286 * the number of composition that begins with 'c1', and the lower
287 * 16bits is the offset in 'compose_seq'.
289 if ((n = (*version->compose_proc)(c1, &cseq)) == 0)
290 return (idn_notfound);
293 * The composite sequences are sorted by the 2nd character 'c2'.
294 * So we can use binary search.
296 lo = 0;
297 hi = n - 1;
298 while (lo <= hi) {
299 int mid = (lo + hi) / 2;
301 if (cseq[mid].c2 < c2) {
302 lo = mid + 1;
303 } else if (cseq[mid].c2 > c2) {
304 hi = mid - 1;
305 } else {
306 *compp = cseq[mid].comp;
307 return (idn_success);
310 return (idn_notfound);