Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / utf8.c
bloba7ae1f3750f709ef0daaaa659074cafa6bbee1e6
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: utf8.c,v 1.1.1.1 2003/06/04 00:26:44 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2000 Japan Network Information Center. All rights reserved.
9 *
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
34 * JPNIC.
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
49 #include <config.h>
51 #include <stddef.h>
53 #include <idn/assert.h>
54 #include <idn/logmacro.h>
55 #include <idn/utf8.h>
56 #include <idn/debug.h>
58 #define UTF8_WIDTH(c) \
59 (((c) < 0x80) ? 1 : \
60 ((c) < 0xc0) ? 0 : \
61 ((c) < 0xe0) ? 2 : \
62 ((c) < 0xf0) ? 3 : \
63 ((c) < 0xf8) ? 4 : \
64 ((c) < 0xfc) ? 5 : \
65 ((c) < 0xfe) ? 6 : 0)
67 #define VALID_CONT_BYTE(c) (0x80 <= (c) && (c) < 0xc0)
69 int
70 idn_utf8_mblen(const char *s) {
71 int c = *(unsigned char *)s;
73 assert(s != NULL);
75 #if 0
76 TRACE(("idn_utf8_mblen(s=<%s>)\n", idn__debug_hexstring(s, 6)));
77 #endif
79 return UTF8_WIDTH(c);
82 int
83 idn_utf8_getmb(const char *s, size_t len, char *buf) {
84 /* buf must be at least 7-bytes long */
85 const unsigned char *p = (const unsigned char *)s;
86 unsigned char *q = (unsigned char *)buf;
87 int width = UTF8_WIDTH(*p);
88 int w;
90 assert(s != NULL);
92 #if 0
93 TRACE(("idn_utf8_getmb(s=<%s>,len=%d)\n",
94 idn__debug_hexstring(s, 6), len));
95 #endif
97 if (width == 0 || len < width)
98 return (0);
100 /* Copy the first byte. */
101 *q++ = *p++;
103 /* .. and the rest. */
104 w = width;
105 while (--w > 0) {
106 if (!VALID_CONT_BYTE(*p))
107 return (0);
108 *q++ = *p++;
110 return (width);
114 idn_utf8_getwc(const char *s, size_t len, unsigned long *vp) {
115 unsigned long v;
116 unsigned long min;
117 const unsigned char *p = (const unsigned char *)s;
118 int c;
119 int width;
120 int rest;
122 assert(s != NULL);
124 #if 0
125 TRACE(("idn_utf8_getwc(s=<%s>,len=%d)\n",
126 idn__debug_hexstring(s, 10), len));
127 #endif
129 c = *p++;
130 width = UTF8_WIDTH(c);
132 switch (width) {
133 case 0:
134 return (0);
135 case 1:
136 v = c;
137 min = 0;
138 break;
139 case 2:
140 v = c & 0x1f;
141 min = 0x80;
142 break;
143 case 3:
144 v = c & 0xf;
145 min = 0x800;
146 break;
147 case 4:
148 v = c & 0x7;
149 min = 0x10000;
150 break;
151 case 5:
152 v = c & 3;
153 min = 0x200000;
154 break;
155 case 6:
156 v = c & 1;
157 min = 0x4000000;
158 break;
159 default:
160 FATAL(("idn_utf8_getint: internal error\n"));
161 return (0);
164 if (len < width)
165 return (0);
167 rest = width - 1;
168 while (rest-- > 0) {
169 if (!VALID_CONT_BYTE(*p))
170 return (0);
171 v = (v << 6) | (*p & 0x3f);
172 p++;
175 if (v < min)
176 return (0);
178 *vp = v;
179 return (width);
183 idn_utf8_putwc(char *s, size_t len, unsigned long v) {
184 unsigned char *p = (unsigned char *)s;
185 int mask;
186 int off;
187 int l;
189 assert(s != NULL);
191 #if 0
192 TRACE(("idn_utf8_putwc(v=%lx)\n", v));
193 #endif
195 if (v < 0x80) {
196 mask = 0;
197 l = 1;
198 } else if (v < 0x800) {
199 mask = 0xc0;
200 l = 2;
201 } else if (v < 0x10000) {
202 mask = 0xe0;
203 l = 3;
204 } else if (v < 0x200000) {
205 mask = 0xf0;
206 l = 4;
207 } else if (v < 0x4000000) {
208 mask = 0xf8;
209 l = 5;
210 } else if (v < 0x80000000) {
211 mask = 0xfc;
212 l = 6;
213 } else {
214 return (0);
217 if (len < l)
218 return (0);
220 off = 6 * (l - 1);
221 *p++ = (v >> off) | mask;
222 mask = 0x80;
223 while (off > 0) {
224 off -= 6;
225 *p++ = ((v >> off) & 0x3f) | mask;
227 return l;
231 idn_utf8_isvalidchar(const char *s) {
232 unsigned long dummy;
234 TRACE(("idn_utf8_isvalidchar(s=<%s>)\n",
235 idn__debug_hexstring(s, 6)));
237 return (idn_utf8_getwc(s, 6, &dummy) > 0);
241 idn_utf8_isvalidstring(const char *s) {
242 unsigned long dummy;
243 int width;
245 assert(s != NULL);
247 TRACE(("idn_utf8_isvalidstring(s=<%s>)\n",
248 idn__debug_hexstring(s, 20)));
250 while (*s != '\0') {
251 width = idn_utf8_getwc(s, 6, &dummy);
252 if (width == 0)
253 return (0);
254 s += width;
256 return (1);
259 char *
260 idn_utf8_findfirstbyte(const char *s, const char *known_top) {
261 const unsigned char *p = (const unsigned char *)s;
262 const unsigned char *t = (const unsigned char *)known_top;
264 assert(s != NULL && known_top != NULL && known_top <= s);
266 TRACE(("idn_utf8_findfirstbyte(s=<%s>)\n",
267 idn__debug_hexstring(s, 8)));
269 while (p >= t) {
270 if (!VALID_CONT_BYTE(*p))
271 break;
272 p--;
274 if (p < t || UTF8_WIDTH(*p) == 0)
275 return (NULL);
277 return ((char *)p);