Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / ucs4.c
blob22ad48bc74211049e19fe47b47f6f1d586ff479a
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: ucs4.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
9 *
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
34 * JPNIC.
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
49 #include <config.h>
51 #include <stddef.h>
52 #include <stdlib.h>
53 #include <string.h>
55 #include <idn/assert.h>
56 #include <idn/result.h>
57 #include <idn/logmacro.h>
58 #include <idn/util.h>
59 #include <idn/ucs4.h>
60 #include <idn/debug.h>
63 * Unicode surrogate pair.
65 #define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
66 #define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
67 #define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
68 #define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
69 #define SURROGATE_BASE 0x10000
70 #define SURROGATE_H_OFF 0xd800
71 #define SURROGATE_L_OFF 0xdc00
72 #define COMBINE_SURROGATE(h, l) \
73 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
76 * ASCII ctype macros.
77 * Note that these macros evaluate the argument multiple times. Be careful.
79 #define ASCII_TOUPPER(c) \
80 (('a' <= (c) && (c) <= 'z') ? ((c) - 'a' + 'A') : (c))
81 #define ASCII_TOLOWER(c) \
82 (('A' <= (c) && (c) <= 'Z') ? ((c) - 'A' + 'a') : (c))
84 idn_result_t
85 idn_ucs4_ucs4toutf16(const unsigned long *ucs4, unsigned short *utf16,
86 size_t tolen) {
87 unsigned short *utf16p = utf16;
88 unsigned long v;
89 idn_result_t r;
91 TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n",
92 idn__debug_ucs4xstring(ucs4, 50), (int)tolen));
94 while (*ucs4 != '\0') {
95 v = *ucs4++;
97 if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
98 WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains "
99 "surrogate pair\n"));
100 r = idn_invalid_encoding;
101 goto ret;
102 } else if (v > 0xffff) {
103 /* Convert to surrogate pair */
104 if (v >= 0x110000) {
105 r = idn_invalid_encoding;
106 goto ret;
108 if (tolen < 2) {
109 r = idn_buffer_overflow;
110 goto ret;
112 *utf16p++ = SURROGATE_HIGH(v);
113 *utf16p++ = SURROGATE_LOW(v);
114 tolen -= 2;
115 } else {
116 if (tolen < 1) {
117 r = idn_buffer_overflow;
118 goto ret;
120 *utf16p++ = v;
121 tolen--;
125 if (tolen < 1) {
126 r = idn_buffer_overflow;
127 goto ret;
129 *utf16p = '\0';
131 r = idn_success;
132 ret:
133 if (r == idn_success) {
134 TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n",
135 idn__debug_utf16xstring(utf16, 50)));
136 } else {
137 TRACE(("idn_ucs4_ucs4toutf16(): %s\n",
138 idn_result_tostring(r)));
140 return (r);
143 idn_result_t
144 idn_ucs4_utf16toucs4(const unsigned short *utf16, unsigned long *ucs4,
145 size_t tolen) {
146 unsigned long *ucs4p = ucs4;
147 unsigned short v0, v1;
148 idn_result_t r;
150 TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n",
151 idn__debug_utf16xstring(utf16, 50), (int)tolen));
153 while (*utf16 != '\0') {
154 v0 = *utf16;
156 if (tolen < 1) {
157 r = idn_buffer_overflow;
158 goto ret;
161 if (IS_SURROGATE_HIGH(v0)) {
162 v1 = *(utf16 + 1);
163 if (!IS_SURROGATE_LOW(v1)) {
164 WARNING(("idn_ucs4_utf16toucs4: "
165 "corrupted surrogate pair\n"));
166 r = idn_invalid_encoding;
167 goto ret;
169 *ucs4p++ = COMBINE_SURROGATE(v0, v1);
170 tolen--;
171 utf16 += 2;
173 } else {
174 *ucs4p++ = v0;
175 tolen--;
176 utf16++;
181 if (tolen < 1) {
182 r = idn_buffer_overflow;
183 goto ret;
185 *ucs4p = '\0';
187 r = idn_success;
188 ret:
189 if (r == idn_success) {
190 TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n",
191 idn__debug_ucs4xstring(ucs4, 50)));
192 } else {
193 TRACE(("idn_ucs4_utf16toucs4(): %s\n",
194 idn_result_tostring(r)));
196 return (r);
199 idn_result_t
200 idn_ucs4_utf8toucs4(const char *utf8, unsigned long *ucs4, size_t tolen) {
201 const unsigned char *utf8p = (const unsigned char *)utf8;
202 unsigned long *ucs4p = ucs4;
203 unsigned long v, min;
204 unsigned char c;
205 int width;
206 int i;
207 idn_result_t r;
209 TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n",
210 idn__debug_xstring(utf8, 50), (int)tolen));
212 while(*utf8p != '\0') {
213 c = *utf8p++;
214 if (c < 0x80) {
215 v = c;
216 min = 0;
217 width = 1;
218 } else if (c < 0xc0) {
219 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
220 r = idn_invalid_encoding;
221 goto ret;
222 } else if (c < 0xe0) {
223 v = c & 0x1f;
224 min = 0x80;
225 width = 2;
226 } else if (c < 0xf0) {
227 v = c & 0x0f;
228 min = 0x800;
229 width = 3;
230 } else if (c < 0xf8) {
231 v = c & 0x07;
232 min = 0x10000;
233 width = 4;
234 } else if (c < 0xfc) {
235 v = c & 0x03;
236 min = 0x200000;
237 width = 5;
238 } else if (c < 0xfe) {
239 v = c & 0x01;
240 min = 0x4000000;
241 width = 6;
242 } else {
243 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
244 r = idn_invalid_encoding;
245 goto ret;
248 for (i = width - 1; i > 0; i--) {
249 c = *utf8p++;
250 if (c < 0x80 || 0xc0 <= c) {
251 WARNING(("idn_ucs4_utf8toucs4: "
252 "invalid character\n"));
253 r = idn_invalid_encoding;
254 goto ret;
256 v = (v << 6) | (c & 0x3f);
259 if (v < min) {
260 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
261 r = idn_invalid_encoding;
262 goto ret;
264 if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
265 WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains "
266 "surrogate pair\n"));
267 r = idn_invalid_encoding;
268 goto ret;
270 if (tolen < 1) {
271 r = idn_buffer_overflow;
272 goto ret;
274 tolen--;
275 *ucs4p++ = v;
278 if (tolen < 1) {
279 r = idn_buffer_overflow;
280 goto ret;
282 *ucs4p = '\0';
284 r = idn_success;
285 ret:
286 if (r == idn_success) {
287 TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n",
288 idn__debug_ucs4xstring(ucs4, 50)));
289 } else {
290 TRACE(("idn_ucs4_utf8toucs4(): %s\n",
291 idn_result_tostring(r)));
293 return (r);
296 idn_result_t
297 idn_ucs4_ucs4toutf8(const unsigned long *ucs4, char *utf8, size_t tolen) {
298 unsigned char *utf8p = (unsigned char *)utf8;
299 unsigned long v;
300 int width;
301 int mask;
302 int offset;
303 idn_result_t r;
305 TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n",
306 idn__debug_ucs4xstring(ucs4, 50), (int)tolen));
308 while (*ucs4 != '\0') {
309 v = *ucs4++;
310 if (IS_SURROGATE_LOW(v) || IS_SURROGATE_HIGH(v)) {
311 WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains "
312 "surrogate pair\n"));
313 r = idn_invalid_encoding;
314 goto ret;
316 if (v < 0x80) {
317 mask = 0;
318 width = 1;
319 } else if (v < 0x800) {
320 mask = 0xc0;
321 width = 2;
322 } else if (v < 0x10000) {
323 mask = 0xe0;
324 width = 3;
325 } else if (v < 0x200000) {
326 mask = 0xf0;
327 width = 4;
328 } else if (v < 0x4000000) {
329 mask = 0xf8;
330 width = 5;
331 } else if (v < 0x80000000) {
332 mask = 0xfc;
333 width = 6;
334 } else {
335 WARNING(("idn_ucs4_ucs4toutf8: invalid character\n"));
336 r = idn_invalid_encoding;
337 goto ret;
340 if (tolen < width) {
341 r = idn_buffer_overflow;
342 goto ret;
344 offset = 6 * (width - 1);
345 *utf8p++ = (v >> offset) | mask;
346 mask = 0x80;
347 while (offset > 0) {
348 offset -= 6;
349 *utf8p++ = ((v >> offset) & 0x3f) | mask;
351 tolen -= width;
354 if (tolen < 1) {
355 r = idn_buffer_overflow;
356 goto ret;
358 *utf8p = '\0';
360 r = idn_success;
361 ret:
362 if (r == idn_success) {
363 TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n",
364 idn__debug_xstring(utf8, 50)));
365 } else {
366 TRACE(("idn_ucs4_ucs4toutf8(): %s\n",
367 idn_result_tostring(r)));
369 return (r);
372 size_t
373 idn_ucs4_strlen(const unsigned long *ucs4) {
374 size_t len;
376 for (len = 0; *ucs4 != '\0'; ucs4++, len++)
377 /* nothing to do */ ;
379 return (len);
382 unsigned long *
383 idn_ucs4_strcpy(unsigned long *to, const unsigned long *from) {
384 unsigned long *result = to;
386 while (*from != '\0')
387 *to++ = *from++;
388 *to = '\0';
390 return (result);
393 unsigned long *
394 idn_ucs4_strcat(unsigned long *to, const unsigned long *from) {
395 unsigned long *result = to;
397 while (*to != '\0')
398 to++;
400 while (*from != '\0')
401 *to++ = *from++;
402 *to = '\0';
404 return (result);
408 idn_ucs4_strcmp(const unsigned long *str1, const unsigned long *str2) {
409 while (*str1 != '\0') {
410 if (*str1 > *str2)
411 return (1);
412 else if (*str1 < *str2)
413 return (-1);
414 str1++;
415 str2++;
418 if (*str1 > *str2)
419 return (1);
420 else if (*str1 < *str2)
421 return (-1);
423 return (0);
427 idn_ucs4_strcasecmp(const unsigned long *str1, const unsigned long *str2) {
428 unsigned long c1, c2;
430 while (*str1 != '\0') {
431 c1 = ASCII_TOLOWER(*str1);
432 c2 = ASCII_TOLOWER(*str2);
433 if (c1 > c2)
434 return (1);
435 else if (c1 < c2)
436 return (-1);
437 str1++;
438 str2++;
441 c1 = ASCII_TOLOWER(*str1);
442 c2 = ASCII_TOLOWER(*str2);
443 if (c1 > c2)
444 return (1);
445 else if (c1 < c2)
446 return (-1);
448 return (0);
452 unsigned long *
453 idn_ucs4_strdup(const unsigned long *str) {
454 size_t length = idn_ucs4_strlen(str);
455 unsigned long *dupstr;
457 dupstr = (unsigned long *)malloc(sizeof(*str) * (length + 1));
458 if (dupstr == NULL)
459 return NULL;
460 memcpy(dupstr, str, sizeof(*str) * (length + 1));
462 return dupstr;