4 static char *rcsid
= "Id: ucs4.c,v 1.1.1.1 2003/06/04 00:26:14 marka Exp";
8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved.
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
55 #include <idn/assert.h>
56 #include <idn/result.h>
57 #include <idn/logmacro.h>
60 #include <idn/debug.h>
63 * Unicode surrogate pair.
65 #define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
66 #define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
67 #define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
68 #define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
69 #define SURROGATE_BASE 0x10000
70 #define SURROGATE_H_OFF 0xd800
71 #define SURROGATE_L_OFF 0xdc00
72 #define COMBINE_SURROGATE(h, l) \
73 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
77 * Note that these macros evaluate the argument multiple times. Be careful.
79 #define ASCII_TOUPPER(c) \
80 (('a' <= (c) && (c) <= 'z') ? ((c) - 'a' + 'A') : (c))
81 #define ASCII_TOLOWER(c) \
82 (('A' <= (c) && (c) <= 'Z') ? ((c) - 'A' + 'a') : (c))
85 idn_ucs4_ucs4toutf16(const unsigned long *ucs4
, unsigned short *utf16
,
87 unsigned short *utf16p
= utf16
;
91 TRACE(("idn_ucs4_ucs4toutf16(ucs4=\"%s\", tolen=%d)\n",
92 idn__debug_ucs4xstring(ucs4
, 50), (int)tolen
));
94 while (*ucs4
!= '\0') {
97 if (IS_SURROGATE_LOW(v
) || IS_SURROGATE_HIGH(v
)) {
98 WARNING(("idn_ucs4_ucs4toutf16: UCS4 string contains "
100 r
= idn_invalid_encoding
;
102 } else if (v
> 0xffff) {
103 /* Convert to surrogate pair */
105 r
= idn_invalid_encoding
;
109 r
= idn_buffer_overflow
;
112 *utf16p
++ = SURROGATE_HIGH(v
);
113 *utf16p
++ = SURROGATE_LOW(v
);
117 r
= idn_buffer_overflow
;
126 r
= idn_buffer_overflow
;
133 if (r
== idn_success
) {
134 TRACE(("idn_ucs4_ucs4toutf16(): success (utf16=\"%s\")\n",
135 idn__debug_utf16xstring(utf16
, 50)));
137 TRACE(("idn_ucs4_ucs4toutf16(): %s\n",
138 idn_result_tostring(r
)));
144 idn_ucs4_utf16toucs4(const unsigned short *utf16
, unsigned long *ucs4
,
146 unsigned long *ucs4p
= ucs4
;
147 unsigned short v0
, v1
;
150 TRACE(("idn_ucs4_utf16toucs4(utf16=\"%s\", tolen=%d)\n",
151 idn__debug_utf16xstring(utf16
, 50), (int)tolen
));
153 while (*utf16
!= '\0') {
157 r
= idn_buffer_overflow
;
161 if (IS_SURROGATE_HIGH(v0
)) {
163 if (!IS_SURROGATE_LOW(v1
)) {
164 WARNING(("idn_ucs4_utf16toucs4: "
165 "corrupted surrogate pair\n"));
166 r
= idn_invalid_encoding
;
169 *ucs4p
++ = COMBINE_SURROGATE(v0
, v1
);
182 r
= idn_buffer_overflow
;
189 if (r
== idn_success
) {
190 TRACE(("idn_ucs4_utf16toucs4(): success (ucs4=\"%s\")\n",
191 idn__debug_ucs4xstring(ucs4
, 50)));
193 TRACE(("idn_ucs4_utf16toucs4(): %s\n",
194 idn_result_tostring(r
)));
200 idn_ucs4_utf8toucs4(const char *utf8
, unsigned long *ucs4
, size_t tolen
) {
201 const unsigned char *utf8p
= (const unsigned char *)utf8
;
202 unsigned long *ucs4p
= ucs4
;
203 unsigned long v
, min
;
209 TRACE(("idn_ucs4_utf8toucs4(utf8=\"%s\", tolen=%d)\n",
210 idn__debug_xstring(utf8
, 50), (int)tolen
));
212 while(*utf8p
!= '\0') {
218 } else if (c
< 0xc0) {
219 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
220 r
= idn_invalid_encoding
;
222 } else if (c
< 0xe0) {
226 } else if (c
< 0xf0) {
230 } else if (c
< 0xf8) {
234 } else if (c
< 0xfc) {
238 } else if (c
< 0xfe) {
243 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
244 r
= idn_invalid_encoding
;
248 for (i
= width
- 1; i
> 0; i
--) {
250 if (c
< 0x80 || 0xc0 <= c
) {
251 WARNING(("idn_ucs4_utf8toucs4: "
252 "invalid character\n"));
253 r
= idn_invalid_encoding
;
256 v
= (v
<< 6) | (c
& 0x3f);
260 WARNING(("idn_ucs4_utf8toucs4: invalid character\n"));
261 r
= idn_invalid_encoding
;
264 if (IS_SURROGATE_LOW(v
) || IS_SURROGATE_HIGH(v
)) {
265 WARNING(("idn_ucs4_utf8toucs4: UTF-8 string contains "
266 "surrogate pair\n"));
267 r
= idn_invalid_encoding
;
271 r
= idn_buffer_overflow
;
279 r
= idn_buffer_overflow
;
286 if (r
== idn_success
) {
287 TRACE(("idn_ucs4_utf8toucs4(): success (ucs4=\"%s\")\n",
288 idn__debug_ucs4xstring(ucs4
, 50)));
290 TRACE(("idn_ucs4_utf8toucs4(): %s\n",
291 idn_result_tostring(r
)));
297 idn_ucs4_ucs4toutf8(const unsigned long *ucs4
, char *utf8
, size_t tolen
) {
298 unsigned char *utf8p
= (unsigned char *)utf8
;
305 TRACE(("idn_ucs4_ucs4toutf8(ucs4=\"%s\", tolen=%d)\n",
306 idn__debug_ucs4xstring(ucs4
, 50), (int)tolen
));
308 while (*ucs4
!= '\0') {
310 if (IS_SURROGATE_LOW(v
) || IS_SURROGATE_HIGH(v
)) {
311 WARNING(("idn_ucs4_ucs4toutf8: UCS4 string contains "
312 "surrogate pair\n"));
313 r
= idn_invalid_encoding
;
319 } else if (v
< 0x800) {
322 } else if (v
< 0x10000) {
325 } else if (v
< 0x200000) {
328 } else if (v
< 0x4000000) {
331 } else if (v
< 0x80000000) {
335 WARNING(("idn_ucs4_ucs4toutf8: invalid character\n"));
336 r
= idn_invalid_encoding
;
341 r
= idn_buffer_overflow
;
344 offset
= 6 * (width
- 1);
345 *utf8p
++ = (v
>> offset
) | mask
;
349 *utf8p
++ = ((v
>> offset
) & 0x3f) | mask
;
355 r
= idn_buffer_overflow
;
362 if (r
== idn_success
) {
363 TRACE(("idn_ucs4_ucs4toutf8(): success (utf8=\"%s\")\n",
364 idn__debug_xstring(utf8
, 50)));
366 TRACE(("idn_ucs4_ucs4toutf8(): %s\n",
367 idn_result_tostring(r
)));
373 idn_ucs4_strlen(const unsigned long *ucs4
) {
376 for (len
= 0; *ucs4
!= '\0'; ucs4
++, len
++)
377 /* nothing to do */ ;
383 idn_ucs4_strcpy(unsigned long *to
, const unsigned long *from
) {
384 unsigned long *result
= to
;
386 while (*from
!= '\0')
394 idn_ucs4_strcat(unsigned long *to
, const unsigned long *from
) {
395 unsigned long *result
= to
;
400 while (*from
!= '\0')
408 idn_ucs4_strcmp(const unsigned long *str1
, const unsigned long *str2
) {
409 while (*str1
!= '\0') {
412 else if (*str1
< *str2
)
420 else if (*str1
< *str2
)
427 idn_ucs4_strcasecmp(const unsigned long *str1
, const unsigned long *str2
) {
428 unsigned long c1
, c2
;
430 while (*str1
!= '\0') {
431 c1
= ASCII_TOLOWER(*str1
);
432 c2
= ASCII_TOLOWER(*str2
);
441 c1
= ASCII_TOLOWER(*str1
);
442 c2
= ASCII_TOLOWER(*str2
);
453 idn_ucs4_strdup(const unsigned long *str
) {
454 size_t length
= idn_ucs4_strlen(str
);
455 unsigned long *dupstr
;
457 dupstr
= (unsigned long *)malloc(sizeof(*str
) * (length
+ 1));
460 memcpy(dupstr
, str
, sizeof(*str
) * (length
+ 1));