1 /* $NetBSD: utf8.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */
4 static char *rcsid
= "Id: utf8.c,v 1.1 2003/06/04 00:26:44 marka Exp ";
8 * Copyright (c) 2000 Japan Network Information Center. All rights reserved.
10 * By using this file, you agree to the terms and conditions set forth bellow.
12 * LICENSE TERMS AND CONDITIONS
14 * The following License Terms and Conditions apply, unless a different
15 * license is obtained from Japan Network Information Center ("JPNIC"),
16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17 * Chiyoda-ku, Tokyo 101-0047, Japan.
19 * 1. Use, Modification and Redistribution (including distribution of any
20 * modified or derived work) in source and/or binary forms is permitted
21 * under this License Terms and Conditions.
23 * 2. Redistribution of source code must retain the copyright notices as they
24 * appear in each source code file, this License Terms and Conditions.
26 * 3. Redistribution in binary form must reproduce the Copyright Notice,
27 * this License Terms and Conditions, in the documentation and/or other
28 * materials provided with the distribution. For the purposes of binary
29 * distribution the "Copyright Notice" refers to the following language:
30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
32 * 4. The name of JPNIC may not be used to endorse or promote products
33 * derived from this Software without specific prior written approval of
36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
53 #include <idn/assert.h>
54 #include <idn/logmacro.h>
56 #include <idn/debug.h>
58 #define UTF8_WIDTH(c) \
67 #define VALID_CONT_BYTE(c) (0x80 <= (c) && (c) < 0xc0)
70 idn_utf8_mblen(const char *s
) {
71 int c
= *(unsigned char *)s
;
76 TRACE(("idn_utf8_mblen(s=<%s>)\n", idn__debug_hexstring(s
, 6)));
83 idn_utf8_getmb(const char *s
, size_t len
, char *buf
) {
84 /* buf must be at least 7-bytes long */
85 const unsigned char *p
= (const unsigned char *)s
;
86 unsigned char *q
= (unsigned char *)buf
;
87 int width
= UTF8_WIDTH(*p
);
93 TRACE(("idn_utf8_getmb(s=<%s>,len=%d)\n",
94 idn__debug_hexstring(s
, 6), len
));
97 if (width
== 0 || len
< width
)
100 /* Copy the first byte. */
103 /* .. and the rest. */
106 if (!VALID_CONT_BYTE(*p
))
114 idn_utf8_getwc(const char *s
, size_t len
, unsigned long *vp
) {
117 const unsigned char *p
= (const unsigned char *)s
;
125 TRACE(("idn_utf8_getwc(s=<%s>,len=%d)\n",
126 idn__debug_hexstring(s
, 10), len
));
130 width
= UTF8_WIDTH(c
);
160 FATAL(("idn_utf8_getint: internal error\n"));
169 if (!VALID_CONT_BYTE(*p
))
171 v
= (v
<< 6) | (*p
& 0x3f);
183 idn_utf8_putwc(char *s
, size_t len
, unsigned long v
) {
184 unsigned char *p
= (unsigned char *)s
;
192 TRACE(("idn_utf8_putwc(v=%lx)\n", v
));
198 } else if (v
< 0x800) {
201 } else if (v
< 0x10000) {
204 } else if (v
< 0x200000) {
207 } else if (v
< 0x4000000) {
210 } else if (v
< 0x80000000) {
221 *p
++ = (v
>> off
) | mask
;
225 *p
++ = ((v
>> off
) & 0x3f) | mask
;
231 idn_utf8_isvalidchar(const char *s
) {
234 TRACE(("idn_utf8_isvalidchar(s=<%s>)\n",
235 idn__debug_hexstring(s
, 6)));
237 return (idn_utf8_getwc(s
, 6, &dummy
) > 0);
241 idn_utf8_isvalidstring(const char *s
) {
247 TRACE(("idn_utf8_isvalidstring(s=<%s>)\n",
248 idn__debug_hexstring(s
, 20)));
251 width
= idn_utf8_getwc(s
, 6, &dummy
);
260 idn_utf8_findfirstbyte(const char *s
, const char *known_top
) {
261 const unsigned char *p
= (const unsigned char *)s
;
262 const unsigned char *t
= (const unsigned char *)known_top
;
264 assert(s
!= NULL
&& known_top
!= NULL
&& known_top
<= s
);
266 TRACE(("idn_utf8_findfirstbyte(s=<%s>)\n",
267 idn__debug_hexstring(s
, 8)));
270 if (!VALID_CONT_BYTE(*p
))
274 if (p
< t
|| UTF8_WIDTH(*p
) == 0)