Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / fs / hfs / unicode.c
blobb1452257b42b6793c0cb6c0fbe7f37739e8354f8
1 /* $NetBSD: unicode.c,v 1.1.1.1 2007/03/06 00:10:39 dillo Exp $ */
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Dieter Baron.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD$");
35 #include <sys/null.h>
37 #include "unicode.h"
39 size_t
40 utf8_to_utf16(uint16_t *dst, size_t dst_len,
41 const char *src, size_t src_len,
42 int flags, int *errp)
44 const unsigned char *s;
45 size_t spos, dpos;
46 int error;
47 uint16_t c;
49 #define IS_CONT(c) (((c)&0xc0) == 0x80)
51 error = 0;
52 s = (const unsigned char *)src;
53 spos = dpos = 0;
54 while (spos<src_len) {
55 if (s[spos] < 0x80)
56 c = s[spos++];
57 else if ((flags & UNICODE_UTF8_LATIN1_FALLBACK)
58 && (spos >= src_len || !IS_CONT(s[spos+1]))
59 && s[spos]>=0xa0) {
60 /* not valid UTF-8, assume ISO 8859-1 */
61 c = s[spos++];
63 else if (s[spos] < 0xc0 || s[spos] >= 0xf5) {
64 /* continuation byte without lead byte
65 or lead byte for codepoint above 0x10ffff */
66 error++;
67 spos++;
68 continue;
70 else if (s[spos] < 0xe0) {
71 if (spos >= src_len || !IS_CONT(s[spos+1])) {
72 spos++;
73 error++;
74 continue;
76 c = ((s[spos] & 0x3f) << 6) | (s[spos+1] & 0x3f);
77 spos += 2;
78 if (c < 0x80) {
79 /* overlong encoding */
80 error++;
81 continue;
84 else if (s[spos] < 0xf0) {
85 if (spos >= src_len-2
86 || !IS_CONT(s[spos+1]) || !IS_CONT(s[spos+2])) {
87 spos++;
88 error++;
89 continue;
91 c = ((s[spos] & 0x0f) << 12) | ((s[spos+1] & 0x3f) << 6)
92 | (s[spos+2] & 0x3f);
93 spos += 3;
94 if (c < 0x800 || (c & 0xdf00) == 0xd800 ) {
95 /* overlong encoding or encoded surrogate */
96 error++;
97 continue;
100 else {
101 uint32_t cc;
102 /* UTF-16 surrogate pair */
104 if (spos >= src_len-3 || !IS_CONT(s[spos+1])
105 || !IS_CONT(s[spos+2]) || !IS_CONT(s[spos+3])) {
106 spos++;
107 error++;
109 continue;
111 cc = ((s[spos] & 0x03) << 18) | ((s[spos+1] & 0x3f) << 12)
112 | ((s[spos+2] & 0x3f) << 6) | (s[spos+3] & 0x3f);
113 spos += 4;
114 if (cc < 0x10000) {
115 /* overlong encoding */
116 error++;
117 continue;
119 if (dst && dpos < dst_len)
120 dst[dpos] = (0xd800 | ((cc-0x10000)>>10));
121 dpos++;
122 c = 0xdc00 | ((cc-0x10000) & 0x3ffff);
125 if (dst && dpos < dst_len)
126 dst[dpos] = c;
127 dpos++;
130 if (errp)
131 *errp = error;
133 return dpos;
135 #undef IS_CONT
139 size_t
140 utf16_to_utf8(char *dst, size_t dst_len,
141 const uint16_t *src, size_t src_len,
142 int flags, int *errp)
144 uint8_t spos, dpos;
145 int error;
147 #define CHECK_LENGTH(l) (dpos > dst_len-(l) ? dst=NULL : NULL)
148 #define ADD_BYTE(b) (dst ? dst[dpos] = (b) : 0, dpos++)
150 error = 0;
151 dpos = 0;
152 for (spos=0; spos<src_len; spos++) {
153 if (src[spos] < 0x80) {
154 CHECK_LENGTH(1);
155 ADD_BYTE(src[spos]);
157 else if (src[spos] < 0x800) {
158 CHECK_LENGTH(2);
159 ADD_BYTE(0xc0 | (src[spos]>>6));
160 ADD_BYTE(0x80 | (src[spos] & 0x3f));
162 else if ((src[spos] & 0xdc00) == 0xd800) {
163 uint32_t c;
164 /* first surrogate */
165 if (spos == src_len - 1 || (src[spos] & 0xdc00) != 0xdc00) {
166 /* no second surrogate present */
167 error++;
168 continue;
170 spos++;
171 CHECK_LENGTH(4);
172 c = (((src[spos]&0x3ff) << 10) | (src[spos+1]&0x3ff)) + 0x10000;
173 ADD_BYTE(0xf0 | (c>>18));
174 ADD_BYTE(0x80 | ((c>>12) & 0x3f));
175 ADD_BYTE(0x80 | ((c>>6) & 0x3f));
176 ADD_BYTE(0x80 | (c & 0x3f));
178 else if ((src[spos] & 0xdc00) == 0xdc00) {
179 /* second surrogate without preceding first surrogate */
180 error++;
182 else {
183 CHECK_LENGTH(3);
184 ADD_BYTE(0xe0 | src[spos]>>12);
185 ADD_BYTE(0x80 | ((src[spos]>>6) & 0x3f));
186 ADD_BYTE(0x80 | (src[spos] & 0x3f));
190 if (errp)
191 *errp = error;
193 return dpos;
195 #undef ADD_BYTE
196 #undef CHECK_LENGTH