Cygwin: access: Fix X_OK behaviour for backup operators and admins
[newlib-cygwin.git] / newlib / libc / iconv / ces / utf-8.c
blob116f25925794f1d4c088e1d03e0a98009f105e72
1 /*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
26 #include "cesbi.h"
28 #if defined (ICONV_TO_UCS_CES_UTF_8) \
29 || defined (ICONV_FROM_UCS_CES_UTF_8)
31 #include <_ansi.h>
32 #include <reent.h>
33 #include <sys/types.h>
34 #include "../lib/local.h"
35 #include "../lib/ucsconv.h"
37 #define UTF8_MB_CUR_MAX 6
40 * UTF-8 CES converter doesn't interpret BOM. Reject overlong sequences,
41 * U'FFFF, U'FFFE codes, UTF-16 surrogate codes and all codes > 0x7FFFFFFF.
44 #if defined (ICONV_FROM_UCS_CES_UTF_8)
45 static size_t
46 convert_from_ucs (void *data,
47 register ucs4_t in,
48 unsigned char **outbuf,
49 size_t *outbytesleft)
51 register unsigned char *cp;
52 register size_t bytes;
54 if ((in >= 0x0000D800 && in <= 0x0000DFFF)
55 || in > 0x7FFFFFFF || in == 0x0000FFFF || in == 0x0000FFFE)
56 return (size_t)ICONV_CES_INVALID_CHARACTER;
58 if (in < 0x80)
59 bytes = 1;
60 else if (in < 0x800)
61 bytes = 2;
62 else if (in < 0x10000)
63 bytes = 3;
64 else if (in < 0x200000)
65 bytes = 4;
66 else if (in < 0x4000000)
67 bytes = 5;
68 else
69 bytes = 6;
71 if (*outbytesleft < bytes)
72 return (size_t)ICONV_CES_NOSPACE;
74 cp = *outbuf;
76 switch (bytes)
78 case 1:
79 *cp = (unsigned char)in;
80 break;
82 case 2:
83 *cp++ = (unsigned char)((in >> 6) | 0x000000C0);
84 *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
85 break;
87 case 3:
88 *cp++ = (unsigned char)((in >> 12) | 0x000000E0);
89 *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
90 *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
91 break;
93 case 4:
94 *cp++ = (unsigned char)((in >> 18) | 0x000000F0);
95 *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
96 *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
97 *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
98 break;
100 case 5:
101 *cp++ = (unsigned char)((in >> 24) | 0x000000F8);
102 *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);
103 *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
104 *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
105 *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
106 break;
108 case 6:
109 *cp++ = (unsigned char)((in >> 30) | 0x000000FC);
110 *cp++ = (unsigned char)(((in >> 24) & 0x0000003F) | 0x00000080);
111 *cp++ = (unsigned char)(((in >> 18) & 0x0000003F) | 0x00000080);
112 *cp++ = (unsigned char)(((in >> 12) & 0x0000003F) | 0x00000080);
113 *cp++ = (unsigned char)(((in >> 6) & 0x0000003F) | 0x00000080);
114 *cp++ = (unsigned char)((in & 0x0000003F) | 0x00000080);
115 break;
118 *outbytesleft -= bytes;
119 *outbuf += bytes;
121 return bytes;
123 #endif /* ICONV_FROM_UCS_CES_UTF_8 */
125 #if defined (ICONV_TO_UCS_CES_UTF_8)
126 static ucs4_t
127 convert_to_ucs (void *data,
128 const unsigned char **inbuf,
129 size_t *inbytesleft)
131 register const unsigned char *in = *inbuf;
132 register size_t bytes;
133 ucs4_t res;
135 if (in[0] >= 0xC0)
137 if (in[0] < 0xE0)
139 if (*inbytesleft < (bytes = 2))
140 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
142 if ( ((in[0] & ~0x1F) == 0xC0)
143 && ((in[1] & 0xC0) == 0x80))
144 res = ((ucs4_t)(in[0] & 0x1F) << 6)
145 | ((ucs4_t)(in[1] & 0x3F));
146 else
147 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
149 if (res < 0x00000080) /* Overlong sequence */
150 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
153 else if (in[0] < 0xF0)
155 if (*inbytesleft < (bytes = 3))
156 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
158 if ( ((in[0] & ~0x0F) == 0xE0)
159 && ((in[1] & 0xC0) == 0x80)
160 && ((in[2] & 0xC0) == 0x80))
161 res = ((ucs4_t)(in[0] & 0x0F) << 12)
162 | ((ucs4_t)(in[1] & 0x3F) << 6)
163 | ((ucs4_t)(in[2] & 0x3F));
164 else
165 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
167 if (res < 0x00000800) /* Overlong sequence */
168 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
171 else if (in[0] < 0xF8)
173 if (*inbytesleft < (bytes = 4))
174 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
176 if ( ((in[0] & ~0x07) == 0xF0)
177 && ((in[1] & 0xC0) == 0x80)
178 && ((in[2] & 0xC0) == 0x80)
179 && ((in[3] & 0xC0) == 0x80))
180 res = ((ucs4_t)(in[0] & 0x07) << 18)
181 | ((ucs4_t)(in[1] & 0x3F) << 12)
182 | ((ucs4_t)(in[2] & 0x3F) << 6)
183 | ((ucs4_t)(in[3] & 0x3F));
184 else
185 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
187 if (res < 0x00010000) /* Overlong sequence */
188 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
191 else if (in[0] < 0xFC)
193 if (*inbytesleft < (bytes = 5))
194 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
196 if ( ((in[0] & ~0x03) == 0xF8)
197 && ((in[1] & 0xC0) == 0x80)
198 && ((in[2] & 0xC0) == 0x80)
199 && ((in[3] & 0xC0) == 0x80)
200 && ((in[4] & 0xC0) == 0x80))
201 res = ((ucs4_t)(in[0] & 0x03) << 24)
202 | ((ucs4_t)(in[1] & 0x3F) << 18)
203 | ((ucs4_t)(in[2] & 0x3F) << 12)
204 | ((ucs4_t)(in[3] & 0x3F) << 6)
205 | ((ucs4_t)(in[4] & 0x3F));
206 else
207 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
209 if (res < 0x00200000) /* Overlong sequence */
210 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
213 else if (in[0] <= 0xFD)
215 if (*inbytesleft < (bytes = 6))
216 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
218 if ( ((in[0] & ~0x01) == 0xFC)
219 && ((in[1] & 0xC0) == 0x80)
220 && ((in[2] & 0xC0) == 0x80)
221 && ((in[3] & 0xC0) == 0x80)
222 && ((in[4] & 0xC0) == 0x80)
223 && ((in[5] & 0xC0) == 0x80))
224 res = ((ucs4_t)(in[0] & 0x1) << 30)
225 | ((ucs4_t)(in[1] & 0x3F) << 24)
226 | ((ucs4_t)(in[2] & 0x3F) << 18)
227 | ((ucs4_t)(in[3] & 0x3F) << 12)
228 | ((ucs4_t)(in[4] & 0x3F) << 6)
229 | ((ucs4_t)(in[5] & 0x3F));
230 else
231 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
233 if (res < 0x04000000) /* Overlong sequence */
234 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
237 else
238 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
240 else if (in[0] & 0x80)
241 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
242 else
244 res = (ucs4_t)in[0];
245 bytes = 1;
248 if ( (res >= 0x0000D800 && res <= 0x0000DFFF)
249 || res > 0x7FFFFFFF || res == 0x0000FFFF || res == 0x0000FFFE)
250 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
252 *inbytesleft -= bytes;
253 *inbuf += bytes;
255 return res;
257 #endif /* ICONV_TO_UCS_CES_UTF_8 */
259 static int
260 get_mb_cur_max (void *data)
262 return UTF8_MB_CUR_MAX;
265 #if defined (ICONV_TO_UCS_CES_UTF_8)
266 const iconv_to_ucs_ces_handlers_t
267 _iconv_to_ucs_ces_handlers_utf_8 =
269 NULL,
270 NULL,
271 get_mb_cur_max,
272 NULL,
273 NULL,
274 NULL,
275 convert_to_ucs
277 #endif
279 #if defined (ICONV_FROM_UCS_CES_UTF_8)
280 const iconv_from_ucs_ces_handlers_t
281 _iconv_from_ucs_ces_handlers_utf_8 =
283 NULL,
284 NULL,
285 get_mb_cur_max,
286 NULL,
287 NULL,
288 NULL,
289 convert_from_ucs
291 #endif
293 #endif /* ICONV_TO_UCS_CES_UTF_8 || ICONV_FROM_UCS_CES_UTF_8 */