Cygwin: access: Fix X_OK behaviour for backup operators and admins
[newlib-cygwin.git] / newlib / libc / iconv / ces / utf-16.c
bloba3491bb3c19d42702069b33722bfe8e61e4c07dd
1 /*
2 * Copyright (c) 2003-2004, Artem B. Bityuckiy
3 * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
26 #include "cesbi.h"
28 #if defined (ICONV_TO_UCS_CES_UTF_16) \
29 || defined (ICONV_FROM_UCS_CES_UTF_16)
31 #include <_ansi.h>
32 #include <reent.h>
33 #include <sys/types.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <wchar.h>
37 #include "../lib/local.h"
38 #include "../lib/ucsconv.h"
39 #include "../lib/endian.h"
42 * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM
43 * is absent. UTF-16 converter outputs in System Endian and adds correspondent
44 * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and
45 * don't output BOM.
48 #define UTF16_UNDEFINED 0x00
49 #define UTF16_BIG_ENDIAN 0x01
50 #define UTF16_LITTLE_ENDIAN 0x02
51 #define UTF16_SYSTEM_ENDIAN 0x04
52 #define UTF16_BOM_WRITTEN 0x08
54 #define UTF16_BOM 0xFEFF
56 #define UTF_16 "utf_16"
57 #define UTF_16BE "utf_16be"
58 #define UTF_16LE "utf_16le"
60 static size_t
61 utf_16_close (struct _reent *rptr,
62 void *data)
64 _free_r(rptr, data);
65 return 0;
68 #if defined (ICONV_FROM_UCS_CES_UTF_16)
69 static void *
70 utf_16_init_from_ucs (struct _reent *rptr,
71 const char *encoding)
73 int *data;
75 if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
76 return (void *)NULL;
78 if (strcmp (encoding, UTF_16LE) == 0)
79 *data = UTF16_LITTLE_ENDIAN;
80 else if (strcmp (encoding, UTF_16BE) == 0)
81 *data = UTF16_BIG_ENDIAN;
82 else
83 *data = UTF16_SYSTEM_ENDIAN;
85 return (void *)data;
88 static size_t
89 utf_16_convert_from_ucs (void *data,
90 register ucs4_t in,
91 unsigned char **outbuf,
92 size_t *outbytesleft)
94 register ucs2_t *cp;
95 register size_t bytes;
96 register int *state;
98 if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)
99 || in == 0x0000FFFF || in == 0x0000FFFE)
100 return (size_t)ICONV_CES_INVALID_CHARACTER;
102 state = (int *)data;
103 bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2
104 : sizeof (ucs2_t);
106 if (in > 0x0000FFFF)
107 bytes += sizeof (ucs2_t);
109 if (*outbytesleft < bytes)
110 return (size_t)ICONV_CES_NOSPACE;
112 cp = (ucs2_t *)*outbuf;
114 if (*state == UTF16_SYSTEM_ENDIAN)
116 *cp++ = UTF16_BOM;
117 *state |= UTF16_BOM_WRITTEN;
120 if (in < 0x00010000)
122 switch (*state)
124 case UTF16_LITTLE_ENDIAN:
125 *cp = ICONV_HTOLES ((ucs2_t)in);
126 break;
127 case UTF16_BIG_ENDIAN:
128 *cp = ICONV_HTOBES ((ucs2_t)in);
129 break;
130 case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
131 *cp = (ucs2_t)in;
132 break;
135 else
137 ucs2_t w1, w2;
139 /* Process surrogate pair */
140 in -= 0x00010000;
141 w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;
142 w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;
144 switch (*state)
146 case UTF16_LITTLE_ENDIAN:
147 *cp++ = ICONV_HTOLES (w1);
148 *cp = ICONV_HTOLES (w2);
149 break;
150 case UTF16_BIG_ENDIAN:
151 *cp++ = ICONV_HTOBES (w1);
152 *cp = ICONV_HTOBES (w2);
153 break;
154 case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
155 *cp++ = w1;
156 *cp = w2;
157 break;
161 *outbuf += bytes;
162 *outbytesleft -= bytes;
164 return bytes;
166 #endif /* ICONV_FROM_UCS_CES_UTF_16 */
168 #if defined (ICONV_TO_UCS_CES_UTF_16)
169 static void *
170 utf_16_init_to_ucs (struct _reent *rptr,
171 const char *encoding)
173 int *data;
175 if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
176 return (void *)NULL;
178 if (strcmp (encoding, UTF_16BE) == 0)
179 *data = UTF16_BIG_ENDIAN;
180 else if (strcmp (encoding, UTF_16LE) == 0)
181 *data = UTF16_LITTLE_ENDIAN;
182 else
183 *data = UTF16_UNDEFINED;
185 return (void *)data;
188 static ucs4_t
189 utf_16_convert_to_ucs (void *data,
190 const unsigned char **inbuf,
191 size_t *inbytesleft)
193 register ucs2_t w1;
194 register ucs2_t w2;
195 register ucs2_t *cp;
196 int *state;
197 ucs4_t res;
198 int bytes = sizeof (ucs2_t);
200 if (*inbytesleft < bytes)
201 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
203 state = (int *)data;
204 cp = ((ucs2_t *)*inbuf);
206 if (*state == UTF16_UNDEFINED)
208 if (*cp == ICONV_HTOLES(UTF16_BOM))
209 *state = UTF16_LITTLE_ENDIAN;
210 else
211 *state = UTF16_BIG_ENDIAN;
213 if ( *cp == ICONV_HTOBES (UTF16_BOM)
214 || *cp == ICONV_HTOLES (UTF16_BOM))
216 if (*inbytesleft < (bytes += sizeof (ucs2_t)))
217 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
218 cp += 1;
222 if (*state == UTF16_LITTLE_ENDIAN)
223 w1 = ICONV_LETOHS (*cp);
224 else
225 w1 = ICONV_BETOHS (*cp);
227 if (w1 < 0xD800 || w1 > 0xDFFF)
229 if (w1 == 0xFFFF || w1 == 0xFFFE)
230 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
231 res = (ucs4_t)w1;
233 else
235 /* Process surrogate pair */
236 if (*inbytesleft < (bytes += 2))
237 return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
239 if (w1 > 0xDBFF)
240 /* Broken surrogate character */
241 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
243 cp += 1;
245 if (*state == UTF16_LITTLE_ENDIAN)
246 w2 = ICONV_LETOHS (*cp);
247 else
248 w2 = ICONV_BETOHS (*cp);
250 if (w2 < 0xDC00 || w2 > 0xDFFF)
251 /* Broken surrogate character */
252 return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
254 res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);
255 res += 0x00010000;
258 *inbuf += bytes;
259 *inbytesleft -= bytes;
261 return res;
263 #endif /* ICONV_TO_UCS_CES_UTF_16 */
265 static int
266 utf_16_get_mb_cur_max (void *data)
268 return 6;
271 #if defined (ICONV_TO_UCS_CES_UTF_16)
272 const iconv_to_ucs_ces_handlers_t
273 _iconv_to_ucs_ces_handlers_utf_16 =
275 utf_16_init_to_ucs,
276 utf_16_close,
277 utf_16_get_mb_cur_max,
278 NULL,
279 NULL,
280 NULL,
281 utf_16_convert_to_ucs
283 #endif
285 #if defined (ICONV_FROM_UCS_CES_UTF_16)
286 const iconv_from_ucs_ces_handlers_t
287 _iconv_from_ucs_ces_handlers_utf_16 =
289 utf_16_init_from_ucs,
290 utf_16_close,
291 utf_16_get_mb_cur_max,
292 NULL,
293 NULL,
294 NULL,
295 utf_16_convert_from_ucs
297 #endif
299 #endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */