NASM 2.10.03
[nasm/nasm.git] / strfunc.c
blob4b5af40b58ef00d7cb8c20ff9349e9722c3c7672
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * strfunc.c
37 * String transformation functions
40 #include "nasmlib.h"
41 #include "nasm.h"
44 * Convert a string in UTF-8 format to UTF-16LE
46 static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
48 #define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
50 size_t outlen = 0;
51 int expect = 0;
52 uint8_t c;
53 uint32_t v = 0, vmin = 0;
55 while (len--) {
56 c = *str++;
58 if (expect) {
59 if ((c & 0xc0) != 0x80) {
60 expect = 0;
61 return -1;
62 } else {
63 v = (v << 6) | (c & 0x3f);
64 if (!--expect) {
65 if (v < vmin || v > 0x10ffff ||
66 (v >= 0xd800 && v <= 0xdfff)) {
67 return -1;
68 } else if (v > 0xffff) {
69 v -= 0x10000;
70 EMIT(0xd800 | (v >> 10));
71 EMIT(0xdc00 | (v & 0x3ff));
72 } else {
73 EMIT(v);
76 continue;
80 if (c < 0x80) {
81 EMIT(c);
82 } else if (c < 0xc0 || c >= 0xfe) {
83 /* Invalid UTF-8 */
84 return -1;
85 } else if (c < 0xe0) {
86 v = c & 0x1f;
87 expect = 1;
88 vmin = 0x80;
89 } else if (c < 0xf0) {
90 v = c & 0x0f;
91 expect = 2;
92 vmin = 0x800;
93 } else if (c < 0xf8) {
94 v = c & 0x07;
95 expect = 3;
96 vmin = 0x10000;
97 } else if (c < 0xfc) {
98 v = c & 0x03;
99 expect = 4;
100 vmin = 0x200000;
101 } else {
102 v = c & 0x01;
103 expect = 5;
104 vmin = 0x4000000;
108 return expect ? (size_t)-1 : outlen << 1;
110 #undef EMIT
114 * Convert a string in UTF-8 format to UTF-16BE
116 static size_t utf8_to_16be(uint8_t *str, size_t len, char *op)
118 #define EMIT(x) \
119 do { \
120 uint16_t _y = (x); \
121 if (op) { \
122 WRITECHAR(op, _y >> 8); \
123 WRITECHAR(op, _y); \
125 outlen++; \
126 } while (0) \
128 size_t outlen = 0;
129 int expect = 0;
130 uint8_t c;
131 uint32_t v = 0, vmin = 0;
133 while (len--) {
134 c = *str++;
136 if (expect) {
137 if ((c & 0xc0) != 0x80) {
138 expect = 0;
139 return -1;
140 } else {
141 v = (v << 6) | (c & 0x3f);
142 if (!--expect) {
143 if (v < vmin || v > 0x10ffff ||
144 (v >= 0xd800 && v <= 0xdfff)) {
145 return -1;
146 } else if (v > 0xffff) {
147 v -= 0x10000;
148 EMIT(0xdc00 | (v & 0x3ff));
149 EMIT(0xd800 | (v >> 10));
150 } else {
151 EMIT(v);
154 continue;
158 if (c < 0x80) {
159 EMIT(c);
160 } else if (c < 0xc0 || c >= 0xfe) {
161 /* Invalid UTF-8 */
162 return -1;
163 } else if (c < 0xe0) {
164 v = c & 0x1f;
165 expect = 1;
166 vmin = 0x80;
167 } else if (c < 0xf0) {
168 v = c & 0x0f;
169 expect = 2;
170 vmin = 0x800;
171 } else if (c < 0xf8) {
172 v = c & 0x07;
173 expect = 3;
174 vmin = 0x10000;
175 } else if (c < 0xfc) {
176 v = c & 0x03;
177 expect = 4;
178 vmin = 0x200000;
179 } else {
180 v = c & 0x01;
181 expect = 5;
182 vmin = 0x4000000;
186 return expect ? (size_t)-1 : outlen << 1;
188 #undef EMIT
192 * Convert a string in UTF-8 format to UTF-32LE
194 static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
196 #define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
198 size_t outlen = 0;
199 int expect = 0;
200 uint8_t c;
201 uint32_t v = 0, vmin = 0;
203 while (len--) {
204 c = *str++;
206 if (expect) {
207 if ((c & 0xc0) != 0x80) {
208 return -1;
209 } else {
210 v = (v << 6) | (c & 0x3f);
211 if (!--expect) {
212 if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
213 return -1;
214 } else {
215 EMIT(v);
218 continue;
222 if (c < 0x80) {
223 EMIT(c);
224 } else if (c < 0xc0 || c >= 0xfe) {
225 /* Invalid UTF-8 */
226 return -1;
227 } else if (c < 0xe0) {
228 v = c & 0x1f;
229 expect = 1;
230 vmin = 0x80;
231 } else if (c < 0xf0) {
232 v = c & 0x0f;
233 expect = 2;
234 vmin = 0x800;
235 } else if (c < 0xf8) {
236 v = c & 0x07;
237 expect = 3;
238 vmin = 0x10000;
239 } else if (c < 0xfc) {
240 v = c & 0x03;
241 expect = 4;
242 vmin = 0x200000;
243 } else {
244 v = c & 0x01;
245 expect = 5;
246 vmin = 0x4000000;
250 return expect ? (size_t)-1 : outlen << 2;
252 #undef EMIT
256 * Convert a string in UTF-8 format to UTF-32BE
258 static size_t utf8_to_32be(uint8_t *str, size_t len, char *op)
260 #define EMIT(x) \
261 do { \
262 uint32_t _y = (x); \
263 if (op) { \
264 WRITECHAR(op,_y >> 24); \
265 WRITECHAR(op,_y >> 16); \
266 WRITECHAR(op,_y >> 8); \
267 WRITECHAR(op,_y); \
269 outlen++; \
270 } while (0)
272 size_t outlen = 0;
273 int expect = 0;
274 uint8_t c;
275 uint32_t v = 0, vmin = 0;
277 while (len--) {
278 c = *str++;
280 if (expect) {
281 if ((c & 0xc0) != 0x80) {
282 return -1;
283 } else {
284 v = (v << 6) | (c & 0x3f);
285 if (!--expect) {
286 if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
287 return -1;
288 } else {
289 EMIT(v);
292 continue;
296 if (c < 0x80) {
297 EMIT(c);
298 } else if (c < 0xc0 || c >= 0xfe) {
299 /* Invalid UTF-8 */
300 return -1;
301 } else if (c < 0xe0) {
302 v = c & 0x1f;
303 expect = 1;
304 vmin = 0x80;
305 } else if (c < 0xf0) {
306 v = c & 0x0f;
307 expect = 2;
308 vmin = 0x800;
309 } else if (c < 0xf8) {
310 v = c & 0x07;
311 expect = 3;
312 vmin = 0x10000;
313 } else if (c < 0xfc) {
314 v = c & 0x03;
315 expect = 4;
316 vmin = 0x200000;
317 } else {
318 v = c & 0x01;
319 expect = 5;
320 vmin = 0x4000000;
324 return expect ? (size_t)-1 : outlen << 2;
326 #undef EMIT
329 typedef size_t (*transform_func)(uint8_t *, size_t, char *);
332 * Apply a specific string transform and return it in a nasm_malloc'd
333 * buffer, returning the length. On error, returns (size_t)-1 and no
334 * buffer is allocated.
336 size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
338 /* This should match enum strfunc in nasm.h */
339 static const transform_func str_transforms[] = {
340 utf8_to_16le,
341 utf8_to_16le,
342 utf8_to_16be,
343 utf8_to_32le,
344 utf8_to_32le,
345 utf8_to_32be,
347 transform_func transform = str_transforms[func];
348 size_t outlen;
349 uint8_t *s = (uint8_t *)str;
350 char *buf;
352 outlen = transform(s, len, NULL);
353 if (outlen == (size_t)-1)
354 return -1;
356 *out = buf = nasm_malloc(outlen+1);
357 buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
358 return transform(s, len, buf);