Implement insns.dat in human readable form
[nasm/nasm.git] / quote.c
blobfe1c97d9a873ce1b0cc92926889f8a4da4dafc1c
1 /* ----------------------------------------------------------------------- *
2 *
3 * Copyright 1996-2009 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
9 * conditions are met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
35 * quote.c
38 #include "compiler.h"
40 #include <stdlib.h>
42 #include "nasmlib.h"
43 #include "quote.h"
45 char *nasm_quote(char *str, size_t len)
47 char c, c1, *p, *q, *nstr, *ep;
48 unsigned char uc;
49 bool sq_ok, dq_ok;
50 size_t qlen;
52 sq_ok = dq_ok = true;
53 ep = str+len;
54 qlen = 0; /* Length if we need `...` quotes */
55 for (p = str; p < ep; p++) {
56 c = *p;
57 switch (c) {
58 case '\'':
59 sq_ok = false;
60 qlen++;
61 break;
62 case '\"':
63 dq_ok = false;
64 qlen++;
65 break;
66 case '`':
67 case '\\':
68 qlen += 2;
69 break;
70 default:
71 if (c < ' ' || c > '~') {
72 sq_ok = dq_ok = false;
73 switch (c) {
74 case '\a':
75 case '\b':
76 case '\t':
77 case '\n':
78 case '\v':
79 case '\f':
80 case '\r':
81 case 27:
82 qlen += 2;
83 break;
84 default:
85 c1 = (p+1 < ep) ? p[1] : 0;
86 if (c1 >= '0' && c1 <= '7')
87 uc = 0377; /* Must use the full form */
88 else
89 uc = c;
90 if (uc > 077)
91 qlen++;
92 if (uc > 07)
93 qlen++;
94 qlen += 2;
95 break;
97 } else {
98 qlen++;
100 break;
104 if (sq_ok || dq_ok) {
105 /* Use '...' or "..." */
106 nstr = nasm_malloc(len+3);
107 nstr[0] = nstr[len+1] = sq_ok ? '\'' : '\"';
108 nstr[len+2] = '\0';
109 if (len > 0)
110 memcpy(nstr+1, str, len);
111 } else {
112 /* Need to use `...` quoted syntax */
113 nstr = nasm_malloc(qlen+3);
114 q = nstr;
115 *q++ = '`';
116 for (p = str; p < ep; p++) {
117 c = *p;
118 switch (c) {
119 case '`':
120 case '\\':
121 *q++ = '\\';
122 *q++ = c;
123 break;
124 case 7:
125 *q++ = '\\';
126 *q++ = 'a';
127 break;
128 case 8:
129 *q++ = '\\';
130 *q++ = 'b';
131 break;
132 case 9:
133 *q++ = '\\';
134 *q++ = 't';
135 break;
136 case 10:
137 *q++ = '\\';
138 *q++ = 'n';
139 break;
140 case 11:
141 *q++ = '\\';
142 *q++ = 'v';
143 break;
144 case 12:
145 *q++ = '\\';
146 *q++ = 'f';
147 break;
148 case 13:
149 *q++ = '\\';
150 *q++ = 'r';
151 break;
152 case 27:
153 *q++ = '\\';
154 *q++ = 'e';
155 break;
156 default:
157 if (c < ' ' || c > '~') {
158 c1 = (p+1 < ep) ? p[1] : 0;
159 if (c1 >= '0' && c1 <= '7')
160 uc = 0377; /* Must use the full form */
161 else
162 uc = c;
163 *q++ = '\\';
164 if (uc > 077)
165 *q++ = ((unsigned char)c >> 6) + '0';
166 if (uc > 07)
167 *q++ = (((unsigned char)c >> 3) & 7) + '0';
168 *q++ = ((unsigned char)c & 7) + '0';
169 break;
170 } else {
171 *q++ = c;
173 break;
176 *q++ = '`';
177 *q++ = '\0';
178 nasm_assert((size_t)(q-nstr) == qlen+3);
180 return nstr;
183 static char *emit_utf8(char *q, int32_t v)
185 if (v < 0) {
186 /* Impossible - do nothing */
187 } else if (v <= 0x7f) {
188 *q++ = v;
189 } else if (v <= 0x000007ff) {
190 *q++ = 0xc0 | (v >> 6);
191 *q++ = 0x80 | (v & 63);
192 } else if (v <= 0x0000ffff) {
193 *q++ = 0xe0 | (v >> 12);
194 *q++ = 0x80 | ((v >> 6) & 63);
195 *q++ = 0x80 | (v & 63);
196 } else if (v <= 0x001fffff) {
197 *q++ = 0xf0 | (v >> 18);
198 *q++ = 0x80 | ((v >> 12) & 63);
199 *q++ = 0x80 | ((v >> 6) & 63);
200 *q++ = 0x80 | (v & 63);
201 } else if (v <= 0x03ffffff) {
202 *q++ = 0xf8 | (v >> 24);
203 *q++ = 0x80 | ((v >> 18) & 63);
204 *q++ = 0x80 | ((v >> 12) & 63);
205 *q++ = 0x80 | ((v >> 6) & 63);
206 *q++ = 0x80 | (v & 63);
207 } else {
208 *q++ = 0xfc | (v >> 30);
209 *q++ = 0x80 | ((v >> 24) & 63);
210 *q++ = 0x80 | ((v >> 18) & 63);
211 *q++ = 0x80 | ((v >> 12) & 63);
212 *q++ = 0x80 | ((v >> 6) & 63);
213 *q++ = 0x80 | (v & 63);
215 return q;
219 * Do an *in-place* dequoting of the specified string, returning the
220 * resulting length (which may be containing embedded nulls.)
222 * In-place replacement is possible since the unquoted length is always
223 * shorter than or equal to the quoted length.
225 * *ep points to the final quote, or to the null if improperly quoted.
227 size_t nasm_unquote(char *str, char **ep)
229 char bq;
230 char *p, *q;
231 char *escp = NULL;
232 char c;
233 enum unq_state {
234 st_start,
235 st_backslash,
236 st_hex,
237 st_oct,
238 st_ucs,
239 } state;
240 int ndig = 0;
241 int32_t nval = 0;
243 p = q = str;
245 bq = *p++;
246 if (!bq)
247 return 0;
249 switch (bq) {
250 case '\'':
251 case '\"':
252 /* '...' or "..." string */
253 while ((c = *p) && c != bq) {
254 p++;
255 *q++ = c;
257 *q = '\0';
258 break;
260 case '`':
261 /* `...` string */
262 state = st_start;
264 while ((c = *p)) {
265 p++;
266 switch (state) {
267 case st_start:
268 switch (c) {
269 case '\\':
270 state = st_backslash;
271 break;
272 case '`':
273 p--;
274 goto out;
275 default:
276 *q++ = c;
277 break;
279 break;
281 case st_backslash:
282 state = st_start;
283 escp = p; /* Beginning of argument sequence */
284 nval = 0;
285 switch (c) {
286 case 'a':
287 *q++ = 7;
288 break;
289 case 'b':
290 *q++ = 8;
291 break;
292 case 'e':
293 *q++ = 27;
294 break;
295 case 'f':
296 *q++ = 12;
297 break;
298 case 'n':
299 *q++ = 10;
300 break;
301 case 'r':
302 *q++ = 13;
303 break;
304 case 't':
305 *q++ = 9;
306 break;
307 case 'u':
308 state = st_ucs;
309 ndig = 4;
310 break;
311 case 'U':
312 state = st_ucs;
313 ndig = 8;
314 break;
315 case 'v':
316 *q++ = 11;
317 break;
318 case 'x':
319 case 'X':
320 state = st_hex;
321 ndig = 2;
322 break;
323 case '0':
324 case '1':
325 case '2':
326 case '3':
327 case '4':
328 case '5':
329 case '6':
330 case '7':
331 state = st_oct;
332 ndig = 2; /* Up to two more digits */
333 nval = c - '0';
334 break;
335 default:
336 *q++ = c;
337 break;
339 break;
341 case st_oct:
342 if (c >= '0' && c <= '7') {
343 nval = (nval << 3) + (c - '0');
344 if (!--ndig) {
345 *q++ = nval;
346 state = st_start;
348 } else {
349 p--; /* Process this character again */
350 *q++ = nval;
351 state = st_start;
353 break;
355 case st_hex:
356 if ((c >= '0' && c <= '9') ||
357 (c >= 'A' && c <= 'F') ||
358 (c >= 'a' && c <= 'f')) {
359 nval = (nval << 4) + numvalue(c);
360 if (!--ndig) {
361 *q++ = nval;
362 state = st_start;
364 } else {
365 p--; /* Process this character again */
366 *q++ = (p > escp) ? nval : escp[-1];
367 state = st_start;
369 break;
371 case st_ucs:
372 if ((c >= '0' && c <= '9') ||
373 (c >= 'A' && c <= 'F') ||
374 (c >= 'a' && c <= 'f')) {
375 nval = (nval << 4) + numvalue(c);
376 if (!--ndig) {
377 q = emit_utf8(q, nval);
378 state = st_start;
380 } else {
381 p--; /* Process this character again */
382 if (p > escp)
383 q = emit_utf8(q, nval);
384 else
385 *q++ = escp[-1];
386 state = st_start;
388 break;
391 switch (state) {
392 case st_start:
393 case st_backslash:
394 break;
395 case st_oct:
396 *q++ = nval;
397 break;
398 case st_hex:
399 *q++ = (p > escp) ? nval : escp[-1];
400 break;
401 case st_ucs:
402 if (p > escp)
403 q = emit_utf8(q, nval);
404 else
405 *q++ = escp[-1];
406 break;
408 out:
409 break;
411 default:
412 /* Not a quoted string, just return the input... */
413 p = q = strchr(str, '\0');
414 break;
417 if (ep)
418 *ep = p;
419 return q-str;
423 * Find the end of a quoted string; returns the pointer to the terminating
424 * character (either the ending quote or the null character, if unterminated.)
426 char *nasm_skip_string(char *str)
428 char bq;
429 char *p;
430 char c;
431 enum unq_state {
432 st_start,
433 st_backslash,
434 } state;
436 bq = str[0];
437 if (bq == '\'' || bq == '\"') {
438 /* '...' or "..." string */
439 for (p = str+1; *p && *p != bq; p++)
441 return p;
442 } else if (bq == '`') {
443 /* `...` string */
444 p = str+1;
445 state = st_start;
447 while ((c = *p++)) {
448 switch (state) {
449 case st_start:
450 switch (c) {
451 case '\\':
452 state = st_backslash;
453 break;
454 case '`':
455 return p-1; /* Found the end */
456 default:
457 break;
459 break;
461 case st_backslash:
463 * Note: for the purpose of finding the end of the string,
464 * all successor states to st_backslash are functionally
465 * equivalent to st_start, since either a backslash or
466 * a backquote will force a return to the st_start state.
468 state = st_start;
469 break;
472 return p; /* Unterminated string... */
473 } else {
474 return str; /* Not a string... */