1 /* ----------------------------------------------------------------------- *
3 * Copyright 1996-2019 The NASM Authors - All Rights Reserved
4 * See the file AUTHORS included with the NASM distribution for
5 * the specific copyright holders.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * ----------------------------------------------------------------------- */
45 * Create a NASM quoted string in newly allocated memory. Update the
46 * *lenp parameter with the output length (sans final NUL).
49 char *nasm_quote(const char *str
, size_t *lenp
)
52 char c
, c1
, *q
, *nstr
;
60 qlen
= 0; /* Length if we need `...` quotes */
61 for (p
= str
; p
< ep
; p
++) {
77 if (c
< ' ' || c
> '~') {
78 sq_ok
= dq_ok
= false;
91 c1
= (p
+1 < ep
) ? p
[1] : 0;
92 if (c1
>= '0' && c1
<= '7')
93 uc
= 0377; /* Must use the full form */
110 if (sq_ok
|| dq_ok
) {
111 /* Use '...' or "..." */
112 nstr
= nasm_malloc(len
+3);
113 nstr
[0] = nstr
[len
+1] = sq_ok
? '\'' : '\"';
116 memcpy(nstr
+1, str
, len
);
118 /* Need to use `...` quoted syntax */
119 nstr
= nasm_malloc(qlen
+3);
122 for (p
= str
; p
< ep
; p
++) {
163 if (c
< ' ' || c
> '~') {
164 c1
= (p
+1 < ep
) ? p
[1] : 0;
165 if (c1
>= '0' && c1
<= '7')
166 uc
= 0377; /* Must use the full form */
171 *q
++ = ((unsigned char)c
>> 6) + '0';
173 *q
++ = (((unsigned char)c
>> 3) & 7) + '0';
174 *q
++ = ((unsigned char)c
& 7) + '0';
183 nasm_assert((size_t)(q
-nstr
) == qlen
+2);
190 static unsigned char *emit_utf8(unsigned char *q
, uint32_t v
)
192 uint32_t vb1
, vb2
, vb3
, vb4
, vb5
;
224 * Note: this is invalid even for "classic" (pre-UTF16) 31-bit
225 * UTF-8 if the value is >= 0x8000000. This at least tries to do
226 * something vaguely sensible with it. Caveat programmer.
227 * The __utf*__ string transform functions do reject these
230 * vb5 cannot be more than 3, as a 32-bit value has been shifted
231 * right by 5*6 = 30 bits already.
237 /* Emit extension bytes as appropriate */
238 out5
: *q
++ = 0x80 + (vb4
& 63);
239 out4
: *q
++ = 0x80 + (vb3
& 63);
240 out3
: *q
++ = 0x80 + (vb2
& 63);
241 out2
: *q
++ = 0x80 + (vb1
& 63);
242 out1
: *q
++ = 0x80 + (v
& 63);
246 static inline uint32_t ctlbit(uint32_t v
)
248 return unlikely(v
< 32) ? UINT32_C(1) << v
: 0;
252 (badctl & (ctlmask |= ctlbit(c)))
254 #define EMIT_UTF8(c) \
258 q = emit_utf8(q, ec); \
263 unsigned char ec = (c); \
269 * Same as nasm_quote, but take the length of a C string;
270 * the lenp argument is optional.
272 char *nasm_quote_cstr(const char *str
, size_t *lenp
)
274 size_t len
= strlen(str
);
275 char *qstr
= nasm_quote(str
, &len
);
282 * Do an *in-place* dequoting of the specified string, returning the
283 * resulting length (which may be containing embedded nulls.)
285 * In-place replacement is possible since the unquoted length is always
286 * shorter than or equal to the quoted length.
288 * *ep points to the final quote, or to the null if improperly quoted.
290 * Issue an error if the string contains control characters
291 * corresponding to bits set in badctl; in that case, the output
292 * string, but not *ep, is truncated before the first invalid
296 static size_t nasm_unquote_common(char *str
, char **ep
,
297 const uint32_t badctl
)
300 const unsigned char *p
;
301 const unsigned char *escp
= NULL
;
304 uint32_t ctlmask
= 0; /* Mask of control characters seen */
316 p
= q
= (unsigned char *)str
;
325 /* '...' or "..." string */
326 while ((c
= *p
++) && (c
!= bq
))
334 while (state
!= st_done
) {
340 state
= st_backslash
;
354 escp
= p
; /* Beginning of argument sequence */
403 ndig
= 2; /* Up to two more digits */
408 p
--; /* Reprocess; terminates string */
414 if (state
== st_start
)
419 if (c
>= '0' && c
<= '7') {
420 nval
= (nval
<< 3) + (c
- '0');
422 break; /* Might have more digits */
424 p
--; /* Process this character again */
432 if (nasm_isxdigit(c
)) {
433 nval
= (nval
<< 4) + numvalue(c
);
435 break; /* Might have more digits */
437 p
--; /* Process this character again */
440 if (unlikely(p
<= escp
))
442 else if (state
== st_ucs
)
457 /* Not a quoted string, just return the input... */
463 /* Zero-terminate the output */
466 if (ctlmask
& badctl
)
467 nasm_nonfatal("control character in string not allowed here");
471 return (char *)q
- str
;
475 size_t nasm_unquote(char *str
, char **ep
)
477 return nasm_unquote_common(str
, ep
, 0);
479 size_t nasm_unquote_cstr(char *str
, char **ep
)
482 * These are the only control characters permitted: BEL BS TAB ESC
484 const uint32_t okctl
= (1 << '\a') | (1 << '\b') | (1 << '\t') | (1 << 27);
486 return nasm_unquote_common(str
, ep
, ~okctl
);
490 * Find the end of a quoted string; returns the pointer to the terminating
491 * character (either the ending quote or the null character, if unterminated.)
492 * If the input is not a quoted string, return NULL.
494 char *nasm_skip_string(const char *str
)
510 /* '...' or "..." string */
511 while ((c
= *p
++) && (c
!= bq
))
518 while (state
!= st_done
) {
524 state
= st_backslash
;
537 * Note: for the purpose of finding the end of the string,
538 * all successor states to st_backslash are functionally
539 * equivalent to st_start, since either a backslash or
540 * a backquote will force a return to the st_start state.
542 state
= c
? st_start
: st_done
;
552 /* Not a string at all... */
555 return (char *)p
- 1;