Merge pull request #2593 from Akury83/master
[RRG-proxmark3.git] / client / deps / liblua / llex.c
blob4bfbed0970ef324f294fd1859dc8c84a335c1ba1
1 /*
2 ** $Id: llex.c $
3 ** Lexical Analyzer
4 ** See Copyright Notice in lua.h
5 */
7 #define llex_c
8 #define LUA_CORE
10 #include "lprefix.h"
13 #include <locale.h>
14 #include <string.h>
16 #include "lua.h"
18 #include "lctype.h"
19 #include "ldebug.h"
20 #include "ldo.h"
21 #include "lgc.h"
22 #include "llex.h"
23 #include "lobject.h"
24 #include "lparser.h"
25 #include "lstate.h"
26 #include "lstring.h"
27 #include "ltable.h"
28 #include "lzio.h"
32 #define next(ls) (ls->current = zgetc(ls->z))
36 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
39 /* ORDER RESERVED */
40 static const char *const luaX_tokens [] = {
41 "and", "break", "do", "else", "elseif",
42 "end", "false", "for", "function", "goto", "if",
43 "in", "local", "nil", "not", "or", "repeat",
44 "return", "then", "true", "until", "while",
45 "//", "..", "...", "==", ">=", "<=", "~=",
46 "<<", ">>", "::", "<eof>",
47 "<number>", "<integer>", "<name>", "<string>"
51 #define save_and_next(ls) (save(ls, ls->current), next(ls))
54 static l_noret lexerror(LexState *ls, const char *msg, int token);
57 static void save(LexState *ls, int c) {
58 Mbuffer *b = ls->buff;
59 if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
60 size_t newsize;
61 if (luaZ_sizebuffer(b) >= MAX_SIZE / 2)
62 lexerror(ls, "lexical element too long", 0);
63 newsize = luaZ_sizebuffer(b) * 2;
64 luaZ_resizebuffer(ls->L, b, newsize);
66 b->buffer[luaZ_bufflen(b)++] = cast_char(c);
70 void luaX_init(lua_State *L) {
71 int i;
72 TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
73 luaC_fix(L, obj2gco(e)); /* never collect this name */
74 for (i = 0; i < NUM_RESERVED; i++) {
75 TString *ts = luaS_new(L, luaX_tokens[i]);
76 luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
77 ts->extra = cast_byte(i + 1); /* reserved word */
82 const char *luaX_token2str(LexState *ls, int token) {
83 if (token < FIRST_RESERVED) { /* single-byte symbols? */
84 if (lisprint(token))
85 return luaO_pushfstring(ls->L, "'%c'", token);
86 else /* control character */
87 return luaO_pushfstring(ls->L, "'<\\%d>'", token);
88 } else {
89 const char *s = luaX_tokens[token - FIRST_RESERVED];
90 if (token < TK_EOS) /* fixed format (symbols and reserved words)? */
91 return luaO_pushfstring(ls->L, "'%s'", s);
92 else /* names, strings, and numerals */
93 return s;
98 static const char *txtToken(LexState *ls, int token) {
99 switch (token) {
100 case TK_NAME:
101 case TK_STRING:
102 case TK_FLT:
103 case TK_INT:
104 save(ls, '\0');
105 return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
106 default:
107 return luaX_token2str(ls, token);
112 static l_noret lexerror(LexState *ls, const char *msg, int token) {
113 msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
114 if (token)
115 luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
116 luaD_throw(ls->L, LUA_ERRSYNTAX);
120 l_noret luaX_syntaxerror(LexState *ls, const char *msg) {
121 lexerror(ls, msg, ls->t.token);
126 ** Creates a new string and anchors it in scanner's table so that it
127 ** will not be collected until the end of the compilation; by that time
128 ** it should be anchored somewhere. It also internalizes long strings,
129 ** ensuring there is only one copy of each unique string. The table
130 ** here is used as a set: the string enters as the key, while its value
131 ** is irrelevant. We use the string itself as the value only because it
132 ** is a TValue readily available. Later, the code generation can change
133 ** this value.
135 TString *luaX_newstring(LexState *ls, const char *str, size_t l) {
136 lua_State *L = ls->L;
137 TString *ts = luaS_newlstr(L, str, l); /* create new string */
138 const TValue *o = luaH_getstr(ls->h, ts);
139 if (!ttisnil(o)) /* string already present? */
140 ts = keystrval(nodefromval(o)); /* get saved copy */
141 else { /* not in use yet */
142 TValue *stv = s2v(L->top.p++); /* reserve stack space for string */
143 setsvalue(L, stv, ts); /* temporarily anchor the string */
144 luaH_finishset(L, ls->h, stv, o, stv); /* t[string] = string */
145 /* table is not a metatable, so it does not need to invalidate cache */
146 luaC_checkGC(L);
147 L->top.p--; /* remove string from stack */
149 return ts;
154 ** increment line number and skips newline sequence (any of
155 ** \n, \r, \n\r, or \r\n)
157 static void inclinenumber(LexState *ls) {
158 int old = ls->current;
159 lua_assert(currIsNewline(ls));
160 next(ls); /* skip '\n' or '\r' */
161 if (currIsNewline(ls) && ls->current != old)
162 next(ls); /* skip '\n\r' or '\r\n' */
163 if (++ls->linenumber >= MAX_INT)
164 lexerror(ls, "chunk has too many lines", 0);
168 void luaX_setinput(lua_State *L, LexState *ls, ZIO *z, TString *source,
169 int firstchar) {
170 ls->t.token = 0;
171 ls->L = L;
172 ls->current = firstchar;
173 ls->lookahead.token = TK_EOS; /* no look-ahead token */
174 ls->z = z;
175 ls->fs = NULL;
176 ls->linenumber = 1;
177 ls->lastline = 1;
178 ls->source = source;
179 ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */
180 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
186 ** =======================================================
187 ** LEXICAL ANALYZER
188 ** =======================================================
192 static int check_next1(LexState *ls, int c) {
193 if (ls->current == c) {
194 next(ls);
195 return 1;
196 } else return 0;
201 ** Check whether current char is in set 'set' (with two chars) and
202 ** saves it
204 static int check_next2(LexState *ls, const char *set) {
205 lua_assert(set[2] == '\0');
206 if (ls->current == set[0] || ls->current == set[1]) {
207 save_and_next(ls);
208 return 1;
209 } else return 0;
213 /* LUA_NUMBER */
215 ** This function is quite liberal in what it accepts, as 'luaO_str2num'
216 ** will reject ill-formed numerals. Roughly, it accepts the following
217 ** pattern:
219 ** %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
221 ** The only tricky part is to accept [+-] only after a valid exponent
222 ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
224 ** The caller might have already read an initial dot.
226 static int read_numeral(LexState *ls, SemInfo *seminfo) {
227 TValue obj;
228 const char *expo = "Ee";
229 int first = ls->current;
230 lua_assert(lisdigit(ls->current));
231 save_and_next(ls);
232 if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */
233 expo = "Pp";
234 for (;;) {
235 if (check_next2(ls, expo)) /* exponent mark? */
236 check_next2(ls, "-+"); /* optional exponent sign */
237 else if (lisxdigit(ls->current) || ls->current == '.') /* '%x|%.' */
238 save_and_next(ls);
239 else break;
241 if (lislalpha(ls->current)) /* is numeral touching a letter? */
242 save_and_next(ls); /* force an error */
243 save(ls, '\0');
244 if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */
245 lexerror(ls, "malformed number", TK_FLT);
246 if (ttisinteger(&obj)) {
247 seminfo->i = ivalue(&obj);
248 return TK_INT;
249 } else {
250 lua_assert(ttisfloat(&obj));
251 seminfo->r = fltvalue(&obj);
252 return TK_FLT;
258 ** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
259 ** sequence is well formed, return its number of '='s + 2; otherwise,
260 ** return 1 if it is a single bracket (no '='s and no 2nd bracket);
261 ** otherwise (an unfinished '[==...') return 0.
263 static size_t skip_sep(LexState *ls) {
264 size_t count = 0;
265 int s = ls->current;
266 lua_assert(s == '[' || s == ']');
267 save_and_next(ls);
268 while (ls->current == '=') {
269 save_and_next(ls);
270 count++;
272 return (ls->current == s) ? count + 2
273 : (count == 0) ? 1
274 : 0;
278 static void read_long_string(LexState *ls, SemInfo *seminfo, size_t sep) {
279 int line = ls->linenumber; /* initial line (for error message) */
280 save_and_next(ls); /* skip 2nd '[' */
281 if (currIsNewline(ls)) /* string starts with a newline? */
282 inclinenumber(ls); /* skip it */
283 for (;;) {
284 switch (ls->current) {
285 case EOZ: { /* error */
286 const char *what = (seminfo ? "string" : "comment");
287 const char *msg = luaO_pushfstring(ls->L,
288 "unfinished long %s (starting at line %d)", what, line);
289 lexerror(ls, msg, TK_EOS);
290 break; /* to avoid warnings */
292 case ']': {
293 if (skip_sep(ls) == sep) {
294 save_and_next(ls); /* skip 2nd ']' */
295 goto endloop;
297 break;
299 case '\n':
300 case '\r': {
301 save(ls, '\n');
302 inclinenumber(ls);
303 if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
304 break;
306 default: {
307 if (seminfo) save_and_next(ls);
308 else next(ls);
312 endloop:
313 if (seminfo)
314 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
315 luaZ_bufflen(ls->buff) - 2 * sep);
319 static void esccheck(LexState *ls, int c, const char *msg) {
320 if (!c) {
321 if (ls->current != EOZ)
322 save_and_next(ls); /* add current to buffer for error message */
323 lexerror(ls, msg, TK_STRING);
328 static int gethexa(LexState *ls) {
329 save_and_next(ls);
330 esccheck(ls, lisxdigit(ls->current), "hexadecimal digit expected");
331 return luaO_hexavalue(ls->current);
335 static int readhexaesc(LexState *ls) {
336 int r = gethexa(ls);
337 r = (r << 4) + gethexa(ls);
338 luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */
339 return r;
343 static unsigned long readutf8esc(LexState *ls) {
344 unsigned long r;
345 int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */
346 save_and_next(ls); /* skip 'u' */
347 esccheck(ls, ls->current == '{', "missing '{'");
348 r = gethexa(ls); /* must have at least one digit */
349 while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
350 i++;
351 esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
352 r = (r << 4) + luaO_hexavalue(ls->current);
354 esccheck(ls, ls->current == '}', "missing '}'");
355 next(ls); /* skip '}' */
356 luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */
357 return r;
361 static void utf8esc(LexState *ls) {
362 char buff[UTF8BUFFSZ];
363 int n = luaO_utf8esc(buff, readutf8esc(ls));
364 for (; n > 0; n--) /* add 'buff' to string */
365 save(ls, buff[UTF8BUFFSZ - n]);
369 static int readdecesc(LexState *ls) {
370 int i;
371 int r = 0; /* result accumulator */
372 for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */
373 r = 10 * r + ls->current - '0';
374 save_and_next(ls);
376 esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
377 luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */
378 return r;
382 static void read_string(LexState *ls, int del, SemInfo *seminfo) {
383 save_and_next(ls); /* keep delimiter (for error messages) */
384 while (ls->current != del) {
385 switch (ls->current) {
386 case EOZ:
387 lexerror(ls, "unfinished string", TK_EOS);
388 break; /* to avoid warnings */
389 case '\n':
390 case '\r':
391 lexerror(ls, "unfinished string", TK_STRING);
392 break; /* to avoid warnings */
393 case '\\': { /* escape sequences */
394 int c; /* final character to be saved */
395 save_and_next(ls); /* keep '\\' for error messages */
396 switch (ls->current) {
397 case 'a':
398 c = '\a';
399 goto read_save;
400 case 'b':
401 c = '\b';
402 goto read_save;
403 case 'f':
404 c = '\f';
405 goto read_save;
406 case 'n':
407 c = '\n';
408 goto read_save;
409 case 'r':
410 c = '\r';
411 goto read_save;
412 case 't':
413 c = '\t';
414 goto read_save;
415 case 'v':
416 c = '\v';
417 goto read_save;
418 case 'x':
419 c = readhexaesc(ls);
420 goto read_save;
421 case 'u':
422 utf8esc(ls);
423 goto no_save;
424 case '\n':
425 case '\r':
426 inclinenumber(ls);
427 c = '\n';
428 goto only_save;
429 case '\\':
430 case '\"':
431 case '\'':
432 c = ls->current;
433 goto read_save;
434 case EOZ:
435 goto no_save; /* will raise an error next loop */
436 case 'z': { /* zap following span of spaces */
437 luaZ_buffremove(ls->buff, 1); /* remove '\\' */
438 next(ls); /* skip the 'z' */
439 while (lisspace(ls->current)) {
440 if (currIsNewline(ls)) inclinenumber(ls);
441 else next(ls);
443 goto no_save;
445 default: {
446 esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
447 c = readdecesc(ls); /* digital escape '\ddd' */
448 goto only_save;
451 read_save:
452 next(ls);
453 /* go through */
454 only_save:
455 luaZ_buffremove(ls->buff, 1); /* remove '\\' */
456 save(ls, c);
457 /* go through */
458 no_save:
459 break;
461 default:
462 save_and_next(ls);
465 save_and_next(ls); /* skip delimiter */
466 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
467 luaZ_bufflen(ls->buff) - 2);
471 static int llex(LexState *ls, SemInfo *seminfo) {
472 luaZ_resetbuffer(ls->buff);
473 for (;;) {
474 switch (ls->current) {
475 case '\n':
476 case '\r': { /* line breaks */
477 inclinenumber(ls);
478 break;
480 case ' ':
481 case '\f':
482 case '\t':
483 case '\v': { /* spaces */
484 next(ls);
485 break;
487 case '-': { /* '-' or '--' (comment) */
488 next(ls);
489 if (ls->current != '-') return '-';
490 /* else is a comment */
491 next(ls);
492 if (ls->current == '[') { /* long comment? */
493 size_t sep = skip_sep(ls);
494 luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */
495 if (sep >= 2) {
496 read_long_string(ls, NULL, sep); /* skip long comment */
497 luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */
498 break;
501 /* else short comment */
502 while (!currIsNewline(ls) && ls->current != EOZ)
503 next(ls); /* skip until end of line (or end of file) */
504 break;
506 case '[': { /* long string or simply '[' */
507 size_t sep = skip_sep(ls);
508 if (sep >= 2) {
509 read_long_string(ls, seminfo, sep);
510 return TK_STRING;
511 } else if (sep == 0) /* '[=...' missing second bracket? */
512 lexerror(ls, "invalid long string delimiter", TK_STRING);
513 return '[';
515 case '=': {
516 next(ls);
517 if (check_next1(ls, '=')) return TK_EQ; /* '==' */
518 else return '=';
520 case '<': {
521 next(ls);
522 if (check_next1(ls, '=')) return TK_LE; /* '<=' */
523 else if (check_next1(ls, '<')) return TK_SHL; /* '<<' */
524 else return '<';
526 case '>': {
527 next(ls);
528 if (check_next1(ls, '=')) return TK_GE; /* '>=' */
529 else if (check_next1(ls, '>')) return TK_SHR; /* '>>' */
530 else return '>';
532 case '/': {
533 next(ls);
534 if (check_next1(ls, '/')) return TK_IDIV; /* '//' */
535 else return '/';
537 case '~': {
538 next(ls);
539 if (check_next1(ls, '=')) return TK_NE; /* '~=' */
540 else return '~';
542 case ':': {
543 next(ls);
544 if (check_next1(ls, ':')) return TK_DBCOLON; /* '::' */
545 else return ':';
547 case '"':
548 case '\'': { /* short literal strings */
549 read_string(ls, ls->current, seminfo);
550 return TK_STRING;
552 case '.': { /* '.', '..', '...', or number */
553 save_and_next(ls);
554 if (check_next1(ls, '.')) {
555 if (check_next1(ls, '.'))
556 return TK_DOTS; /* '...' */
557 else return TK_CONCAT; /* '..' */
558 } else if (!lisdigit(ls->current)) return '.';
559 else return read_numeral(ls, seminfo);
561 case '0':
562 case '1':
563 case '2':
564 case '3':
565 case '4':
566 case '5':
567 case '6':
568 case '7':
569 case '8':
570 case '9': {
571 return read_numeral(ls, seminfo);
573 case EOZ: {
574 return TK_EOS;
576 default: {
577 if (lislalpha(ls->current)) { /* identifier or reserved word? */
578 TString *ts;
579 do {
580 save_and_next(ls);
581 } while (lislalnum(ls->current));
582 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
583 luaZ_bufflen(ls->buff));
584 seminfo->ts = ts;
585 if (isreserved(ts)) /* reserved word? */
586 return ts->extra - 1 + FIRST_RESERVED;
587 else {
588 return TK_NAME;
590 } else { /* single-char tokens ('+', '*', '%', '{', '}', ...) */
591 int c = ls->current;
592 next(ls);
593 return c;
601 void luaX_next(LexState *ls) {
602 ls->lastline = ls->linenumber;
603 if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
604 ls->t = ls->lookahead; /* use this one */
605 ls->lookahead.token = TK_EOS; /* and discharge it */
606 } else
607 ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
611 int luaX_lookahead(LexState *ls) {
612 lua_assert(ls->lookahead.token == TK_EOS);
613 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
614 return ls->lookahead.token;