combase: Fix the trailing linefeed of a TRACE().
[wine/zf.git] / dlls / vbscript / lex.c
blob5b93739d02f0fa29b3851e7eab8873570142dcd6
1 /*
2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 #include <assert.h>
20 #include <limits.h>
21 #include <math.h>
23 #include "vbscript.h"
24 #include "parse.h"
25 #include "parser.tab.h"
27 #include "wine/debug.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(vbscript);
31 static const struct {
32 const WCHAR *word;
33 int token;
34 } keywords[] = {
35 {L"and", tAND},
36 {L"byref", tBYREF},
37 {L"byval", tBYVAL},
38 {L"call", tCALL},
39 {L"case", tCASE},
40 {L"class", tCLASS},
41 {L"const", tCONST},
42 {L"default", tDEFAULT},
43 {L"dim", tDIM},
44 {L"do", tDO},
45 {L"each", tEACH},
46 {L"else", tELSE},
47 {L"elseif", tELSEIF},
48 {L"empty", tEMPTY},
49 {L"end", tEND},
50 {L"eqv", tEQV},
51 {L"error", tERROR},
52 {L"exit", tEXIT},
53 {L"explicit", tEXPLICIT},
54 {L"false", tFALSE},
55 {L"for", tFOR},
56 {L"function", tFUNCTION},
57 {L"get", tGET},
58 {L"goto", tGOTO},
59 {L"if", tIF},
60 {L"imp", tIMP},
61 {L"in", tIN},
62 {L"is", tIS},
63 {L"let", tLET},
64 {L"loop", tLOOP},
65 {L"me", tME},
66 {L"mod", tMOD},
67 {L"new", tNEW},
68 {L"next", tNEXT},
69 {L"not", tNOT},
70 {L"nothing", tNOTHING},
71 {L"null", tNULL},
72 {L"on", tON},
73 {L"option", tOPTION},
74 {L"or", tOR},
75 {L"preserve", tPRESERVE},
76 {L"private", tPRIVATE},
77 {L"property", tPROPERTY},
78 {L"public", tPUBLIC},
79 {L"redim", tREDIM},
80 {L"rem", tREM},
81 {L"resume", tRESUME},
82 {L"select", tSELECT},
83 {L"set", tSET},
84 {L"step", tSTEP},
85 {L"stop", tSTOP},
86 {L"sub", tSUB},
87 {L"then", tTHEN},
88 {L"to", tTO},
89 {L"true", tTRUE},
90 {L"until", tUNTIL},
91 {L"wend", tWEND},
92 {L"while", tWHILE},
93 {L"with", tWITH},
94 {L"xor", tXOR}
97 static inline BOOL is_identifier_char(WCHAR c)
99 return iswalnum(c) || c == '_';
102 static int check_keyword(parser_ctx_t *ctx, const WCHAR *word, const WCHAR **lval)
104 const WCHAR *p1 = ctx->ptr;
105 const WCHAR *p2 = word;
106 WCHAR c;
108 while(p1 < ctx->end && *p2) {
109 c = towlower(*p1);
110 if(c != *p2)
111 return c - *p2;
112 p1++;
113 p2++;
116 if(*p2 || (p1 < ctx->end && is_identifier_char(*p1)))
117 return 1;
119 ctx->ptr = p1;
120 *lval = word;
121 return 0;
124 static int check_keywords(parser_ctx_t *ctx, const WCHAR **lval)
126 int min = 0, max = ARRAY_SIZE(keywords)-1, r, i;
128 while(min <= max) {
129 i = (min+max)/2;
131 r = check_keyword(ctx, keywords[i].word, lval);
132 if(!r)
133 return keywords[i].token;
135 if(r > 0)
136 min = i+1;
137 else
138 max = i-1;
141 return 0;
144 static int parse_identifier(parser_ctx_t *ctx, const WCHAR **ret)
146 const WCHAR *ptr = ctx->ptr++;
147 WCHAR *str;
148 int len;
150 while(ctx->ptr < ctx->end && is_identifier_char(*ctx->ptr))
151 ctx->ptr++;
152 len = ctx->ptr-ptr;
154 str = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
155 if(!str)
156 return 0;
158 memcpy(str, ptr, (len+1)*sizeof(WCHAR));
159 str[len] = 0;
160 *ret = str;
161 return tIdentifier;
164 static int parse_string_literal(parser_ctx_t *ctx, const WCHAR **ret)
166 const WCHAR *ptr = ++ctx->ptr;
167 WCHAR *rptr;
168 int len = 0;
170 while(ctx->ptr < ctx->end) {
171 if(*ctx->ptr == '\n' || *ctx->ptr == '\r') {
172 FIXME("newline inside string literal\n");
173 return 0;
176 if(*ctx->ptr == '"') {
177 if(ctx->ptr[1] != '"')
178 break;
179 len--;
180 ctx->ptr++;
182 ctx->ptr++;
185 if(ctx->ptr == ctx->end) {
186 FIXME("unterminated string literal\n");
187 return 0;
190 len += ctx->ptr-ptr;
192 *ret = rptr = parser_alloc(ctx, (len+1)*sizeof(WCHAR));
193 if(!rptr)
194 return 0;
196 while(ptr < ctx->ptr) {
197 if(*ptr == '"')
198 ptr++;
199 *rptr++ = *ptr++;
202 *rptr = 0;
203 ctx->ptr++;
204 return tString;
207 static int parse_numeric_literal(parser_ctx_t *ctx, void **ret)
209 BOOL use_int = TRUE;
210 LONGLONG d = 0, hlp;
211 int exp = 0;
212 double r;
214 if(*ctx->ptr == '0' && !('0' <= ctx->ptr[1] && ctx->ptr[1] <= '9') && ctx->ptr[1] != '.')
215 return *ctx->ptr++;
217 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
218 hlp = d*10 + *(ctx->ptr++) - '0';
219 if(d>MAXLONGLONG/10 || hlp<0) {
220 exp++;
221 break;
223 else
224 d = hlp;
226 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
227 exp++;
228 ctx->ptr++;
231 if(*ctx->ptr == '.') {
232 use_int = FALSE;
233 ctx->ptr++;
235 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr)) {
236 hlp = d*10 + *(ctx->ptr++) - '0';
237 if(d>MAXLONGLONG/10 || hlp<0)
238 break;
240 d = hlp;
241 exp--;
243 while(ctx->ptr < ctx->end && is_digit(*ctx->ptr))
244 ctx->ptr++;
247 if(*ctx->ptr == 'e' || *ctx->ptr == 'E') {
248 int e = 0, sign = 1;
250 ctx->ptr++;
251 if(*ctx->ptr == '-') {
252 ctx->ptr++;
253 sign = -1;
254 }else if(*ctx->ptr == '+') {
255 ctx->ptr++;
258 if(!is_digit(*ctx->ptr)) {
259 FIXME("Invalid numeric literal\n");
260 return 0;
263 use_int = FALSE;
265 do {
266 e = e*10 + *(ctx->ptr++) - '0';
267 if(sign == -1 && -e+exp < -(INT_MAX/100)) {
268 /* The literal will be rounded to 0 anyway. */
269 while(is_digit(*ctx->ptr))
270 ctx->ptr++;
271 *(double*)ret = 0;
272 return tDouble;
275 if(sign*e + exp > INT_MAX/100) {
276 FIXME("Invalid numeric literal\n");
277 return 0;
279 } while(is_digit(*ctx->ptr));
281 exp += sign*e;
284 if(use_int && (LONG)d == d) {
285 *(LONG*)ret = d;
286 return tInt;
289 r = exp>=0 ? d*pow(10, exp) : d/pow(10, -exp);
290 if(isinf(r)) {
291 FIXME("Invalid numeric literal\n");
292 return 0;
295 *(double*)ret = r;
296 return tDouble;
299 static int hex_to_int(WCHAR c)
301 if('0' <= c && c <= '9')
302 return c-'0';
303 if('a' <= c && c <= 'f')
304 return c+10-'a';
305 if('A' <= c && c <= 'F')
306 return c+10-'A';
307 return -1;
310 static int parse_hex_literal(parser_ctx_t *ctx, LONG *ret)
312 const WCHAR *begin = ctx->ptr;
313 unsigned l = 0, d;
315 while((d = hex_to_int(*++ctx->ptr)) != -1)
316 l = l*16 + d;
318 if(begin + 9 /* max digits+1 */ < ctx->ptr || (*ctx->ptr != '&' && is_identifier_char(*ctx->ptr))) {
319 FIXME("invalid literal\n");
320 return 0;
323 if(*ctx->ptr == '&') {
324 ctx->ptr++;
325 *ret = l;
326 }else {
327 *ret = l == (UINT16)l ? (INT16)l : l;
329 return tInt;
332 static void skip_spaces(parser_ctx_t *ctx)
334 while(*ctx->ptr == ' ' || *ctx->ptr == '\t')
335 ctx->ptr++;
338 static int comment_line(parser_ctx_t *ctx)
340 static const WCHAR newlineW[] = {'\n','\r',0};
341 ctx->ptr = wcspbrk(ctx->ptr, newlineW);
342 if(ctx->ptr)
343 ctx->ptr++;
344 else
345 ctx->ptr = ctx->end;
346 return tNL;
349 static int parse_next_token(void *lval, unsigned *loc, parser_ctx_t *ctx)
351 WCHAR c;
353 skip_spaces(ctx);
354 *loc = ctx->ptr - ctx->code;
355 if(ctx->ptr == ctx->end)
356 return ctx->last_token == tNL ? 0 : tNL;
358 c = *ctx->ptr;
360 if('0' <= c && c <= '9')
361 return parse_numeric_literal(ctx, lval);
363 if(iswalpha(c)) {
364 int ret = 0;
365 if(ctx->last_token != '.' && ctx->last_token != tDOT)
366 ret = check_keywords(ctx, lval);
367 if(!ret)
368 return parse_identifier(ctx, lval);
369 if(ret != tREM)
370 return ret;
371 c = '\'';
374 switch(c) {
375 case '\n':
376 case '\r':
377 ctx->ptr++;
378 return tNL;
379 case '\'':
380 return comment_line(ctx);
381 case ':':
382 case ')':
383 case ',':
384 case '=':
385 case '+':
386 case '*':
387 case '/':
388 case '^':
389 case '\\':
390 case '_':
391 return *ctx->ptr++;
392 case '.':
394 * We need to distinguish between '.' used as part of a member expression and
395 * a beginning of a dot expression (a member expression accessing with statement
396 * expression).
398 c = ctx->ptr > ctx->code ? ctx->ptr[-1] : '\n';
399 ctx->ptr++;
400 return is_identifier_char(c) || c == ')' ? '.' : tDOT;
401 case '-':
402 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '>')
403 return comment_line(ctx);
404 ctx->ptr++;
405 return '-';
406 case '(':
407 /* NOTE:
408 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
409 * in call statement special case |f()| without 'call' keyword
411 ctx->ptr++;
412 skip_spaces(ctx);
413 if(*ctx->ptr == ')') {
414 ctx->ptr++;
415 return tEMPTYBRACKETS;
418 * Parser can't predict if bracket is part of argument expression or an argument
419 * in call expression. We predict it here instead.
421 if(ctx->last_token == tIdentifier || ctx->last_token == ')')
422 return '(';
423 return tEXPRLBRACKET;
424 case '"':
425 return parse_string_literal(ctx, lval);
426 case '&':
427 if(*++ctx->ptr == 'h' || *ctx->ptr == 'H')
428 return parse_hex_literal(ctx, lval);
429 return '&';
430 case '<':
431 switch(*++ctx->ptr) {
432 case '>':
433 ctx->ptr++;
434 return tNEQ;
435 case '=':
436 ctx->ptr++;
437 return tLTEQ;
438 case '!':
439 if(ctx->is_html && ctx->ptr[1] == '-' && ctx->ptr[2] == '-')
440 return comment_line(ctx);
442 return '<';
443 case '>':
444 if(*++ctx->ptr == '=') {
445 ctx->ptr++;
446 return tGTEQ;
448 return '>';
449 default:
450 FIXME("Unhandled char %c in %s\n", *ctx->ptr, debugstr_w(ctx->ptr));
453 return 0;
456 int parser_lex(void *lval, unsigned *loc, parser_ctx_t *ctx)
458 int ret;
460 if (ctx->last_token == tEXPRESSION)
462 ctx->last_token = tNL;
463 return tEXPRESSION;
466 while(1) {
467 ret = parse_next_token(lval, loc, ctx);
468 if(ret == '_') {
469 skip_spaces(ctx);
470 if(*ctx->ptr != '\n' && *ctx->ptr != '\r') {
471 FIXME("'_' not followed by newline\n");
472 return 0;
474 if(*ctx->ptr == '\r')
475 ctx->ptr++;
476 if(*ctx->ptr == '\n')
477 ctx->ptr++;
478 continue;
480 if(ret != tNL || ctx->last_token != tNL)
481 break;
483 ctx->last_nl = ctx->ptr-ctx->code;
486 return (ctx->last_token = ret);