2 * Copyright 2011 Jacek Caban for CodeWeavers
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "parser.tab.h"
27 #include "wine/debug.h"
29 WINE_DEFAULT_DEBUG_CHANNEL(vbscript
);
42 {L
"default", tDEFAULT
},
53 {L
"explicit", tEXPLICIT
},
56 {L
"function", tFUNCTION
},
70 {L
"nothing", tNOTHING
},
75 {L
"preserve", tPRESERVE
},
76 {L
"private", tPRIVATE
},
77 {L
"property", tPROPERTY
},
97 static inline BOOL
is_identifier_char(WCHAR c
)
99 return iswalnum(c
) || c
== '_';
102 static int check_keyword(parser_ctx_t
*ctx
, const WCHAR
*word
, const WCHAR
**lval
)
104 const WCHAR
*p1
= ctx
->ptr
;
105 const WCHAR
*p2
= word
;
108 while(p1
< ctx
->end
&& *p2
) {
116 if(*p2
|| (p1
< ctx
->end
&& is_identifier_char(*p1
)))
124 static int check_keywords(parser_ctx_t
*ctx
, const WCHAR
**lval
)
126 int min
= 0, max
= ARRAY_SIZE(keywords
)-1, r
, i
;
131 r
= check_keyword(ctx
, keywords
[i
].word
, lval
);
133 return keywords
[i
].token
;
144 static int parse_identifier(parser_ctx_t
*ctx
, const WCHAR
**ret
)
146 const WCHAR
*ptr
= ctx
->ptr
++;
150 while(ctx
->ptr
< ctx
->end
&& is_identifier_char(*ctx
->ptr
))
154 str
= parser_alloc(ctx
, (len
+1)*sizeof(WCHAR
));
158 memcpy(str
, ptr
, (len
+1)*sizeof(WCHAR
));
164 static int parse_string_literal(parser_ctx_t
*ctx
, const WCHAR
**ret
)
166 const WCHAR
*ptr
= ++ctx
->ptr
;
170 while(ctx
->ptr
< ctx
->end
) {
171 if(*ctx
->ptr
== '\n' || *ctx
->ptr
== '\r') {
172 FIXME("newline inside string literal\n");
176 if(*ctx
->ptr
== '"') {
177 if(ctx
->ptr
[1] != '"')
185 if(ctx
->ptr
== ctx
->end
) {
186 FIXME("unterminated string literal\n");
192 *ret
= rptr
= parser_alloc(ctx
, (len
+1)*sizeof(WCHAR
));
196 while(ptr
< ctx
->ptr
) {
207 static int parse_numeric_literal(parser_ctx_t
*ctx
, void **ret
)
214 if(*ctx
->ptr
== '0' && !('0' <= ctx
->ptr
[1] && ctx
->ptr
[1] <= '9') && ctx
->ptr
[1] != '.')
217 while(ctx
->ptr
< ctx
->end
&& is_digit(*ctx
->ptr
)) {
218 hlp
= d
*10 + *(ctx
->ptr
++) - '0';
219 if(d
>MAXLONGLONG
/10 || hlp
<0) {
226 while(ctx
->ptr
< ctx
->end
&& is_digit(*ctx
->ptr
)) {
231 if(*ctx
->ptr
== '.') {
235 while(ctx
->ptr
< ctx
->end
&& is_digit(*ctx
->ptr
)) {
236 hlp
= d
*10 + *(ctx
->ptr
++) - '0';
237 if(d
>MAXLONGLONG
/10 || hlp
<0)
243 while(ctx
->ptr
< ctx
->end
&& is_digit(*ctx
->ptr
))
247 if(*ctx
->ptr
== 'e' || *ctx
->ptr
== 'E') {
251 if(*ctx
->ptr
== '-') {
254 }else if(*ctx
->ptr
== '+') {
258 if(!is_digit(*ctx
->ptr
)) {
259 FIXME("Invalid numeric literal\n");
266 e
= e
*10 + *(ctx
->ptr
++) - '0';
267 if(sign
== -1 && -e
+exp
< -(INT_MAX
/100)) {
268 /* The literal will be rounded to 0 anyway. */
269 while(is_digit(*ctx
->ptr
))
275 if(sign
*e
+ exp
> INT_MAX
/100) {
276 FIXME("Invalid numeric literal\n");
279 } while(is_digit(*ctx
->ptr
));
284 if(use_int
&& (LONG
)d
== d
) {
289 r
= exp
>=0 ? d
*pow(10, exp
) : d
/pow(10, -exp
);
291 FIXME("Invalid numeric literal\n");
299 static int hex_to_int(WCHAR c
)
301 if('0' <= c
&& c
<= '9')
303 if('a' <= c
&& c
<= 'f')
305 if('A' <= c
&& c
<= 'F')
310 static int parse_hex_literal(parser_ctx_t
*ctx
, LONG
*ret
)
312 const WCHAR
*begin
= ctx
->ptr
;
315 while((d
= hex_to_int(*++ctx
->ptr
)) != -1)
318 if(begin
+ 9 /* max digits+1 */ < ctx
->ptr
) {
319 FIXME("invalid literal\n");
323 if(*ctx
->ptr
== '&') {
327 *ret
= l
== (UINT16
)l
? (INT16
)l
: l
;
332 static void skip_spaces(parser_ctx_t
*ctx
)
334 while(*ctx
->ptr
== ' ' || *ctx
->ptr
== '\t')
338 static int comment_line(parser_ctx_t
*ctx
)
340 ctx
->ptr
= wcspbrk(ctx
->ptr
, L
"\n\r");
348 static int parse_next_token(void *lval
, unsigned *loc
, parser_ctx_t
*ctx
)
353 *loc
= ctx
->ptr
- ctx
->code
;
354 if(ctx
->ptr
== ctx
->end
)
355 return ctx
->last_token
== tNL
? 0 : tNL
;
359 if('0' <= c
&& c
<= '9')
360 return parse_numeric_literal(ctx
, lval
);
364 if(ctx
->last_token
!= '.' && ctx
->last_token
!= tDOT
)
365 ret
= check_keywords(ctx
, lval
);
367 return parse_identifier(ctx
, lval
);
379 return comment_line(ctx
);
393 * We need to distinguish between '.' used as part of a member expression and
394 * a beginning of a dot expression (a member expression accessing with statement
395 * expression) and a floating point number like ".2" .
397 c
= ctx
->ptr
> ctx
->code
? ctx
->ptr
[-1] : '\n';
398 if (is_identifier_char(c
) || c
== ')') {
403 if('0' <= c
&& c
<= '9')
404 return parse_numeric_literal(ctx
, lval
);
408 if(ctx
->is_html
&& ctx
->ptr
[1] == '-' && ctx
->ptr
[2] == '>')
409 return comment_line(ctx
);
414 * We resolve empty brackets in lexer instead of parser to avoid complex conflicts
415 * in call statement special case |f()| without 'call' keyword
419 if(*ctx
->ptr
== ')') {
421 return tEMPTYBRACKETS
;
424 * Parser can't predict if bracket is part of argument expression or an argument
425 * in call expression. We predict it here instead.
427 if(ctx
->last_token
== tIdentifier
|| ctx
->last_token
== ')')
429 return tEXPRLBRACKET
;
431 return parse_string_literal(ctx
, lval
);
433 if(*++ctx
->ptr
== 'h' || *ctx
->ptr
== 'H')
434 return parse_hex_literal(ctx
, lval
);
437 switch(*++ctx
->ptr
) {
445 if(ctx
->is_html
&& ctx
->ptr
[1] == '-' && ctx
->ptr
[2] == '-')
446 return comment_line(ctx
);
450 if(*++ctx
->ptr
== '=') {
456 FIXME("Unhandled char %c in %s\n", *ctx
->ptr
, debugstr_w(ctx
->ptr
));
462 int parser_lex(void *lval
, unsigned *loc
, parser_ctx_t
*ctx
)
466 if (ctx
->last_token
== tEXPRESSION
)
468 ctx
->last_token
= tNL
;
473 ret
= parse_next_token(lval
, loc
, ctx
);
476 if(*ctx
->ptr
!= '\n' && *ctx
->ptr
!= '\r') {
477 FIXME("'_' not followed by newline\n");
480 if(*ctx
->ptr
== '\r')
482 if(*ctx
->ptr
== '\n')
486 if(ret
!= tNL
|| ctx
->last_token
!= tNL
)
489 ctx
->last_nl
= ctx
->ptr
-ctx
->code
;
492 return (ctx
->last_token
= ret
);