1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
22 PRECEDENCE_ASSIGNMENT
= 1, // Lowest precedence.
25 PRECEDENCE_EQUALITY
= 4,
26 PRECEDENCE_RELATION
= 5,
28 PRECEDENCE_PREFIX
= 7,
30 PRECEDENCE_DOT
= 9, // Highest precedence.
33 // The top-level for blocks/ifs is recursive descent, the expression parser is
34 // a Pratt parser. The basic idea there is to have the precedences (and
35 // associativities) encoded relative to each other and only parse up until you
36 // hit something of that precedence. There's a dispatch table in expressions_
37 // at the top of parser.cc that describes how each token dispatches if it's
38 // seen as either a prefix or infix operator, and if it's infix, what its
42 // - http://javascript.crockford.com/tdop/tdop.html
43 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
45 // Indexed by Token::Type.
46 ParserHelper
Parser::expressions_
[] = {
47 {NULL
, NULL
, -1}, // INVALID
48 {&Parser::Literal
, NULL
, -1}, // INTEGER
49 {&Parser::Literal
, NULL
, -1}, // STRING
50 {&Parser::Literal
, NULL
, -1}, // TRUE_TOKEN
51 {&Parser::Literal
, NULL
, -1}, // FALSE_TOKEN
52 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
53 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
54 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
55 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
56 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
57 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
58 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
59 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
60 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
61 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
62 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
63 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
64 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
65 {&Parser::Not
, NULL
, -1}, // BANG
66 {NULL
, &Parser::DotOperator
, PRECEDENCE_DOT
}, // DOT
67 {&Parser::Group
, NULL
, -1}, // LEFT_PAREN
68 {NULL
, NULL
, -1}, // RIGHT_PAREN
69 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
70 {NULL
, NULL
, -1}, // RIGHT_BRACKET
71 {NULL
, NULL
, -1}, // LEFT_BRACE
72 {NULL
, NULL
, -1}, // RIGHT_BRACE
73 {NULL
, NULL
, -1}, // IF
74 {NULL
, NULL
, -1}, // ELSE
75 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
76 {NULL
, NULL
, -1}, // COMMA
77 {NULL
, NULL
, -1}, // COMMENT
80 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
81 : tokens_(tokens
), err_(err
), cur_(0) {
88 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
90 Parser
p(tokens
, err
);
91 return p
.ParseFile().PassAs
<ParseNode
>();
95 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
97 Parser
p(tokens
, err
);
98 return p
.ParseExpression().Pass();
101 bool Parser::IsAssignment(const ParseNode
* node
) const {
102 return node
&& node
->AsBinaryOp() &&
103 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
104 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
105 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
108 bool Parser::IsStatementBreak(Token::Type token_type
) const {
109 switch (token_type
) {
110 case Token::IDENTIFIER
:
111 case Token::LEFT_BRACE
:
112 case Token::RIGHT_BRACE
:
121 bool Parser::LookAhead(Token::Type type
) {
124 return cur_token().type() == type
;
127 bool Parser::Match(Token::Type type
) {
128 if (!LookAhead(type
))
134 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
135 Token::Type types
[1] = { type
};
136 return Consume(types
, 1, error_message
);
139 Token
Parser::Consume(Token::Type
* types
,
141 const char* error_message
) {
143 // Don't overwrite current error, but make progress through tokens so that
144 // a loop that's expecting a particular token will still terminate.
146 return Token(Location(), Token::INVALID
, base::StringPiece());
149 const char kEOFMsg
[] = "I hit EOF instead.";
151 *err_
= Err(Location(), error_message
, kEOFMsg
);
153 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
154 return Token(Location(), Token::INVALID
, base::StringPiece());
157 for (size_t i
= 0; i
< num_types
; ++i
) {
158 if (cur_token().type() == types
[i
])
159 return tokens_
[cur_
++];
161 *err_
= Err(cur_token(), error_message
);
162 return Token(Location(), Token::INVALID
, base::StringPiece());
165 Token
Parser::Consume() {
166 return tokens_
[cur_
++];
169 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
170 return ParseExpression(0);
173 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
175 return scoped_ptr
<ParseNode
>();
177 Token token
= Consume();
178 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
180 if (prefix
== NULL
) {
182 std::string("Unexpected token '") + token
.value().as_string() +
184 return scoped_ptr
<ParseNode
>();
187 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
191 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
192 precedence
<= expressions_
[cur_token().type()].precedence
) {
194 InfixFunc infix
= expressions_
[token
.type()].infix
;
197 std::string("Unexpected token '") +
198 token
.value().as_string() + std::string("'"));
199 return scoped_ptr
<ParseNode
>();
201 left
= (this->*infix
)(left
.Pass(), token
);
203 return scoped_ptr
<ParseNode
>();
209 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
210 return scoped_ptr
<ParseNode
>(new LiteralNode(token
)).Pass();
213 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
214 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
217 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
218 scoped_ptr
<ParseNode
> expr
= ParseExpression();
220 return scoped_ptr
<ParseNode
>();
221 Consume(Token::RIGHT_PAREN
, "Expected ')'");
225 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
226 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
228 return scoped_ptr
<ParseNode
>();
229 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
230 unary_op
->set_op(token
);
231 unary_op
->set_operand(expr
.Pass());
232 return unary_op
.PassAs
<ParseNode
>();
235 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
236 scoped_ptr
<ParseNode
> list(ParseList(Token::RIGHT_BRACKET
, true));
237 if (!has_error() && !at_end())
238 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
242 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
244 scoped_ptr
<ParseNode
> right
=
245 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
249 "Expected right hand side for '" + token
.value().as_string() + "'");
250 return scoped_ptr
<ParseNode
>();
252 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
253 binary_op
->set_op(token
);
254 binary_op
->set_left(left
.Pass());
255 binary_op
->set_right(right
.Pass());
256 return binary_op
.PassAs
<ParseNode
>();
259 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
261 scoped_ptr
<ListNode
> list(new ListNode
);
262 list
->set_begin_token(token
);
263 list
->set_end_token(token
);
264 scoped_ptr
<BlockNode
> block
;
265 bool has_arg
= false;
266 if (Match(Token::LEFT_PAREN
)) {
267 // Parsing a function call.
269 if (Match(Token::RIGHT_PAREN
)) {
270 // Nothing, just an empty call.
272 list
= ParseList(Token::RIGHT_PAREN
, false);
274 return scoped_ptr
<ParseNode
>();
275 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
277 // Optionally with a scope.
278 if (LookAhead(Token::LEFT_BRACE
)) {
279 block
= ParseBlock();
281 return scoped_ptr
<ParseNode
>();
285 if (!left
&& !has_arg
) {
286 // Not a function call, just a standalone identifier.
287 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
289 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
290 func_call
->set_function(token
);
291 func_call
->set_args(list
.Pass());
293 func_call
->set_block(block
.Pass());
294 return func_call
.PassAs
<ParseNode
>();
297 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
299 if (left
->AsIdentifier() == NULL
) {
300 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
301 return scoped_ptr
<ParseNode
>();
303 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
304 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
305 assign
->set_op(token
);
306 assign
->set_left(left
.Pass());
307 assign
->set_right(value
.Pass());
308 return assign
.PassAs
<ParseNode
>();
311 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
313 // TODO: Maybe support more complex expressions like a[0][0]. This would
314 // require work on the evaluator too.
315 if (left
->AsIdentifier() == NULL
) {
316 *err_
= Err(left
.get(), "May only subscript identifiers.",
317 "The thing on the left hand side of the [] must be an identifier\n"
318 "and not an expression. If you need this, you'll have to assign the\n"
319 "value to a temporary before subscripting. Sorry.");
320 return scoped_ptr
<ParseNode
>();
322 scoped_ptr
<ParseNode
> value
= ParseExpression();
323 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
324 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
325 accessor
->set_base(left
->AsIdentifier()->value());
326 accessor
->set_index(value
.Pass());
327 return accessor
.PassAs
<ParseNode
>();
330 scoped_ptr
<ParseNode
> Parser::DotOperator(scoped_ptr
<ParseNode
> left
,
332 if (left
->AsIdentifier() == NULL
) {
333 *err_
= Err(left
.get(), "May only use \".\" for identifiers.",
334 "The thing on the left hand side of the dot must be an identifier\n"
335 "and not an expression. If you need this, you'll have to assign the\n"
336 "value to a temporary first. Sorry.");
337 return scoped_ptr
<ParseNode
>();
340 scoped_ptr
<ParseNode
> right
= ParseExpression(PRECEDENCE_DOT
);
341 if (!right
|| !right
->AsIdentifier()) {
342 *err_
= Err(token
, "Expected identifier for right-hand-side of \".\"",
343 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
344 return scoped_ptr
<ParseNode
>();
347 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
348 accessor
->set_base(left
->AsIdentifier()->value());
349 accessor
->set_member(scoped_ptr
<IdentifierNode
>(
350 static_cast<IdentifierNode
*>(right
.release())));
351 return accessor
.PassAs
<ParseNode
>();
354 // Does not Consume the start or end token.
355 scoped_ptr
<ListNode
> Parser::ParseList(Token::Type stop_before
,
356 bool allow_trailing_comma
) {
357 scoped_ptr
<ListNode
> list(new ListNode
);
358 list
->set_begin_token(cur_token());
359 bool just_got_comma
= false;
360 bool first_time
= true;
361 while (!LookAhead(stop_before
)) {
363 if (!just_got_comma
) {
364 // Require commas separate things in lists.
365 *err_
= Err(cur_token(), "Expected comma between items.");
366 return scoped_ptr
<ListNode
>();
371 // Why _OR? We're parsing things that are higher precedence than the ,
372 // that separates the items of the list. , should appear lower than
373 // boolean expressions (the lowest of which is OR), but above assignments.
374 list
->append_item(ParseExpression(PRECEDENCE_OR
));
376 return scoped_ptr
<ListNode
>();
379 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
380 return scoped_ptr
<ListNode
>();
382 just_got_comma
= Match(Token::COMMA
);
384 if (just_got_comma
&& !allow_trailing_comma
) {
385 *err_
= Err(cur_token(), "Trailing comma");
386 return scoped_ptr
<ListNode
>();
388 list
->set_end_token(cur_token());
392 scoped_ptr
<ParseNode
> Parser::ParseFile() {
393 scoped_ptr
<BlockNode
> file(new BlockNode(false));
397 scoped_ptr
<ParseNode
> statement
= ParseStatement();
400 file
->append_statement(statement
.Pass());
402 if (!at_end() && !has_error())
403 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
405 return scoped_ptr
<ParseNode
>();
406 return file
.PassAs
<ParseNode
>();
409 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
410 if (LookAhead(Token::LEFT_BRACE
)) {
411 return ParseBlock().PassAs
<ParseNode
>();
412 } else if (LookAhead(Token::IF
)) {
413 return ParseCondition();
415 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
416 // to allow "pointless" expressions and return ParseExpression() directly.
417 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
419 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
423 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
424 *err_
= Err(token
, "Expecting assignment or function call.");
426 return scoped_ptr
<ParseNode
>();
430 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
432 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
434 return scoped_ptr
<BlockNode
>();
435 scoped_ptr
<BlockNode
> block(new BlockNode(true));
436 block
->set_begin_token(begin_token
);
439 if (LookAhead(Token::RIGHT_BRACE
)) {
440 block
->set_end_token(Consume());
444 scoped_ptr
<ParseNode
> statement
= ParseStatement();
446 return scoped_ptr
<BlockNode
>();
447 block
->append_statement(statement
.Pass());
452 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
453 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
454 Consume(Token::IF
, "Expected 'if'");
455 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
456 condition
->set_condition(ParseExpression());
457 if (IsAssignment(condition
->condition()))
458 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
459 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
460 condition
->set_if_true(ParseBlock().Pass());
461 if (Match(Token::ELSE
))
462 condition
->set_if_false(ParseStatement().Pass());
464 return scoped_ptr
<ParseNode
>();
465 return condition
.PassAs
<ParseNode
>();