1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
23 // Returns true if the two tokens are on the same line. We assume they're in
25 bool IsSameLine(const Token
& a
, const Token
& b
) {
26 DCHECK(a
.location().file() == b
.location().file());
27 return a
.location().line_number() == b
.location().line_number();
33 PRECEDENCE_ASSIGNMENT
= 1,
36 PRECEDENCE_EQUALITY
= 4,
37 PRECEDENCE_RELATION
= 5,
39 PRECEDENCE_PREFIX
= 7,
43 // The top-level for blocks/ifs is still recursive descent, the expression
44 // parser is a Pratt parser. The basic idea there is to have the precedences
45 // (and associativities) encoded relative to each other and only parse up
46 // until you hit something of that precedence. There's a dispatch table in
47 // expressions_ at the top of parser.cc that describes how each token
48 // dispatches if it's seen as either a prefix or infix operator, and if it's
49 // infix, what its precedence is.
52 // - http://javascript.crockford.com/tdop/tdop.html
53 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
55 // Indexed by Token::Type.
56 ParserHelper
Parser::expressions_
[] = {
57 {NULL
, NULL
, -1}, // INVALID
58 {&Parser::Literal
, NULL
, -1}, // INTEGER
59 {&Parser::Literal
, NULL
, -1}, // STRING
60 {&Parser::Literal
, NULL
, -1}, // TRUE_TOKEN
61 {&Parser::Literal
, NULL
, -1}, // FALSE_TOKEN
62 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
63 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
64 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
65 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
66 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
67 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
68 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
69 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
70 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
71 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
72 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
73 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
74 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
75 {&Parser::Not
, NULL
, -1}, // BANG
76 {&Parser::Group
, NULL
, -1}, // LEFT_PAREN
77 {NULL
, NULL
, -1}, // RIGHT_PAREN
78 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
79 {NULL
, NULL
, -1}, // RIGHT_BRACKET
80 {NULL
, NULL
, -1}, // LEFT_BRACE
81 {NULL
, NULL
, -1}, // RIGHT_BRACE
82 {NULL
, NULL
, -1}, // IF
83 {NULL
, NULL
, -1}, // ELSE
84 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
85 {NULL
, NULL
, -1}, // COMMA
86 {NULL
, NULL
, -1}, // COMMENT
89 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
90 : tokens_(tokens
), err_(err
), cur_(0) {
97 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
99 Parser
p(tokens
, err
);
100 return p
.ParseFile().PassAs
<ParseNode
>();
104 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
106 Parser
p(tokens
, err
);
107 return p
.ParseExpression().Pass();
110 bool Parser::IsAssignment(const ParseNode
* node
) const {
111 return node
&& node
->AsBinaryOp() &&
112 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
113 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
114 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
117 bool Parser::IsStatementBreak(Token::Type token_type
) const {
118 switch (token_type
) {
119 case Token::IDENTIFIER
:
120 case Token::LEFT_BRACE
:
121 case Token::RIGHT_BRACE
:
130 bool Parser::LookAhead(Token::Type type
) {
133 return cur_token().type() == type
;
136 bool Parser::Match(Token::Type type
) {
137 if (!LookAhead(type
))
143 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
144 Token::Type types
[1] = { type
};
145 return Consume(types
, 1, error_message
);
148 Token
Parser::Consume(Token::Type
* types
,
150 const char* error_message
) {
152 // Don't overwrite current error, but make progress through tokens so that
153 // a loop that's expecting a particular token will still terminate.
155 return Token(Location(), Token::INVALID
, base::StringPiece());
158 const char kEOFMsg
[] = "I hit EOF instead.";
160 *err_
= Err(Location(), error_message
, kEOFMsg
);
162 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
163 return Token(Location(), Token::INVALID
, base::StringPiece());
166 for (size_t i
= 0; i
< num_types
; ++i
) {
167 if (cur_token().type() == types
[i
])
168 return tokens_
[cur_
++];
170 *err_
= Err(cur_token(), error_message
);
171 return Token(Location(), Token::INVALID
, base::StringPiece());
174 Token
Parser::Consume() {
175 return tokens_
[cur_
++];
178 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
179 return ParseExpression(0);
182 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
184 return scoped_ptr
<ParseNode
>();
186 Token token
= Consume();
187 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
189 if (prefix
== NULL
) {
191 std::string("Unexpected token '") + token
.value().as_string() +
193 return scoped_ptr
<ParseNode
>();
196 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
200 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
201 precedence
<= expressions_
[cur_token().type()].precedence
) {
203 InfixFunc infix
= expressions_
[token
.type()].infix
;
206 std::string("Unexpected token '") +
207 token
.value().as_string() + std::string("'"));
208 return scoped_ptr
<ParseNode
>();
210 left
= (this->*infix
)(left
.Pass(), token
);
212 return scoped_ptr
<ParseNode
>();
218 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
219 return scoped_ptr
<ParseNode
>(new LiteralNode(token
)).Pass();
222 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
223 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
226 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
227 scoped_ptr
<ParseNode
> expr
= ParseExpression();
229 return scoped_ptr
<ParseNode
>();
230 Consume(Token::RIGHT_PAREN
, "Expected ')'");
234 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
235 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
237 return scoped_ptr
<ParseNode
>();
238 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
239 unary_op
->set_op(token
);
240 unary_op
->set_operand(expr
.Pass());
241 return unary_op
.PassAs
<ParseNode
>();
244 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
245 scoped_ptr
<ParseNode
> list(ParseList(Token::RIGHT_BRACKET
, true));
246 if (!has_error() && !at_end())
247 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
251 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
253 scoped_ptr
<ParseNode
> right
=
254 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
258 "Expected right hand side for '" + token
.value().as_string() + "'");
259 return scoped_ptr
<ParseNode
>();
261 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
262 binary_op
->set_op(token
);
263 binary_op
->set_left(left
.Pass());
264 binary_op
->set_right(right
.Pass());
265 return binary_op
.PassAs
<ParseNode
>();
268 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
270 scoped_ptr
<ListNode
> list(new ListNode
);
271 list
->set_begin_token(token
);
272 list
->set_end_token(token
);
273 scoped_ptr
<BlockNode
> block
;
274 bool has_arg
= false;
275 if (Match(Token::LEFT_PAREN
)) {
276 // Parsing a function call.
278 if (Match(Token::RIGHT_PAREN
)) {
279 // Nothing, just an empty call.
281 list
= ParseList(Token::RIGHT_PAREN
, false);
283 return scoped_ptr
<ParseNode
>();
284 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
286 // Optionally with a scope.
287 if (LookAhead(Token::LEFT_BRACE
)) {
288 block
= ParseBlock();
290 return scoped_ptr
<ParseNode
>();
294 if (!left
&& !has_arg
) {
295 // Not a function call, just a standalone identifier.
296 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
298 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
299 func_call
->set_function(token
);
300 func_call
->set_args(list
.Pass());
302 func_call
->set_block(block
.Pass());
303 return func_call
.PassAs
<ParseNode
>();
306 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
308 if (left
->AsIdentifier() == NULL
) {
309 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
310 return scoped_ptr
<ParseNode
>();
312 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
313 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
314 assign
->set_op(token
);
315 assign
->set_left(left
.Pass());
316 assign
->set_right(value
.Pass());
317 return assign
.PassAs
<ParseNode
>();
320 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
322 // TODO: Maybe support more complex expressions like a[0][0]. This would
323 // require work on the evaluator too.
324 if (left
->AsIdentifier() == NULL
) {
325 *err_
= Err(left
.get(), "May only subscript simple identifiers");
326 return scoped_ptr
<ParseNode
>();
328 scoped_ptr
<ParseNode
> value
= ParseExpression();
329 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
330 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
331 accessor
->set_base(left
->AsIdentifier()->value());
332 accessor
->set_index(value
.Pass());
333 return accessor
.PassAs
<ParseNode
>();
336 // Does not Consume the start or end token.
337 scoped_ptr
<ListNode
> Parser::ParseList(Token::Type stop_before
,
338 bool allow_trailing_comma
) {
339 scoped_ptr
<ListNode
> list(new ListNode
);
340 list
->set_begin_token(cur_token());
341 bool just_got_comma
= false;
342 while (!LookAhead(stop_before
)) {
343 just_got_comma
= false;
344 // Why _OR? We're parsing things that are higher precedence than the ,
345 // that separates the items of the list. , should appear lower than
346 // boolean expressions (the lowest of which is OR), but above assignments.
347 list
->append_item(ParseExpression(PRECEDENCE_OR
));
349 return scoped_ptr
<ListNode
>();
352 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
353 return scoped_ptr
<ListNode
>();
355 just_got_comma
= Match(Token::COMMA
);
357 if (just_got_comma
&& !allow_trailing_comma
) {
358 *err_
= Err(cur_token(), "Trailing comma");
359 return scoped_ptr
<ListNode
>();
361 list
->set_end_token(cur_token());
365 scoped_ptr
<ParseNode
> Parser::ParseFile() {
366 scoped_ptr
<BlockNode
> file(new BlockNode(false));
370 scoped_ptr
<ParseNode
> statement
= ParseStatement();
373 file
->append_statement(statement
.Pass());
375 if (!at_end() && !has_error())
376 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
378 return scoped_ptr
<ParseNode
>();
379 return file
.PassAs
<ParseNode
>();
382 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
383 if (LookAhead(Token::LEFT_BRACE
)) {
384 return ParseBlock().PassAs
<ParseNode
>();
385 } else if (LookAhead(Token::IF
)) {
386 return ParseCondition();
388 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
389 // to allow "pointless" expressions and return ParseExpression() directly.
390 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
392 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
396 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
397 *err_
= Err(token
, "Expecting assignment or function call.");
399 return scoped_ptr
<ParseNode
>();
403 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
405 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
407 return scoped_ptr
<BlockNode
>();
408 scoped_ptr
<BlockNode
> block(new BlockNode(true));
409 block
->set_begin_token(begin_token
);
412 if (LookAhead(Token::RIGHT_BRACE
)) {
413 block
->set_end_token(Consume());
417 scoped_ptr
<ParseNode
> statement
= ParseStatement();
419 return scoped_ptr
<BlockNode
>();
420 block
->append_statement(statement
.Pass());
425 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
426 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
427 Consume(Token::IF
, "Expected 'if'");
428 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
429 condition
->set_condition(ParseExpression());
430 if (IsAssignment(condition
->condition()))
431 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
432 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
433 condition
->set_if_true(ParseBlock().Pass());
434 if (Match(Token::ELSE
))
435 condition
->set_if_false(ParseStatement().Pass());
437 return scoped_ptr
<ParseNode
>();
438 return condition
.PassAs
<ParseNode
>();