1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
22 PRECEDENCE_ASSIGNMENT
= 1, // Lowest precedence.
25 PRECEDENCE_EQUALITY
= 4,
26 PRECEDENCE_RELATION
= 5,
28 PRECEDENCE_PREFIX
= 7,
30 PRECEDENCE_DOT
= 9, // Highest precedence.
33 // The top-level for blocks/ifs is recursive descent, the expression parser is
34 // a Pratt parser. The basic idea there is to have the precedences (and
35 // associativities) encoded relative to each other and only parse up until you
36 // hit something of that precedence. There's a dispatch table in expressions_
37 // at the top of parser.cc that describes how each token dispatches if it's
38 // seen as either a prefix or infix operator, and if it's infix, what its
42 // - http://javascript.crockford.com/tdop/tdop.html
43 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
45 // Indexed by Token::Type.
46 ParserHelper
Parser::expressions_
[] = {
47 {NULL
, NULL
, -1}, // INVALID
48 {&Parser::Literal
, NULL
, -1}, // INTEGER
49 {&Parser::Literal
, NULL
, -1}, // STRING
50 {&Parser::Literal
, NULL
, -1}, // TRUE_TOKEN
51 {&Parser::Literal
, NULL
, -1}, // FALSE_TOKEN
52 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
53 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
54 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
55 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
56 {NULL
, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
57 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
58 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
59 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
60 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
61 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
62 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
63 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
64 {NULL
, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
65 {&Parser::Not
, NULL
, -1}, // BANG
66 {NULL
, &Parser::DotOperator
, PRECEDENCE_DOT
}, // DOT
67 {&Parser::Group
, NULL
, -1}, // LEFT_PAREN
68 {NULL
, NULL
, -1}, // RIGHT_PAREN
69 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
70 {NULL
, NULL
, -1}, // RIGHT_BRACKET
71 {NULL
, NULL
, -1}, // LEFT_BRACE
72 {NULL
, NULL
, -1}, // RIGHT_BRACE
73 {NULL
, NULL
, -1}, // IF
74 {NULL
, NULL
, -1}, // ELSE
75 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
76 {NULL
, NULL
, -1}, // COMMA
77 {NULL
, NULL
, -1}, // UNCLASSIFIED_COMMENT
78 {NULL
, NULL
, -1}, // LINE_COMMENT
79 {NULL
, NULL
, -1}, // SUFFIX_COMMENT
80 {&Parser::BlockComment
, NULL
, -1}, // BLOCK_COMMENT
83 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
84 : err_(err
), cur_(0) {
85 for (const auto& token
: tokens
) {
86 switch(token
.type()) {
87 case Token::LINE_COMMENT
:
88 line_comment_tokens_
.push_back(token
);
90 case Token::SUFFIX_COMMENT
:
91 suffix_comment_tokens_
.push_back(token
);
94 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
95 // through the real parser.
96 tokens_
.push_back(token
);
106 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
108 Parser
p(tokens
, err
);
109 return p
.ParseFile();
113 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
115 Parser
p(tokens
, err
);
116 return p
.ParseExpression().Pass();
119 bool Parser::IsAssignment(const ParseNode
* node
) const {
120 return node
&& node
->AsBinaryOp() &&
121 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
122 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
123 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
126 bool Parser::IsStatementBreak(Token::Type token_type
) const {
127 switch (token_type
) {
128 case Token::IDENTIFIER
:
129 case Token::LEFT_BRACE
:
130 case Token::RIGHT_BRACE
:
139 bool Parser::LookAhead(Token::Type type
) {
142 return cur_token().type() == type
;
145 bool Parser::Match(Token::Type type
) {
146 if (!LookAhead(type
))
152 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
153 Token::Type types
[1] = { type
};
154 return Consume(types
, 1, error_message
);
157 Token
Parser::Consume(Token::Type
* types
,
159 const char* error_message
) {
161 // Don't overwrite current error, but make progress through tokens so that
162 // a loop that's expecting a particular token will still terminate.
164 return Token(Location(), Token::INVALID
, base::StringPiece());
167 const char kEOFMsg
[] = "I hit EOF instead.";
169 *err_
= Err(Location(), error_message
, kEOFMsg
);
171 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
172 return Token(Location(), Token::INVALID
, base::StringPiece());
175 for (size_t i
= 0; i
< num_types
; ++i
) {
176 if (cur_token().type() == types
[i
])
177 return tokens_
[cur_
++];
179 *err_
= Err(cur_token(), error_message
);
180 return Token(Location(), Token::INVALID
, base::StringPiece());
183 Token
Parser::Consume() {
184 return tokens_
[cur_
++];
187 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
188 return ParseExpression(0);
191 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
193 return scoped_ptr
<ParseNode
>();
195 Token token
= Consume();
196 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
198 if (prefix
== NULL
) {
200 std::string("Unexpected token '") + token
.value().as_string() +
202 return scoped_ptr
<ParseNode
>();
205 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
209 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
210 precedence
<= expressions_
[cur_token().type()].precedence
) {
212 InfixFunc infix
= expressions_
[token
.type()].infix
;
215 std::string("Unexpected token '") +
216 token
.value().as_string() + std::string("'"));
217 return scoped_ptr
<ParseNode
>();
219 left
= (this->*infix
)(left
.Pass(), token
);
221 return scoped_ptr
<ParseNode
>();
227 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
228 return make_scoped_ptr(new LiteralNode(token
));
231 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
232 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
235 scoped_ptr
<ParseNode
> Parser::BlockComment(Token token
) {
236 scoped_ptr
<BlockCommentNode
> comment(new BlockCommentNode());
237 comment
->set_comment(token
);
238 return comment
.Pass();
241 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
242 scoped_ptr
<ParseNode
> expr
= ParseExpression();
244 return scoped_ptr
<ParseNode
>();
245 Consume(Token::RIGHT_PAREN
, "Expected ')'");
249 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
250 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
252 return scoped_ptr
<ParseNode
>();
253 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
254 unary_op
->set_op(token
);
255 unary_op
->set_operand(expr
.Pass());
256 return unary_op
.Pass();
259 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
260 scoped_ptr
<ParseNode
> list(ParseList(node
, Token::RIGHT_BRACKET
, true));
261 if (!has_error() && !at_end())
262 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
266 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
268 scoped_ptr
<ParseNode
> right
=
269 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
273 "Expected right hand side for '" + token
.value().as_string() + "'");
274 return scoped_ptr
<ParseNode
>();
276 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
277 binary_op
->set_op(token
);
278 binary_op
->set_left(left
.Pass());
279 binary_op
->set_right(right
.Pass());
280 return binary_op
.Pass();
283 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
285 scoped_ptr
<ListNode
> list(new ListNode
);
286 list
->set_begin_token(token
);
287 list
->set_end(make_scoped_ptr(new EndNode(token
)));
288 scoped_ptr
<BlockNode
> block
;
289 bool has_arg
= false;
290 if (LookAhead(Token::LEFT_PAREN
)) {
291 Token start_token
= Consume();
292 // Parsing a function call.
294 if (Match(Token::RIGHT_PAREN
)) {
295 // Nothing, just an empty call.
297 list
= ParseList(start_token
, Token::RIGHT_PAREN
, false);
299 return scoped_ptr
<ParseNode
>();
300 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
302 // Optionally with a scope.
303 if (LookAhead(Token::LEFT_BRACE
)) {
304 block
= ParseBlock();
306 return scoped_ptr
<ParseNode
>();
310 if (!left
&& !has_arg
) {
311 // Not a function call, just a standalone identifier.
312 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
314 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
315 func_call
->set_function(token
);
316 func_call
->set_args(list
.Pass());
318 func_call
->set_block(block
.Pass());
319 return func_call
.Pass();
322 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
324 if (left
->AsIdentifier() == NULL
) {
325 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
326 return scoped_ptr
<ParseNode
>();
328 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
329 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
330 assign
->set_op(token
);
331 assign
->set_left(left
.Pass());
332 assign
->set_right(value
.Pass());
333 return assign
.Pass();
336 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
338 // TODO: Maybe support more complex expressions like a[0][0]. This would
339 // require work on the evaluator too.
340 if (left
->AsIdentifier() == NULL
) {
341 *err_
= Err(left
.get(), "May only subscript identifiers.",
342 "The thing on the left hand side of the [] must be an identifier\n"
343 "and not an expression. If you need this, you'll have to assign the\n"
344 "value to a temporary before subscripting. Sorry.");
345 return scoped_ptr
<ParseNode
>();
347 scoped_ptr
<ParseNode
> value
= ParseExpression();
348 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
349 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
350 accessor
->set_base(left
->AsIdentifier()->value());
351 accessor
->set_index(value
.Pass());
352 return accessor
.Pass();
355 scoped_ptr
<ParseNode
> Parser::DotOperator(scoped_ptr
<ParseNode
> left
,
357 if (left
->AsIdentifier() == NULL
) {
358 *err_
= Err(left
.get(), "May only use \".\" for identifiers.",
359 "The thing on the left hand side of the dot must be an identifier\n"
360 "and not an expression. If you need this, you'll have to assign the\n"
361 "value to a temporary first. Sorry.");
362 return scoped_ptr
<ParseNode
>();
365 scoped_ptr
<ParseNode
> right
= ParseExpression(PRECEDENCE_DOT
);
366 if (!right
|| !right
->AsIdentifier()) {
367 *err_
= Err(token
, "Expected identifier for right-hand-side of \".\"",
368 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
369 return scoped_ptr
<ParseNode
>();
372 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
373 accessor
->set_base(left
->AsIdentifier()->value());
374 accessor
->set_member(scoped_ptr
<IdentifierNode
>(
375 static_cast<IdentifierNode
*>(right
.release())));
376 return accessor
.Pass();
379 // Does not Consume the start or end token.
380 scoped_ptr
<ListNode
> Parser::ParseList(Token start_token
,
381 Token::Type stop_before
,
382 bool allow_trailing_comma
) {
383 scoped_ptr
<ListNode
> list(new ListNode
);
384 list
->set_begin_token(start_token
);
385 bool just_got_comma
= false;
386 bool first_time
= true;
387 while (!LookAhead(stop_before
)) {
389 if (!just_got_comma
) {
390 // Require commas separate things in lists.
391 *err_
= Err(cur_token(), "Expected comma between items.");
392 return scoped_ptr
<ListNode
>();
397 // Why _OR? We're parsing things that are higher precedence than the ,
398 // that separates the items of the list. , should appear lower than
399 // boolean expressions (the lowest of which is OR), but above assignments.
400 list
->append_item(ParseExpression(PRECEDENCE_OR
));
402 return scoped_ptr
<ListNode
>();
405 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
406 return scoped_ptr
<ListNode
>();
408 if (list
->contents().back()->AsBlockComment()) {
409 // If there was a comment inside the list, we don't need a comma to the
410 // next item, so pretend we got one, if we're expecting one.
411 just_got_comma
= allow_trailing_comma
;
413 just_got_comma
= Match(Token::COMMA
);
416 if (just_got_comma
&& !allow_trailing_comma
) {
417 *err_
= Err(cur_token(), "Trailing comma");
418 return scoped_ptr
<ListNode
>();
420 list
->set_end(make_scoped_ptr(new EndNode(cur_token())));
424 scoped_ptr
<ParseNode
> Parser::ParseFile() {
425 scoped_ptr
<BlockNode
> file(new BlockNode(false));
429 scoped_ptr
<ParseNode
> statement
= ParseStatement();
432 file
->append_statement(statement
.Pass());
434 if (!at_end() && !has_error())
435 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
437 return scoped_ptr
<ParseNode
>();
439 // TODO(scottmg): If this is measurably expensive, it could be done only
440 // when necessary (when reformatting, or during tests). Comments are
441 // separate from the parse tree at this point, so downstream code can remain
443 AssignComments(file
.get());
448 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
449 if (LookAhead(Token::LEFT_BRACE
)) {
451 } else if (LookAhead(Token::IF
)) {
452 return ParseCondition();
453 } else if (LookAhead(Token::BLOCK_COMMENT
)) {
454 return BlockComment(Consume());
456 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
457 // to allow "pointless" expressions and return ParseExpression() directly.
458 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
460 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
464 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
465 *err_
= Err(token
, "Expecting assignment or function call.");
467 return scoped_ptr
<ParseNode
>();
471 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
473 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
475 return scoped_ptr
<BlockNode
>();
476 scoped_ptr
<BlockNode
> block(new BlockNode(true));
477 block
->set_begin_token(begin_token
);
480 if (LookAhead(Token::RIGHT_BRACE
)) {
481 block
->set_end(make_scoped_ptr(new EndNode(Consume())));
485 scoped_ptr
<ParseNode
> statement
= ParseStatement();
487 return scoped_ptr
<BlockNode
>();
488 block
->append_statement(statement
.Pass());
493 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
494 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
495 condition
->set_if_token(Consume(Token::IF
, "Expected 'if'"));
496 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
497 condition
->set_condition(ParseExpression());
498 if (IsAssignment(condition
->condition()))
499 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
500 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
501 condition
->set_if_true(ParseBlock().Pass());
502 if (Match(Token::ELSE
))
503 condition
->set_if_false(ParseStatement().Pass());
505 return scoped_ptr
<ParseNode
>();
506 return condition
.Pass();
509 void Parser::TraverseOrder(const ParseNode
* root
,
510 std::vector
<const ParseNode
*>* pre
,
511 std::vector
<const ParseNode
*>* post
) {
513 pre
->push_back(root
);
515 if (const AccessorNode
* accessor
= root
->AsAccessor()) {
516 TraverseOrder(accessor
->index(), pre
, post
);
517 TraverseOrder(accessor
->member(), pre
, post
);
518 } else if (const BinaryOpNode
* binop
= root
->AsBinaryOp()) {
519 TraverseOrder(binop
->left(), pre
, post
);
520 TraverseOrder(binop
->right(), pre
, post
);
521 } else if (const BlockNode
* block
= root
->AsBlock()) {
522 for (const auto& statement
: block
->statements())
523 TraverseOrder(statement
, pre
, post
);
524 TraverseOrder(block
->End(), pre
, post
);
525 } else if (const ConditionNode
* condition
= root
->AsConditionNode()) {
526 TraverseOrder(condition
->condition(), pre
, post
);
527 TraverseOrder(condition
->if_true(), pre
, post
);
528 TraverseOrder(condition
->if_false(), pre
, post
);
529 } else if (const FunctionCallNode
* func_call
= root
->AsFunctionCall()) {
530 TraverseOrder(func_call
->args(), pre
, post
);
531 TraverseOrder(func_call
->block(), pre
, post
);
532 } else if (root
->AsIdentifier()) {
534 } else if (const ListNode
* list
= root
->AsList()) {
535 for (const auto& node
: list
->contents())
536 TraverseOrder(node
, pre
, post
);
537 TraverseOrder(list
->End(), pre
, post
);
538 } else if (root
->AsLiteral()) {
540 } else if (const UnaryOpNode
* unaryop
= root
->AsUnaryOp()) {
541 TraverseOrder(unaryop
->operand(), pre
, post
);
542 } else if (root
->AsBlockComment()) {
544 } else if (root
->AsEnd()) {
547 CHECK(false) << "Unhandled case in TraverseOrder.";
550 post
->push_back(root
);
554 void Parser::AssignComments(ParseNode
* file
) {
555 // Start by generating a pre- and post- order traversal of the tree so we
556 // can determine what's before and after comments.
557 std::vector
<const ParseNode
*> pre
;
558 std::vector
<const ParseNode
*> post
;
559 TraverseOrder(file
, &pre
, &post
);
561 // Assign line comments to syntax immediately following.
563 for (const auto& node
: pre
) {
564 const Location
& start
= node
->GetRange().begin();
565 while (cur_comment
< static_cast<int>(line_comment_tokens_
.size())) {
566 if (start
.byte() >= line_comment_tokens_
[cur_comment
].location().byte()) {
567 const_cast<ParseNode
*>(node
)->comments_mutable()->append_before(
568 line_comment_tokens_
[cur_comment
]);
576 // Remaining line comments go at end of file.
577 for (; cur_comment
< static_cast<int>(line_comment_tokens_
.size());
579 file
->comments_mutable()->append_after(line_comment_tokens_
[cur_comment
]);
581 // Assign suffix to syntax immediately before.
582 cur_comment
= static_cast<int>(suffix_comment_tokens_
.size() - 1);
583 for (std::vector
<const ParseNode
*>::const_reverse_iterator i
= post
.rbegin();
586 // Don't assign suffix comments to the function, list, or block, but instead
587 // to the last thing inside.
588 if ((*i
)->AsFunctionCall() || (*i
)->AsList() || (*i
)->AsBlock())
591 const Location
& start
= (*i
)->GetRange().begin();
592 const Location
& end
= (*i
)->GetRange().end();
594 // Don't assign suffix comments to something that starts on an earlier
600 // it's attached to "b", not sources = [ ... ].
601 if (start
.line_number() != end
.line_number())
604 while (cur_comment
>= 0) {
605 if (end
.byte() <= suffix_comment_tokens_
[cur_comment
].location().byte()) {
606 const_cast<ParseNode
*>(*i
)->comments_mutable()->append_suffix(
607 suffix_comment_tokens_
[cur_comment
]);
614 // Suffix comments were assigned in reverse, so if there were multiple on
615 // the same node, they need to be reversed.
616 if ((*i
)->comments() && !(*i
)->comments()->suffix().empty())
617 const_cast<ParseNode
*>(*i
)->comments_mutable()->ReverseSuffix();