1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 const char kGrammar_Help
[] =
13 "GN build language grammar\n"
17 " GN build files are read as sequences of tokens. While splitting the\n"
18 " file into tokens, the next token is the longest sequence of characters\n"
19 " that form a valid token.\n"
21 "White space and comments\n"
23 " White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
24 " carriage returns (U+000D), and newlines (U+000A).\n"
26 " Comments start at the character \"#\" and stop at the next newline.\n"
28 " White space and comments are ignored except that they may separate\n"
29 " tokens that would otherwise combine into a single token.\n"
33 " Identifiers name variables and functions.\n"
35 " identifier = letter { letter | digit } .\n"
36 " letter = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
37 " digit = \"0\" ... \"9\" .\n"
41 " The following keywords are reserved and may not be used as\n"
44 " else false if true\n"
48 " An integer literal represents a decimal integer value.\n"
50 " integer = [ \"-\" ] digit { digit } .\n"
52 " Leading zeros and negative zero are disallowed.\n"
56 " A string literal represents a string value consisting of the quoted\n"
57 " characters with possible escape sequences and variable expansions.\n"
59 " string = `\"` { char | escape | expansion } `\"` .\n"
60 " escape = `\\` ( \"$\" | `\"` | char ) .\n"
61 " BracketExpansion = \"{\" ( identifier | ArrayAccess | ScopeAccess "
63 " expansion = \"$\" ( identifier | BracketExpansion ) .\n"
64 " char = /* any character except \"$\", `\"`, or newline "
67 " After a backslash, certain sequences represent special characters:\n"
69 " \\\" U+0022 quotation mark\n"
70 " \\$ U+0024 dollar sign\n"
71 " \\\\ U+005C backslash\n"
73 " All other backslashes represent themselves.\n"
77 " The following character sequences represent punctuation:\n"
86 " The input tokens form a syntax tree following a context-free grammar:\n"
88 " File = StatementList .\n"
90 " Statement = Assignment | Call | Condition .\n"
91 " Assignment = identifier AssignOp Expr .\n"
92 " Call = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
93 " Condition = \"if\" \"(\" Expr \")\" Block\n"
94 " [ \"else\" ( Condition | Block ) ] .\n"
95 " Block = \"{\" StatementList \"}\" .\n"
96 " StatementList = { Statement } .\n"
98 " ArrayAccess = identifier \"[\" { identifier | integer } \"]\" .\n"
99 " ScopeAccess = identifier \".\" identifier .\n"
100 " Expr = UnaryExpr | Expr BinaryOp Expr .\n"
101 " UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .\n"
102 " PrimaryExpr = identifier | integer | string | Call\n"
103 " | ArrayAccess | ScopeAccess\n"
104 " | \"(\" Expr \")\"\n"
105 " | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
106 " ExprList = Expr { \",\" Expr } .\n"
108 " AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
109 " UnaryOp = \"!\" .\n"
110 " BinaryOp = \"+\" | \"-\" // highest priority\n"
111 " | \"<\" | \"<=\" | \">\" | \">=\"\n"
112 " | \"==\" | \"!=\"\n"
114 " | \"||\" . // lowest priority\n"
116 " All binary operators are left-associative.\n";
119 PRECEDENCE_ASSIGNMENT
= 1, // Lowest precedence.
122 PRECEDENCE_EQUALITY
= 4,
123 PRECEDENCE_RELATION
= 5,
125 PRECEDENCE_PREFIX
= 7,
127 PRECEDENCE_DOT
= 9, // Highest precedence.
130 // The top-level for blocks/ifs is recursive descent, the expression parser is
131 // a Pratt parser. The basic idea there is to have the precedences (and
132 // associativities) encoded relative to each other and only parse up until you
133 // hit something of that precedence. There's a dispatch table in expressions_
134 // at the top of parser.cc that describes how each token dispatches if it's
135 // seen as either a prefix or infix operator, and if it's infix, what its
139 // - http://javascript.crockford.com/tdop/tdop.html
140 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
142 // Indexed by Token::Type.
143 ParserHelper
Parser::expressions_
[] = {
144 {nullptr, nullptr, -1}, // INVALID
145 {&Parser::Literal
, nullptr, -1}, // INTEGER
146 {&Parser::Literal
, nullptr, -1}, // STRING
147 {&Parser::Literal
, nullptr, -1}, // TRUE_TOKEN
148 {&Parser::Literal
, nullptr, -1}, // FALSE_TOKEN
149 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
150 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
151 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
152 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
153 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
154 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
155 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
156 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
157 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
158 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
159 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
160 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
161 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
162 {&Parser::Not
, nullptr, -1}, // BANG
163 {nullptr, &Parser::DotOperator
, PRECEDENCE_DOT
}, // DOT
164 {&Parser::Group
, nullptr, -1}, // LEFT_PAREN
165 {nullptr, nullptr, -1}, // RIGHT_PAREN
166 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
167 {nullptr, nullptr, -1}, // RIGHT_BRACKET
168 {nullptr, nullptr, -1}, // LEFT_BRACE
169 {nullptr, nullptr, -1}, // RIGHT_BRACE
170 {nullptr, nullptr, -1}, // IF
171 {nullptr, nullptr, -1}, // ELSE
172 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
173 {nullptr, nullptr, -1}, // COMMA
174 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
175 {nullptr, nullptr, -1}, // LINE_COMMENT
176 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
177 {&Parser::BlockComment
, nullptr, -1}, // BLOCK_COMMENT
180 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
181 : err_(err
), cur_(0) {
182 for (const auto& token
: tokens
) {
183 switch (token
.type()) {
184 case Token::LINE_COMMENT
:
185 line_comment_tokens_
.push_back(token
);
187 case Token::SUFFIX_COMMENT
:
188 suffix_comment_tokens_
.push_back(token
);
191 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
192 // through the real parser.
193 tokens_
.push_back(token
);
203 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
205 Parser
p(tokens
, err
);
206 return p
.ParseFile();
210 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
212 Parser
p(tokens
, err
);
213 scoped_ptr
<ParseNode
> expr
= p
.ParseExpression();
214 if (!p
.at_end() && !err
->has_error()) {
215 *err
= Err(p
.cur_token(), "Trailing garbage");
222 scoped_ptr
<ParseNode
> Parser::ParseValue(const std::vector
<Token
>& tokens
,
224 for (const Token
& token
: tokens
) {
225 switch (token
.type()) {
228 case Token::TRUE_TOKEN
:
229 case Token::FALSE_TOKEN
:
230 case Token::LEFT_BRACKET
:
231 case Token::RIGHT_BRACKET
:
235 *err
= Err(token
, "Invalid token in literal value");
240 return ParseExpression(tokens
, err
);
243 bool Parser::IsAssignment(const ParseNode
* node
) const {
244 return node
&& node
->AsBinaryOp() &&
245 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
246 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
247 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
250 bool Parser::IsStatementBreak(Token::Type token_type
) const {
251 switch (token_type
) {
252 case Token::IDENTIFIER
:
253 case Token::LEFT_BRACE
:
254 case Token::RIGHT_BRACE
:
263 bool Parser::LookAhead(Token::Type type
) {
266 return cur_token().type() == type
;
269 bool Parser::Match(Token::Type type
) {
270 if (!LookAhead(type
))
276 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
277 Token::Type types
[1] = { type
};
278 return Consume(types
, 1, error_message
);
281 Token
Parser::Consume(Token::Type
* types
,
283 const char* error_message
) {
285 // Don't overwrite current error, but make progress through tokens so that
286 // a loop that's expecting a particular token will still terminate.
288 return Token(Location(), Token::INVALID
, base::StringPiece());
291 const char kEOFMsg
[] = "I hit EOF instead.";
293 *err_
= Err(Location(), error_message
, kEOFMsg
);
295 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
296 return Token(Location(), Token::INVALID
, base::StringPiece());
299 for (size_t i
= 0; i
< num_types
; ++i
) {
300 if (cur_token().type() == types
[i
])
303 *err_
= Err(cur_token(), error_message
);
304 return Token(Location(), Token::INVALID
, base::StringPiece());
307 Token
Parser::Consume() {
308 return tokens_
[cur_
++];
311 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
312 return ParseExpression(0);
315 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
317 return scoped_ptr
<ParseNode
>();
319 Token token
= Consume();
320 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
322 if (prefix
== nullptr) {
324 std::string("Unexpected token '") + token
.value().as_string() +
326 return scoped_ptr
<ParseNode
>();
329 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
333 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
334 precedence
<= expressions_
[cur_token().type()].precedence
) {
336 InfixFunc infix
= expressions_
[token
.type()].infix
;
337 if (infix
== nullptr) {
339 std::string("Unexpected token '") +
340 token
.value().as_string() + std::string("'"));
341 return scoped_ptr
<ParseNode
>();
343 left
= (this->*infix
)(left
.Pass(), token
);
345 return scoped_ptr
<ParseNode
>();
351 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
352 return make_scoped_ptr(new LiteralNode(token
));
355 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
356 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
359 scoped_ptr
<ParseNode
> Parser::BlockComment(Token token
) {
360 scoped_ptr
<BlockCommentNode
> comment(new BlockCommentNode());
361 comment
->set_comment(token
);
362 return comment
.Pass();
365 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
366 scoped_ptr
<ParseNode
> expr
= ParseExpression();
368 return scoped_ptr
<ParseNode
>();
369 Consume(Token::RIGHT_PAREN
, "Expected ')'");
373 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
374 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
376 return scoped_ptr
<ParseNode
>();
377 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
378 unary_op
->set_op(token
);
379 unary_op
->set_operand(expr
.Pass());
380 return unary_op
.Pass();
383 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
384 scoped_ptr
<ParseNode
> list(ParseList(node
, Token::RIGHT_BRACKET
, true));
385 if (!has_error() && !at_end())
386 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
390 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
392 scoped_ptr
<ParseNode
> right
=
393 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
396 *err_
= Err(token
, "Expected right hand side for '" +
397 token
.value().as_string() + "'");
399 return scoped_ptr
<ParseNode
>();
401 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
402 binary_op
->set_op(token
);
403 binary_op
->set_left(left
.Pass());
404 binary_op
->set_right(right
.Pass());
405 return binary_op
.Pass();
408 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
410 scoped_ptr
<ListNode
> list(new ListNode
);
411 list
->set_begin_token(token
);
412 list
->set_end(make_scoped_ptr(new EndNode(token
)));
413 scoped_ptr
<BlockNode
> block
;
414 bool has_arg
= false;
415 if (LookAhead(Token::LEFT_PAREN
)) {
416 Token start_token
= Consume();
417 // Parsing a function call.
419 if (Match(Token::RIGHT_PAREN
)) {
420 // Nothing, just an empty call.
422 list
= ParseList(start_token
, Token::RIGHT_PAREN
, false);
424 return scoped_ptr
<ParseNode
>();
425 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
427 // Optionally with a scope.
428 if (LookAhead(Token::LEFT_BRACE
)) {
429 block
= ParseBlock();
431 return scoped_ptr
<ParseNode
>();
435 if (!left
&& !has_arg
) {
436 // Not a function call, just a standalone identifier.
437 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
439 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
440 func_call
->set_function(token
);
441 func_call
->set_args(list
.Pass());
443 func_call
->set_block(block
.Pass());
444 return func_call
.Pass();
447 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
449 if (left
->AsIdentifier() == nullptr) {
450 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
451 return scoped_ptr
<ParseNode
>();
453 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
454 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
455 assign
->set_op(token
);
456 assign
->set_left(left
.Pass());
457 assign
->set_right(value
.Pass());
458 return assign
.Pass();
461 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
463 // TODO: Maybe support more complex expressions like a[0][0]. This would
464 // require work on the evaluator too.
465 if (left
->AsIdentifier() == nullptr) {
466 *err_
= Err(left
.get(), "May only subscript identifiers.",
467 "The thing on the left hand side of the [] must be an identifier\n"
468 "and not an expression. If you need this, you'll have to assign the\n"
469 "value to a temporary before subscripting. Sorry.");
470 return scoped_ptr
<ParseNode
>();
472 scoped_ptr
<ParseNode
> value
= ParseExpression();
473 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
474 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
475 accessor
->set_base(left
->AsIdentifier()->value());
476 accessor
->set_index(value
.Pass());
477 return accessor
.Pass();
480 scoped_ptr
<ParseNode
> Parser::DotOperator(scoped_ptr
<ParseNode
> left
,
482 if (left
->AsIdentifier() == nullptr) {
483 *err_
= Err(left
.get(), "May only use \".\" for identifiers.",
484 "The thing on the left hand side of the dot must be an identifier\n"
485 "and not an expression. If you need this, you'll have to assign the\n"
486 "value to a temporary first. Sorry.");
487 return scoped_ptr
<ParseNode
>();
490 scoped_ptr
<ParseNode
> right
= ParseExpression(PRECEDENCE_DOT
);
491 if (!right
|| !right
->AsIdentifier()) {
492 *err_
= Err(token
, "Expected identifier for right-hand-side of \".\"",
493 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
494 return scoped_ptr
<ParseNode
>();
497 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
498 accessor
->set_base(left
->AsIdentifier()->value());
499 accessor
->set_member(scoped_ptr
<IdentifierNode
>(
500 static_cast<IdentifierNode
*>(right
.release())));
501 return accessor
.Pass();
504 // Does not Consume the start or end token.
505 scoped_ptr
<ListNode
> Parser::ParseList(Token start_token
,
506 Token::Type stop_before
,
507 bool allow_trailing_comma
) {
508 scoped_ptr
<ListNode
> list(new ListNode
);
509 list
->set_begin_token(start_token
);
510 bool just_got_comma
= false;
511 bool first_time
= true;
512 while (!LookAhead(stop_before
)) {
514 if (!just_got_comma
) {
515 // Require commas separate things in lists.
516 *err_
= Err(cur_token(), "Expected comma between items.");
517 return scoped_ptr
<ListNode
>();
522 // Why _OR? We're parsing things that are higher precedence than the ,
523 // that separates the items of the list. , should appear lower than
524 // boolean expressions (the lowest of which is OR), but above assignments.
525 list
->append_item(ParseExpression(PRECEDENCE_OR
));
527 return scoped_ptr
<ListNode
>();
530 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
531 return scoped_ptr
<ListNode
>();
533 if (list
->contents().back()->AsBlockComment()) {
534 // If there was a comment inside the list, we don't need a comma to the
535 // next item, so pretend we got one, if we're expecting one.
536 just_got_comma
= allow_trailing_comma
;
538 just_got_comma
= Match(Token::COMMA
);
541 if (just_got_comma
&& !allow_trailing_comma
) {
542 *err_
= Err(cur_token(), "Trailing comma");
543 return scoped_ptr
<ListNode
>();
545 list
->set_end(make_scoped_ptr(new EndNode(cur_token())));
549 scoped_ptr
<ParseNode
> Parser::ParseFile() {
550 scoped_ptr
<BlockNode
> file(new BlockNode
);
554 scoped_ptr
<ParseNode
> statement
= ParseStatement();
557 file
->append_statement(statement
.Pass());
559 if (!at_end() && !has_error())
560 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
562 return scoped_ptr
<ParseNode
>();
564 // TODO(scottmg): If this is measurably expensive, it could be done only
565 // when necessary (when reformatting, or during tests). Comments are
566 // separate from the parse tree at this point, so downstream code can remain
568 AssignComments(file
.get());
573 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
574 if (LookAhead(Token::IF
)) {
575 return ParseCondition();
576 } else if (LookAhead(Token::BLOCK_COMMENT
)) {
577 return BlockComment(Consume());
579 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
580 // to allow "pointless" expressions and return ParseExpression() directly.
581 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
583 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
587 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
588 *err_
= Err(token
, "Expecting assignment or function call.");
590 return scoped_ptr
<ParseNode
>();
594 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
596 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
598 return scoped_ptr
<BlockNode
>();
599 scoped_ptr
<BlockNode
> block(new BlockNode
);
600 block
->set_begin_token(begin_token
);
603 if (LookAhead(Token::RIGHT_BRACE
)) {
604 block
->set_end(make_scoped_ptr(new EndNode(Consume())));
608 scoped_ptr
<ParseNode
> statement
= ParseStatement();
610 return scoped_ptr
<BlockNode
>();
611 block
->append_statement(statement
.Pass());
616 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
617 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
618 condition
->set_if_token(Consume(Token::IF
, "Expected 'if'"));
619 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
620 condition
->set_condition(ParseExpression());
621 if (IsAssignment(condition
->condition()))
622 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
623 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
624 condition
->set_if_true(ParseBlock().Pass());
625 if (Match(Token::ELSE
)) {
626 if (LookAhead(Token::LEFT_BRACE
)) {
627 condition
->set_if_false(ParseBlock().Pass());
628 } else if (LookAhead(Token::IF
)) {
629 condition
->set_if_false(ParseStatement().Pass());
631 *err_
= Err(cur_token(), "Expected '{' or 'if' after 'else'.");
632 return scoped_ptr
<ParseNode
>();
636 return scoped_ptr
<ParseNode
>();
637 return condition
.Pass();
640 void Parser::TraverseOrder(const ParseNode
* root
,
641 std::vector
<const ParseNode
*>* pre
,
642 std::vector
<const ParseNode
*>* post
) {
644 pre
->push_back(root
);
646 if (const AccessorNode
* accessor
= root
->AsAccessor()) {
647 TraverseOrder(accessor
->index(), pre
, post
);
648 TraverseOrder(accessor
->member(), pre
, post
);
649 } else if (const BinaryOpNode
* binop
= root
->AsBinaryOp()) {
650 TraverseOrder(binop
->left(), pre
, post
);
651 TraverseOrder(binop
->right(), pre
, post
);
652 } else if (const BlockNode
* block
= root
->AsBlock()) {
653 for (const auto& statement
: block
->statements())
654 TraverseOrder(statement
, pre
, post
);
655 TraverseOrder(block
->End(), pre
, post
);
656 } else if (const ConditionNode
* condition
= root
->AsConditionNode()) {
657 TraverseOrder(condition
->condition(), pre
, post
);
658 TraverseOrder(condition
->if_true(), pre
, post
);
659 TraverseOrder(condition
->if_false(), pre
, post
);
660 } else if (const FunctionCallNode
* func_call
= root
->AsFunctionCall()) {
661 TraverseOrder(func_call
->args(), pre
, post
);
662 TraverseOrder(func_call
->block(), pre
, post
);
663 } else if (root
->AsIdentifier()) {
665 } else if (const ListNode
* list
= root
->AsList()) {
666 for (const auto& node
: list
->contents())
667 TraverseOrder(node
, pre
, post
);
668 TraverseOrder(list
->End(), pre
, post
);
669 } else if (root
->AsLiteral()) {
671 } else if (const UnaryOpNode
* unaryop
= root
->AsUnaryOp()) {
672 TraverseOrder(unaryop
->operand(), pre
, post
);
673 } else if (root
->AsBlockComment()) {
675 } else if (root
->AsEnd()) {
678 CHECK(false) << "Unhandled case in TraverseOrder.";
681 post
->push_back(root
);
685 void Parser::AssignComments(ParseNode
* file
) {
686 // Start by generating a pre- and post- order traversal of the tree so we
687 // can determine what's before and after comments.
688 std::vector
<const ParseNode
*> pre
;
689 std::vector
<const ParseNode
*> post
;
690 TraverseOrder(file
, &pre
, &post
);
692 // Assign line comments to syntax immediately following.
694 for (const auto& node
: pre
) {
695 const Location
& start
= node
->GetRange().begin();
696 while (cur_comment
< static_cast<int>(line_comment_tokens_
.size())) {
697 if (start
.byte() >= line_comment_tokens_
[cur_comment
].location().byte()) {
698 const_cast<ParseNode
*>(node
)->comments_mutable()->append_before(
699 line_comment_tokens_
[cur_comment
]);
707 // Remaining line comments go at end of file.
708 for (; cur_comment
< static_cast<int>(line_comment_tokens_
.size());
710 file
->comments_mutable()->append_after(line_comment_tokens_
[cur_comment
]);
712 // Assign suffix to syntax immediately before.
713 cur_comment
= static_cast<int>(suffix_comment_tokens_
.size() - 1);
714 for (std::vector
<const ParseNode
*>::const_reverse_iterator i
= post
.rbegin();
717 // Don't assign suffix comments to the function, list, or block, but instead
718 // to the last thing inside.
719 if ((*i
)->AsFunctionCall() || (*i
)->AsList() || (*i
)->AsBlock())
722 const Location
& start
= (*i
)->GetRange().begin();
723 const Location
& end
= (*i
)->GetRange().end();
725 // Don't assign suffix comments to something that starts on an earlier
731 // it's attached to "b", not sources = [ ... ].
732 if (start
.line_number() != end
.line_number())
735 while (cur_comment
>= 0) {
736 if (end
.byte() <= suffix_comment_tokens_
[cur_comment
].location().byte()) {
737 const_cast<ParseNode
*>(*i
)->comments_mutable()->append_suffix(
738 suffix_comment_tokens_
[cur_comment
]);
745 // Suffix comments were assigned in reverse, so if there were multiple on
746 // the same node, they need to be reversed.
747 if ((*i
)->comments() && !(*i
)->comments()->suffix().empty())
748 const_cast<ParseNode
*>(*i
)->comments_mutable()->ReverseSuffix();