1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 const char kGrammar_Help
[] =
13 "GN build language grammar\n"
17 " GN build files are read as sequences of tokens. While splitting the\n"
18 " file into tokens, the next token is the longest sequence of characters\n"
19 " that form a valid token.\n"
21 "White space and comments\n"
23 " White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
24 " carriage returns (U+000D), and newlines (U+000A).\n"
26 " Comments start at the character \"#\" and stop at the next newline.\n"
28 " White space and comments are ignored except that they may separate\n"
29 " tokens that would otherwise combine into a single token.\n"
33 " Identifiers name variables and functions.\n"
35 " identifier = letter { letter | digit } .\n"
36 " letter = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
37 " digit = \"0\" ... \"9\" .\n"
41 " The following keywords are reserved and may not be used as\n"
44 " else false if true\n"
48 " An integer literal represents a decimal integer value.\n"
50 " integer = [ \"-\" ] digit { digit } .\n"
52 " Leading zeros and negative zero are disallowed.\n"
56 " A string literal represents a string value consisting of the quoted\n"
57 " characters with possible escape sequences and variable expansions.\n"
59 " string = `\"` { char | escape | expansion } `\"` .\n"
60 " escape = `\\` ( \"$\" | `\"` | char ) .\n"
61 " expansion = \"$\" ( identifier | \"{\" identifier \"}\" ) .\n"
62 " char = /* any character except \"$\", `\"`, or newline */ .\n"
64 " After a backslash, certain sequences represent special characters:\n"
66 " \\\" U+0022 quotation mark\n"
67 " \\$ U+0024 dollar sign\n"
68 " \\\\ U+005C backslash\n"
70 " All other backslashes represent themselves.\n"
74 " The following character sequences represent punctuation:\n"
83 " The input tokens form a syntax tree following a context-free grammar:\n"
85 " File = StatementList .\n"
87 " Statement = Assignment | Call | Condition .\n"
88 " Assignment = identifier AssignOp Expr .\n"
89 " Call = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
90 " Condition = \"if\" \"(\" Expr \")\" Block\n"
91 " [ \"else\" ( Condition | Block ) ] .\n"
92 " Block = \"{\" StatementList \"}\" .\n"
93 " StatementList = { Statement } .\n"
95 " Expr = UnaryExpr | Expr BinaryOp Expr .\n"
96 " UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .\n"
97 " PrimaryExpr = identifier | integer | string | Call\n"
98 " | identifier \"[\" Expr \"]\"\n"
99 " | identifier \".\" identifier\n"
100 " | \"(\" Expr \")\"\n"
101 " | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
102 " ExprList = Expr { \",\" Expr } .\n"
104 " AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
105 " UnaryOp = \"!\" .\n"
106 " BinaryOp = \"+\" | \"-\" // highest priority\n"
107 " | \"<\" | \"<=\" | \">\" | \">=\"\n"
108 " | \"==\" | \"!=\"\n"
110 " | \"||\" . // lowest priority\n"
112 " All binary operators are left-associative.\n";
115 PRECEDENCE_ASSIGNMENT
= 1, // Lowest precedence.
118 PRECEDENCE_EQUALITY
= 4,
119 PRECEDENCE_RELATION
= 5,
121 PRECEDENCE_PREFIX
= 7,
123 PRECEDENCE_DOT
= 9, // Highest precedence.
126 // The top-level for blocks/ifs is recursive descent, the expression parser is
127 // a Pratt parser. The basic idea there is to have the precedences (and
128 // associativities) encoded relative to each other and only parse up until you
129 // hit something of that precedence. There's a dispatch table in expressions_
130 // at the top of parser.cc that describes how each token dispatches if it's
131 // seen as either a prefix or infix operator, and if it's infix, what its
135 // - http://javascript.crockford.com/tdop/tdop.html
136 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
138 // Indexed by Token::Type.
139 ParserHelper
Parser::expressions_
[] = {
140 {nullptr, nullptr, -1}, // INVALID
141 {&Parser::Literal
, nullptr, -1}, // INTEGER
142 {&Parser::Literal
, nullptr, -1}, // STRING
143 {&Parser::Literal
, nullptr, -1}, // TRUE_TOKEN
144 {&Parser::Literal
, nullptr, -1}, // FALSE_TOKEN
145 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
146 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
147 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
148 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
149 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
150 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
151 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
152 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
153 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
154 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
155 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
156 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
157 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
158 {&Parser::Not
, nullptr, -1}, // BANG
159 {nullptr, &Parser::DotOperator
, PRECEDENCE_DOT
}, // DOT
160 {&Parser::Group
, nullptr, -1}, // LEFT_PAREN
161 {nullptr, nullptr, -1}, // RIGHT_PAREN
162 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
163 {nullptr, nullptr, -1}, // RIGHT_BRACKET
164 {nullptr, nullptr, -1}, // LEFT_BRACE
165 {nullptr, nullptr, -1}, // RIGHT_BRACE
166 {nullptr, nullptr, -1}, // IF
167 {nullptr, nullptr, -1}, // ELSE
168 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
169 {nullptr, nullptr, -1}, // COMMA
170 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
171 {nullptr, nullptr, -1}, // LINE_COMMENT
172 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
173 {&Parser::BlockComment
, nullptr, -1}, // BLOCK_COMMENT
176 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
177 : err_(err
), cur_(0) {
178 for (const auto& token
: tokens
) {
179 switch (token
.type()) {
180 case Token::LINE_COMMENT
:
181 line_comment_tokens_
.push_back(token
);
183 case Token::SUFFIX_COMMENT
:
184 suffix_comment_tokens_
.push_back(token
);
187 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
188 // through the real parser.
189 tokens_
.push_back(token
);
199 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
201 Parser
p(tokens
, err
);
202 return p
.ParseFile();
206 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
208 Parser
p(tokens
, err
);
209 scoped_ptr
<ParseNode
> expr
= p
.ParseExpression();
210 if (!p
.at_end() && !err
->has_error()) {
211 *err
= Err(p
.cur_token(), "Trailing garbage");
218 scoped_ptr
<ParseNode
> Parser::ParseValue(const std::vector
<Token
>& tokens
,
220 for (const Token
& token
: tokens
) {
221 switch (token
.type()) {
224 case Token::TRUE_TOKEN
:
225 case Token::FALSE_TOKEN
:
226 case Token::LEFT_BRACKET
:
227 case Token::RIGHT_BRACKET
:
231 *err
= Err(token
, "Invalid token in literal value");
236 return ParseExpression(tokens
, err
);
239 bool Parser::IsAssignment(const ParseNode
* node
) const {
240 return node
&& node
->AsBinaryOp() &&
241 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
242 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
243 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
246 bool Parser::IsStatementBreak(Token::Type token_type
) const {
247 switch (token_type
) {
248 case Token::IDENTIFIER
:
249 case Token::LEFT_BRACE
:
250 case Token::RIGHT_BRACE
:
259 bool Parser::LookAhead(Token::Type type
) {
262 return cur_token().type() == type
;
265 bool Parser::Match(Token::Type type
) {
266 if (!LookAhead(type
))
272 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
273 Token::Type types
[1] = { type
};
274 return Consume(types
, 1, error_message
);
277 Token
Parser::Consume(Token::Type
* types
,
279 const char* error_message
) {
281 // Don't overwrite current error, but make progress through tokens so that
282 // a loop that's expecting a particular token will still terminate.
284 return Token(Location(), Token::INVALID
, base::StringPiece());
287 const char kEOFMsg
[] = "I hit EOF instead.";
289 *err_
= Err(Location(), error_message
, kEOFMsg
);
291 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
292 return Token(Location(), Token::INVALID
, base::StringPiece());
295 for (size_t i
= 0; i
< num_types
; ++i
) {
296 if (cur_token().type() == types
[i
])
299 *err_
= Err(cur_token(), error_message
);
300 return Token(Location(), Token::INVALID
, base::StringPiece());
303 Token
Parser::Consume() {
304 return tokens_
[cur_
++];
307 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
308 return ParseExpression(0);
311 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
313 return scoped_ptr
<ParseNode
>();
315 Token token
= Consume();
316 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
318 if (prefix
== nullptr) {
320 std::string("Unexpected token '") + token
.value().as_string() +
322 return scoped_ptr
<ParseNode
>();
325 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
329 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
330 precedence
<= expressions_
[cur_token().type()].precedence
) {
332 InfixFunc infix
= expressions_
[token
.type()].infix
;
333 if (infix
== nullptr) {
335 std::string("Unexpected token '") +
336 token
.value().as_string() + std::string("'"));
337 return scoped_ptr
<ParseNode
>();
339 left
= (this->*infix
)(left
.Pass(), token
);
341 return scoped_ptr
<ParseNode
>();
347 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
348 return make_scoped_ptr(new LiteralNode(token
));
351 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
352 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
355 scoped_ptr
<ParseNode
> Parser::BlockComment(Token token
) {
356 scoped_ptr
<BlockCommentNode
> comment(new BlockCommentNode());
357 comment
->set_comment(token
);
358 return comment
.Pass();
361 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
362 scoped_ptr
<ParseNode
> expr
= ParseExpression();
364 return scoped_ptr
<ParseNode
>();
365 Consume(Token::RIGHT_PAREN
, "Expected ')'");
369 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
370 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
372 return scoped_ptr
<ParseNode
>();
373 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
374 unary_op
->set_op(token
);
375 unary_op
->set_operand(expr
.Pass());
376 return unary_op
.Pass();
379 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
380 scoped_ptr
<ParseNode
> list(ParseList(node
, Token::RIGHT_BRACKET
, true));
381 if (!has_error() && !at_end())
382 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
386 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
388 scoped_ptr
<ParseNode
> right
=
389 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
392 *err_
= Err(token
, "Expected right hand side for '" +
393 token
.value().as_string() + "'");
395 return scoped_ptr
<ParseNode
>();
397 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
398 binary_op
->set_op(token
);
399 binary_op
->set_left(left
.Pass());
400 binary_op
->set_right(right
.Pass());
401 return binary_op
.Pass();
404 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
406 scoped_ptr
<ListNode
> list(new ListNode
);
407 list
->set_begin_token(token
);
408 list
->set_end(make_scoped_ptr(new EndNode(token
)));
409 scoped_ptr
<BlockNode
> block
;
410 bool has_arg
= false;
411 if (LookAhead(Token::LEFT_PAREN
)) {
412 Token start_token
= Consume();
413 // Parsing a function call.
415 if (Match(Token::RIGHT_PAREN
)) {
416 // Nothing, just an empty call.
418 list
= ParseList(start_token
, Token::RIGHT_PAREN
, false);
420 return scoped_ptr
<ParseNode
>();
421 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
423 // Optionally with a scope.
424 if (LookAhead(Token::LEFT_BRACE
)) {
425 block
= ParseBlock();
427 return scoped_ptr
<ParseNode
>();
431 if (!left
&& !has_arg
) {
432 // Not a function call, just a standalone identifier.
433 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
435 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
436 func_call
->set_function(token
);
437 func_call
->set_args(list
.Pass());
439 func_call
->set_block(block
.Pass());
440 return func_call
.Pass();
443 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
445 if (left
->AsIdentifier() == nullptr) {
446 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
447 return scoped_ptr
<ParseNode
>();
449 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
450 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
451 assign
->set_op(token
);
452 assign
->set_left(left
.Pass());
453 assign
->set_right(value
.Pass());
454 return assign
.Pass();
457 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
459 // TODO: Maybe support more complex expressions like a[0][0]. This would
460 // require work on the evaluator too.
461 if (left
->AsIdentifier() == nullptr) {
462 *err_
= Err(left
.get(), "May only subscript identifiers.",
463 "The thing on the left hand side of the [] must be an identifier\n"
464 "and not an expression. If you need this, you'll have to assign the\n"
465 "value to a temporary before subscripting. Sorry.");
466 return scoped_ptr
<ParseNode
>();
468 scoped_ptr
<ParseNode
> value
= ParseExpression();
469 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
470 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
471 accessor
->set_base(left
->AsIdentifier()->value());
472 accessor
->set_index(value
.Pass());
473 return accessor
.Pass();
476 scoped_ptr
<ParseNode
> Parser::DotOperator(scoped_ptr
<ParseNode
> left
,
478 if (left
->AsIdentifier() == nullptr) {
479 *err_
= Err(left
.get(), "May only use \".\" for identifiers.",
480 "The thing on the left hand side of the dot must be an identifier\n"
481 "and not an expression. If you need this, you'll have to assign the\n"
482 "value to a temporary first. Sorry.");
483 return scoped_ptr
<ParseNode
>();
486 scoped_ptr
<ParseNode
> right
= ParseExpression(PRECEDENCE_DOT
);
487 if (!right
|| !right
->AsIdentifier()) {
488 *err_
= Err(token
, "Expected identifier for right-hand-side of \".\"",
489 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
490 return scoped_ptr
<ParseNode
>();
493 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
494 accessor
->set_base(left
->AsIdentifier()->value());
495 accessor
->set_member(scoped_ptr
<IdentifierNode
>(
496 static_cast<IdentifierNode
*>(right
.release())));
497 return accessor
.Pass();
500 // Does not Consume the start or end token.
501 scoped_ptr
<ListNode
> Parser::ParseList(Token start_token
,
502 Token::Type stop_before
,
503 bool allow_trailing_comma
) {
504 scoped_ptr
<ListNode
> list(new ListNode
);
505 list
->set_begin_token(start_token
);
506 bool just_got_comma
= false;
507 bool first_time
= true;
508 while (!LookAhead(stop_before
)) {
510 if (!just_got_comma
) {
511 // Require commas separate things in lists.
512 *err_
= Err(cur_token(), "Expected comma between items.");
513 return scoped_ptr
<ListNode
>();
518 // Why _OR? We're parsing things that are higher precedence than the ,
519 // that separates the items of the list. , should appear lower than
520 // boolean expressions (the lowest of which is OR), but above assignments.
521 list
->append_item(ParseExpression(PRECEDENCE_OR
));
523 return scoped_ptr
<ListNode
>();
526 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
527 return scoped_ptr
<ListNode
>();
529 if (list
->contents().back()->AsBlockComment()) {
530 // If there was a comment inside the list, we don't need a comma to the
531 // next item, so pretend we got one, if we're expecting one.
532 just_got_comma
= allow_trailing_comma
;
534 just_got_comma
= Match(Token::COMMA
);
537 if (just_got_comma
&& !allow_trailing_comma
) {
538 *err_
= Err(cur_token(), "Trailing comma");
539 return scoped_ptr
<ListNode
>();
541 list
->set_end(make_scoped_ptr(new EndNode(cur_token())));
545 scoped_ptr
<ParseNode
> Parser::ParseFile() {
546 scoped_ptr
<BlockNode
> file(new BlockNode
);
550 scoped_ptr
<ParseNode
> statement
= ParseStatement();
553 file
->append_statement(statement
.Pass());
555 if (!at_end() && !has_error())
556 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
558 return scoped_ptr
<ParseNode
>();
560 // TODO(scottmg): If this is measurably expensive, it could be done only
561 // when necessary (when reformatting, or during tests). Comments are
562 // separate from the parse tree at this point, so downstream code can remain
564 AssignComments(file
.get());
569 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
570 if (LookAhead(Token::IF
)) {
571 return ParseCondition();
572 } else if (LookAhead(Token::BLOCK_COMMENT
)) {
573 return BlockComment(Consume());
575 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
576 // to allow "pointless" expressions and return ParseExpression() directly.
577 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
579 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
583 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
584 *err_
= Err(token
, "Expecting assignment or function call.");
586 return scoped_ptr
<ParseNode
>();
590 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
592 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
594 return scoped_ptr
<BlockNode
>();
595 scoped_ptr
<BlockNode
> block(new BlockNode
);
596 block
->set_begin_token(begin_token
);
599 if (LookAhead(Token::RIGHT_BRACE
)) {
600 block
->set_end(make_scoped_ptr(new EndNode(Consume())));
604 scoped_ptr
<ParseNode
> statement
= ParseStatement();
606 return scoped_ptr
<BlockNode
>();
607 block
->append_statement(statement
.Pass());
612 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
613 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
614 condition
->set_if_token(Consume(Token::IF
, "Expected 'if'"));
615 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
616 condition
->set_condition(ParseExpression());
617 if (IsAssignment(condition
->condition()))
618 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
619 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
620 condition
->set_if_true(ParseBlock().Pass());
621 if (Match(Token::ELSE
)) {
622 if (LookAhead(Token::LEFT_BRACE
)) {
623 condition
->set_if_false(ParseBlock().Pass());
624 } else if (LookAhead(Token::IF
)) {
625 condition
->set_if_false(ParseStatement().Pass());
627 *err_
= Err(cur_token(), "Expected '{' or 'if' after 'else'.");
628 return scoped_ptr
<ParseNode
>();
632 return scoped_ptr
<ParseNode
>();
633 return condition
.Pass();
636 void Parser::TraverseOrder(const ParseNode
* root
,
637 std::vector
<const ParseNode
*>* pre
,
638 std::vector
<const ParseNode
*>* post
) {
640 pre
->push_back(root
);
642 if (const AccessorNode
* accessor
= root
->AsAccessor()) {
643 TraverseOrder(accessor
->index(), pre
, post
);
644 TraverseOrder(accessor
->member(), pre
, post
);
645 } else if (const BinaryOpNode
* binop
= root
->AsBinaryOp()) {
646 TraverseOrder(binop
->left(), pre
, post
);
647 TraverseOrder(binop
->right(), pre
, post
);
648 } else if (const BlockNode
* block
= root
->AsBlock()) {
649 for (const auto& statement
: block
->statements())
650 TraverseOrder(statement
, pre
, post
);
651 TraverseOrder(block
->End(), pre
, post
);
652 } else if (const ConditionNode
* condition
= root
->AsConditionNode()) {
653 TraverseOrder(condition
->condition(), pre
, post
);
654 TraverseOrder(condition
->if_true(), pre
, post
);
655 TraverseOrder(condition
->if_false(), pre
, post
);
656 } else if (const FunctionCallNode
* func_call
= root
->AsFunctionCall()) {
657 TraverseOrder(func_call
->args(), pre
, post
);
658 TraverseOrder(func_call
->block(), pre
, post
);
659 } else if (root
->AsIdentifier()) {
661 } else if (const ListNode
* list
= root
->AsList()) {
662 for (const auto& node
: list
->contents())
663 TraverseOrder(node
, pre
, post
);
664 TraverseOrder(list
->End(), pre
, post
);
665 } else if (root
->AsLiteral()) {
667 } else if (const UnaryOpNode
* unaryop
= root
->AsUnaryOp()) {
668 TraverseOrder(unaryop
->operand(), pre
, post
);
669 } else if (root
->AsBlockComment()) {
671 } else if (root
->AsEnd()) {
674 CHECK(false) << "Unhandled case in TraverseOrder.";
677 post
->push_back(root
);
681 void Parser::AssignComments(ParseNode
* file
) {
682 // Start by generating a pre- and post- order traversal of the tree so we
683 // can determine what's before and after comments.
684 std::vector
<const ParseNode
*> pre
;
685 std::vector
<const ParseNode
*> post
;
686 TraverseOrder(file
, &pre
, &post
);
688 // Assign line comments to syntax immediately following.
690 for (const auto& node
: pre
) {
691 const Location
& start
= node
->GetRange().begin();
692 while (cur_comment
< static_cast<int>(line_comment_tokens_
.size())) {
693 if (start
.byte() >= line_comment_tokens_
[cur_comment
].location().byte()) {
694 const_cast<ParseNode
*>(node
)->comments_mutable()->append_before(
695 line_comment_tokens_
[cur_comment
]);
703 // Remaining line comments go at end of file.
704 for (; cur_comment
< static_cast<int>(line_comment_tokens_
.size());
706 file
->comments_mutable()->append_after(line_comment_tokens_
[cur_comment
]);
708 // Assign suffix to syntax immediately before.
709 cur_comment
= static_cast<int>(suffix_comment_tokens_
.size() - 1);
710 for (std::vector
<const ParseNode
*>::const_reverse_iterator i
= post
.rbegin();
713 // Don't assign suffix comments to the function, list, or block, but instead
714 // to the last thing inside.
715 if ((*i
)->AsFunctionCall() || (*i
)->AsList() || (*i
)->AsBlock())
718 const Location
& start
= (*i
)->GetRange().begin();
719 const Location
& end
= (*i
)->GetRange().end();
721 // Don't assign suffix comments to something that starts on an earlier
727 // it's attached to "b", not sources = [ ... ].
728 if (start
.line_number() != end
.line_number())
731 while (cur_comment
>= 0) {
732 if (end
.byte() <= suffix_comment_tokens_
[cur_comment
].location().byte()) {
733 const_cast<ParseNode
*>(*i
)->comments_mutable()->append_suffix(
734 suffix_comment_tokens_
[cur_comment
]);
741 // Suffix comments were assigned in reverse, so if there were multiple on
742 // the same node, they need to be reversed.
743 if ((*i
)->comments() && !(*i
)->comments()->suffix().empty())
744 const_cast<ParseNode
*>(*i
)->comments_mutable()->ReverseSuffix();