1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 const char kGrammar_Help
[] =
13 "GN build language grammar\n"
17 " GN build files are read as sequences of tokens. While splitting the\n"
18 " file into tokens, the next token is the longest sequence of characters\n"
19 " that form a valid token.\n"
21 "White space and comments\n"
23 " White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
24 " carriage returns (U+000D), and newlines (U+000A).\n"
26 " Comments start at the character \"#\" and stop at the next newline.\n"
28 " White space and comments are ignored except that they may separate\n"
29 " tokens that would otherwise combine into a single token.\n"
33 " Identifiers name variables and functions.\n"
35 " identifier = letter { letter | digit } .\n"
36 " letter = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
37 " digit = \"0\" ... \"9\" .\n"
41 " The following keywords are reserved and may not be used as\n"
44 " else false if true\n"
48 " An integer literal represents a decimal integer value.\n"
50 " integer = [ \"-\" ] digit { digit } .\n"
54 " A string literal represents a string value consisting of the quoted\n"
55 " characters with possible escape sequences and variable expansions.\n"
57 " string = `\"` { char | escape | expansion } `\"` .\n"
58 " escape = `\\` ( \"$\" | `\"` | char ) .\n"
59 " expansion = \"$\" ( identifier | \"{\" identifier \"}\" ) .\n"
60 " char = /* any character except \"$\", `\"`, or newline */ .\n"
62 " After a backslash, certain sequences represent special characters:\n"
64 " \\\" U+0022 quotation mark\n"
65 " \\$ U+0024 dollar sign\n"
66 " \\\\ U+005C backslash\n"
68 " All other backslashes represent themselves.\n"
72 " The following character sequences represent punctuation:\n"
81 " The input tokens form a syntax tree following a context-free grammar:\n"
83 " File = StatementList .\n"
85 " Statement = Assignment | Call | Condition .\n"
86 " Assignment = identifier AssignOp Expr .\n"
87 " Call = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
88 " Condition = \"if\" \"(\" Expr \")\" Block\n"
89 " [ \"else\" ( Condition | Block ) ] .\n"
90 " Block = \"{\" StatementList \"}\" .\n"
91 " StatementList = { Statement } .\n"
93 " Expr = UnaryExpr | Expr BinaryOp Expr .\n"
94 " UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .\n"
95 " PrimaryExpr = identifier | integer | string | Call\n"
96 " | identifier \"[\" Expr \"]\"\n"
97 " | identifier \".\" identifier\n"
98 " | \"(\" Expr \")\"\n"
99 " | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
100 " ExprList = Expr { \",\" Expr } .\n"
102 " AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
103 " UnaryOp = \"!\" .\n"
104 " BinaryOp = \"+\" | \"-\" // highest priority\n"
105 " | \"<\" | \"<=\" | \">\" | \">=\"\n"
106 " | \"==\" | \"!=\"\n"
108 " | \"||\" . // lowest priority\n"
110 " All binary operators are left-associative.\n";
113 PRECEDENCE_ASSIGNMENT
= 1, // Lowest precedence.
116 PRECEDENCE_EQUALITY
= 4,
117 PRECEDENCE_RELATION
= 5,
119 PRECEDENCE_PREFIX
= 7,
121 PRECEDENCE_DOT
= 9, // Highest precedence.
124 // The top-level for blocks/ifs is recursive descent, the expression parser is
125 // a Pratt parser. The basic idea there is to have the precedences (and
126 // associativities) encoded relative to each other and only parse up until you
127 // hit something of that precedence. There's a dispatch table in expressions_
128 // at the top of parser.cc that describes how each token dispatches if it's
129 // seen as either a prefix or infix operator, and if it's infix, what its
133 // - http://javascript.crockford.com/tdop/tdop.html
134 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
136 // Indexed by Token::Type.
137 ParserHelper
Parser::expressions_
[] = {
138 {nullptr, nullptr, -1}, // INVALID
139 {&Parser::Literal
, nullptr, -1}, // INTEGER
140 {&Parser::Literal
, nullptr, -1}, // STRING
141 {&Parser::Literal
, nullptr, -1}, // TRUE_TOKEN
142 {&Parser::Literal
, nullptr, -1}, // FALSE_TOKEN
143 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // EQUAL
144 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // PLUS
145 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_SUM
}, // MINUS
146 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // PLUS_EQUALS
147 {nullptr, &Parser::Assignment
, PRECEDENCE_ASSIGNMENT
}, // MINUS_EQUALS
148 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // EQUAL_EQUAL
149 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_EQUALITY
}, // NOT_EQUAL
150 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_EQUAL
151 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_EQUAL
152 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // LESS_THAN
153 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_RELATION
}, // GREATER_THAN
154 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_AND
}, // BOOLEAN_AND
155 {nullptr, &Parser::BinaryOperator
, PRECEDENCE_OR
}, // BOOLEAN_OR
156 {&Parser::Not
, nullptr, -1}, // BANG
157 {nullptr, &Parser::DotOperator
, PRECEDENCE_DOT
}, // DOT
158 {&Parser::Group
, nullptr, -1}, // LEFT_PAREN
159 {nullptr, nullptr, -1}, // RIGHT_PAREN
160 {&Parser::List
, &Parser::Subscript
, PRECEDENCE_CALL
}, // LEFT_BRACKET
161 {nullptr, nullptr, -1}, // RIGHT_BRACKET
162 {nullptr, nullptr, -1}, // LEFT_BRACE
163 {nullptr, nullptr, -1}, // RIGHT_BRACE
164 {nullptr, nullptr, -1}, // IF
165 {nullptr, nullptr, -1}, // ELSE
166 {&Parser::Name
, &Parser::IdentifierOrCall
, PRECEDENCE_CALL
}, // IDENTIFIER
167 {nullptr, nullptr, -1}, // COMMA
168 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
169 {nullptr, nullptr, -1}, // LINE_COMMENT
170 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
171 {&Parser::BlockComment
, nullptr, -1}, // BLOCK_COMMENT
174 Parser::Parser(const std::vector
<Token
>& tokens
, Err
* err
)
175 : err_(err
), cur_(0) {
176 for (const auto& token
: tokens
) {
177 switch(token
.type()) {
178 case Token::LINE_COMMENT
:
179 line_comment_tokens_
.push_back(token
);
181 case Token::SUFFIX_COMMENT
:
182 suffix_comment_tokens_
.push_back(token
);
185 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
186 // through the real parser.
187 tokens_
.push_back(token
);
197 scoped_ptr
<ParseNode
> Parser::Parse(const std::vector
<Token
>& tokens
,
199 Parser
p(tokens
, err
);
200 return p
.ParseFile();
204 scoped_ptr
<ParseNode
> Parser::ParseExpression(const std::vector
<Token
>& tokens
,
206 Parser
p(tokens
, err
);
207 return p
.ParseExpression().Pass();
210 bool Parser::IsAssignment(const ParseNode
* node
) const {
211 return node
&& node
->AsBinaryOp() &&
212 (node
->AsBinaryOp()->op().type() == Token::EQUAL
||
213 node
->AsBinaryOp()->op().type() == Token::PLUS_EQUALS
||
214 node
->AsBinaryOp()->op().type() == Token::MINUS_EQUALS
);
217 bool Parser::IsStatementBreak(Token::Type token_type
) const {
218 switch (token_type
) {
219 case Token::IDENTIFIER
:
220 case Token::LEFT_BRACE
:
221 case Token::RIGHT_BRACE
:
230 bool Parser::LookAhead(Token::Type type
) {
233 return cur_token().type() == type
;
236 bool Parser::Match(Token::Type type
) {
237 if (!LookAhead(type
))
243 Token
Parser::Consume(Token::Type type
, const char* error_message
) {
244 Token::Type types
[1] = { type
};
245 return Consume(types
, 1, error_message
);
248 Token
Parser::Consume(Token::Type
* types
,
250 const char* error_message
) {
252 // Don't overwrite current error, but make progress through tokens so that
253 // a loop that's expecting a particular token will still terminate.
255 return Token(Location(), Token::INVALID
, base::StringPiece());
258 const char kEOFMsg
[] = "I hit EOF instead.";
260 *err_
= Err(Location(), error_message
, kEOFMsg
);
262 *err_
= Err(tokens_
[tokens_
.size() - 1], error_message
, kEOFMsg
);
263 return Token(Location(), Token::INVALID
, base::StringPiece());
266 for (size_t i
= 0; i
< num_types
; ++i
) {
267 if (cur_token().type() == types
[i
])
270 *err_
= Err(cur_token(), error_message
);
271 return Token(Location(), Token::INVALID
, base::StringPiece());
274 Token
Parser::Consume() {
275 return tokens_
[cur_
++];
278 scoped_ptr
<ParseNode
> Parser::ParseExpression() {
279 return ParseExpression(0);
282 scoped_ptr
<ParseNode
> Parser::ParseExpression(int precedence
) {
284 return scoped_ptr
<ParseNode
>();
286 Token token
= Consume();
287 PrefixFunc prefix
= expressions_
[token
.type()].prefix
;
289 if (prefix
== nullptr) {
291 std::string("Unexpected token '") + token
.value().as_string() +
293 return scoped_ptr
<ParseNode
>();
296 scoped_ptr
<ParseNode
> left
= (this->*prefix
)(token
);
300 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
301 precedence
<= expressions_
[cur_token().type()].precedence
) {
303 InfixFunc infix
= expressions_
[token
.type()].infix
;
304 if (infix
== nullptr) {
306 std::string("Unexpected token '") +
307 token
.value().as_string() + std::string("'"));
308 return scoped_ptr
<ParseNode
>();
310 left
= (this->*infix
)(left
.Pass(), token
);
312 return scoped_ptr
<ParseNode
>();
318 scoped_ptr
<ParseNode
> Parser::Literal(Token token
) {
319 return make_scoped_ptr(new LiteralNode(token
));
322 scoped_ptr
<ParseNode
> Parser::Name(Token token
) {
323 return IdentifierOrCall(scoped_ptr
<ParseNode
>(), token
).Pass();
326 scoped_ptr
<ParseNode
> Parser::BlockComment(Token token
) {
327 scoped_ptr
<BlockCommentNode
> comment(new BlockCommentNode());
328 comment
->set_comment(token
);
329 return comment
.Pass();
332 scoped_ptr
<ParseNode
> Parser::Group(Token token
) {
333 scoped_ptr
<ParseNode
> expr
= ParseExpression();
335 return scoped_ptr
<ParseNode
>();
336 Consume(Token::RIGHT_PAREN
, "Expected ')'");
340 scoped_ptr
<ParseNode
> Parser::Not(Token token
) {
341 scoped_ptr
<ParseNode
> expr
= ParseExpression(PRECEDENCE_PREFIX
+ 1);
343 return scoped_ptr
<ParseNode
>();
344 scoped_ptr
<UnaryOpNode
> unary_op(new UnaryOpNode
);
345 unary_op
->set_op(token
);
346 unary_op
->set_operand(expr
.Pass());
347 return unary_op
.Pass();
350 scoped_ptr
<ParseNode
> Parser::List(Token node
) {
351 scoped_ptr
<ParseNode
> list(ParseList(node
, Token::RIGHT_BRACKET
, true));
352 if (!has_error() && !at_end())
353 Consume(Token::RIGHT_BRACKET
, "Expected ']'");
357 scoped_ptr
<ParseNode
> Parser::BinaryOperator(scoped_ptr
<ParseNode
> left
,
359 scoped_ptr
<ParseNode
> right
=
360 ParseExpression(expressions_
[token
.type()].precedence
+ 1);
364 "Expected right hand side for '" + token
.value().as_string() + "'");
365 return scoped_ptr
<ParseNode
>();
367 scoped_ptr
<BinaryOpNode
> binary_op(new BinaryOpNode
);
368 binary_op
->set_op(token
);
369 binary_op
->set_left(left
.Pass());
370 binary_op
->set_right(right
.Pass());
371 return binary_op
.Pass();
374 scoped_ptr
<ParseNode
> Parser::IdentifierOrCall(scoped_ptr
<ParseNode
> left
,
376 scoped_ptr
<ListNode
> list(new ListNode
);
377 list
->set_begin_token(token
);
378 list
->set_end(make_scoped_ptr(new EndNode(token
)));
379 scoped_ptr
<BlockNode
> block
;
380 bool has_arg
= false;
381 if (LookAhead(Token::LEFT_PAREN
)) {
382 Token start_token
= Consume();
383 // Parsing a function call.
385 if (Match(Token::RIGHT_PAREN
)) {
386 // Nothing, just an empty call.
388 list
= ParseList(start_token
, Token::RIGHT_PAREN
, false);
390 return scoped_ptr
<ParseNode
>();
391 Consume(Token::RIGHT_PAREN
, "Expected ')' after call");
393 // Optionally with a scope.
394 if (LookAhead(Token::LEFT_BRACE
)) {
395 block
= ParseBlock();
397 return scoped_ptr
<ParseNode
>();
401 if (!left
&& !has_arg
) {
402 // Not a function call, just a standalone identifier.
403 return scoped_ptr
<ParseNode
>(new IdentifierNode(token
)).Pass();
405 scoped_ptr
<FunctionCallNode
> func_call(new FunctionCallNode
);
406 func_call
->set_function(token
);
407 func_call
->set_args(list
.Pass());
409 func_call
->set_block(block
.Pass());
410 return func_call
.Pass();
413 scoped_ptr
<ParseNode
> Parser::Assignment(scoped_ptr
<ParseNode
> left
,
415 if (left
->AsIdentifier() == nullptr) {
416 *err_
= Err(left
.get(), "Left-hand side of assignment must be identifier.");
417 return scoped_ptr
<ParseNode
>();
419 scoped_ptr
<ParseNode
> value
= ParseExpression(PRECEDENCE_ASSIGNMENT
);
420 scoped_ptr
<BinaryOpNode
> assign(new BinaryOpNode
);
421 assign
->set_op(token
);
422 assign
->set_left(left
.Pass());
423 assign
->set_right(value
.Pass());
424 return assign
.Pass();
427 scoped_ptr
<ParseNode
> Parser::Subscript(scoped_ptr
<ParseNode
> left
,
429 // TODO: Maybe support more complex expressions like a[0][0]. This would
430 // require work on the evaluator too.
431 if (left
->AsIdentifier() == nullptr) {
432 *err_
= Err(left
.get(), "May only subscript identifiers.",
433 "The thing on the left hand side of the [] must be an identifier\n"
434 "and not an expression. If you need this, you'll have to assign the\n"
435 "value to a temporary before subscripting. Sorry.");
436 return scoped_ptr
<ParseNode
>();
438 scoped_ptr
<ParseNode
> value
= ParseExpression();
439 Consume(Token::RIGHT_BRACKET
, "Expecting ']' after subscript.");
440 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
441 accessor
->set_base(left
->AsIdentifier()->value());
442 accessor
->set_index(value
.Pass());
443 return accessor
.Pass();
446 scoped_ptr
<ParseNode
> Parser::DotOperator(scoped_ptr
<ParseNode
> left
,
448 if (left
->AsIdentifier() == nullptr) {
449 *err_
= Err(left
.get(), "May only use \".\" for identifiers.",
450 "The thing on the left hand side of the dot must be an identifier\n"
451 "and not an expression. If you need this, you'll have to assign the\n"
452 "value to a temporary first. Sorry.");
453 return scoped_ptr
<ParseNode
>();
456 scoped_ptr
<ParseNode
> right
= ParseExpression(PRECEDENCE_DOT
);
457 if (!right
|| !right
->AsIdentifier()) {
458 *err_
= Err(token
, "Expected identifier for right-hand-side of \".\"",
459 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
460 return scoped_ptr
<ParseNode
>();
463 scoped_ptr
<AccessorNode
> accessor(new AccessorNode
);
464 accessor
->set_base(left
->AsIdentifier()->value());
465 accessor
->set_member(scoped_ptr
<IdentifierNode
>(
466 static_cast<IdentifierNode
*>(right
.release())));
467 return accessor
.Pass();
470 // Does not Consume the start or end token.
471 scoped_ptr
<ListNode
> Parser::ParseList(Token start_token
,
472 Token::Type stop_before
,
473 bool allow_trailing_comma
) {
474 scoped_ptr
<ListNode
> list(new ListNode
);
475 list
->set_begin_token(start_token
);
476 bool just_got_comma
= false;
477 bool first_time
= true;
478 while (!LookAhead(stop_before
)) {
480 if (!just_got_comma
) {
481 // Require commas separate things in lists.
482 *err_
= Err(cur_token(), "Expected comma between items.");
483 return scoped_ptr
<ListNode
>();
488 // Why _OR? We're parsing things that are higher precedence than the ,
489 // that separates the items of the list. , should appear lower than
490 // boolean expressions (the lowest of which is OR), but above assignments.
491 list
->append_item(ParseExpression(PRECEDENCE_OR
));
493 return scoped_ptr
<ListNode
>();
496 Err(tokens_
[tokens_
.size() - 1], "Unexpected end of file in list.");
497 return scoped_ptr
<ListNode
>();
499 if (list
->contents().back()->AsBlockComment()) {
500 // If there was a comment inside the list, we don't need a comma to the
501 // next item, so pretend we got one, if we're expecting one.
502 just_got_comma
= allow_trailing_comma
;
504 just_got_comma
= Match(Token::COMMA
);
507 if (just_got_comma
&& !allow_trailing_comma
) {
508 *err_
= Err(cur_token(), "Trailing comma");
509 return scoped_ptr
<ListNode
>();
511 list
->set_end(make_scoped_ptr(new EndNode(cur_token())));
515 scoped_ptr
<ParseNode
> Parser::ParseFile() {
516 scoped_ptr
<BlockNode
> file(new BlockNode
);
520 scoped_ptr
<ParseNode
> statement
= ParseStatement();
523 file
->append_statement(statement
.Pass());
525 if (!at_end() && !has_error())
526 *err_
= Err(cur_token(), "Unexpected here, should be newline.");
528 return scoped_ptr
<ParseNode
>();
530 // TODO(scottmg): If this is measurably expensive, it could be done only
531 // when necessary (when reformatting, or during tests). Comments are
532 // separate from the parse tree at this point, so downstream code can remain
534 AssignComments(file
.get());
539 scoped_ptr
<ParseNode
> Parser::ParseStatement() {
540 if (LookAhead(Token::IF
)) {
541 return ParseCondition();
542 } else if (LookAhead(Token::BLOCK_COMMENT
)) {
543 return BlockComment(Consume());
545 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
546 // to allow "pointless" expressions and return ParseExpression() directly.
547 scoped_ptr
<ParseNode
> stmt
= ParseExpression();
549 if (stmt
->AsFunctionCall() || IsAssignment(stmt
.get()))
553 Token token
= at_end() ? tokens_
[tokens_
.size() - 1] : cur_token();
554 *err_
= Err(token
, "Expecting assignment or function call.");
556 return scoped_ptr
<ParseNode
>();
560 scoped_ptr
<BlockNode
> Parser::ParseBlock() {
562 Consume(Token::LEFT_BRACE
, "Expected '{' to start a block.");
564 return scoped_ptr
<BlockNode
>();
565 scoped_ptr
<BlockNode
> block(new BlockNode
);
566 block
->set_begin_token(begin_token
);
569 if (LookAhead(Token::RIGHT_BRACE
)) {
570 block
->set_end(make_scoped_ptr(new EndNode(Consume())));
574 scoped_ptr
<ParseNode
> statement
= ParseStatement();
576 return scoped_ptr
<BlockNode
>();
577 block
->append_statement(statement
.Pass());
582 scoped_ptr
<ParseNode
> Parser::ParseCondition() {
583 scoped_ptr
<ConditionNode
> condition(new ConditionNode
);
584 condition
->set_if_token(Consume(Token::IF
, "Expected 'if'"));
585 Consume(Token::LEFT_PAREN
, "Expected '(' after 'if'.");
586 condition
->set_condition(ParseExpression());
587 if (IsAssignment(condition
->condition()))
588 *err_
= Err(condition
->condition(), "Assignment not allowed in 'if'.");
589 Consume(Token::RIGHT_PAREN
, "Expected ')' after condition of 'if'.");
590 condition
->set_if_true(ParseBlock().Pass());
591 if (Match(Token::ELSE
)) {
592 if (LookAhead(Token::LEFT_BRACE
)) {
593 condition
->set_if_false(ParseBlock().Pass());
594 } else if (LookAhead(Token::IF
)) {
595 condition
->set_if_false(ParseStatement().Pass());
597 *err_
= Err(cur_token(), "Expected '{' or 'if' after 'else'.");
598 return scoped_ptr
<ParseNode
>();
602 return scoped_ptr
<ParseNode
>();
603 return condition
.Pass();
606 void Parser::TraverseOrder(const ParseNode
* root
,
607 std::vector
<const ParseNode
*>* pre
,
608 std::vector
<const ParseNode
*>* post
) {
610 pre
->push_back(root
);
612 if (const AccessorNode
* accessor
= root
->AsAccessor()) {
613 TraverseOrder(accessor
->index(), pre
, post
);
614 TraverseOrder(accessor
->member(), pre
, post
);
615 } else if (const BinaryOpNode
* binop
= root
->AsBinaryOp()) {
616 TraverseOrder(binop
->left(), pre
, post
);
617 TraverseOrder(binop
->right(), pre
, post
);
618 } else if (const BlockNode
* block
= root
->AsBlock()) {
619 for (const auto& statement
: block
->statements())
620 TraverseOrder(statement
, pre
, post
);
621 TraverseOrder(block
->End(), pre
, post
);
622 } else if (const ConditionNode
* condition
= root
->AsConditionNode()) {
623 TraverseOrder(condition
->condition(), pre
, post
);
624 TraverseOrder(condition
->if_true(), pre
, post
);
625 TraverseOrder(condition
->if_false(), pre
, post
);
626 } else if (const FunctionCallNode
* func_call
= root
->AsFunctionCall()) {
627 TraverseOrder(func_call
->args(), pre
, post
);
628 TraverseOrder(func_call
->block(), pre
, post
);
629 } else if (root
->AsIdentifier()) {
631 } else if (const ListNode
* list
= root
->AsList()) {
632 for (const auto& node
: list
->contents())
633 TraverseOrder(node
, pre
, post
);
634 TraverseOrder(list
->End(), pre
, post
);
635 } else if (root
->AsLiteral()) {
637 } else if (const UnaryOpNode
* unaryop
= root
->AsUnaryOp()) {
638 TraverseOrder(unaryop
->operand(), pre
, post
);
639 } else if (root
->AsBlockComment()) {
641 } else if (root
->AsEnd()) {
644 CHECK(false) << "Unhandled case in TraverseOrder.";
647 post
->push_back(root
);
651 void Parser::AssignComments(ParseNode
* file
) {
652 // Start by generating a pre- and post- order traversal of the tree so we
653 // can determine what's before and after comments.
654 std::vector
<const ParseNode
*> pre
;
655 std::vector
<const ParseNode
*> post
;
656 TraverseOrder(file
, &pre
, &post
);
658 // Assign line comments to syntax immediately following.
660 for (const auto& node
: pre
) {
661 const Location
& start
= node
->GetRange().begin();
662 while (cur_comment
< static_cast<int>(line_comment_tokens_
.size())) {
663 if (start
.byte() >= line_comment_tokens_
[cur_comment
].location().byte()) {
664 const_cast<ParseNode
*>(node
)->comments_mutable()->append_before(
665 line_comment_tokens_
[cur_comment
]);
673 // Remaining line comments go at end of file.
674 for (; cur_comment
< static_cast<int>(line_comment_tokens_
.size());
676 file
->comments_mutable()->append_after(line_comment_tokens_
[cur_comment
]);
678 // Assign suffix to syntax immediately before.
679 cur_comment
= static_cast<int>(suffix_comment_tokens_
.size() - 1);
680 for (std::vector
<const ParseNode
*>::const_reverse_iterator i
= post
.rbegin();
683 // Don't assign suffix comments to the function, list, or block, but instead
684 // to the last thing inside.
685 if ((*i
)->AsFunctionCall() || (*i
)->AsList() || (*i
)->AsBlock())
688 const Location
& start
= (*i
)->GetRange().begin();
689 const Location
& end
= (*i
)->GetRange().end();
691 // Don't assign suffix comments to something that starts on an earlier
697 // it's attached to "b", not sources = [ ... ].
698 if (start
.line_number() != end
.line_number())
701 while (cur_comment
>= 0) {
702 if (end
.byte() <= suffix_comment_tokens_
[cur_comment
].location().byte()) {
703 const_cast<ParseNode
*>(*i
)->comments_mutable()->append_suffix(
704 suffix_comment_tokens_
[cur_comment
]);
711 // Suffix comments were assigned in reverse, so if there were multiple on
712 // the same node, they need to be reversed.
713 if ((*i
)->comments() && !(*i
)->comments()->suffix().empty())
714 const_cast<ParseNode
*>(*i
)->comments_mutable()->ReverseSuffix();