Add ICU message format support
[chromium-blink-merge.git] / tools / gn / parser.cc
blob286409b5ed48a6d3b207a37c37ddfe306af313fb
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 const char kGrammar_Help[] =
13 "GN build language grammar\n"
14 "\n"
15 "Tokens\n"
16 "\n"
17 " GN build files are read as sequences of tokens. While splitting the\n"
18 " file into tokens, the next token is the longest sequence of characters\n"
19 " that form a valid token.\n"
20 "\n"
21 "White space and comments\n"
22 "\n"
23 " White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
24 " carriage returns (U+000D), and newlines (U+000A).\n"
25 "\n"
26 " Comments start at the character \"#\" and stop at the next newline.\n"
27 "\n"
28 " White space and comments are ignored except that they may separate\n"
29 " tokens that would otherwise combine into a single token.\n"
30 "\n"
31 "Identifiers\n"
32 "\n"
33 " Identifiers name variables and functions.\n"
34 "\n"
35 " identifier = letter { letter | digit } .\n"
36 " letter = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
37 " digit = \"0\" ... \"9\" .\n"
38 "\n"
39 "Keywords\n"
40 "\n"
41 " The following keywords are reserved and may not be used as\n"
42 " identifiers:\n"
43 "\n"
44 " else false if true\n"
45 "\n"
46 "Integer literals\n"
47 "\n"
48 " An integer literal represents a decimal integer value.\n"
49 "\n"
50 " integer = [ \"-\" ] digit { digit } .\n"
51 "\n"
52 " Leading zeros and negative zero are disallowed.\n"
53 "\n"
54 "String literals\n"
55 "\n"
56 " A string literal represents a string value consisting of the quoted\n"
57 " characters with possible escape sequences and variable expansions.\n"
58 "\n"
59 " string = `\"` { char | escape | expansion } `\"` .\n"
60 " escape = `\\` ( \"$\" | `\"` | char ) .\n"
61 " BracketExpansion = \"{\" ( identifier | ArrayAccess | ScopeAccess "
62 ") \"}\" .\n"
63 " expansion = \"$\" ( identifier | BracketExpansion ) .\n"
64 " char = /* any character except \"$\", `\"`, or newline "
65 "*/ .\n"
66 "\n"
67 " After a backslash, certain sequences represent special characters:\n"
68 "\n"
69 " \\\" U+0022 quotation mark\n"
70 " \\$ U+0024 dollar sign\n"
71 " \\\\ U+005C backslash\n"
72 "\n"
73 " All other backslashes represent themselves.\n"
74 "\n"
75 "Punctuation\n"
76 "\n"
77 " The following character sequences represent punctuation:\n"
78 "\n"
79 " + += == != ( )\n"
80 " - -= < <= [ ]\n"
81 " ! = > >= { }\n"
82 " && || . ,\n"
83 "\n"
84 "Grammar\n"
85 "\n"
86 " The input tokens form a syntax tree following a context-free grammar:\n"
87 "\n"
88 " File = StatementList .\n"
89 "\n"
90 " Statement = Assignment | Call | Condition .\n"
91 " Assignment = identifier AssignOp Expr .\n"
92 " Call = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
93 " Condition = \"if\" \"(\" Expr \")\" Block\n"
94 " [ \"else\" ( Condition | Block ) ] .\n"
95 " Block = \"{\" StatementList \"}\" .\n"
96 " StatementList = { Statement } .\n"
97 "\n"
98 " ArrayAccess = identifier \"[\" { identifier | integer } \"]\" .\n"
99 " ScopeAccess = identifier \".\" identifier .\n"
100 " Expr = UnaryExpr | Expr BinaryOp Expr .\n"
101 " UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .\n"
102 " PrimaryExpr = identifier | integer | string | Call\n"
103 " | ArrayAccess | ScopeAccess\n"
104 " | \"(\" Expr \")\"\n"
105 " | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
106 " ExprList = Expr { \",\" Expr } .\n"
107 "\n"
108 " AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
109 " UnaryOp = \"!\" .\n"
110 " BinaryOp = \"+\" | \"-\" // highest priority\n"
111 " | \"<\" | \"<=\" | \">\" | \">=\"\n"
112 " | \"==\" | \"!=\"\n"
113 " | \"&&\"\n"
114 " | \"||\" . // lowest priority\n"
115 "\n"
116 " All binary operators are left-associative.\n";
118 enum Precedence {
119 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
120 PRECEDENCE_OR = 2,
121 PRECEDENCE_AND = 3,
122 PRECEDENCE_EQUALITY = 4,
123 PRECEDENCE_RELATION = 5,
124 PRECEDENCE_SUM = 6,
125 PRECEDENCE_PREFIX = 7,
126 PRECEDENCE_CALL = 8,
127 PRECEDENCE_DOT = 9, // Highest precedence.
130 // The top-level for blocks/ifs is recursive descent, the expression parser is
131 // a Pratt parser. The basic idea there is to have the precedences (and
132 // associativities) encoded relative to each other and only parse up until you
133 // hit something of that precedence. There's a dispatch table in expressions_
134 // at the top of parser.cc that describes how each token dispatches if it's
135 // seen as either a prefix or infix operator, and if it's infix, what its
136 // precedence is.
138 // Refs:
139 // - http://javascript.crockford.com/tdop/tdop.html
140 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
142 // Indexed by Token::Type.
143 ParserHelper Parser::expressions_[] = {
144 {nullptr, nullptr, -1}, // INVALID
145 {&Parser::Literal, nullptr, -1}, // INTEGER
146 {&Parser::Literal, nullptr, -1}, // STRING
147 {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN
148 {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN
149 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
150 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
151 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
152 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
153 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
154 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
155 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
156 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
157 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
158 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
159 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
160 {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
161 {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
162 {&Parser::Not, nullptr, -1}, // BANG
163 {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
164 {&Parser::Group, nullptr, -1}, // LEFT_PAREN
165 {nullptr, nullptr, -1}, // RIGHT_PAREN
166 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
167 {nullptr, nullptr, -1}, // RIGHT_BRACKET
168 {nullptr, nullptr, -1}, // LEFT_BRACE
169 {nullptr, nullptr, -1}, // RIGHT_BRACE
170 {nullptr, nullptr, -1}, // IF
171 {nullptr, nullptr, -1}, // ELSE
172 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
173 {nullptr, nullptr, -1}, // COMMA
174 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
175 {nullptr, nullptr, -1}, // LINE_COMMENT
176 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
177 {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT
180 Parser::Parser(const std::vector<Token>& tokens, Err* err)
181 : err_(err), cur_(0) {
182 for (const auto& token : tokens) {
183 switch (token.type()) {
184 case Token::LINE_COMMENT:
185 line_comment_tokens_.push_back(token);
186 break;
187 case Token::SUFFIX_COMMENT:
188 suffix_comment_tokens_.push_back(token);
189 break;
190 default:
191 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
192 // through the real parser.
193 tokens_.push_back(token);
194 break;
199 Parser::~Parser() {
202 // static
203 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
204 Err* err) {
205 Parser p(tokens, err);
206 return p.ParseFile();
209 // static
210 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
211 Err* err) {
212 Parser p(tokens, err);
213 scoped_ptr<ParseNode> expr = p.ParseExpression();
214 if (!p.at_end() && !err->has_error()) {
215 *err = Err(p.cur_token(), "Trailing garbage");
216 return nullptr;
218 return expr.Pass();
221 // static
222 scoped_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
223 Err* err) {
224 for (const Token& token : tokens) {
225 switch (token.type()) {
226 case Token::INTEGER:
227 case Token::STRING:
228 case Token::TRUE_TOKEN:
229 case Token::FALSE_TOKEN:
230 case Token::LEFT_BRACKET:
231 case Token::RIGHT_BRACKET:
232 case Token::COMMA:
233 continue;
234 default:
235 *err = Err(token, "Invalid token in literal value");
236 return nullptr;
240 return ParseExpression(tokens, err);
243 bool Parser::IsAssignment(const ParseNode* node) const {
244 return node && node->AsBinaryOp() &&
245 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
246 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
247 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
250 bool Parser::IsStatementBreak(Token::Type token_type) const {
251 switch (token_type) {
252 case Token::IDENTIFIER:
253 case Token::LEFT_BRACE:
254 case Token::RIGHT_BRACE:
255 case Token::IF:
256 case Token::ELSE:
257 return true;
258 default:
259 return false;
263 bool Parser::LookAhead(Token::Type type) {
264 if (at_end())
265 return false;
266 return cur_token().type() == type;
269 bool Parser::Match(Token::Type type) {
270 if (!LookAhead(type))
271 return false;
272 Consume();
273 return true;
276 Token Parser::Consume(Token::Type type, const char* error_message) {
277 Token::Type types[1] = { type };
278 return Consume(types, 1, error_message);
281 Token Parser::Consume(Token::Type* types,
282 size_t num_types,
283 const char* error_message) {
284 if (has_error()) {
285 // Don't overwrite current error, but make progress through tokens so that
286 // a loop that's expecting a particular token will still terminate.
287 cur_++;
288 return Token(Location(), Token::INVALID, base::StringPiece());
290 if (at_end()) {
291 const char kEOFMsg[] = "I hit EOF instead.";
292 if (tokens_.empty())
293 *err_ = Err(Location(), error_message, kEOFMsg);
294 else
295 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
296 return Token(Location(), Token::INVALID, base::StringPiece());
299 for (size_t i = 0; i < num_types; ++i) {
300 if (cur_token().type() == types[i])
301 return Consume();
303 *err_ = Err(cur_token(), error_message);
304 return Token(Location(), Token::INVALID, base::StringPiece());
307 Token Parser::Consume() {
308 return tokens_[cur_++];
311 scoped_ptr<ParseNode> Parser::ParseExpression() {
312 return ParseExpression(0);
315 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
316 if (at_end())
317 return scoped_ptr<ParseNode>();
319 Token token = Consume();
320 PrefixFunc prefix = expressions_[token.type()].prefix;
322 if (prefix == nullptr) {
323 *err_ = Err(token,
324 std::string("Unexpected token '") + token.value().as_string() +
325 std::string("'"));
326 return scoped_ptr<ParseNode>();
329 scoped_ptr<ParseNode> left = (this->*prefix)(token);
330 if (has_error())
331 return left.Pass();
333 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
334 precedence <= expressions_[cur_token().type()].precedence) {
335 token = Consume();
336 InfixFunc infix = expressions_[token.type()].infix;
337 if (infix == nullptr) {
338 *err_ = Err(token,
339 std::string("Unexpected token '") +
340 token.value().as_string() + std::string("'"));
341 return scoped_ptr<ParseNode>();
343 left = (this->*infix)(left.Pass(), token);
344 if (has_error())
345 return scoped_ptr<ParseNode>();
348 return left.Pass();
351 scoped_ptr<ParseNode> Parser::Literal(Token token) {
352 return make_scoped_ptr(new LiteralNode(token));
355 scoped_ptr<ParseNode> Parser::Name(Token token) {
356 return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
359 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
360 scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
361 comment->set_comment(token);
362 return comment.Pass();
365 scoped_ptr<ParseNode> Parser::Group(Token token) {
366 scoped_ptr<ParseNode> expr = ParseExpression();
367 if (has_error())
368 return scoped_ptr<ParseNode>();
369 Consume(Token::RIGHT_PAREN, "Expected ')'");
370 return expr.Pass();
373 scoped_ptr<ParseNode> Parser::Not(Token token) {
374 scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
375 if (has_error())
376 return scoped_ptr<ParseNode>();
377 scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
378 unary_op->set_op(token);
379 unary_op->set_operand(expr.Pass());
380 return unary_op.Pass();
383 scoped_ptr<ParseNode> Parser::List(Token node) {
384 scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
385 if (!has_error() && !at_end())
386 Consume(Token::RIGHT_BRACKET, "Expected ']'");
387 return list.Pass();
390 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
391 Token token) {
392 scoped_ptr<ParseNode> right =
393 ParseExpression(expressions_[token.type()].precedence + 1);
394 if (!right) {
395 if (!has_error()) {
396 *err_ = Err(token, "Expected right hand side for '" +
397 token.value().as_string() + "'");
399 return scoped_ptr<ParseNode>();
401 scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
402 binary_op->set_op(token);
403 binary_op->set_left(left.Pass());
404 binary_op->set_right(right.Pass());
405 return binary_op.Pass();
408 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
409 Token token) {
410 scoped_ptr<ListNode> list(new ListNode);
411 list->set_begin_token(token);
412 list->set_end(make_scoped_ptr(new EndNode(token)));
413 scoped_ptr<BlockNode> block;
414 bool has_arg = false;
415 if (LookAhead(Token::LEFT_PAREN)) {
416 Token start_token = Consume();
417 // Parsing a function call.
418 has_arg = true;
419 if (Match(Token::RIGHT_PAREN)) {
420 // Nothing, just an empty call.
421 } else {
422 list = ParseList(start_token, Token::RIGHT_PAREN, false);
423 if (has_error())
424 return scoped_ptr<ParseNode>();
425 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
427 // Optionally with a scope.
428 if (LookAhead(Token::LEFT_BRACE)) {
429 block = ParseBlock();
430 if (has_error())
431 return scoped_ptr<ParseNode>();
435 if (!left && !has_arg) {
436 // Not a function call, just a standalone identifier.
437 return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
439 scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
440 func_call->set_function(token);
441 func_call->set_args(list.Pass());
442 if (block)
443 func_call->set_block(block.Pass());
444 return func_call.Pass();
447 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
448 Token token) {
449 if (left->AsIdentifier() == nullptr) {
450 *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
451 return scoped_ptr<ParseNode>();
453 scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
454 scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
455 assign->set_op(token);
456 assign->set_left(left.Pass());
457 assign->set_right(value.Pass());
458 return assign.Pass();
461 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
462 Token token) {
463 // TODO: Maybe support more complex expressions like a[0][0]. This would
464 // require work on the evaluator too.
465 if (left->AsIdentifier() == nullptr) {
466 *err_ = Err(left.get(), "May only subscript identifiers.",
467 "The thing on the left hand side of the [] must be an identifier\n"
468 "and not an expression. If you need this, you'll have to assign the\n"
469 "value to a temporary before subscripting. Sorry.");
470 return scoped_ptr<ParseNode>();
472 scoped_ptr<ParseNode> value = ParseExpression();
473 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
474 scoped_ptr<AccessorNode> accessor(new AccessorNode);
475 accessor->set_base(left->AsIdentifier()->value());
476 accessor->set_index(value.Pass());
477 return accessor.Pass();
480 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
481 Token token) {
482 if (left->AsIdentifier() == nullptr) {
483 *err_ = Err(left.get(), "May only use \".\" for identifiers.",
484 "The thing on the left hand side of the dot must be an identifier\n"
485 "and not an expression. If you need this, you'll have to assign the\n"
486 "value to a temporary first. Sorry.");
487 return scoped_ptr<ParseNode>();
490 scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
491 if (!right || !right->AsIdentifier()) {
492 *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
493 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
494 return scoped_ptr<ParseNode>();
497 scoped_ptr<AccessorNode> accessor(new AccessorNode);
498 accessor->set_base(left->AsIdentifier()->value());
499 accessor->set_member(scoped_ptr<IdentifierNode>(
500 static_cast<IdentifierNode*>(right.release())));
501 return accessor.Pass();
504 // Does not Consume the start or end token.
505 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
506 Token::Type stop_before,
507 bool allow_trailing_comma) {
508 scoped_ptr<ListNode> list(new ListNode);
509 list->set_begin_token(start_token);
510 bool just_got_comma = false;
511 bool first_time = true;
512 while (!LookAhead(stop_before)) {
513 if (!first_time) {
514 if (!just_got_comma) {
515 // Require commas separate things in lists.
516 *err_ = Err(cur_token(), "Expected comma between items.");
517 return scoped_ptr<ListNode>();
520 first_time = false;
522 // Why _OR? We're parsing things that are higher precedence than the ,
523 // that separates the items of the list. , should appear lower than
524 // boolean expressions (the lowest of which is OR), but above assignments.
525 list->append_item(ParseExpression(PRECEDENCE_OR));
526 if (has_error())
527 return scoped_ptr<ListNode>();
528 if (at_end()) {
529 *err_ =
530 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
531 return scoped_ptr<ListNode>();
533 if (list->contents().back()->AsBlockComment()) {
534 // If there was a comment inside the list, we don't need a comma to the
535 // next item, so pretend we got one, if we're expecting one.
536 just_got_comma = allow_trailing_comma;
537 } else {
538 just_got_comma = Match(Token::COMMA);
541 if (just_got_comma && !allow_trailing_comma) {
542 *err_ = Err(cur_token(), "Trailing comma");
543 return scoped_ptr<ListNode>();
545 list->set_end(make_scoped_ptr(new EndNode(cur_token())));
546 return list.Pass();
549 scoped_ptr<ParseNode> Parser::ParseFile() {
550 scoped_ptr<BlockNode> file(new BlockNode);
551 for (;;) {
552 if (at_end())
553 break;
554 scoped_ptr<ParseNode> statement = ParseStatement();
555 if (!statement)
556 break;
557 file->append_statement(statement.Pass());
559 if (!at_end() && !has_error())
560 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
561 if (has_error())
562 return scoped_ptr<ParseNode>();
564 // TODO(scottmg): If this is measurably expensive, it could be done only
565 // when necessary (when reformatting, or during tests). Comments are
566 // separate from the parse tree at this point, so downstream code can remain
567 // ignorant of them.
568 AssignComments(file.get());
570 return file.Pass();
573 scoped_ptr<ParseNode> Parser::ParseStatement() {
574 if (LookAhead(Token::IF)) {
575 return ParseCondition();
576 } else if (LookAhead(Token::BLOCK_COMMENT)) {
577 return BlockComment(Consume());
578 } else {
579 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
580 // to allow "pointless" expressions and return ParseExpression() directly.
581 scoped_ptr<ParseNode> stmt = ParseExpression();
582 if (stmt) {
583 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
584 return stmt.Pass();
586 if (!has_error()) {
587 Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
588 *err_ = Err(token, "Expecting assignment or function call.");
590 return scoped_ptr<ParseNode>();
594 scoped_ptr<BlockNode> Parser::ParseBlock() {
595 Token begin_token =
596 Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
597 if (has_error())
598 return scoped_ptr<BlockNode>();
599 scoped_ptr<BlockNode> block(new BlockNode);
600 block->set_begin_token(begin_token);
602 for (;;) {
603 if (LookAhead(Token::RIGHT_BRACE)) {
604 block->set_end(make_scoped_ptr(new EndNode(Consume())));
605 break;
608 scoped_ptr<ParseNode> statement = ParseStatement();
609 if (!statement)
610 return scoped_ptr<BlockNode>();
611 block->append_statement(statement.Pass());
613 return block.Pass();
616 scoped_ptr<ParseNode> Parser::ParseCondition() {
617 scoped_ptr<ConditionNode> condition(new ConditionNode);
618 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
619 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
620 condition->set_condition(ParseExpression());
621 if (IsAssignment(condition->condition()))
622 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
623 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
624 condition->set_if_true(ParseBlock().Pass());
625 if (Match(Token::ELSE)) {
626 if (LookAhead(Token::LEFT_BRACE)) {
627 condition->set_if_false(ParseBlock().Pass());
628 } else if (LookAhead(Token::IF)) {
629 condition->set_if_false(ParseStatement().Pass());
630 } else {
631 *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
632 return scoped_ptr<ParseNode>();
635 if (has_error())
636 return scoped_ptr<ParseNode>();
637 return condition.Pass();
640 void Parser::TraverseOrder(const ParseNode* root,
641 std::vector<const ParseNode*>* pre,
642 std::vector<const ParseNode*>* post) {
643 if (root) {
644 pre->push_back(root);
646 if (const AccessorNode* accessor = root->AsAccessor()) {
647 TraverseOrder(accessor->index(), pre, post);
648 TraverseOrder(accessor->member(), pre, post);
649 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
650 TraverseOrder(binop->left(), pre, post);
651 TraverseOrder(binop->right(), pre, post);
652 } else if (const BlockNode* block = root->AsBlock()) {
653 for (const auto& statement : block->statements())
654 TraverseOrder(statement, pre, post);
655 TraverseOrder(block->End(), pre, post);
656 } else if (const ConditionNode* condition = root->AsConditionNode()) {
657 TraverseOrder(condition->condition(), pre, post);
658 TraverseOrder(condition->if_true(), pre, post);
659 TraverseOrder(condition->if_false(), pre, post);
660 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
661 TraverseOrder(func_call->args(), pre, post);
662 TraverseOrder(func_call->block(), pre, post);
663 } else if (root->AsIdentifier()) {
664 // Nothing.
665 } else if (const ListNode* list = root->AsList()) {
666 for (const auto& node : list->contents())
667 TraverseOrder(node, pre, post);
668 TraverseOrder(list->End(), pre, post);
669 } else if (root->AsLiteral()) {
670 // Nothing.
671 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
672 TraverseOrder(unaryop->operand(), pre, post);
673 } else if (root->AsBlockComment()) {
674 // Nothing.
675 } else if (root->AsEnd()) {
676 // Nothing.
677 } else {
678 CHECK(false) << "Unhandled case in TraverseOrder.";
681 post->push_back(root);
685 void Parser::AssignComments(ParseNode* file) {
686 // Start by generating a pre- and post- order traversal of the tree so we
687 // can determine what's before and after comments.
688 std::vector<const ParseNode*> pre;
689 std::vector<const ParseNode*> post;
690 TraverseOrder(file, &pre, &post);
692 // Assign line comments to syntax immediately following.
693 int cur_comment = 0;
694 for (const auto& node : pre) {
695 const Location& start = node->GetRange().begin();
696 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
697 if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
698 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
699 line_comment_tokens_[cur_comment]);
700 ++cur_comment;
701 } else {
702 break;
707 // Remaining line comments go at end of file.
708 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
709 ++cur_comment)
710 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
712 // Assign suffix to syntax immediately before.
713 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
714 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
715 i != post.rend();
716 ++i) {
717 // Don't assign suffix comments to the function, list, or block, but instead
718 // to the last thing inside.
719 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
720 continue;
722 const Location& start = (*i)->GetRange().begin();
723 const Location& end = (*i)->GetRange().end();
725 // Don't assign suffix comments to something that starts on an earlier
726 // line, so that in:
728 // sources = [ "a",
729 // "b" ] # comment
731 // it's attached to "b", not sources = [ ... ].
732 if (start.line_number() != end.line_number())
733 continue;
735 while (cur_comment >= 0) {
736 if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
737 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
738 suffix_comment_tokens_[cur_comment]);
739 --cur_comment;
740 } else {
741 break;
745 // Suffix comments were assigned in reverse, so if there were multiple on
746 // the same node, they need to be reversed.
747 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
748 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();