* Changing crash report limitation logic to have more control over extreme cases.
[chromium-blink-merge.git] / tools / gn / parser.cc
blobb2e22fb09f63a2f383fd7ee2bcf73896ea570fc2
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 const char kGrammar_Help[] =
13 "GN build language grammar\n"
14 "\n"
15 "Tokens\n"
16 "\n"
17 " GN build files are read as sequences of tokens. While splitting the\n"
18 " file into tokens, the next token is the longest sequence of characters\n"
19 " that form a valid token.\n"
20 "\n"
21 "White space and comments\n"
22 "\n"
23 " White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
24 " carriage returns (U+000D), and newlines (U+000A).\n"
25 "\n"
26 " Comments start at the character \"#\" and stop at the next newline.\n"
27 "\n"
28 " White space and comments are ignored except that they may separate\n"
29 " tokens that would otherwise combine into a single token.\n"
30 "\n"
31 "Identifiers\n"
32 "\n"
33 " Identifiers name variables and functions.\n"
34 "\n"
35 " identifier = letter { letter | digit } .\n"
36 " letter = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
37 " digit = \"0\" ... \"9\" .\n"
38 "\n"
39 "Keywords\n"
40 "\n"
41 " The following keywords are reserved and may not be used as\n"
42 " identifiers:\n"
43 "\n"
44 " else false if true\n"
45 "\n"
46 "Integer literals\n"
47 "\n"
48 " An integer literal represents a decimal integer value.\n"
49 "\n"
50 " integer = [ \"-\" ] digit { digit } .\n"
51 "\n"
52 " Leading zeros and negative zero are disallowed.\n"
53 "\n"
54 "String literals\n"
55 "\n"
56 " A string literal represents a string value consisting of the quoted\n"
57 " characters with possible escape sequences and variable expansions.\n"
58 "\n"
59 " string = `\"` { char | escape | expansion } `\"` .\n"
60 " escape = `\\` ( \"$\" | `\"` | char ) .\n"
61 " BracketExpansion = \"{\" ( identifier | ArrayAccess | ScopeAccess "
62 ") \"}\" .\n"
63 " expansion = \"$\" ( identifier | BracketExpansion ) .\n"
64 " char = /* any character except \"$\", `\"`, or newline "
65 "*/ .\n"
66 "\n"
67 " After a backslash, certain sequences represent special characters:\n"
68 "\n"
69 " \\\" U+0022 quotation mark\n"
70 " \\$ U+0024 dollar sign\n"
71 " \\\\ U+005C backslash\n"
72 "\n"
73 " All other backslashes represent themselves.\n"
74 "\n"
75 "Punctuation\n"
76 "\n"
77 " The following character sequences represent punctuation:\n"
78 "\n"
79 " + += == != ( )\n"
80 " - -= < <= [ ]\n"
81 " ! = > >= { }\n"
82 " && || . ,\n"
83 "\n"
84 "Grammar\n"
85 "\n"
86 " The input tokens form a syntax tree following a context-free grammar:\n"
87 "\n"
88 " File = StatementList .\n"
89 "\n"
90 " Statement = Assignment | Call | Condition .\n"
91 " Assignment = identifier AssignOp Expr .\n"
92 " Call = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
93 " Condition = \"if\" \"(\" Expr \")\" Block\n"
94 " [ \"else\" ( Condition | Block ) ] .\n"
95 " Block = \"{\" StatementList \"}\" .\n"
96 " StatementList = { Statement } .\n"
97 "\n"
98 " ArrayAccess = identifier \"[\" { identifier | integer } \"]\" .\n"
99 " ScopeAccess = identifier \".\" identifier .\n"
100 " Expr = UnaryExpr | Expr BinaryOp Expr .\n"
101 " UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .\n"
102 " PrimaryExpr = identifier | integer | string | Call\n"
103 " | ArrayAccess | ScopeAccess\n"
104 " | \"(\" Expr \")\"\n"
105 " | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
106 " ExprList = Expr { \",\" Expr } .\n"
107 "\n"
108 " AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
109 " UnaryOp = \"!\" .\n"
110 " BinaryOp = \"+\" | \"-\" // highest priority\n"
111 " | \"<\" | \"<=\" | \">\" | \">=\"\n"
112 " | \"==\" | \"!=\"\n"
113 " | \"&&\"\n"
114 " | \"||\" . // lowest priority\n"
115 "\n"
116 " All binary operators are left-associative.\n";
118 enum Precedence {
119 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
120 PRECEDENCE_OR = 2,
121 PRECEDENCE_AND = 3,
122 PRECEDENCE_EQUALITY = 4,
123 PRECEDENCE_RELATION = 5,
124 PRECEDENCE_SUM = 6,
125 PRECEDENCE_PREFIX = 7,
126 PRECEDENCE_CALL = 8,
127 PRECEDENCE_DOT = 9, // Highest precedence.
130 // The top-level for blocks/ifs is recursive descent, the expression parser is
131 // a Pratt parser. The basic idea there is to have the precedences (and
132 // associativities) encoded relative to each other and only parse up until you
133 // hit something of that precedence. There's a dispatch table in expressions_
134 // at the top of parser.cc that describes how each token dispatches if it's
135 // seen as either a prefix or infix operator, and if it's infix, what its
136 // precedence is.
138 // Refs:
139 // - http://javascript.crockford.com/tdop/tdop.html
140 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
142 // Indexed by Token::Type.
143 ParserHelper Parser::expressions_[] = {
144 {nullptr, nullptr, -1}, // INVALID
145 {&Parser::Literal, nullptr, -1}, // INTEGER
146 {&Parser::Literal, nullptr, -1}, // STRING
147 {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN
148 {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN
149 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
150 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
151 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
152 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
153 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
154 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
155 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
156 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
157 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
158 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
159 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
160 {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
161 {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
162 {&Parser::Not, nullptr, -1}, // BANG
163 {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
164 {&Parser::Group, nullptr, -1}, // LEFT_PAREN
165 {nullptr, nullptr, -1}, // RIGHT_PAREN
166 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
167 {nullptr, nullptr, -1}, // RIGHT_BRACKET
168 {nullptr, nullptr, -1}, // LEFT_BRACE
169 {nullptr, nullptr, -1}, // RIGHT_BRACE
170 {nullptr, nullptr, -1}, // IF
171 {nullptr, nullptr, -1}, // ELSE
172 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
173 {nullptr, nullptr, -1}, // COMMA
174 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
175 {nullptr, nullptr, -1}, // LINE_COMMENT
176 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
177 {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT
180 Parser::Parser(const std::vector<Token>& tokens, Err* err)
181 : err_(err), cur_(0) {
182 for (const auto& token : tokens) {
183 switch (token.type()) {
184 case Token::LINE_COMMENT:
185 line_comment_tokens_.push_back(token);
186 break;
187 case Token::SUFFIX_COMMENT:
188 suffix_comment_tokens_.push_back(token);
189 break;
190 default:
191 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
192 // through the real parser.
193 tokens_.push_back(token);
194 break;
199 Parser::~Parser() {
202 // static
203 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
204 Err* err) {
205 Parser p(tokens, err);
206 return p.ParseFile();
209 // static
210 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
211 Err* err) {
212 Parser p(tokens, err);
213 scoped_ptr<ParseNode> expr = p.ParseExpression();
214 if (!p.at_end() && !err->has_error()) {
215 *err = Err(p.cur_token(), "Trailing garbage");
216 return nullptr;
218 return expr.Pass();
221 // static
222 scoped_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
223 Err* err) {
224 for (const Token& token : tokens) {
225 switch (token.type()) {
226 case Token::INTEGER:
227 case Token::STRING:
228 case Token::TRUE_TOKEN:
229 case Token::FALSE_TOKEN:
230 case Token::LEFT_BRACKET:
231 case Token::RIGHT_BRACKET:
232 case Token::COMMA:
233 continue;
234 default:
235 *err = Err(token, "Invalid token in literal value");
236 return nullptr;
240 return ParseExpression(tokens, err);
243 bool Parser::IsAssignment(const ParseNode* node) const {
244 return node && node->AsBinaryOp() &&
245 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
246 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
247 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
250 bool Parser::IsStatementBreak(Token::Type token_type) const {
251 switch (token_type) {
252 case Token::IDENTIFIER:
253 case Token::LEFT_BRACE:
254 case Token::RIGHT_BRACE:
255 case Token::IF:
256 case Token::ELSE:
257 return true;
258 default:
259 return false;
263 bool Parser::LookAhead(Token::Type type) {
264 if (at_end())
265 return false;
266 return cur_token().type() == type;
269 bool Parser::Match(Token::Type type) {
270 if (!LookAhead(type))
271 return false;
272 Consume();
273 return true;
276 Token Parser::Consume(Token::Type type, const char* error_message) {
277 Token::Type types[1] = { type };
278 return Consume(types, 1, error_message);
281 Token Parser::Consume(Token::Type* types,
282 size_t num_types,
283 const char* error_message) {
284 if (has_error()) {
285 // Don't overwrite current error, but make progress through tokens so that
286 // a loop that's expecting a particular token will still terminate.
287 cur_++;
288 return Token(Location(), Token::INVALID, base::StringPiece());
290 if (at_end()) {
291 const char kEOFMsg[] = "I hit EOF instead.";
292 if (tokens_.empty())
293 *err_ = Err(Location(), error_message, kEOFMsg);
294 else
295 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
296 return Token(Location(), Token::INVALID, base::StringPiece());
299 for (size_t i = 0; i < num_types; ++i) {
300 if (cur_token().type() == types[i])
301 return Consume();
303 *err_ = Err(cur_token(), error_message);
304 return Token(Location(), Token::INVALID, base::StringPiece());
307 Token Parser::Consume() {
308 return tokens_[cur_++];
311 scoped_ptr<ParseNode> Parser::ParseExpression() {
312 return ParseExpression(0);
315 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
316 if (at_end())
317 return scoped_ptr<ParseNode>();
319 Token token = Consume();
320 PrefixFunc prefix = expressions_[token.type()].prefix;
322 if (prefix == nullptr) {
323 *err_ = Err(token,
324 std::string("Unexpected token '") + token.value().as_string() +
325 std::string("'"));
326 return scoped_ptr<ParseNode>();
329 scoped_ptr<ParseNode> left = (this->*prefix)(token);
330 if (has_error())
331 return left.Pass();
333 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
334 precedence <= expressions_[cur_token().type()].precedence) {
335 token = Consume();
336 InfixFunc infix = expressions_[token.type()].infix;
337 if (infix == nullptr) {
338 *err_ = Err(token,
339 std::string("Unexpected token '") +
340 token.value().as_string() + std::string("'"));
341 return scoped_ptr<ParseNode>();
343 left = (this->*infix)(left.Pass(), token);
344 if (has_error())
345 return scoped_ptr<ParseNode>();
348 return left.Pass();
351 scoped_ptr<ParseNode> Parser::Literal(Token token) {
352 return make_scoped_ptr(new LiteralNode(token));
355 scoped_ptr<ParseNode> Parser::Name(Token token) {
356 return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
359 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
360 scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
361 comment->set_comment(token);
362 return comment.Pass();
365 scoped_ptr<ParseNode> Parser::Group(Token token) {
366 scoped_ptr<ParseNode> expr = ParseExpression();
367 if (has_error())
368 return scoped_ptr<ParseNode>();
369 Consume(Token::RIGHT_PAREN, "Expected ')'");
370 return expr.Pass();
373 scoped_ptr<ParseNode> Parser::Not(Token token) {
374 scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
375 if (has_error())
376 return scoped_ptr<ParseNode>();
377 if (!expr) {
378 if (!has_error())
379 *err_ = Err(token, "Expected right-hand side for '!'.");
380 return scoped_ptr<ParseNode>();
382 scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
383 unary_op->set_op(token);
384 unary_op->set_operand(expr.Pass());
385 return unary_op.Pass();
388 scoped_ptr<ParseNode> Parser::List(Token node) {
389 scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
390 if (!has_error() && !at_end())
391 Consume(Token::RIGHT_BRACKET, "Expected ']'");
392 return list.Pass();
395 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
396 Token token) {
397 scoped_ptr<ParseNode> right =
398 ParseExpression(expressions_[token.type()].precedence + 1);
399 if (!right) {
400 if (!has_error()) {
401 *err_ = Err(token, "Expected right-hand side for '" +
402 token.value().as_string() + "'");
404 return scoped_ptr<ParseNode>();
406 scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
407 binary_op->set_op(token);
408 binary_op->set_left(left.Pass());
409 binary_op->set_right(right.Pass());
410 return binary_op.Pass();
413 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
414 Token token) {
415 scoped_ptr<ListNode> list(new ListNode);
416 list->set_begin_token(token);
417 list->set_end(make_scoped_ptr(new EndNode(token)));
418 scoped_ptr<BlockNode> block;
419 bool has_arg = false;
420 if (LookAhead(Token::LEFT_PAREN)) {
421 Token start_token = Consume();
422 // Parsing a function call.
423 has_arg = true;
424 if (Match(Token::RIGHT_PAREN)) {
425 // Nothing, just an empty call.
426 } else {
427 list = ParseList(start_token, Token::RIGHT_PAREN, false);
428 if (has_error())
429 return scoped_ptr<ParseNode>();
430 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
432 // Optionally with a scope.
433 if (LookAhead(Token::LEFT_BRACE)) {
434 block = ParseBlock();
435 if (has_error())
436 return scoped_ptr<ParseNode>();
440 if (!left && !has_arg) {
441 // Not a function call, just a standalone identifier.
442 return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
444 scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
445 func_call->set_function(token);
446 func_call->set_args(list.Pass());
447 if (block)
448 func_call->set_block(block.Pass());
449 return func_call.Pass();
452 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
453 Token token) {
454 if (left->AsIdentifier() == nullptr) {
455 *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
456 return scoped_ptr<ParseNode>();
458 scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
459 if (!value) {
460 if (!has_error())
461 *err_ = Err(token, "Expected right-hand side for assignment.");
462 return scoped_ptr<ParseNode>();
464 scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
465 assign->set_op(token);
466 assign->set_left(left.Pass());
467 assign->set_right(value.Pass());
468 return assign.Pass();
471 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
472 Token token) {
473 // TODO: Maybe support more complex expressions like a[0][0]. This would
474 // require work on the evaluator too.
475 if (left->AsIdentifier() == nullptr) {
476 *err_ = Err(left.get(), "May only subscript identifiers.",
477 "The thing on the left hand side of the [] must be an identifier\n"
478 "and not an expression. If you need this, you'll have to assign the\n"
479 "value to a temporary before subscripting. Sorry.");
480 return scoped_ptr<ParseNode>();
482 scoped_ptr<ParseNode> value = ParseExpression();
483 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
484 scoped_ptr<AccessorNode> accessor(new AccessorNode);
485 accessor->set_base(left->AsIdentifier()->value());
486 accessor->set_index(value.Pass());
487 return accessor.Pass();
490 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
491 Token token) {
492 if (left->AsIdentifier() == nullptr) {
493 *err_ = Err(left.get(), "May only use \".\" for identifiers.",
494 "The thing on the left hand side of the dot must be an identifier\n"
495 "and not an expression. If you need this, you'll have to assign the\n"
496 "value to a temporary first. Sorry.");
497 return scoped_ptr<ParseNode>();
500 scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
501 if (!right || !right->AsIdentifier()) {
502 *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
503 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
504 return scoped_ptr<ParseNode>();
507 scoped_ptr<AccessorNode> accessor(new AccessorNode);
508 accessor->set_base(left->AsIdentifier()->value());
509 accessor->set_member(scoped_ptr<IdentifierNode>(
510 static_cast<IdentifierNode*>(right.release())));
511 return accessor.Pass();
514 // Does not Consume the start or end token.
515 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
516 Token::Type stop_before,
517 bool allow_trailing_comma) {
518 scoped_ptr<ListNode> list(new ListNode);
519 list->set_begin_token(start_token);
520 bool just_got_comma = false;
521 bool first_time = true;
522 while (!LookAhead(stop_before)) {
523 if (!first_time) {
524 if (!just_got_comma) {
525 // Require commas separate things in lists.
526 *err_ = Err(cur_token(), "Expected comma between items.");
527 return scoped_ptr<ListNode>();
530 first_time = false;
532 // Why _OR? We're parsing things that are higher precedence than the ,
533 // that separates the items of the list. , should appear lower than
534 // boolean expressions (the lowest of which is OR), but above assignments.
535 list->append_item(ParseExpression(PRECEDENCE_OR));
536 if (has_error())
537 return scoped_ptr<ListNode>();
538 if (at_end()) {
539 *err_ =
540 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
541 return scoped_ptr<ListNode>();
543 if (list->contents().back()->AsBlockComment()) {
544 // If there was a comment inside the list, we don't need a comma to the
545 // next item, so pretend we got one, if we're expecting one.
546 just_got_comma = allow_trailing_comma;
547 } else {
548 just_got_comma = Match(Token::COMMA);
551 if (just_got_comma && !allow_trailing_comma) {
552 *err_ = Err(cur_token(), "Trailing comma");
553 return scoped_ptr<ListNode>();
555 list->set_end(make_scoped_ptr(new EndNode(cur_token())));
556 return list.Pass();
559 scoped_ptr<ParseNode> Parser::ParseFile() {
560 scoped_ptr<BlockNode> file(new BlockNode);
561 for (;;) {
562 if (at_end())
563 break;
564 scoped_ptr<ParseNode> statement = ParseStatement();
565 if (!statement)
566 break;
567 file->append_statement(statement.Pass());
569 if (!at_end() && !has_error())
570 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
571 if (has_error())
572 return scoped_ptr<ParseNode>();
574 // TODO(scottmg): If this is measurably expensive, it could be done only
575 // when necessary (when reformatting, or during tests). Comments are
576 // separate from the parse tree at this point, so downstream code can remain
577 // ignorant of them.
578 AssignComments(file.get());
580 return file.Pass();
583 scoped_ptr<ParseNode> Parser::ParseStatement() {
584 if (LookAhead(Token::IF)) {
585 return ParseCondition();
586 } else if (LookAhead(Token::BLOCK_COMMENT)) {
587 return BlockComment(Consume());
588 } else {
589 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
590 // to allow "pointless" expressions and return ParseExpression() directly.
591 scoped_ptr<ParseNode> stmt = ParseExpression();
592 if (stmt) {
593 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
594 return stmt.Pass();
596 if (!has_error()) {
597 Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
598 *err_ = Err(token, "Expecting assignment or function call.");
600 return scoped_ptr<ParseNode>();
604 scoped_ptr<BlockNode> Parser::ParseBlock() {
605 Token begin_token =
606 Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
607 if (has_error())
608 return scoped_ptr<BlockNode>();
609 scoped_ptr<BlockNode> block(new BlockNode);
610 block->set_begin_token(begin_token);
612 for (;;) {
613 if (LookAhead(Token::RIGHT_BRACE)) {
614 block->set_end(make_scoped_ptr(new EndNode(Consume())));
615 break;
618 scoped_ptr<ParseNode> statement = ParseStatement();
619 if (!statement)
620 return scoped_ptr<BlockNode>();
621 block->append_statement(statement.Pass());
623 return block.Pass();
626 scoped_ptr<ParseNode> Parser::ParseCondition() {
627 scoped_ptr<ConditionNode> condition(new ConditionNode);
628 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
629 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
630 condition->set_condition(ParseExpression());
631 if (IsAssignment(condition->condition()))
632 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
633 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
634 condition->set_if_true(ParseBlock().Pass());
635 if (Match(Token::ELSE)) {
636 if (LookAhead(Token::LEFT_BRACE)) {
637 condition->set_if_false(ParseBlock().Pass());
638 } else if (LookAhead(Token::IF)) {
639 condition->set_if_false(ParseStatement().Pass());
640 } else {
641 *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
642 return scoped_ptr<ParseNode>();
645 if (has_error())
646 return scoped_ptr<ParseNode>();
647 return condition.Pass();
650 void Parser::TraverseOrder(const ParseNode* root,
651 std::vector<const ParseNode*>* pre,
652 std::vector<const ParseNode*>* post) {
653 if (root) {
654 pre->push_back(root);
656 if (const AccessorNode* accessor = root->AsAccessor()) {
657 TraverseOrder(accessor->index(), pre, post);
658 TraverseOrder(accessor->member(), pre, post);
659 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
660 TraverseOrder(binop->left(), pre, post);
661 TraverseOrder(binop->right(), pre, post);
662 } else if (const BlockNode* block = root->AsBlock()) {
663 for (const auto& statement : block->statements())
664 TraverseOrder(statement, pre, post);
665 TraverseOrder(block->End(), pre, post);
666 } else if (const ConditionNode* condition = root->AsConditionNode()) {
667 TraverseOrder(condition->condition(), pre, post);
668 TraverseOrder(condition->if_true(), pre, post);
669 TraverseOrder(condition->if_false(), pre, post);
670 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
671 TraverseOrder(func_call->args(), pre, post);
672 TraverseOrder(func_call->block(), pre, post);
673 } else if (root->AsIdentifier()) {
674 // Nothing.
675 } else if (const ListNode* list = root->AsList()) {
676 for (const auto& node : list->contents())
677 TraverseOrder(node, pre, post);
678 TraverseOrder(list->End(), pre, post);
679 } else if (root->AsLiteral()) {
680 // Nothing.
681 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
682 TraverseOrder(unaryop->operand(), pre, post);
683 } else if (root->AsBlockComment()) {
684 // Nothing.
685 } else if (root->AsEnd()) {
686 // Nothing.
687 } else {
688 CHECK(false) << "Unhandled case in TraverseOrder.";
691 post->push_back(root);
695 void Parser::AssignComments(ParseNode* file) {
696 // Start by generating a pre- and post- order traversal of the tree so we
697 // can determine what's before and after comments.
698 std::vector<const ParseNode*> pre;
699 std::vector<const ParseNode*> post;
700 TraverseOrder(file, &pre, &post);
702 // Assign line comments to syntax immediately following.
703 int cur_comment = 0;
704 for (const auto& node : pre) {
705 const Location& start = node->GetRange().begin();
706 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
707 if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
708 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
709 line_comment_tokens_[cur_comment]);
710 ++cur_comment;
711 } else {
712 break;
717 // Remaining line comments go at end of file.
718 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
719 ++cur_comment)
720 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
722 // Assign suffix to syntax immediately before.
723 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
724 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
725 i != post.rend();
726 ++i) {
727 // Don't assign suffix comments to the function, list, or block, but instead
728 // to the last thing inside.
729 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
730 continue;
732 const Location& start = (*i)->GetRange().begin();
733 const Location& end = (*i)->GetRange().end();
735 // Don't assign suffix comments to something that starts on an earlier
736 // line, so that in:
738 // sources = [ "a",
739 // "b" ] # comment
741 // it's attached to "b", not sources = [ ... ].
742 if (start.line_number() != end.line_number())
743 continue;
745 while (cur_comment >= 0) {
746 if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
747 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
748 suffix_comment_tokens_[cur_comment]);
749 --cur_comment;
750 } else {
751 break;
755 // Suffix comments were assigned in reverse, so if there were multiple on
756 // the same node, they need to be reversed.
757 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
758 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();