tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 const char kGrammar_Help[] =
  13     "GN build language grammar\n"
  14     "\n"
  15     "Tokens\n"
  16     "\n"
  17     "  GN build files are read as sequences of tokens.  While splitting the\n"
  18     "  file into tokens, the next token is the longest sequence of characters\n"
  19     "  that form a valid token.\n"
  20     "\n"
  21     "White space and comments\n"
  22     "\n"
  23     "  White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
  24     "  carriage returns (U+000D), and newlines (U+000A).\n"
  25     "\n"
  26     "  Comments start at the character \"#\" and stop at the next newline.\n"
  27     "\n"
  28     "  White space and comments are ignored except that they may separate\n"
  29     "  tokens that would otherwise combine into a single token.\n"
  30     "\n"
  31     "Identifiers\n"
  32     "\n"
  33     "  Identifiers name variables and functions.\n"
  34     "\n"
  35     "      identifier = letter { letter | digit } .\n"
  36     "      letter     = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
  37     "      digit      = \"0\" ... \"9\" .\n"
  38     "\n"
  39     "Keywords\n"
  40     "\n"
  41     "  The following keywords are reserved and may not be used as\n"
  42     "  identifiers:\n"
  43     "\n"
  44     "          else    false   if      true\n"
  45     "\n"
  46     "Integer literals\n"
  47     "\n"
  48     "  An integer literal represents a decimal integer value.\n"
  49     "\n"
  50     "      integer = [ \"-\" ] digit { digit } .\n"
  51     "\n"
  52     "String literals\n"
  53     "\n"
  54     "  A string literal represents a string value consisting of the quoted\n"
  55     "  characters with possible escape sequences and variable expansions.\n"
  56     "\n"
  57     "      string    = `\"` { char | escape | expansion } `\"` .\n"
  58     "      escape    = `\\` ( \"$\" | `\"` | char ) .\n"
  59     "      expansion = \"$\" ( identifier | \"{\" identifier \"}\" ) .\n"
  60     "      char      = /* any character except \"$\", `\"`, or newline */ .\n"
  61     "\n"
  62     "  After a backslash, certain sequences represent special characters:\n"
  63     "\n"
  64     "          \\\"    U+0022    quotation mark\n"
  65     "          \\$    U+0024    dollar sign\n"
  66     "          \\\\    U+005C    backslash\n"
  67     "\n"
  68     "  All other backslashes represent themselves.\n"
  69     "\n"
  70     "Punctuation\n"
  71     "\n"
  72     "  The following character sequences represent punctuation:\n"
  73     "\n"
  74     "          +       +=      ==      !=      (       )\n"
  75     "          -       -=      <       <=      [       ]\n"
  76     "          !       =       >       >=      {       }\n"
  77     "                          &&      ||      .       ,\n"
  78     "\n"
  79     "Grammar\n"
  80     "\n"
  81     "  The input tokens form a syntax tree following a context-free grammar:\n"
  82     "\n"
  83     "      File = StatementList .\n"
  84     "\n"
  85     "      Statement     = Assignment | Call | Condition .\n"
  86     "      Assignment    = identifier AssignOp Expr .\n"
  87     "      Call          = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
  88     "      Condition     = \"if\" \"(\" Expr \")\" Block\n"
  89     "                      [ \"else\" ( Condition | Block ) ] .\n"
  90     "      Block         = \"{\" StatementList \"}\" .\n"
  91     "      StatementList = { Statement } .\n"
  92     "\n"
  93     "      Expr        = UnaryExpr | Expr BinaryOp Expr .\n"
  94     "      UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .\n"
  95     "      PrimaryExpr = identifier | integer | string | Call\n"
  96     "                  | identifier \"[\" Expr \"]\"\n"
  97     "                  | identifier \".\" identifier\n"
  98     "                  | \"(\" Expr \")\"\n"
  99     "                  | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
 100     "      ExprList    = Expr { \",\" Expr } .\n"
 101     "\n"
 102     "      AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
 103     "      UnaryOp  = \"!\" .\n"
 104     "      BinaryOp = \"+\" | \"-\"                  // highest priority\n"
 105     "               | \"<\" | \"<=\" | \">\" | \">=\"\n"
 106     "               | \"==\" | \"!=\"\n"
 107     "               | \"&&\"\n"
 108     "               | \"||\" .                     // lowest priority\n"
 109     "\n"
 110     "  All binary operators are left-associative.\n";
 111
 112 enum Precedence {
 113   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
 114   PRECEDENCE_OR = 2,
 115   PRECEDENCE_AND = 3,
 116   PRECEDENCE_EQUALITY = 4,
 117   PRECEDENCE_RELATION = 5,
 118   PRECEDENCE_SUM = 6,
 119   PRECEDENCE_PREFIX = 7,
 120   PRECEDENCE_CALL = 8,
 121   PRECEDENCE_DOT = 9,         // Highest precedence.
 122 };
 123
 124 // The top-level for blocks/ifs is recursive descent, the expression parser is
 125 // a Pratt parser. The basic idea there is to have the precedences (and
 126 // associativities) encoded relative to each other and only parse up until you
 127 // hit something of that precedence. There's a dispatch table in expressions_
 128 // at the top of parser.cc that describes how each token dispatches if it's
 129 // seen as either a prefix or infix operator, and if it's infix, what its
 130 // precedence is.
 131 //
 132 // Refs:
 133 // - http://javascript.crockford.com/tdop/tdop.html
 134 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 135
 136 // Indexed by Token::Type.
 137 ParserHelper Parser::expressions_[] = {
 138     {nullptr, nullptr, -1},                                   // INVALID
 139     {&Parser::Literal, nullptr, -1},                          // INTEGER
 140     {&Parser::Literal, nullptr, -1},                          // STRING
 141     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
 142     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
 143     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
 144     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
 145     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
 146     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
 147     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
 148     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
 149     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
 150     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
 151     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
 152     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
 153     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
 154     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
 155     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
 156     {&Parser::Not, nullptr, -1},                              // BANG
 157     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
 158     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
 159     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
 160     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
 161     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
 162     {nullptr, nullptr, -1},                                   // LEFT_BRACE
 163     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
 164     {nullptr, nullptr, -1},                                   // IF
 165     {nullptr, nullptr, -1},                                   // ELSE
 166     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
 167     {nullptr, nullptr, -1},                                       // COMMA
 168     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
 169     {nullptr, nullptr, -1},                // LINE_COMMENT
 170     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
 171     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
 172 };
 173
 174 Parser::Parser(const std::vector<Token>& tokens, Err* err)
 175     : err_(err), cur_(0) {
 176   for (const auto& token : tokens) {
 177     switch(token.type()) {
 178       case Token::LINE_COMMENT:
 179         line_comment_tokens_.push_back(token);
 180         break;
 181       case Token::SUFFIX_COMMENT:
 182         suffix_comment_tokens_.push_back(token);
 183         break;
 184       default:
 185         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
 186         // through the real parser.
 187         tokens_.push_back(token);
 188         break;
 189     }
 190   }
 191 }
 192
 193 Parser::~Parser() {
 194 }
 195
 196 // static
 197 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
 198                                     Err* err) {
 199   Parser p(tokens, err);
 200   return p.ParseFile();
 201 }
 202
 203 // static
 204 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
 205                                               Err* err) {
 206   Parser p(tokens, err);
 207   return p.ParseExpression().Pass();
 208 }
 209
 210 bool Parser::IsAssignment(const ParseNode* node) const {
 211   return node && node->AsBinaryOp() &&
 212          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 213           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 214           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 215 }
 216
 217 bool Parser::IsStatementBreak(Token::Type token_type) const {
 218   switch (token_type) {
 219     case Token::IDENTIFIER:
 220     case Token::LEFT_BRACE:
 221     case Token::RIGHT_BRACE:
 222     case Token::IF:
 223     case Token::ELSE:
 224       return true;
 225     default:
 226       return false;
 227   }
 228 }
 229
 230 bool Parser::LookAhead(Token::Type type) {
 231   if (at_end())
 232     return false;
 233   return cur_token().type() == type;
 234 }
 235
 236 bool Parser::Match(Token::Type type) {
 237   if (!LookAhead(type))
 238     return false;
 239   Consume();
 240   return true;
 241 }
 242
 243 Token Parser::Consume(Token::Type type, const char* error_message) {
 244   Token::Type types[1] = { type };
 245   return Consume(types, 1, error_message);
 246 }
 247
 248 Token Parser::Consume(Token::Type* types,
 249                       size_t num_types,
 250                       const char* error_message) {
 251   if (has_error()) {
 252     // Don't overwrite current error, but make progress through tokens so that
 253     // a loop that's expecting a particular token will still terminate.
 254     cur_++;
 255     return Token(Location(), Token::INVALID, base::StringPiece());
 256   }
 257   if (at_end()) {
 258     const char kEOFMsg[] = "I hit EOF instead.";
 259     if (tokens_.empty())
 260       *err_ = Err(Location(), error_message, kEOFMsg);
 261     else
 262       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 263     return Token(Location(), Token::INVALID, base::StringPiece());
 264   }
 265
 266   for (size_t i = 0; i < num_types; ++i) {
 267     if (cur_token().type() == types[i])
 268       return Consume();
 269   }
 270   *err_ = Err(cur_token(), error_message);
 271   return Token(Location(), Token::INVALID, base::StringPiece());
 272 }
 273
 274 Token Parser::Consume() {
 275   return tokens_[cur_++];
 276 }
 277
 278 scoped_ptr<ParseNode> Parser::ParseExpression() {
 279   return ParseExpression(0);
 280 }
 281
 282 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 283   if (at_end())
 284     return scoped_ptr<ParseNode>();
 285
 286   Token token = Consume();
 287   PrefixFunc prefix = expressions_[token.type()].prefix;
 288
 289   if (prefix == nullptr) {
 290     *err_ = Err(token,
 291                 std::string("Unexpected token '") + token.value().as_string() +
 292                     std::string("'"));
 293     return scoped_ptr<ParseNode>();
 294   }
 295
 296   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 297   if (has_error())
 298     return left.Pass();
 299
 300   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 301          precedence <= expressions_[cur_token().type()].precedence) {
 302     token = Consume();
 303     InfixFunc infix = expressions_[token.type()].infix;
 304     if (infix == nullptr) {
 305       *err_ = Err(token,
 306                   std::string("Unexpected token '") +
 307                       token.value().as_string() + std::string("'"));
 308       return scoped_ptr<ParseNode>();
 309     }
 310     left = (this->*infix)(left.Pass(), token);
 311     if (has_error())
 312       return scoped_ptr<ParseNode>();
 313   }
 314
 315   return left.Pass();
 316 }
 317
 318 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 319   return make_scoped_ptr(new LiteralNode(token));
 320 }
 321
 322 scoped_ptr<ParseNode> Parser::Name(Token token) {
 323   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 324 }
 325
 326 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
 327   scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
 328   comment->set_comment(token);
 329   return comment.Pass();
 330 }
 331
 332 scoped_ptr<ParseNode> Parser::Group(Token token) {
 333   scoped_ptr<ParseNode> expr = ParseExpression();
 334   if (has_error())
 335     return scoped_ptr<ParseNode>();
 336   Consume(Token::RIGHT_PAREN, "Expected ')'");
 337   return expr.Pass();
 338 }
 339
 340 scoped_ptr<ParseNode> Parser::Not(Token token) {
 341   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 342   if (has_error())
 343     return scoped_ptr<ParseNode>();
 344   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 345   unary_op->set_op(token);
 346   unary_op->set_operand(expr.Pass());
 347   return unary_op.Pass();
 348 }
 349
 350 scoped_ptr<ParseNode> Parser::List(Token node) {
 351   scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
 352   if (!has_error() && !at_end())
 353     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 354   return list.Pass();
 355 }
 356
 357 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 358                                              Token token) {
 359   scoped_ptr<ParseNode> right =
 360       ParseExpression(expressions_[token.type()].precedence + 1);
 361   if (!right) {
 362     *err_ =
 363         Err(token,
 364             "Expected right hand side for '" + token.value().as_string() + "'");
 365     return scoped_ptr<ParseNode>();
 366   }
 367   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 368   binary_op->set_op(token);
 369   binary_op->set_left(left.Pass());
 370   binary_op->set_right(right.Pass());
 371   return binary_op.Pass();
 372 }
 373
 374 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 375                                                Token token) {
 376   scoped_ptr<ListNode> list(new ListNode);
 377   list->set_begin_token(token);
 378   list->set_end(make_scoped_ptr(new EndNode(token)));
 379   scoped_ptr<BlockNode> block;
 380   bool has_arg = false;
 381   if (LookAhead(Token::LEFT_PAREN)) {
 382     Token start_token = Consume();
 383     // Parsing a function call.
 384     has_arg = true;
 385     if (Match(Token::RIGHT_PAREN)) {
 386       // Nothing, just an empty call.
 387     } else {
 388       list = ParseList(start_token, Token::RIGHT_PAREN, false);
 389       if (has_error())
 390         return scoped_ptr<ParseNode>();
 391       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 392     }
 393     // Optionally with a scope.
 394     if (LookAhead(Token::LEFT_BRACE)) {
 395       block = ParseBlock();
 396       if (has_error())
 397         return scoped_ptr<ParseNode>();
 398     }
 399   }
 400
 401   if (!left && !has_arg) {
 402     // Not a function call, just a standalone identifier.
 403     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 404   }
 405   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 406   func_call->set_function(token);
 407   func_call->set_args(list.Pass());
 408   if (block)
 409     func_call->set_block(block.Pass());
 410   return func_call.Pass();
 411 }
 412
 413 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 414                                          Token token) {
 415   if (left->AsIdentifier() == nullptr) {
 416     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 417     return scoped_ptr<ParseNode>();
 418   }
 419   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 420   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 421   assign->set_op(token);
 422   assign->set_left(left.Pass());
 423   assign->set_right(value.Pass());
 424   return assign.Pass();
 425 }
 426
 427 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 428                                         Token token) {
 429   // TODO: Maybe support more complex expressions like a[0][0]. This would
 430   // require work on the evaluator too.
 431   if (left->AsIdentifier() == nullptr) {
 432     *err_ = Err(left.get(), "May only subscript identifiers.",
 433         "The thing on the left hand side of the [] must be an identifier\n"
 434         "and not an expression. If you need this, you'll have to assign the\n"
 435         "value to a temporary before subscripting. Sorry.");
 436     return scoped_ptr<ParseNode>();
 437   }
 438   scoped_ptr<ParseNode> value = ParseExpression();
 439   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 440   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 441   accessor->set_base(left->AsIdentifier()->value());
 442   accessor->set_index(value.Pass());
 443   return accessor.Pass();
 444 }
 445
 446 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
 447                                           Token token) {
 448   if (left->AsIdentifier() == nullptr) {
 449     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
 450         "The thing on the left hand side of the dot must be an identifier\n"
 451         "and not an expression. If you need this, you'll have to assign the\n"
 452         "value to a temporary first. Sorry.");
 453     return scoped_ptr<ParseNode>();
 454   }
 455
 456   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
 457   if (!right || !right->AsIdentifier()) {
 458     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
 459         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
 460     return scoped_ptr<ParseNode>();
 461   }
 462
 463   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 464   accessor->set_base(left->AsIdentifier()->value());
 465   accessor->set_member(scoped_ptr<IdentifierNode>(
 466       static_cast<IdentifierNode*>(right.release())));
 467   return accessor.Pass();
 468 }
 469
 470 // Does not Consume the start or end token.
 471 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
 472                                        Token::Type stop_before,
 473                                        bool allow_trailing_comma) {
 474   scoped_ptr<ListNode> list(new ListNode);
 475   list->set_begin_token(start_token);
 476   bool just_got_comma = false;
 477   bool first_time = true;
 478   while (!LookAhead(stop_before)) {
 479     if (!first_time) {
 480       if (!just_got_comma) {
 481         // Require commas separate things in lists.
 482         *err_ = Err(cur_token(), "Expected comma between items.");
 483         return scoped_ptr<ListNode>();
 484       }
 485     }
 486     first_time = false;
 487
 488     // Why _OR? We're parsing things that are higher precedence than the ,
 489     // that separates the items of the list. , should appear lower than
 490     // boolean expressions (the lowest of which is OR), but above assignments.
 491     list->append_item(ParseExpression(PRECEDENCE_OR));
 492     if (has_error())
 493       return scoped_ptr<ListNode>();
 494     if (at_end()) {
 495       *err_ =
 496           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 497       return scoped_ptr<ListNode>();
 498     }
 499     if (list->contents().back()->AsBlockComment()) {
 500       // If there was a comment inside the list, we don't need a comma to the
 501       // next item, so pretend we got one, if we're expecting one.
 502       just_got_comma = allow_trailing_comma;
 503     } else {
 504       just_got_comma = Match(Token::COMMA);
 505     }
 506   }
 507   if (just_got_comma && !allow_trailing_comma) {
 508     *err_ = Err(cur_token(), "Trailing comma");
 509     return scoped_ptr<ListNode>();
 510   }
 511   list->set_end(make_scoped_ptr(new EndNode(cur_token())));
 512   return list.Pass();
 513 }
 514
 515 scoped_ptr<ParseNode> Parser::ParseFile() {
 516   scoped_ptr<BlockNode> file(new BlockNode);
 517   for (;;) {
 518     if (at_end())
 519       break;
 520     scoped_ptr<ParseNode> statement = ParseStatement();
 521     if (!statement)
 522       break;
 523     file->append_statement(statement.Pass());
 524   }
 525   if (!at_end() && !has_error())
 526     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 527   if (has_error())
 528     return scoped_ptr<ParseNode>();
 529
 530   // TODO(scottmg): If this is measurably expensive, it could be done only
 531   // when necessary (when reformatting, or during tests). Comments are
 532   // separate from the parse tree at this point, so downstream code can remain
 533   // ignorant of them.
 534   AssignComments(file.get());
 535
 536   return file.Pass();
 537 }
 538
 539 scoped_ptr<ParseNode> Parser::ParseStatement() {
 540   if (LookAhead(Token::IF)) {
 541     return ParseCondition();
 542   } else if (LookAhead(Token::BLOCK_COMMENT)) {
 543     return BlockComment(Consume());
 544   } else {
 545     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 546     // to allow "pointless" expressions and return ParseExpression() directly.
 547     scoped_ptr<ParseNode> stmt = ParseExpression();
 548     if (stmt) {
 549       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 550         return stmt.Pass();
 551     }
 552     if (!has_error()) {
 553       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 554       *err_ = Err(token, "Expecting assignment or function call.");
 555     }
 556     return scoped_ptr<ParseNode>();
 557   }
 558 }
 559
 560 scoped_ptr<BlockNode> Parser::ParseBlock() {
 561   Token begin_token =
 562       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 563   if (has_error())
 564     return scoped_ptr<BlockNode>();
 565   scoped_ptr<BlockNode> block(new BlockNode);
 566   block->set_begin_token(begin_token);
 567
 568   for (;;) {
 569     if (LookAhead(Token::RIGHT_BRACE)) {
 570       block->set_end(make_scoped_ptr(new EndNode(Consume())));
 571       break;
 572     }
 573
 574     scoped_ptr<ParseNode> statement = ParseStatement();
 575     if (!statement)
 576       return scoped_ptr<BlockNode>();
 577     block->append_statement(statement.Pass());
 578   }
 579   return block.Pass();
 580 }
 581
 582 scoped_ptr<ParseNode> Parser::ParseCondition() {
 583   scoped_ptr<ConditionNode> condition(new ConditionNode);
 584   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
 585   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 586   condition->set_condition(ParseExpression());
 587   if (IsAssignment(condition->condition()))
 588     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 589   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 590   condition->set_if_true(ParseBlock().Pass());
 591   if (Match(Token::ELSE)) {
 592     if (LookAhead(Token::LEFT_BRACE)) {
 593       condition->set_if_false(ParseBlock().Pass());
 594     } else if (LookAhead(Token::IF)) {
 595       condition->set_if_false(ParseStatement().Pass());
 596     } else {
 597       *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
 598       return scoped_ptr<ParseNode>();
 599     }
 600   }
 601   if (has_error())
 602     return scoped_ptr<ParseNode>();
 603   return condition.Pass();
 604 }
 605
 606 void Parser::TraverseOrder(const ParseNode* root,
 607                            std::vector<const ParseNode*>* pre,
 608                            std::vector<const ParseNode*>* post) {
 609   if (root) {
 610     pre->push_back(root);
 611
 612     if (const AccessorNode* accessor = root->AsAccessor()) {
 613       TraverseOrder(accessor->index(), pre, post);
 614       TraverseOrder(accessor->member(), pre, post);
 615     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
 616       TraverseOrder(binop->left(), pre, post);
 617       TraverseOrder(binop->right(), pre, post);
 618     } else if (const BlockNode* block = root->AsBlock()) {
 619       for (const auto& statement : block->statements())
 620         TraverseOrder(statement, pre, post);
 621       TraverseOrder(block->End(), pre, post);
 622     } else if (const ConditionNode* condition = root->AsConditionNode()) {
 623       TraverseOrder(condition->condition(), pre, post);
 624       TraverseOrder(condition->if_true(), pre, post);
 625       TraverseOrder(condition->if_false(), pre, post);
 626     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
 627       TraverseOrder(func_call->args(), pre, post);
 628       TraverseOrder(func_call->block(), pre, post);
 629     } else if (root->AsIdentifier()) {
 630       // Nothing.
 631     } else if (const ListNode* list = root->AsList()) {
 632       for (const auto& node : list->contents())
 633         TraverseOrder(node, pre, post);
 634       TraverseOrder(list->End(), pre, post);
 635     } else if (root->AsLiteral()) {
 636       // Nothing.
 637     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
 638       TraverseOrder(unaryop->operand(), pre, post);
 639     } else if (root->AsBlockComment()) {
 640       // Nothing.
 641     } else if (root->AsEnd()) {
 642       // Nothing.
 643     } else {
 644       CHECK(false) << "Unhandled case in TraverseOrder.";
 645     }
 646
 647     post->push_back(root);
 648   }
 649 }
 650
 651 void Parser::AssignComments(ParseNode* file) {
 652   // Start by generating a pre- and post- order traversal of the tree so we
 653   // can determine what's before and after comments.
 654   std::vector<const ParseNode*> pre;
 655   std::vector<const ParseNode*> post;
 656   TraverseOrder(file, &pre, &post);
 657
 658   // Assign line comments to syntax immediately following.
 659   int cur_comment = 0;
 660   for (const auto& node : pre) {
 661     const Location& start = node->GetRange().begin();
 662     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
 663       if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
 664         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
 665             line_comment_tokens_[cur_comment]);
 666         ++cur_comment;
 667       } else {
 668         break;
 669       }
 670     }
 671   }
 672
 673   // Remaining line comments go at end of file.
 674   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
 675        ++cur_comment)
 676     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
 677
 678   // Assign suffix to syntax immediately before.
 679   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
 680   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
 681        i != post.rend();
 682        ++i) {
 683     // Don't assign suffix comments to the function, list, or block, but instead
 684     // to the last thing inside.
 685     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
 686       continue;
 687
 688     const Location& start = (*i)->GetRange().begin();
 689     const Location& end = (*i)->GetRange().end();
 690
 691     // Don't assign suffix comments to something that starts on an earlier
 692     // line, so that in:
 693     //
 694     // sources = [ "a",
 695     //     "b" ] # comment
 696     //
 697     // it's attached to "b", not sources = [ ... ].
 698     if (start.line_number() != end.line_number())
 699       continue;
 700
 701     while (cur_comment >= 0) {
 702       if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
 703         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
 704             suffix_comment_tokens_[cur_comment]);
 705         --cur_comment;
 706       } else {
 707         break;
 708       }
 709     }
 710
 711     // Suffix comments were assigned in reverse, so if there were multiple on
 712     // the same node, they need to be reversed.
 713     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
 714       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
 715   }
 716 }