tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 const char kGrammar_Help[] =
  13     "GN build language grammar\n"
  14     "\n"
  15     "Tokens\n"
  16     "\n"
  17     "  GN build files are read as sequences of tokens.  While splitting the\n"
  18     "  file into tokens, the next token is the longest sequence of characters\n"
  19     "  that form a valid token.\n"
  20     "\n"
  21     "White space and comments\n"
  22     "\n"
  23     "  White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
  24     "  carriage returns (U+000D), and newlines (U+000A).\n"
  25     "\n"
  26     "  Comments start at the character \"#\" and stop at the next newline.\n"
  27     "\n"
  28     "  White space and comments are ignored except that they may separate\n"
  29     "  tokens that would otherwise combine into a single token.\n"
  30     "\n"
  31     "Identifiers\n"
  32     "\n"
  33     "  Identifiers name variables and functions.\n"
  34     "\n"
  35     "      identifier = letter { letter | digit } .\n"
  36     "      letter     = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
  37     "      digit      = \"0\" ... \"9\" .\n"
  38     "\n"
  39     "Keywords\n"
  40     "\n"
  41     "  The following keywords are reserved and may not be used as\n"
  42     "  identifiers:\n"
  43     "\n"
  44     "          else    false   if      true\n"
  45     "\n"
  46     "Integer literals\n"
  47     "\n"
  48     "  An integer literal represents a decimal integer value.\n"
  49     "\n"
  50     "      integer = [ \"-\" ] digit { digit } .\n"
  51     "\n"
  52     "  Leading zeros and negative zero are disallowed.\n"
  53     "\n"
  54     "String literals\n"
  55     "\n"
  56     "  A string literal represents a string value consisting of the quoted\n"
  57     "  characters with possible escape sequences and variable expansions.\n"
  58     "\n"
  59     "      string           = `\"` { char | escape | expansion } `\"` .\n"
  60     "      escape           = `\\` ( \"$\" | `\"` | char ) .\n"
  61     "      BracketExpansion = \"{\" ( identifier | ArrayAccess | ScopeAccess "
  62                               ") \"}\" .\n"
  63     "      expansion        = \"$\" ( identifier | BracketExpansion ) .\n"
  64     "      char             = /* any character except \"$\", `\"`, or newline "
  65                              "*/ .\n"
  66     "\n"
  67     "  After a backslash, certain sequences represent special characters:\n"
  68     "\n"
  69     "          \\\"    U+0022    quotation mark\n"
  70     "          \\$    U+0024    dollar sign\n"
  71     "          \\\\    U+005C    backslash\n"
  72     "\n"
  73     "  All other backslashes represent themselves.\n"
  74     "\n"
  75     "Punctuation\n"
  76     "\n"
  77     "  The following character sequences represent punctuation:\n"
  78     "\n"
  79     "          +       +=      ==      !=      (       )\n"
  80     "          -       -=      <       <=      [       ]\n"
  81     "          !       =       >       >=      {       }\n"
  82     "                          &&      ||      .       ,\n"
  83     "\n"
  84     "Grammar\n"
  85     "\n"
  86     "  The input tokens form a syntax tree following a context-free grammar:\n"
  87     "\n"
  88     "      File = StatementList .\n"
  89     "\n"
  90     "      Statement     = Assignment | Call | Condition .\n"
  91     "      Assignment    = identifier AssignOp Expr .\n"
  92     "      Call          = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
  93     "      Condition     = \"if\" \"(\" Expr \")\" Block\n"
  94     "                      [ \"else\" ( Condition | Block ) ] .\n"
  95     "      Block         = \"{\" StatementList \"}\" .\n"
  96     "      StatementList = { Statement } .\n"
  97     "\n"
  98     "      ArrayAccess = identifier \"[\" { identifier | integer } \"]\" .\n"
  99     "      ScopeAccess = identifier \".\" identifier .\n"
 100     "      Expr        = UnaryExpr | Expr BinaryOp Expr .\n"
 101     "      UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .\n"
 102     "      PrimaryExpr = identifier | integer | string | Call\n"
 103     "                  | ArrayAccess | ScopeAccess\n"
 104     "                  | \"(\" Expr \")\"\n"
 105     "                  | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
 106     "      ExprList    = Expr { \",\" Expr } .\n"
 107     "\n"
 108     "      AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
 109     "      UnaryOp  = \"!\" .\n"
 110     "      BinaryOp = \"+\" | \"-\"                  // highest priority\n"
 111     "               | \"<\" | \"<=\" | \">\" | \">=\"\n"
 112     "               | \"==\" | \"!=\"\n"
 113     "               | \"&&\"\n"
 114     "               | \"||\" .                     // lowest priority\n"
 115     "\n"
 116     "  All binary operators are left-associative.\n";
 117
 118 enum Precedence {
 119   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
 120   PRECEDENCE_OR = 2,
 121   PRECEDENCE_AND = 3,
 122   PRECEDENCE_EQUALITY = 4,
 123   PRECEDENCE_RELATION = 5,
 124   PRECEDENCE_SUM = 6,
 125   PRECEDENCE_PREFIX = 7,
 126   PRECEDENCE_CALL = 8,
 127   PRECEDENCE_DOT = 9,         // Highest precedence.
 128 };
 129
 130 // The top-level for blocks/ifs is recursive descent, the expression parser is
 131 // a Pratt parser. The basic idea there is to have the precedences (and
 132 // associativities) encoded relative to each other and only parse up until you
 133 // hit something of that precedence. There's a dispatch table in expressions_
 134 // at the top of parser.cc that describes how each token dispatches if it's
 135 // seen as either a prefix or infix operator, and if it's infix, what its
 136 // precedence is.
 137 //
 138 // Refs:
 139 // - http://javascript.crockford.com/tdop/tdop.html
 140 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 141
 142 // Indexed by Token::Type.
 143 ParserHelper Parser::expressions_[] = {
 144     {nullptr, nullptr, -1},                                   // INVALID
 145     {&Parser::Literal, nullptr, -1},                          // INTEGER
 146     {&Parser::Literal, nullptr, -1},                          // STRING
 147     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
 148     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
 149     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
 150     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
 151     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
 152     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
 153     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
 154     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
 155     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
 156     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
 157     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
 158     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
 159     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
 160     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
 161     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
 162     {&Parser::Not, nullptr, -1},                              // BANG
 163     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
 164     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
 165     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
 166     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
 167     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
 168     {nullptr, nullptr, -1},                                   // LEFT_BRACE
 169     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
 170     {nullptr, nullptr, -1},                                   // IF
 171     {nullptr, nullptr, -1},                                   // ELSE
 172     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
 173     {nullptr, nullptr, -1},                                       // COMMA
 174     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
 175     {nullptr, nullptr, -1},                // LINE_COMMENT
 176     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
 177     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
 178 };
 179
 180 Parser::Parser(const std::vector<Token>& tokens, Err* err)
 181     : err_(err), cur_(0) {
 182   for (const auto& token : tokens) {
 183     switch (token.type()) {
 184       case Token::LINE_COMMENT:
 185         line_comment_tokens_.push_back(token);
 186         break;
 187       case Token::SUFFIX_COMMENT:
 188         suffix_comment_tokens_.push_back(token);
 189         break;
 190       default:
 191         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
 192         // through the real parser.
 193         tokens_.push_back(token);
 194         break;
 195     }
 196   }
 197 }
 198
 199 Parser::~Parser() {
 200 }
 201
 202 // static
 203 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
 204                                     Err* err) {
 205   Parser p(tokens, err);
 206   return p.ParseFile();
 207 }
 208
 209 // static
 210 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
 211                                               Err* err) {
 212   Parser p(tokens, err);
 213   scoped_ptr<ParseNode> expr = p.ParseExpression();
 214   if (!p.at_end() && !err->has_error()) {
 215     *err = Err(p.cur_token(), "Trailing garbage");
 216     return nullptr;
 217   }
 218   return expr.Pass();
 219 }
 220
 221 // static
 222 scoped_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
 223                                          Err* err) {
 224   for (const Token& token : tokens) {
 225     switch (token.type()) {
 226       case Token::INTEGER:
 227       case Token::STRING:
 228       case Token::TRUE_TOKEN:
 229       case Token::FALSE_TOKEN:
 230       case Token::LEFT_BRACKET:
 231       case Token::RIGHT_BRACKET:
 232       case Token::COMMA:
 233         continue;
 234       default:
 235         *err = Err(token, "Invalid token in literal value");
 236         return nullptr;
 237     }
 238   }
 239
 240   return ParseExpression(tokens, err);
 241 }
 242
 243 bool Parser::IsAssignment(const ParseNode* node) const {
 244   return node && node->AsBinaryOp() &&
 245          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 246           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 247           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 248 }
 249
 250 bool Parser::IsStatementBreak(Token::Type token_type) const {
 251   switch (token_type) {
 252     case Token::IDENTIFIER:
 253     case Token::LEFT_BRACE:
 254     case Token::RIGHT_BRACE:
 255     case Token::IF:
 256     case Token::ELSE:
 257       return true;
 258     default:
 259       return false;
 260   }
 261 }
 262
 263 bool Parser::LookAhead(Token::Type type) {
 264   if (at_end())
 265     return false;
 266   return cur_token().type() == type;
 267 }
 268
 269 bool Parser::Match(Token::Type type) {
 270   if (!LookAhead(type))
 271     return false;
 272   Consume();
 273   return true;
 274 }
 275
 276 Token Parser::Consume(Token::Type type, const char* error_message) {
 277   Token::Type types[1] = { type };
 278   return Consume(types, 1, error_message);
 279 }
 280
 281 Token Parser::Consume(Token::Type* types,
 282                       size_t num_types,
 283                       const char* error_message) {
 284   if (has_error()) {
 285     // Don't overwrite current error, but make progress through tokens so that
 286     // a loop that's expecting a particular token will still terminate.
 287     cur_++;
 288     return Token(Location(), Token::INVALID, base::StringPiece());
 289   }
 290   if (at_end()) {
 291     const char kEOFMsg[] = "I hit EOF instead.";
 292     if (tokens_.empty())
 293       *err_ = Err(Location(), error_message, kEOFMsg);
 294     else
 295       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 296     return Token(Location(), Token::INVALID, base::StringPiece());
 297   }
 298
 299   for (size_t i = 0; i < num_types; ++i) {
 300     if (cur_token().type() == types[i])
 301       return Consume();
 302   }
 303   *err_ = Err(cur_token(), error_message);
 304   return Token(Location(), Token::INVALID, base::StringPiece());
 305 }
 306
 307 Token Parser::Consume() {
 308   return tokens_[cur_++];
 309 }
 310
 311 scoped_ptr<ParseNode> Parser::ParseExpression() {
 312   return ParseExpression(0);
 313 }
 314
 315 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 316   if (at_end())
 317     return scoped_ptr<ParseNode>();
 318
 319   Token token = Consume();
 320   PrefixFunc prefix = expressions_[token.type()].prefix;
 321
 322   if (prefix == nullptr) {
 323     *err_ = Err(token,
 324                 std::string("Unexpected token '") + token.value().as_string() +
 325                     std::string("'"));
 326     return scoped_ptr<ParseNode>();
 327   }
 328
 329   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 330   if (has_error())
 331     return left.Pass();
 332
 333   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 334          precedence <= expressions_[cur_token().type()].precedence) {
 335     token = Consume();
 336     InfixFunc infix = expressions_[token.type()].infix;
 337     if (infix == nullptr) {
 338       *err_ = Err(token,
 339                   std::string("Unexpected token '") +
 340                       token.value().as_string() + std::string("'"));
 341       return scoped_ptr<ParseNode>();
 342     }
 343     left = (this->*infix)(left.Pass(), token);
 344     if (has_error())
 345       return scoped_ptr<ParseNode>();
 346   }
 347
 348   return left.Pass();
 349 }
 350
 351 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 352   return make_scoped_ptr(new LiteralNode(token));
 353 }
 354
 355 scoped_ptr<ParseNode> Parser::Name(Token token) {
 356   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 357 }
 358
 359 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
 360   scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
 361   comment->set_comment(token);
 362   return comment.Pass();
 363 }
 364
 365 scoped_ptr<ParseNode> Parser::Group(Token token) {
 366   scoped_ptr<ParseNode> expr = ParseExpression();
 367   if (has_error())
 368     return scoped_ptr<ParseNode>();
 369   Consume(Token::RIGHT_PAREN, "Expected ')'");
 370   return expr.Pass();
 371 }
 372
 373 scoped_ptr<ParseNode> Parser::Not(Token token) {
 374   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 375   if (has_error())
 376     return scoped_ptr<ParseNode>();
 377   if (!expr) {
 378     if (!has_error())
 379       *err_ = Err(token, "Expected right-hand side for '!'.");
 380     return scoped_ptr<ParseNode>();
 381   }
 382   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 383   unary_op->set_op(token);
 384   unary_op->set_operand(expr.Pass());
 385   return unary_op.Pass();
 386 }
 387
 388 scoped_ptr<ParseNode> Parser::List(Token node) {
 389   scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
 390   if (!has_error() && !at_end())
 391     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 392   return list.Pass();
 393 }
 394
 395 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 396                                              Token token) {
 397   scoped_ptr<ParseNode> right =
 398       ParseExpression(expressions_[token.type()].precedence + 1);
 399   if (!right) {
 400     if (!has_error()) {
 401       *err_ = Err(token, "Expected right-hand side for '" +
 402                              token.value().as_string() + "'");
 403     }
 404     return scoped_ptr<ParseNode>();
 405   }
 406   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 407   binary_op->set_op(token);
 408   binary_op->set_left(left.Pass());
 409   binary_op->set_right(right.Pass());
 410   return binary_op.Pass();
 411 }
 412
 413 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 414                                                Token token) {
 415   scoped_ptr<ListNode> list(new ListNode);
 416   list->set_begin_token(token);
 417   list->set_end(make_scoped_ptr(new EndNode(token)));
 418   scoped_ptr<BlockNode> block;
 419   bool has_arg = false;
 420   if (LookAhead(Token::LEFT_PAREN)) {
 421     Token start_token = Consume();
 422     // Parsing a function call.
 423     has_arg = true;
 424     if (Match(Token::RIGHT_PAREN)) {
 425       // Nothing, just an empty call.
 426     } else {
 427       list = ParseList(start_token, Token::RIGHT_PAREN, false);
 428       if (has_error())
 429         return scoped_ptr<ParseNode>();
 430       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 431     }
 432     // Optionally with a scope.
 433     if (LookAhead(Token::LEFT_BRACE)) {
 434       block = ParseBlock();
 435       if (has_error())
 436         return scoped_ptr<ParseNode>();
 437     }
 438   }
 439
 440   if (!left && !has_arg) {
 441     // Not a function call, just a standalone identifier.
 442     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 443   }
 444   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 445   func_call->set_function(token);
 446   func_call->set_args(list.Pass());
 447   if (block)
 448     func_call->set_block(block.Pass());
 449   return func_call.Pass();
 450 }
 451
 452 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 453                                          Token token) {
 454   if (left->AsIdentifier() == nullptr) {
 455     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 456     return scoped_ptr<ParseNode>();
 457   }
 458   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 459   if (!value) {
 460     if (!has_error())
 461       *err_ = Err(token, "Expected right-hand side for assignment.");
 462     return scoped_ptr<ParseNode>();
 463   }
 464   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 465   assign->set_op(token);
 466   assign->set_left(left.Pass());
 467   assign->set_right(value.Pass());
 468   return assign.Pass();
 469 }
 470
 471 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 472                                         Token token) {
 473   // TODO: Maybe support more complex expressions like a[0][0]. This would
 474   // require work on the evaluator too.
 475   if (left->AsIdentifier() == nullptr) {
 476     *err_ = Err(left.get(), "May only subscript identifiers.",
 477         "The thing on the left hand side of the [] must be an identifier\n"
 478         "and not an expression. If you need this, you'll have to assign the\n"
 479         "value to a temporary before subscripting. Sorry.");
 480     return scoped_ptr<ParseNode>();
 481   }
 482   scoped_ptr<ParseNode> value = ParseExpression();
 483   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 484   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 485   accessor->set_base(left->AsIdentifier()->value());
 486   accessor->set_index(value.Pass());
 487   return accessor.Pass();
 488 }
 489
 490 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
 491                                           Token token) {
 492   if (left->AsIdentifier() == nullptr) {
 493     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
 494         "The thing on the left hand side of the dot must be an identifier\n"
 495         "and not an expression. If you need this, you'll have to assign the\n"
 496         "value to a temporary first. Sorry.");
 497     return scoped_ptr<ParseNode>();
 498   }
 499
 500   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
 501   if (!right || !right->AsIdentifier()) {
 502     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
 503         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
 504     return scoped_ptr<ParseNode>();
 505   }
 506
 507   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 508   accessor->set_base(left->AsIdentifier()->value());
 509   accessor->set_member(scoped_ptr<IdentifierNode>(
 510       static_cast<IdentifierNode*>(right.release())));
 511   return accessor.Pass();
 512 }
 513
 514 // Does not Consume the start or end token.
 515 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
 516                                        Token::Type stop_before,
 517                                        bool allow_trailing_comma) {
 518   scoped_ptr<ListNode> list(new ListNode);
 519   list->set_begin_token(start_token);
 520   bool just_got_comma = false;
 521   bool first_time = true;
 522   while (!LookAhead(stop_before)) {
 523     if (!first_time) {
 524       if (!just_got_comma) {
 525         // Require commas separate things in lists.
 526         *err_ = Err(cur_token(), "Expected comma between items.");
 527         return scoped_ptr<ListNode>();
 528       }
 529     }
 530     first_time = false;
 531
 532     // Why _OR? We're parsing things that are higher precedence than the ,
 533     // that separates the items of the list. , should appear lower than
 534     // boolean expressions (the lowest of which is OR), but above assignments.
 535     list->append_item(ParseExpression(PRECEDENCE_OR));
 536     if (has_error())
 537       return scoped_ptr<ListNode>();
 538     if (at_end()) {
 539       *err_ =
 540           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 541       return scoped_ptr<ListNode>();
 542     }
 543     if (list->contents().back()->AsBlockComment()) {
 544       // If there was a comment inside the list, we don't need a comma to the
 545       // next item, so pretend we got one, if we're expecting one.
 546       just_got_comma = allow_trailing_comma;
 547     } else {
 548       just_got_comma = Match(Token::COMMA);
 549     }
 550   }
 551   if (just_got_comma && !allow_trailing_comma) {
 552     *err_ = Err(cur_token(), "Trailing comma");
 553     return scoped_ptr<ListNode>();
 554   }
 555   list->set_end(make_scoped_ptr(new EndNode(cur_token())));
 556   return list.Pass();
 557 }
 558
 559 scoped_ptr<ParseNode> Parser::ParseFile() {
 560   scoped_ptr<BlockNode> file(new BlockNode);
 561   for (;;) {
 562     if (at_end())
 563       break;
 564     scoped_ptr<ParseNode> statement = ParseStatement();
 565     if (!statement)
 566       break;
 567     file->append_statement(statement.Pass());
 568   }
 569   if (!at_end() && !has_error())
 570     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 571   if (has_error())
 572     return scoped_ptr<ParseNode>();
 573
 574   // TODO(scottmg): If this is measurably expensive, it could be done only
 575   // when necessary (when reformatting, or during tests). Comments are
 576   // separate from the parse tree at this point, so downstream code can remain
 577   // ignorant of them.
 578   AssignComments(file.get());
 579
 580   return file.Pass();
 581 }
 582
 583 scoped_ptr<ParseNode> Parser::ParseStatement() {
 584   if (LookAhead(Token::IF)) {
 585     return ParseCondition();
 586   } else if (LookAhead(Token::BLOCK_COMMENT)) {
 587     return BlockComment(Consume());
 588   } else {
 589     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 590     // to allow "pointless" expressions and return ParseExpression() directly.
 591     scoped_ptr<ParseNode> stmt = ParseExpression();
 592     if (stmt) {
 593       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 594         return stmt.Pass();
 595     }
 596     if (!has_error()) {
 597       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 598       *err_ = Err(token, "Expecting assignment or function call.");
 599     }
 600     return scoped_ptr<ParseNode>();
 601   }
 602 }
 603
 604 scoped_ptr<BlockNode> Parser::ParseBlock() {
 605   Token begin_token =
 606       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 607   if (has_error())
 608     return scoped_ptr<BlockNode>();
 609   scoped_ptr<BlockNode> block(new BlockNode);
 610   block->set_begin_token(begin_token);
 611
 612   for (;;) {
 613     if (LookAhead(Token::RIGHT_BRACE)) {
 614       block->set_end(make_scoped_ptr(new EndNode(Consume())));
 615       break;
 616     }
 617
 618     scoped_ptr<ParseNode> statement = ParseStatement();
 619     if (!statement)
 620       return scoped_ptr<BlockNode>();
 621     block->append_statement(statement.Pass());
 622   }
 623   return block.Pass();
 624 }
 625
 626 scoped_ptr<ParseNode> Parser::ParseCondition() {
 627   scoped_ptr<ConditionNode> condition(new ConditionNode);
 628   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
 629   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 630   condition->set_condition(ParseExpression());
 631   if (IsAssignment(condition->condition()))
 632     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 633   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 634   condition->set_if_true(ParseBlock().Pass());
 635   if (Match(Token::ELSE)) {
 636     if (LookAhead(Token::LEFT_BRACE)) {
 637       condition->set_if_false(ParseBlock().Pass());
 638     } else if (LookAhead(Token::IF)) {
 639       condition->set_if_false(ParseStatement().Pass());
 640     } else {
 641       *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
 642       return scoped_ptr<ParseNode>();
 643     }
 644   }
 645   if (has_error())
 646     return scoped_ptr<ParseNode>();
 647   return condition.Pass();
 648 }
 649
 650 void Parser::TraverseOrder(const ParseNode* root,
 651                            std::vector<const ParseNode*>* pre,
 652                            std::vector<const ParseNode*>* post) {
 653   if (root) {
 654     pre->push_back(root);
 655
 656     if (const AccessorNode* accessor = root->AsAccessor()) {
 657       TraverseOrder(accessor->index(), pre, post);
 658       TraverseOrder(accessor->member(), pre, post);
 659     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
 660       TraverseOrder(binop->left(), pre, post);
 661       TraverseOrder(binop->right(), pre, post);
 662     } else if (const BlockNode* block = root->AsBlock()) {
 663       for (const auto& statement : block->statements())
 664         TraverseOrder(statement, pre, post);
 665       TraverseOrder(block->End(), pre, post);
 666     } else if (const ConditionNode* condition = root->AsConditionNode()) {
 667       TraverseOrder(condition->condition(), pre, post);
 668       TraverseOrder(condition->if_true(), pre, post);
 669       TraverseOrder(condition->if_false(), pre, post);
 670     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
 671       TraverseOrder(func_call->args(), pre, post);
 672       TraverseOrder(func_call->block(), pre, post);
 673     } else if (root->AsIdentifier()) {
 674       // Nothing.
 675     } else if (const ListNode* list = root->AsList()) {
 676       for (const auto& node : list->contents())
 677         TraverseOrder(node, pre, post);
 678       TraverseOrder(list->End(), pre, post);
 679     } else if (root->AsLiteral()) {
 680       // Nothing.
 681     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
 682       TraverseOrder(unaryop->operand(), pre, post);
 683     } else if (root->AsBlockComment()) {
 684       // Nothing.
 685     } else if (root->AsEnd()) {
 686       // Nothing.
 687     } else {
 688       CHECK(false) << "Unhandled case in TraverseOrder.";
 689     }
 690
 691     post->push_back(root);
 692   }
 693 }
 694
 695 void Parser::AssignComments(ParseNode* file) {
 696   // Start by generating a pre- and post- order traversal of the tree so we
 697   // can determine what's before and after comments.
 698   std::vector<const ParseNode*> pre;
 699   std::vector<const ParseNode*> post;
 700   TraverseOrder(file, &pre, &post);
 701
 702   // Assign line comments to syntax immediately following.
 703   int cur_comment = 0;
 704   for (const auto& node : pre) {
 705     const Location& start = node->GetRange().begin();
 706     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
 707       if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
 708         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
 709             line_comment_tokens_[cur_comment]);
 710         ++cur_comment;
 711       } else {
 712         break;
 713       }
 714     }
 715   }
 716
 717   // Remaining line comments go at end of file.
 718   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
 719        ++cur_comment)
 720     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
 721
 722   // Assign suffix to syntax immediately before.
 723   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
 724   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
 725        i != post.rend();
 726        ++i) {
 727     // Don't assign suffix comments to the function, list, or block, but instead
 728     // to the last thing inside.
 729     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
 730       continue;
 731
 732     const Location& start = (*i)->GetRange().begin();
 733     const Location& end = (*i)->GetRange().end();
 734
 735     // Don't assign suffix comments to something that starts on an earlier
 736     // line, so that in:
 737     //
 738     // sources = [ "a",
 739     //     "b" ] # comment
 740     //
 741     // it's attached to "b", not sources = [ ... ].
 742     if (start.line_number() != end.line_number())
 743       continue;
 744
 745     while (cur_comment >= 0) {
 746       if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
 747         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
 748             suffix_comment_tokens_[cur_comment]);
 749         --cur_comment;
 750       } else {
 751         break;
 752       }
 753     }
 754
 755     // Suffix comments were assigned in reverse, so if there were multiple on
 756     // the same node, they need to be reversed.
 757     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
 758       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
 759   }
 760 }