tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 const char kGrammar_Help[] =
  13     "GN build language grammar\n"
  14     "\n"
  15     "Tokens\n"
  16     "\n"
  17     "  GN build files are read as sequences of tokens.  While splitting the\n"
  18     "  file into tokens, the next token is the longest sequence of characters\n"
  19     "  that form a valid token.\n"
  20     "\n"
  21     "White space and comments\n"
  22     "\n"
  23     "  White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
  24     "  carriage returns (U+000D), and newlines (U+000A).\n"
  25     "\n"
  26     "  Comments start at the character \"#\" and stop at the next newline.\n"
  27     "\n"
  28     "  White space and comments are ignored except that they may separate\n"
  29     "  tokens that would otherwise combine into a single token.\n"
  30     "\n"
  31     "Identifiers\n"
  32     "\n"
  33     "  Identifiers name variables and functions.\n"
  34     "\n"
  35     "      identifier = letter { letter | digit } .\n"
  36     "      letter     = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
  37     "      digit      = \"0\" ... \"9\" .\n"
  38     "\n"
  39     "Keywords\n"
  40     "\n"
  41     "  The following keywords are reserved and may not be used as\n"
  42     "  identifiers:\n"
  43     "\n"
  44     "          else    false   if      true\n"
  45     "\n"
  46     "Integer literals\n"
  47     "\n"
  48     "  An integer literal represents a decimal integer value.\n"
  49     "\n"
  50     "      integer = [ \"-\" ] digit { digit } .\n"
  51     "\n"
  52     "  Leading zeros and negative zero are disallowed.\n"
  53     "\n"
  54     "String literals\n"
  55     "\n"
  56     "  A string literal represents a string value consisting of the quoted\n"
  57     "  characters with possible escape sequences and variable expansions.\n"
  58     "\n"
  59     "      string    = `\"` { char | escape | expansion } `\"` .\n"
  60     "      escape    = `\\` ( \"$\" | `\"` | char ) .\n"
  61     "      expansion = \"$\" ( identifier | \"{\" identifier \"}\" ) .\n"
  62     "      char      = /* any character except \"$\", `\"`, or newline */ .\n"
  63     "\n"
  64     "  After a backslash, certain sequences represent special characters:\n"
  65     "\n"
  66     "          \\\"    U+0022    quotation mark\n"
  67     "          \\$    U+0024    dollar sign\n"
  68     "          \\\\    U+005C    backslash\n"
  69     "\n"
  70     "  All other backslashes represent themselves.\n"
  71     "\n"
  72     "Punctuation\n"
  73     "\n"
  74     "  The following character sequences represent punctuation:\n"
  75     "\n"
  76     "          +       +=      ==      !=      (       )\n"
  77     "          -       -=      <       <=      [       ]\n"
  78     "          !       =       >       >=      {       }\n"
  79     "                          &&      ||      .       ,\n"
  80     "\n"
  81     "Grammar\n"
  82     "\n"
  83     "  The input tokens form a syntax tree following a context-free grammar:\n"
  84     "\n"
  85     "      File = StatementList .\n"
  86     "\n"
  87     "      Statement     = Assignment | Call | Condition .\n"
  88     "      Assignment    = identifier AssignOp Expr .\n"
  89     "      Call          = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
  90     "      Condition     = \"if\" \"(\" Expr \")\" Block\n"
  91     "                      [ \"else\" ( Condition | Block ) ] .\n"
  92     "      Block         = \"{\" StatementList \"}\" .\n"
  93     "      StatementList = { Statement } .\n"
  94     "\n"
  95     "      Expr        = UnaryExpr | Expr BinaryOp Expr .\n"
  96     "      UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .\n"
  97     "      PrimaryExpr = identifier | integer | string | Call\n"
  98     "                  | identifier \"[\" Expr \"]\"\n"
  99     "                  | identifier \".\" identifier\n"
 100     "                  | \"(\" Expr \")\"\n"
 101     "                  | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
 102     "      ExprList    = Expr { \",\" Expr } .\n"
 103     "\n"
 104     "      AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
 105     "      UnaryOp  = \"!\" .\n"
 106     "      BinaryOp = \"+\" | \"-\"                  // highest priority\n"
 107     "               | \"<\" | \"<=\" | \">\" | \">=\"\n"
 108     "               | \"==\" | \"!=\"\n"
 109     "               | \"&&\"\n"
 110     "               | \"||\" .                     // lowest priority\n"
 111     "\n"
 112     "  All binary operators are left-associative.\n";
 113
 114 enum Precedence {
 115   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
 116   PRECEDENCE_OR = 2,
 117   PRECEDENCE_AND = 3,
 118   PRECEDENCE_EQUALITY = 4,
 119   PRECEDENCE_RELATION = 5,
 120   PRECEDENCE_SUM = 6,
 121   PRECEDENCE_PREFIX = 7,
 122   PRECEDENCE_CALL = 8,
 123   PRECEDENCE_DOT = 9,         // Highest precedence.
 124 };
 125
 126 // The top-level for blocks/ifs is recursive descent, the expression parser is
 127 // a Pratt parser. The basic idea there is to have the precedences (and
 128 // associativities) encoded relative to each other and only parse up until you
 129 // hit something of that precedence. There's a dispatch table in expressions_
 130 // at the top of parser.cc that describes how each token dispatches if it's
 131 // seen as either a prefix or infix operator, and if it's infix, what its
 132 // precedence is.
 133 //
 134 // Refs:
 135 // - http://javascript.crockford.com/tdop/tdop.html
 136 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 137
 138 // Indexed by Token::Type.
 139 ParserHelper Parser::expressions_[] = {
 140     {nullptr, nullptr, -1},                                   // INVALID
 141     {&Parser::Literal, nullptr, -1},                          // INTEGER
 142     {&Parser::Literal, nullptr, -1},                          // STRING
 143     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
 144     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
 145     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
 146     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
 147     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
 148     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
 149     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
 150     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
 151     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
 152     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
 153     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
 154     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
 155     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
 156     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
 157     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
 158     {&Parser::Not, nullptr, -1},                              // BANG
 159     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
 160     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
 161     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
 162     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
 163     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
 164     {nullptr, nullptr, -1},                                   // LEFT_BRACE
 165     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
 166     {nullptr, nullptr, -1},                                   // IF
 167     {nullptr, nullptr, -1},                                   // ELSE
 168     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
 169     {nullptr, nullptr, -1},                                       // COMMA
 170     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
 171     {nullptr, nullptr, -1},                // LINE_COMMENT
 172     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
 173     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
 174 };
 175
 176 Parser::Parser(const std::vector<Token>& tokens, Err* err)
 177     : err_(err), cur_(0) {
 178   for (const auto& token : tokens) {
 179     switch (token.type()) {
 180       case Token::LINE_COMMENT:
 181         line_comment_tokens_.push_back(token);
 182         break;
 183       case Token::SUFFIX_COMMENT:
 184         suffix_comment_tokens_.push_back(token);
 185         break;
 186       default:
 187         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
 188         // through the real parser.
 189         tokens_.push_back(token);
 190         break;
 191     }
 192   }
 193 }
 194
 195 Parser::~Parser() {
 196 }
 197
 198 // static
 199 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
 200                                     Err* err) {
 201   Parser p(tokens, err);
 202   return p.ParseFile();
 203 }
 204
 205 // static
 206 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
 207                                               Err* err) {
 208   Parser p(tokens, err);
 209   scoped_ptr<ParseNode> expr = p.ParseExpression();
 210   if (!p.at_end() && !err->has_error()) {
 211     *err = Err(p.cur_token(), "Trailing garbage");
 212     return nullptr;
 213   }
 214   return expr.Pass();
 215 }
 216
 217 // static
 218 scoped_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
 219                                          Err* err) {
 220   for (const Token& token : tokens) {
 221     switch (token.type()) {
 222       case Token::INTEGER:
 223       case Token::STRING:
 224       case Token::TRUE_TOKEN:
 225       case Token::FALSE_TOKEN:
 226       case Token::LEFT_BRACKET:
 227       case Token::RIGHT_BRACKET:
 228       case Token::COMMA:
 229         continue;
 230       default:
 231         *err = Err(token, "Invalid token in literal value");
 232         return nullptr;
 233     }
 234   }
 235
 236   return ParseExpression(tokens, err);
 237 }
 238
 239 bool Parser::IsAssignment(const ParseNode* node) const {
 240   return node && node->AsBinaryOp() &&
 241          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 242           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 243           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 244 }
 245
 246 bool Parser::IsStatementBreak(Token::Type token_type) const {
 247   switch (token_type) {
 248     case Token::IDENTIFIER:
 249     case Token::LEFT_BRACE:
 250     case Token::RIGHT_BRACE:
 251     case Token::IF:
 252     case Token::ELSE:
 253       return true;
 254     default:
 255       return false;
 256   }
 257 }
 258
 259 bool Parser::LookAhead(Token::Type type) {
 260   if (at_end())
 261     return false;
 262   return cur_token().type() == type;
 263 }
 264
 265 bool Parser::Match(Token::Type type) {
 266   if (!LookAhead(type))
 267     return false;
 268   Consume();
 269   return true;
 270 }
 271
 272 Token Parser::Consume(Token::Type type, const char* error_message) {
 273   Token::Type types[1] = { type };
 274   return Consume(types, 1, error_message);
 275 }
 276
 277 Token Parser::Consume(Token::Type* types,
 278                       size_t num_types,
 279                       const char* error_message) {
 280   if (has_error()) {
 281     // Don't overwrite current error, but make progress through tokens so that
 282     // a loop that's expecting a particular token will still terminate.
 283     cur_++;
 284     return Token(Location(), Token::INVALID, base::StringPiece());
 285   }
 286   if (at_end()) {
 287     const char kEOFMsg[] = "I hit EOF instead.";
 288     if (tokens_.empty())
 289       *err_ = Err(Location(), error_message, kEOFMsg);
 290     else
 291       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 292     return Token(Location(), Token::INVALID, base::StringPiece());
 293   }
 294
 295   for (size_t i = 0; i < num_types; ++i) {
 296     if (cur_token().type() == types[i])
 297       return Consume();
 298   }
 299   *err_ = Err(cur_token(), error_message);
 300   return Token(Location(), Token::INVALID, base::StringPiece());
 301 }
 302
 303 Token Parser::Consume() {
 304   return tokens_[cur_++];
 305 }
 306
 307 scoped_ptr<ParseNode> Parser::ParseExpression() {
 308   return ParseExpression(0);
 309 }
 310
 311 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 312   if (at_end())
 313     return scoped_ptr<ParseNode>();
 314
 315   Token token = Consume();
 316   PrefixFunc prefix = expressions_[token.type()].prefix;
 317
 318   if (prefix == nullptr) {
 319     *err_ = Err(token,
 320                 std::string("Unexpected token '") + token.value().as_string() +
 321                     std::string("'"));
 322     return scoped_ptr<ParseNode>();
 323   }
 324
 325   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 326   if (has_error())
 327     return left.Pass();
 328
 329   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 330          precedence <= expressions_[cur_token().type()].precedence) {
 331     token = Consume();
 332     InfixFunc infix = expressions_[token.type()].infix;
 333     if (infix == nullptr) {
 334       *err_ = Err(token,
 335                   std::string("Unexpected token '") +
 336                       token.value().as_string() + std::string("'"));
 337       return scoped_ptr<ParseNode>();
 338     }
 339     left = (this->*infix)(left.Pass(), token);
 340     if (has_error())
 341       return scoped_ptr<ParseNode>();
 342   }
 343
 344   return left.Pass();
 345 }
 346
 347 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 348   return make_scoped_ptr(new LiteralNode(token));
 349 }
 350
 351 scoped_ptr<ParseNode> Parser::Name(Token token) {
 352   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 353 }
 354
 355 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
 356   scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
 357   comment->set_comment(token);
 358   return comment.Pass();
 359 }
 360
 361 scoped_ptr<ParseNode> Parser::Group(Token token) {
 362   scoped_ptr<ParseNode> expr = ParseExpression();
 363   if (has_error())
 364     return scoped_ptr<ParseNode>();
 365   Consume(Token::RIGHT_PAREN, "Expected ')'");
 366   return expr.Pass();
 367 }
 368
 369 scoped_ptr<ParseNode> Parser::Not(Token token) {
 370   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 371   if (has_error())
 372     return scoped_ptr<ParseNode>();
 373   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 374   unary_op->set_op(token);
 375   unary_op->set_operand(expr.Pass());
 376   return unary_op.Pass();
 377 }
 378
 379 scoped_ptr<ParseNode> Parser::List(Token node) {
 380   scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
 381   if (!has_error() && !at_end())
 382     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 383   return list.Pass();
 384 }
 385
 386 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 387                                              Token token) {
 388   scoped_ptr<ParseNode> right =
 389       ParseExpression(expressions_[token.type()].precedence + 1);
 390   if (!right) {
 391     if (!has_error()) {
 392       *err_ = Err(token, "Expected right hand side for '" +
 393                              token.value().as_string() + "'");
 394     }
 395     return scoped_ptr<ParseNode>();
 396   }
 397   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 398   binary_op->set_op(token);
 399   binary_op->set_left(left.Pass());
 400   binary_op->set_right(right.Pass());
 401   return binary_op.Pass();
 402 }
 403
 404 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 405                                                Token token) {
 406   scoped_ptr<ListNode> list(new ListNode);
 407   list->set_begin_token(token);
 408   list->set_end(make_scoped_ptr(new EndNode(token)));
 409   scoped_ptr<BlockNode> block;
 410   bool has_arg = false;
 411   if (LookAhead(Token::LEFT_PAREN)) {
 412     Token start_token = Consume();
 413     // Parsing a function call.
 414     has_arg = true;
 415     if (Match(Token::RIGHT_PAREN)) {
 416       // Nothing, just an empty call.
 417     } else {
 418       list = ParseList(start_token, Token::RIGHT_PAREN, false);
 419       if (has_error())
 420         return scoped_ptr<ParseNode>();
 421       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 422     }
 423     // Optionally with a scope.
 424     if (LookAhead(Token::LEFT_BRACE)) {
 425       block = ParseBlock();
 426       if (has_error())
 427         return scoped_ptr<ParseNode>();
 428     }
 429   }
 430
 431   if (!left && !has_arg) {
 432     // Not a function call, just a standalone identifier.
 433     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 434   }
 435   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 436   func_call->set_function(token);
 437   func_call->set_args(list.Pass());
 438   if (block)
 439     func_call->set_block(block.Pass());
 440   return func_call.Pass();
 441 }
 442
 443 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 444                                          Token token) {
 445   if (left->AsIdentifier() == nullptr) {
 446     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 447     return scoped_ptr<ParseNode>();
 448   }
 449   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 450   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 451   assign->set_op(token);
 452   assign->set_left(left.Pass());
 453   assign->set_right(value.Pass());
 454   return assign.Pass();
 455 }
 456
 457 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 458                                         Token token) {
 459   // TODO: Maybe support more complex expressions like a[0][0]. This would
 460   // require work on the evaluator too.
 461   if (left->AsIdentifier() == nullptr) {
 462     *err_ = Err(left.get(), "May only subscript identifiers.",
 463         "The thing on the left hand side of the [] must be an identifier\n"
 464         "and not an expression. If you need this, you'll have to assign the\n"
 465         "value to a temporary before subscripting. Sorry.");
 466     return scoped_ptr<ParseNode>();
 467   }
 468   scoped_ptr<ParseNode> value = ParseExpression();
 469   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 470   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 471   accessor->set_base(left->AsIdentifier()->value());
 472   accessor->set_index(value.Pass());
 473   return accessor.Pass();
 474 }
 475
 476 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
 477                                           Token token) {
 478   if (left->AsIdentifier() == nullptr) {
 479     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
 480         "The thing on the left hand side of the dot must be an identifier\n"
 481         "and not an expression. If you need this, you'll have to assign the\n"
 482         "value to a temporary first. Sorry.");
 483     return scoped_ptr<ParseNode>();
 484   }
 485
 486   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
 487   if (!right || !right->AsIdentifier()) {
 488     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
 489         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
 490     return scoped_ptr<ParseNode>();
 491   }
 492
 493   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 494   accessor->set_base(left->AsIdentifier()->value());
 495   accessor->set_member(scoped_ptr<IdentifierNode>(
 496       static_cast<IdentifierNode*>(right.release())));
 497   return accessor.Pass();
 498 }
 499
 500 // Does not Consume the start or end token.
 501 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
 502                                        Token::Type stop_before,
 503                                        bool allow_trailing_comma) {
 504   scoped_ptr<ListNode> list(new ListNode);
 505   list->set_begin_token(start_token);
 506   bool just_got_comma = false;
 507   bool first_time = true;
 508   while (!LookAhead(stop_before)) {
 509     if (!first_time) {
 510       if (!just_got_comma) {
 511         // Require commas separate things in lists.
 512         *err_ = Err(cur_token(), "Expected comma between items.");
 513         return scoped_ptr<ListNode>();
 514       }
 515     }
 516     first_time = false;
 517
 518     // Why _OR? We're parsing things that are higher precedence than the ,
 519     // that separates the items of the list. , should appear lower than
 520     // boolean expressions (the lowest of which is OR), but above assignments.
 521     list->append_item(ParseExpression(PRECEDENCE_OR));
 522     if (has_error())
 523       return scoped_ptr<ListNode>();
 524     if (at_end()) {
 525       *err_ =
 526           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 527       return scoped_ptr<ListNode>();
 528     }
 529     if (list->contents().back()->AsBlockComment()) {
 530       // If there was a comment inside the list, we don't need a comma to the
 531       // next item, so pretend we got one, if we're expecting one.
 532       just_got_comma = allow_trailing_comma;
 533     } else {
 534       just_got_comma = Match(Token::COMMA);
 535     }
 536   }
 537   if (just_got_comma && !allow_trailing_comma) {
 538     *err_ = Err(cur_token(), "Trailing comma");
 539     return scoped_ptr<ListNode>();
 540   }
 541   list->set_end(make_scoped_ptr(new EndNode(cur_token())));
 542   return list.Pass();
 543 }
 544
 545 scoped_ptr<ParseNode> Parser::ParseFile() {
 546   scoped_ptr<BlockNode> file(new BlockNode);
 547   for (;;) {
 548     if (at_end())
 549       break;
 550     scoped_ptr<ParseNode> statement = ParseStatement();
 551     if (!statement)
 552       break;
 553     file->append_statement(statement.Pass());
 554   }
 555   if (!at_end() && !has_error())
 556     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 557   if (has_error())
 558     return scoped_ptr<ParseNode>();
 559
 560   // TODO(scottmg): If this is measurably expensive, it could be done only
 561   // when necessary (when reformatting, or during tests). Comments are
 562   // separate from the parse tree at this point, so downstream code can remain
 563   // ignorant of them.
 564   AssignComments(file.get());
 565
 566   return file.Pass();
 567 }
 568
 569 scoped_ptr<ParseNode> Parser::ParseStatement() {
 570   if (LookAhead(Token::IF)) {
 571     return ParseCondition();
 572   } else if (LookAhead(Token::BLOCK_COMMENT)) {
 573     return BlockComment(Consume());
 574   } else {
 575     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 576     // to allow "pointless" expressions and return ParseExpression() directly.
 577     scoped_ptr<ParseNode> stmt = ParseExpression();
 578     if (stmt) {
 579       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 580         return stmt.Pass();
 581     }
 582     if (!has_error()) {
 583       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 584       *err_ = Err(token, "Expecting assignment or function call.");
 585     }
 586     return scoped_ptr<ParseNode>();
 587   }
 588 }
 589
 590 scoped_ptr<BlockNode> Parser::ParseBlock() {
 591   Token begin_token =
 592       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 593   if (has_error())
 594     return scoped_ptr<BlockNode>();
 595   scoped_ptr<BlockNode> block(new BlockNode);
 596   block->set_begin_token(begin_token);
 597
 598   for (;;) {
 599     if (LookAhead(Token::RIGHT_BRACE)) {
 600       block->set_end(make_scoped_ptr(new EndNode(Consume())));
 601       break;
 602     }
 603
 604     scoped_ptr<ParseNode> statement = ParseStatement();
 605     if (!statement)
 606       return scoped_ptr<BlockNode>();
 607     block->append_statement(statement.Pass());
 608   }
 609   return block.Pass();
 610 }
 611
 612 scoped_ptr<ParseNode> Parser::ParseCondition() {
 613   scoped_ptr<ConditionNode> condition(new ConditionNode);
 614   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
 615   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 616   condition->set_condition(ParseExpression());
 617   if (IsAssignment(condition->condition()))
 618     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 619   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 620   condition->set_if_true(ParseBlock().Pass());
 621   if (Match(Token::ELSE)) {
 622     if (LookAhead(Token::LEFT_BRACE)) {
 623       condition->set_if_false(ParseBlock().Pass());
 624     } else if (LookAhead(Token::IF)) {
 625       condition->set_if_false(ParseStatement().Pass());
 626     } else {
 627       *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
 628       return scoped_ptr<ParseNode>();
 629     }
 630   }
 631   if (has_error())
 632     return scoped_ptr<ParseNode>();
 633   return condition.Pass();
 634 }
 635
 636 void Parser::TraverseOrder(const ParseNode* root,
 637                            std::vector<const ParseNode*>* pre,
 638                            std::vector<const ParseNode*>* post) {
 639   if (root) {
 640     pre->push_back(root);
 641
 642     if (const AccessorNode* accessor = root->AsAccessor()) {
 643       TraverseOrder(accessor->index(), pre, post);
 644       TraverseOrder(accessor->member(), pre, post);
 645     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
 646       TraverseOrder(binop->left(), pre, post);
 647       TraverseOrder(binop->right(), pre, post);
 648     } else if (const BlockNode* block = root->AsBlock()) {
 649       for (const auto& statement : block->statements())
 650         TraverseOrder(statement, pre, post);
 651       TraverseOrder(block->End(), pre, post);
 652     } else if (const ConditionNode* condition = root->AsConditionNode()) {
 653       TraverseOrder(condition->condition(), pre, post);
 654       TraverseOrder(condition->if_true(), pre, post);
 655       TraverseOrder(condition->if_false(), pre, post);
 656     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
 657       TraverseOrder(func_call->args(), pre, post);
 658       TraverseOrder(func_call->block(), pre, post);
 659     } else if (root->AsIdentifier()) {
 660       // Nothing.
 661     } else if (const ListNode* list = root->AsList()) {
 662       for (const auto& node : list->contents())
 663         TraverseOrder(node, pre, post);
 664       TraverseOrder(list->End(), pre, post);
 665     } else if (root->AsLiteral()) {
 666       // Nothing.
 667     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
 668       TraverseOrder(unaryop->operand(), pre, post);
 669     } else if (root->AsBlockComment()) {
 670       // Nothing.
 671     } else if (root->AsEnd()) {
 672       // Nothing.
 673     } else {
 674       CHECK(false) << "Unhandled case in TraverseOrder.";
 675     }
 676
 677     post->push_back(root);
 678   }
 679 }
 680
 681 void Parser::AssignComments(ParseNode* file) {
 682   // Start by generating a pre- and post- order traversal of the tree so we
 683   // can determine what's before and after comments.
 684   std::vector<const ParseNode*> pre;
 685   std::vector<const ParseNode*> post;
 686   TraverseOrder(file, &pre, &post);
 687
 688   // Assign line comments to syntax immediately following.
 689   int cur_comment = 0;
 690   for (const auto& node : pre) {
 691     const Location& start = node->GetRange().begin();
 692     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
 693       if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
 694         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
 695             line_comment_tokens_[cur_comment]);
 696         ++cur_comment;
 697       } else {
 698         break;
 699       }
 700     }
 701   }
 702
 703   // Remaining line comments go at end of file.
 704   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
 705        ++cur_comment)
 706     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
 707
 708   // Assign suffix to syntax immediately before.
 709   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
 710   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
 711        i != post.rend();
 712        ++i) {
 713     // Don't assign suffix comments to the function, list, or block, but instead
 714     // to the last thing inside.
 715     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
 716       continue;
 717
 718     const Location& start = (*i)->GetRange().begin();
 719     const Location& end = (*i)->GetRange().end();
 720
 721     // Don't assign suffix comments to something that starts on an earlier
 722     // line, so that in:
 723     //
 724     // sources = [ "a",
 725     //     "b" ] # comment
 726     //
 727     // it's attached to "b", not sources = [ ... ].
 728     if (start.line_number() != end.line_number())
 729       continue;
 730
 731     while (cur_comment >= 0) {
 732       if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
 733         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
 734             suffix_comment_tokens_[cur_comment]);
 735         --cur_comment;
 736       } else {
 737         break;
 738       }
 739     }
 740
 741     // Suffix comments were assigned in reverse, so if there were multiple on
 742     // the same node, they need to be reversed.
 743     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
 744       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
 745   }
 746 }