tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 const char kGrammar_Help[] =
  13     "GN build language grammar\n"
  14     "\n"
  15     "Tokens\n"
  16     "\n"
  17     "  GN build files are read as sequences of tokens.  While splitting the\n"
  18     "  file into tokens, the next token is the longest sequence of characters\n"
  19     "  that form a valid token.\n"
  20     "\n"
  21     "White space and comments\n"
  22     "\n"
  23     "  White space is comprised of spaces (U+0020), horizontal tabs (U+0009),\n"
  24     "  carriage returns (U+000D), and newlines (U+000A).\n"
  25     "\n"
  26     "  Comments start at the character \"#\" and stop at the next newline.\n"
  27     "\n"
  28     "  White space and comments are ignored except that they may separate\n"
  29     "  tokens that would otherwise combine into a single token.\n"
  30     "\n"
  31     "Identifiers\n"
  32     "\n"
  33     "  Identifiers name variables and functions.\n"
  34     "\n"
  35     "      identifier = letter { letter | digit } .\n"
  36     "      letter     = \"A\" ... \"Z\" | \"a\" ... \"z\" | \"_\" .\n"
  37     "      digit      = \"0\" ... \"9\" .\n"
  38     "\n"
  39     "Keywords\n"
  40     "\n"
  41     "  The following keywords are reserved and may not be used as\n"
  42     "  identifiers:\n"
  43     "\n"
  44     "          else    false   if      true\n"
  45     "\n"
  46     "Integer literals\n"
  47     "\n"
  48     "  An integer literal represents a decimal integer value.\n"
  49     "\n"
  50     "      integer = [ \"-\" ] digit { digit } .\n"
  51     "\n"
  52     "  Leading zeros and negative zero are disallowed.\n"
  53     "\n"
  54     "String literals\n"
  55     "\n"
  56     "  A string literal represents a string value consisting of the quoted\n"
  57     "  characters with possible escape sequences and variable expansions.\n"
  58     "\n"
  59     "      string           = `\"` { char | escape | expansion } `\"` .\n"
  60     "      escape           = `\\` ( \"$\" | `\"` | char ) .\n"
  61     "      BracketExpansion = \"{\" ( identifier | ArrayAccess | ScopeAccess "
  62                               ") \"}\" .\n"
  63     "      expansion        = \"$\" ( identifier | BracketExpansion ) .\n"
  64     "      char             = /* any character except \"$\", `\"`, or newline "
  65                              "*/ .\n"
  66     "\n"
  67     "  After a backslash, certain sequences represent special characters:\n"
  68     "\n"
  69     "          \\\"    U+0022    quotation mark\n"
  70     "          \\$    U+0024    dollar sign\n"
  71     "          \\\\    U+005C    backslash\n"
  72     "\n"
  73     "  All other backslashes represent themselves.\n"
  74     "\n"
  75     "Punctuation\n"
  76     "\n"
  77     "  The following character sequences represent punctuation:\n"
  78     "\n"
  79     "          +       +=      ==      !=      (       )\n"
  80     "          -       -=      <       <=      [       ]\n"
  81     "          !       =       >       >=      {       }\n"
  82     "                          &&      ||      .       ,\n"
  83     "\n"
  84     "Grammar\n"
  85     "\n"
  86     "  The input tokens form a syntax tree following a context-free grammar:\n"
  87     "\n"
  88     "      File = StatementList .\n"
  89     "\n"
  90     "      Statement     = Assignment | Call | Condition .\n"
  91     "      Assignment    = identifier AssignOp Expr .\n"
  92     "      Call          = identifier \"(\" [ ExprList ] \")\" [ Block ] .\n"
  93     "      Condition     = \"if\" \"(\" Expr \")\" Block\n"
  94     "                      [ \"else\" ( Condition | Block ) ] .\n"
  95     "      Block         = \"{\" StatementList \"}\" .\n"
  96     "      StatementList = { Statement } .\n"
  97     "\n"
  98     "      ArrayAccess = identifier \"[\" { identifier | integer } \"]\" .\n"
  99     "      ScopeAccess = identifier \".\" identifier .\n"
 100     "      Expr        = UnaryExpr | Expr BinaryOp Expr .\n"
 101     "      UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .\n"
 102     "      PrimaryExpr = identifier | integer | string | Call\n"
 103     "                  | ArrayAccess | ScopeAccess\n"
 104     "                  | \"(\" Expr \")\"\n"
 105     "                  | \"[\" [ ExprList [ \",\" ] ] \"]\" .\n"
 106     "      ExprList    = Expr { \",\" Expr } .\n"
 107     "\n"
 108     "      AssignOp = \"=\" | \"+=\" | \"-=\" .\n"
 109     "      UnaryOp  = \"!\" .\n"
 110     "      BinaryOp = \"+\" | \"-\"                  // highest priority\n"
 111     "               | \"<\" | \"<=\" | \">\" | \">=\"\n"
 112     "               | \"==\" | \"!=\"\n"
 113     "               | \"&&\"\n"
 114     "               | \"||\" .                     // lowest priority\n"
 115     "\n"
 116     "  All binary operators are left-associative.\n";
 117
 118 enum Precedence {
 119   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
 120   PRECEDENCE_OR = 2,
 121   PRECEDENCE_AND = 3,
 122   PRECEDENCE_EQUALITY = 4,
 123   PRECEDENCE_RELATION = 5,
 124   PRECEDENCE_SUM = 6,
 125   PRECEDENCE_PREFIX = 7,
 126   PRECEDENCE_CALL = 8,
 127   PRECEDENCE_DOT = 9,         // Highest precedence.
 128 };
 129
 130 // The top-level for blocks/ifs is recursive descent, the expression parser is
 131 // a Pratt parser. The basic idea there is to have the precedences (and
 132 // associativities) encoded relative to each other and only parse up until you
 133 // hit something of that precedence. There's a dispatch table in expressions_
 134 // at the top of parser.cc that describes how each token dispatches if it's
 135 // seen as either a prefix or infix operator, and if it's infix, what its
 136 // precedence is.
 137 //
 138 // Refs:
 139 // - http://javascript.crockford.com/tdop/tdop.html
 140 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 141
 142 // Indexed by Token::Type.
 143 ParserHelper Parser::expressions_[] = {
 144     {nullptr, nullptr, -1},                                   // INVALID
 145     {&Parser::Literal, nullptr, -1},                          // INTEGER
 146     {&Parser::Literal, nullptr, -1},                          // STRING
 147     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
 148     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
 149     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
 150     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
 151     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
 152     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
 153     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
 154     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
 155     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
 156     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
 157     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
 158     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
 159     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
 160     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
 161     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
 162     {&Parser::Not, nullptr, -1},                              // BANG
 163     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
 164     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
 165     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
 166     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
 167     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
 168     {nullptr, nullptr, -1},                                   // LEFT_BRACE
 169     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
 170     {nullptr, nullptr, -1},                                   // IF
 171     {nullptr, nullptr, -1},                                   // ELSE
 172     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
 173     {nullptr, nullptr, -1},                                       // COMMA
 174     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
 175     {nullptr, nullptr, -1},                // LINE_COMMENT
 176     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
 177     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
 178 };
 179
 180 Parser::Parser(const std::vector<Token>& tokens, Err* err)
 181     : err_(err), cur_(0) {
 182   for (const auto& token : tokens) {
 183     switch (token.type()) {
 184       case Token::LINE_COMMENT:
 185         line_comment_tokens_.push_back(token);
 186         break;
 187       case Token::SUFFIX_COMMENT:
 188         suffix_comment_tokens_.push_back(token);
 189         break;
 190       default:
 191         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
 192         // through the real parser.
 193         tokens_.push_back(token);
 194         break;
 195     }
 196   }
 197 }
 198
 199 Parser::~Parser() {
 200 }
 201
 202 // static
 203 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
 204                                     Err* err) {
 205   Parser p(tokens, err);
 206   return p.ParseFile();
 207 }
 208
 209 // static
 210 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
 211                                               Err* err) {
 212   Parser p(tokens, err);
 213   scoped_ptr<ParseNode> expr = p.ParseExpression();
 214   if (!p.at_end() && !err->has_error()) {
 215     *err = Err(p.cur_token(), "Trailing garbage");
 216     return nullptr;
 217   }
 218   return expr.Pass();
 219 }
 220
 221 // static
 222 scoped_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
 223                                          Err* err) {
 224   for (const Token& token : tokens) {
 225     switch (token.type()) {
 226       case Token::INTEGER:
 227       case Token::STRING:
 228       case Token::TRUE_TOKEN:
 229       case Token::FALSE_TOKEN:
 230       case Token::LEFT_BRACKET:
 231       case Token::RIGHT_BRACKET:
 232       case Token::COMMA:
 233         continue;
 234       default:
 235         *err = Err(token, "Invalid token in literal value");
 236         return nullptr;
 237     }
 238   }
 239
 240   return ParseExpression(tokens, err);
 241 }
 242
 243 bool Parser::IsAssignment(const ParseNode* node) const {
 244   return node && node->AsBinaryOp() &&
 245          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 246           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 247           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 248 }
 249
 250 bool Parser::IsStatementBreak(Token::Type token_type) const {
 251   switch (token_type) {
 252     case Token::IDENTIFIER:
 253     case Token::LEFT_BRACE:
 254     case Token::RIGHT_BRACE:
 255     case Token::IF:
 256     case Token::ELSE:
 257       return true;
 258     default:
 259       return false;
 260   }
 261 }
 262
 263 bool Parser::LookAhead(Token::Type type) {
 264   if (at_end())
 265     return false;
 266   return cur_token().type() == type;
 267 }
 268
 269 bool Parser::Match(Token::Type type) {
 270   if (!LookAhead(type))
 271     return false;
 272   Consume();
 273   return true;
 274 }
 275
 276 Token Parser::Consume(Token::Type type, const char* error_message) {
 277   Token::Type types[1] = { type };
 278   return Consume(types, 1, error_message);
 279 }
 280
 281 Token Parser::Consume(Token::Type* types,
 282                       size_t num_types,
 283                       const char* error_message) {
 284   if (has_error()) {
 285     // Don't overwrite current error, but make progress through tokens so that
 286     // a loop that's expecting a particular token will still terminate.
 287     cur_++;
 288     return Token(Location(), Token::INVALID, base::StringPiece());
 289   }
 290   if (at_end()) {
 291     const char kEOFMsg[] = "I hit EOF instead.";
 292     if (tokens_.empty())
 293       *err_ = Err(Location(), error_message, kEOFMsg);
 294     else
 295       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 296     return Token(Location(), Token::INVALID, base::StringPiece());
 297   }
 298
 299   for (size_t i = 0; i < num_types; ++i) {
 300     if (cur_token().type() == types[i])
 301       return Consume();
 302   }
 303   *err_ = Err(cur_token(), error_message);
 304   return Token(Location(), Token::INVALID, base::StringPiece());
 305 }
 306
 307 Token Parser::Consume() {
 308   return tokens_[cur_++];
 309 }
 310
 311 scoped_ptr<ParseNode> Parser::ParseExpression() {
 312   return ParseExpression(0);
 313 }
 314
 315 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 316   if (at_end())
 317     return scoped_ptr<ParseNode>();
 318
 319   Token token = Consume();
 320   PrefixFunc prefix = expressions_[token.type()].prefix;
 321
 322   if (prefix == nullptr) {
 323     *err_ = Err(token,
 324                 std::string("Unexpected token '") + token.value().as_string() +
 325                     std::string("'"));
 326     return scoped_ptr<ParseNode>();
 327   }
 328
 329   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 330   if (has_error())
 331     return left.Pass();
 332
 333   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 334          precedence <= expressions_[cur_token().type()].precedence) {
 335     token = Consume();
 336     InfixFunc infix = expressions_[token.type()].infix;
 337     if (infix == nullptr) {
 338       *err_ = Err(token,
 339                   std::string("Unexpected token '") +
 340                       token.value().as_string() + std::string("'"));
 341       return scoped_ptr<ParseNode>();
 342     }
 343     left = (this->*infix)(left.Pass(), token);
 344     if (has_error())
 345       return scoped_ptr<ParseNode>();
 346   }
 347
 348   return left.Pass();
 349 }
 350
 351 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 352   return make_scoped_ptr(new LiteralNode(token));
 353 }
 354
 355 scoped_ptr<ParseNode> Parser::Name(Token token) {
 356   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 357 }
 358
 359 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
 360   scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
 361   comment->set_comment(token);
 362   return comment.Pass();
 363 }
 364
 365 scoped_ptr<ParseNode> Parser::Group(Token token) {
 366   scoped_ptr<ParseNode> expr = ParseExpression();
 367   if (has_error())
 368     return scoped_ptr<ParseNode>();
 369   Consume(Token::RIGHT_PAREN, "Expected ')'");
 370   return expr.Pass();
 371 }
 372
 373 scoped_ptr<ParseNode> Parser::Not(Token token) {
 374   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 375   if (has_error())
 376     return scoped_ptr<ParseNode>();
 377   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 378   unary_op->set_op(token);
 379   unary_op->set_operand(expr.Pass());
 380   return unary_op.Pass();
 381 }
 382
 383 scoped_ptr<ParseNode> Parser::List(Token node) {
 384   scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
 385   if (!has_error() && !at_end())
 386     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 387   return list.Pass();
 388 }
 389
 390 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 391                                              Token token) {
 392   scoped_ptr<ParseNode> right =
 393       ParseExpression(expressions_[token.type()].precedence + 1);
 394   if (!right) {
 395     if (!has_error()) {
 396       *err_ = Err(token, "Expected right hand side for '" +
 397                              token.value().as_string() + "'");
 398     }
 399     return scoped_ptr<ParseNode>();
 400   }
 401   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 402   binary_op->set_op(token);
 403   binary_op->set_left(left.Pass());
 404   binary_op->set_right(right.Pass());
 405   return binary_op.Pass();
 406 }
 407
 408 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 409                                                Token token) {
 410   scoped_ptr<ListNode> list(new ListNode);
 411   list->set_begin_token(token);
 412   list->set_end(make_scoped_ptr(new EndNode(token)));
 413   scoped_ptr<BlockNode> block;
 414   bool has_arg = false;
 415   if (LookAhead(Token::LEFT_PAREN)) {
 416     Token start_token = Consume();
 417     // Parsing a function call.
 418     has_arg = true;
 419     if (Match(Token::RIGHT_PAREN)) {
 420       // Nothing, just an empty call.
 421     } else {
 422       list = ParseList(start_token, Token::RIGHT_PAREN, false);
 423       if (has_error())
 424         return scoped_ptr<ParseNode>();
 425       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 426     }
 427     // Optionally with a scope.
 428     if (LookAhead(Token::LEFT_BRACE)) {
 429       block = ParseBlock();
 430       if (has_error())
 431         return scoped_ptr<ParseNode>();
 432     }
 433   }
 434
 435   if (!left && !has_arg) {
 436     // Not a function call, just a standalone identifier.
 437     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 438   }
 439   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 440   func_call->set_function(token);
 441   func_call->set_args(list.Pass());
 442   if (block)
 443     func_call->set_block(block.Pass());
 444   return func_call.Pass();
 445 }
 446
 447 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 448                                          Token token) {
 449   if (left->AsIdentifier() == nullptr) {
 450     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 451     return scoped_ptr<ParseNode>();
 452   }
 453   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 454   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 455   assign->set_op(token);
 456   assign->set_left(left.Pass());
 457   assign->set_right(value.Pass());
 458   return assign.Pass();
 459 }
 460
 461 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 462                                         Token token) {
 463   // TODO: Maybe support more complex expressions like a[0][0]. This would
 464   // require work on the evaluator too.
 465   if (left->AsIdentifier() == nullptr) {
 466     *err_ = Err(left.get(), "May only subscript identifiers.",
 467         "The thing on the left hand side of the [] must be an identifier\n"
 468         "and not an expression. If you need this, you'll have to assign the\n"
 469         "value to a temporary before subscripting. Sorry.");
 470     return scoped_ptr<ParseNode>();
 471   }
 472   scoped_ptr<ParseNode> value = ParseExpression();
 473   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 474   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 475   accessor->set_base(left->AsIdentifier()->value());
 476   accessor->set_index(value.Pass());
 477   return accessor.Pass();
 478 }
 479
 480 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
 481                                           Token token) {
 482   if (left->AsIdentifier() == nullptr) {
 483     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
 484         "The thing on the left hand side of the dot must be an identifier\n"
 485         "and not an expression. If you need this, you'll have to assign the\n"
 486         "value to a temporary first. Sorry.");
 487     return scoped_ptr<ParseNode>();
 488   }
 489
 490   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
 491   if (!right || !right->AsIdentifier()) {
 492     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
 493         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
 494     return scoped_ptr<ParseNode>();
 495   }
 496
 497   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 498   accessor->set_base(left->AsIdentifier()->value());
 499   accessor->set_member(scoped_ptr<IdentifierNode>(
 500       static_cast<IdentifierNode*>(right.release())));
 501   return accessor.Pass();
 502 }
 503
 504 // Does not Consume the start or end token.
 505 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
 506                                        Token::Type stop_before,
 507                                        bool allow_trailing_comma) {
 508   scoped_ptr<ListNode> list(new ListNode);
 509   list->set_begin_token(start_token);
 510   bool just_got_comma = false;
 511   bool first_time = true;
 512   while (!LookAhead(stop_before)) {
 513     if (!first_time) {
 514       if (!just_got_comma) {
 515         // Require commas separate things in lists.
 516         *err_ = Err(cur_token(), "Expected comma between items.");
 517         return scoped_ptr<ListNode>();
 518       }
 519     }
 520     first_time = false;
 521
 522     // Why _OR? We're parsing things that are higher precedence than the ,
 523     // that separates the items of the list. , should appear lower than
 524     // boolean expressions (the lowest of which is OR), but above assignments.
 525     list->append_item(ParseExpression(PRECEDENCE_OR));
 526     if (has_error())
 527       return scoped_ptr<ListNode>();
 528     if (at_end()) {
 529       *err_ =
 530           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 531       return scoped_ptr<ListNode>();
 532     }
 533     if (list->contents().back()->AsBlockComment()) {
 534       // If there was a comment inside the list, we don't need a comma to the
 535       // next item, so pretend we got one, if we're expecting one.
 536       just_got_comma = allow_trailing_comma;
 537     } else {
 538       just_got_comma = Match(Token::COMMA);
 539     }
 540   }
 541   if (just_got_comma && !allow_trailing_comma) {
 542     *err_ = Err(cur_token(), "Trailing comma");
 543     return scoped_ptr<ListNode>();
 544   }
 545   list->set_end(make_scoped_ptr(new EndNode(cur_token())));
 546   return list.Pass();
 547 }
 548
 549 scoped_ptr<ParseNode> Parser::ParseFile() {
 550   scoped_ptr<BlockNode> file(new BlockNode);
 551   for (;;) {
 552     if (at_end())
 553       break;
 554     scoped_ptr<ParseNode> statement = ParseStatement();
 555     if (!statement)
 556       break;
 557     file->append_statement(statement.Pass());
 558   }
 559   if (!at_end() && !has_error())
 560     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 561   if (has_error())
 562     return scoped_ptr<ParseNode>();
 563
 564   // TODO(scottmg): If this is measurably expensive, it could be done only
 565   // when necessary (when reformatting, or during tests). Comments are
 566   // separate from the parse tree at this point, so downstream code can remain
 567   // ignorant of them.
 568   AssignComments(file.get());
 569
 570   return file.Pass();
 571 }
 572
 573 scoped_ptr<ParseNode> Parser::ParseStatement() {
 574   if (LookAhead(Token::IF)) {
 575     return ParseCondition();
 576   } else if (LookAhead(Token::BLOCK_COMMENT)) {
 577     return BlockComment(Consume());
 578   } else {
 579     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 580     // to allow "pointless" expressions and return ParseExpression() directly.
 581     scoped_ptr<ParseNode> stmt = ParseExpression();
 582     if (stmt) {
 583       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 584         return stmt.Pass();
 585     }
 586     if (!has_error()) {
 587       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 588       *err_ = Err(token, "Expecting assignment or function call.");
 589     }
 590     return scoped_ptr<ParseNode>();
 591   }
 592 }
 593
 594 scoped_ptr<BlockNode> Parser::ParseBlock() {
 595   Token begin_token =
 596       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 597   if (has_error())
 598     return scoped_ptr<BlockNode>();
 599   scoped_ptr<BlockNode> block(new BlockNode);
 600   block->set_begin_token(begin_token);
 601
 602   for (;;) {
 603     if (LookAhead(Token::RIGHT_BRACE)) {
 604       block->set_end(make_scoped_ptr(new EndNode(Consume())));
 605       break;
 606     }
 607
 608     scoped_ptr<ParseNode> statement = ParseStatement();
 609     if (!statement)
 610       return scoped_ptr<BlockNode>();
 611     block->append_statement(statement.Pass());
 612   }
 613   return block.Pass();
 614 }
 615
 616 scoped_ptr<ParseNode> Parser::ParseCondition() {
 617   scoped_ptr<ConditionNode> condition(new ConditionNode);
 618   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
 619   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 620   condition->set_condition(ParseExpression());
 621   if (IsAssignment(condition->condition()))
 622     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 623   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 624   condition->set_if_true(ParseBlock().Pass());
 625   if (Match(Token::ELSE)) {
 626     if (LookAhead(Token::LEFT_BRACE)) {
 627       condition->set_if_false(ParseBlock().Pass());
 628     } else if (LookAhead(Token::IF)) {
 629       condition->set_if_false(ParseStatement().Pass());
 630     } else {
 631       *err_ = Err(cur_token(), "Expected '{' or 'if' after 'else'.");
 632       return scoped_ptr<ParseNode>();
 633     }
 634   }
 635   if (has_error())
 636     return scoped_ptr<ParseNode>();
 637   return condition.Pass();
 638 }
 639
 640 void Parser::TraverseOrder(const ParseNode* root,
 641                            std::vector<const ParseNode*>* pre,
 642                            std::vector<const ParseNode*>* post) {
 643   if (root) {
 644     pre->push_back(root);
 645
 646     if (const AccessorNode* accessor = root->AsAccessor()) {
 647       TraverseOrder(accessor->index(), pre, post);
 648       TraverseOrder(accessor->member(), pre, post);
 649     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
 650       TraverseOrder(binop->left(), pre, post);
 651       TraverseOrder(binop->right(), pre, post);
 652     } else if (const BlockNode* block = root->AsBlock()) {
 653       for (const auto& statement : block->statements())
 654         TraverseOrder(statement, pre, post);
 655       TraverseOrder(block->End(), pre, post);
 656     } else if (const ConditionNode* condition = root->AsConditionNode()) {
 657       TraverseOrder(condition->condition(), pre, post);
 658       TraverseOrder(condition->if_true(), pre, post);
 659       TraverseOrder(condition->if_false(), pre, post);
 660     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
 661       TraverseOrder(func_call->args(), pre, post);
 662       TraverseOrder(func_call->block(), pre, post);
 663     } else if (root->AsIdentifier()) {
 664       // Nothing.
 665     } else if (const ListNode* list = root->AsList()) {
 666       for (const auto& node : list->contents())
 667         TraverseOrder(node, pre, post);
 668       TraverseOrder(list->End(), pre, post);
 669     } else if (root->AsLiteral()) {
 670       // Nothing.
 671     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
 672       TraverseOrder(unaryop->operand(), pre, post);
 673     } else if (root->AsBlockComment()) {
 674       // Nothing.
 675     } else if (root->AsEnd()) {
 676       // Nothing.
 677     } else {
 678       CHECK(false) << "Unhandled case in TraverseOrder.";
 679     }
 680
 681     post->push_back(root);
 682   }
 683 }
 684
 685 void Parser::AssignComments(ParseNode* file) {
 686   // Start by generating a pre- and post- order traversal of the tree so we
 687   // can determine what's before and after comments.
 688   std::vector<const ParseNode*> pre;
 689   std::vector<const ParseNode*> post;
 690   TraverseOrder(file, &pre, &post);
 691
 692   // Assign line comments to syntax immediately following.
 693   int cur_comment = 0;
 694   for (const auto& node : pre) {
 695     const Location& start = node->GetRange().begin();
 696     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
 697       if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
 698         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
 699             line_comment_tokens_[cur_comment]);
 700         ++cur_comment;
 701       } else {
 702         break;
 703       }
 704     }
 705   }
 706
 707   // Remaining line comments go at end of file.
 708   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
 709        ++cur_comment)
 710     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
 711
 712   // Assign suffix to syntax immediately before.
 713   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
 714   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
 715        i != post.rend();
 716        ++i) {
 717     // Don't assign suffix comments to the function, list, or block, but instead
 718     // to the last thing inside.
 719     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
 720       continue;
 721
 722     const Location& start = (*i)->GetRange().begin();
 723     const Location& end = (*i)->GetRange().end();
 724
 725     // Don't assign suffix comments to something that starts on an earlier
 726     // line, so that in:
 727     //
 728     // sources = [ "a",
 729     //     "b" ] # comment
 730     //
 731     // it's attached to "b", not sources = [ ... ].
 732     if (start.line_number() != end.line_number())
 733       continue;
 734
 735     while (cur_comment >= 0) {
 736       if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
 737         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
 738             suffix_comment_tokens_[cur_comment]);
 739         --cur_comment;
 740       } else {
 741         break;
 742       }
 743     }
 744
 745     // Suffix comments were assigned in reverse, so if there were multiple on
 746     // the same node, they need to be reversed.
 747     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
 748       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
 749   }
 750 }