tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 // grammar:
  13 //
  14 // file       := (statement)*
  15 // statement  := block | if | assignment
  16 // block      := '{' statement* '}'
  17 // if         := 'if' '(' expr ')' statement [ else ]
  18 // else       := 'else' (if | statement)*
  19 // assignment := ident {'=' | '+=' | '-='} expr
  20
  21 enum Precedence {
  22   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
  23   PRECEDENCE_OR = 2,
  24   PRECEDENCE_AND = 3,
  25   PRECEDENCE_EQUALITY = 4,
  26   PRECEDENCE_RELATION = 5,
  27   PRECEDENCE_SUM = 6,
  28   PRECEDENCE_PREFIX = 7,
  29   PRECEDENCE_CALL = 8,
  30   PRECEDENCE_DOT = 9,         // Highest precedence.
  31 };
  32
  33 // The top-level for blocks/ifs is recursive descent, the expression parser is
  34 // a Pratt parser. The basic idea there is to have the precedences (and
  35 // associativities) encoded relative to each other and only parse up until you
  36 // hit something of that precedence. There's a dispatch table in expressions_
  37 // at the top of parser.cc that describes how each token dispatches if it's
  38 // seen as either a prefix or infix operator, and if it's infix, what its
  39 // precedence is.
  40 //
  41 // Refs:
  42 // - http://javascript.crockford.com/tdop/tdop.html
  43 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  44
  45 // Indexed by Token::Type.
  46 ParserHelper Parser::expressions_[] = {
  47   {NULL, NULL, -1},                                             // INVALID
  48   {&Parser::Literal, NULL, -1},                                 // INTEGER
  49   {&Parser::Literal, NULL, -1},                                 // STRING
  50   {&Parser::Literal, NULL, -1},                                 // TRUE_TOKEN
  51   {&Parser::Literal, NULL, -1},                                 // FALSE_TOKEN
  52   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // EQUAL
  53   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // PLUS
  54   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // MINUS
  55   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // PLUS_EQUALS
  56   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // MINUS_EQUALS
  57   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // EQUAL_EQUAL
  58   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // NOT_EQUAL
  59   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_EQUAL
  60   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_EQUAL
  61   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_THAN
  62   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_THAN
  63   {NULL, &Parser::BinaryOperator, PRECEDENCE_AND},              // BOOLEAN_AND
  64   {NULL, &Parser::BinaryOperator, PRECEDENCE_OR},               // BOOLEAN_OR
  65   {&Parser::Not, NULL, -1},                                     // BANG
  66   {NULL, &Parser::DotOperator, PRECEDENCE_DOT},                 // DOT
  67   {&Parser::Group, NULL, -1},                                   // LEFT_PAREN
  68   {NULL, NULL, -1},                                             // RIGHT_PAREN
  69   {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},         // LEFT_BRACKET
  70   {NULL, NULL, -1},                                             // RIGHT_BRACKET
  71   {NULL, NULL, -1},                                             // LEFT_BRACE
  72   {NULL, NULL, -1},                                             // RIGHT_BRACE
  73   {NULL, NULL, -1},                                             // IF
  74   {NULL, NULL, -1},                                             // ELSE
  75   {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
  76   {NULL, NULL, -1},                                             // COMMA
  77   {NULL, NULL, -1},                                             // COMMENT
  78 };
  79
  80 Parser::Parser(const std::vector<Token>& tokens, Err* err)
  81     : tokens_(tokens), err_(err), cur_(0) {
  82 }
  83
  84 Parser::~Parser() {
  85 }
  86
  87 // static
  88 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
  89                                     Err* err) {
  90   Parser p(tokens, err);
  91   return p.ParseFile().PassAs<ParseNode>();
  92 }
  93
  94 // static
  95 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
  96                                               Err* err) {
  97   Parser p(tokens, err);
  98   return p.ParseExpression().Pass();
  99 }
 100
 101 bool Parser::IsAssignment(const ParseNode* node) const {
 102   return node && node->AsBinaryOp() &&
 103          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 104           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 105           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 106 }
 107
 108 bool Parser::IsStatementBreak(Token::Type token_type) const {
 109   switch (token_type) {
 110     case Token::IDENTIFIER:
 111     case Token::LEFT_BRACE:
 112     case Token::RIGHT_BRACE:
 113     case Token::IF:
 114     case Token::ELSE:
 115       return true;
 116     default:
 117       return false;
 118   }
 119 }
 120
 121 bool Parser::LookAhead(Token::Type type) {
 122   if (at_end())
 123     return false;
 124   return cur_token().type() == type;
 125 }
 126
 127 bool Parser::Match(Token::Type type) {
 128   if (!LookAhead(type))
 129     return false;
 130   Consume();
 131   return true;
 132 }
 133
 134 Token Parser::Consume(Token::Type type, const char* error_message) {
 135   Token::Type types[1] = { type };
 136   return Consume(types, 1, error_message);
 137 }
 138
 139 Token Parser::Consume(Token::Type* types,
 140                       size_t num_types,
 141                       const char* error_message) {
 142   if (has_error()) {
 143     // Don't overwrite current error, but make progress through tokens so that
 144     // a loop that's expecting a particular token will still terminate.
 145     cur_++;
 146     return Token(Location(), Token::INVALID, base::StringPiece());
 147   }
 148   if (at_end()) {
 149     const char kEOFMsg[] = "I hit EOF instead.";
 150     if (tokens_.empty())
 151       *err_ = Err(Location(), error_message, kEOFMsg);
 152     else
 153       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 154     return Token(Location(), Token::INVALID, base::StringPiece());
 155   }
 156
 157   for (size_t i = 0; i < num_types; ++i) {
 158     if (cur_token().type() == types[i])
 159       return tokens_[cur_++];
 160   }
 161   *err_ = Err(cur_token(), error_message);
 162   return Token(Location(), Token::INVALID, base::StringPiece());
 163 }
 164
 165 Token Parser::Consume() {
 166   return tokens_[cur_++];
 167 }
 168
 169 scoped_ptr<ParseNode> Parser::ParseExpression() {
 170   return ParseExpression(0);
 171 }
 172
 173 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 174   if (at_end())
 175     return scoped_ptr<ParseNode>();
 176
 177   Token token = Consume();
 178   PrefixFunc prefix = expressions_[token.type()].prefix;
 179
 180   if (prefix == NULL) {
 181     *err_ = Err(token,
 182                 std::string("Unexpected token '") + token.value().as_string() +
 183                     std::string("'"));
 184     return scoped_ptr<ParseNode>();
 185   }
 186
 187   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 188   if (has_error())
 189     return left.Pass();
 190
 191   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 192          precedence <= expressions_[cur_token().type()].precedence) {
 193     token = Consume();
 194     InfixFunc infix = expressions_[token.type()].infix;
 195     if (infix == NULL) {
 196       *err_ = Err(token,
 197                   std::string("Unexpected token '") +
 198                       token.value().as_string() + std::string("'"));
 199       return scoped_ptr<ParseNode>();
 200     }
 201     left = (this->*infix)(left.Pass(), token);
 202     if (has_error())
 203       return scoped_ptr<ParseNode>();
 204   }
 205
 206   return left.Pass();
 207 }
 208
 209 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 210   return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
 211 }
 212
 213 scoped_ptr<ParseNode> Parser::Name(Token token) {
 214   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 215 }
 216
 217 scoped_ptr<ParseNode> Parser::Group(Token token) {
 218   scoped_ptr<ParseNode> expr = ParseExpression();
 219   if (has_error())
 220     return scoped_ptr<ParseNode>();
 221   Consume(Token::RIGHT_PAREN, "Expected ')'");
 222   return expr.Pass();
 223 }
 224
 225 scoped_ptr<ParseNode> Parser::Not(Token token) {
 226   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 227   if (has_error())
 228     return scoped_ptr<ParseNode>();
 229   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 230   unary_op->set_op(token);
 231   unary_op->set_operand(expr.Pass());
 232   return unary_op.PassAs<ParseNode>();
 233 }
 234
 235 scoped_ptr<ParseNode> Parser::List(Token node) {
 236   scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
 237   if (!has_error() && !at_end())
 238     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 239   return list.Pass();
 240 }
 241
 242 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 243                                              Token token) {
 244   scoped_ptr<ParseNode> right =
 245       ParseExpression(expressions_[token.type()].precedence + 1);
 246   if (!right) {
 247     *err_ =
 248         Err(token,
 249             "Expected right hand side for '" + token.value().as_string() + "'");
 250     return scoped_ptr<ParseNode>();
 251   }
 252   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 253   binary_op->set_op(token);
 254   binary_op->set_left(left.Pass());
 255   binary_op->set_right(right.Pass());
 256   return binary_op.PassAs<ParseNode>();
 257 }
 258
 259 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 260                                                Token token) {
 261   scoped_ptr<ListNode> list(new ListNode);
 262   list->set_begin_token(token);
 263   list->set_end_token(token);
 264   scoped_ptr<BlockNode> block;
 265   bool has_arg = false;
 266   if (Match(Token::LEFT_PAREN)) {
 267     // Parsing a function call.
 268     has_arg = true;
 269     if (Match(Token::RIGHT_PAREN)) {
 270       // Nothing, just an empty call.
 271     } else {
 272       list = ParseList(Token::RIGHT_PAREN, false);
 273       if (has_error())
 274         return scoped_ptr<ParseNode>();
 275       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 276     }
 277     // Optionally with a scope.
 278     if (LookAhead(Token::LEFT_BRACE)) {
 279       block = ParseBlock();
 280       if (has_error())
 281         return scoped_ptr<ParseNode>();
 282     }
 283   }
 284
 285   if (!left && !has_arg) {
 286     // Not a function call, just a standalone identifier.
 287     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 288   }
 289   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 290   func_call->set_function(token);
 291   func_call->set_args(list.Pass());
 292   if (block)
 293     func_call->set_block(block.Pass());
 294   return func_call.PassAs<ParseNode>();
 295 }
 296
 297 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 298                                          Token token) {
 299   if (left->AsIdentifier() == NULL) {
 300     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 301     return scoped_ptr<ParseNode>();
 302   }
 303   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 304   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 305   assign->set_op(token);
 306   assign->set_left(left.Pass());
 307   assign->set_right(value.Pass());
 308   return assign.PassAs<ParseNode>();
 309 }
 310
 311 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 312                                         Token token) {
 313   // TODO: Maybe support more complex expressions like a[0][0]. This would
 314   // require work on the evaluator too.
 315   if (left->AsIdentifier() == NULL) {
 316     *err_ = Err(left.get(), "May only subscript identifiers.",
 317         "The thing on the left hand side of the [] must be an identifier\n"
 318         "and not an expression. If you need this, you'll have to assign the\n"
 319         "value to a temporary before subscripting. Sorry.");
 320     return scoped_ptr<ParseNode>();
 321   }
 322   scoped_ptr<ParseNode> value = ParseExpression();
 323   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 324   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 325   accessor->set_base(left->AsIdentifier()->value());
 326   accessor->set_index(value.Pass());
 327   return accessor.PassAs<ParseNode>();
 328 }
 329
 330 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
 331                                           Token token) {
 332   if (left->AsIdentifier() == NULL) {
 333     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
 334         "The thing on the left hand side of the dot must be an identifier\n"
 335         "and not an expression. If you need this, you'll have to assign the\n"
 336         "value to a temporary first. Sorry.");
 337     return scoped_ptr<ParseNode>();
 338   }
 339
 340   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
 341   if (!right || !right->AsIdentifier()) {
 342     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
 343         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
 344     return scoped_ptr<ParseNode>();
 345   }
 346
 347   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 348   accessor->set_base(left->AsIdentifier()->value());
 349   accessor->set_member(scoped_ptr<IdentifierNode>(
 350       static_cast<IdentifierNode*>(right.release())));
 351   return accessor.PassAs<ParseNode>();
 352 }
 353
 354 // Does not Consume the start or end token.
 355 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
 356                                        bool allow_trailing_comma) {
 357   scoped_ptr<ListNode> list(new ListNode);
 358   list->set_begin_token(cur_token());
 359   bool just_got_comma = false;
 360   bool first_time = true;
 361   while (!LookAhead(stop_before)) {
 362     if (!first_time) {
 363       if (!just_got_comma) {
 364         // Require commas separate things in lists.
 365         *err_ = Err(cur_token(), "Expected comma between items.");
 366         return scoped_ptr<ListNode>();
 367       }
 368     }
 369     first_time = false;
 370
 371     // Why _OR? We're parsing things that are higher precedence than the ,
 372     // that separates the items of the list. , should appear lower than
 373     // boolean expressions (the lowest of which is OR), but above assignments.
 374     list->append_item(ParseExpression(PRECEDENCE_OR));
 375     if (has_error())
 376       return scoped_ptr<ListNode>();
 377     if (at_end()) {
 378       *err_ =
 379           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 380       return scoped_ptr<ListNode>();
 381     }
 382     just_got_comma = Match(Token::COMMA);
 383   }
 384   if (just_got_comma && !allow_trailing_comma) {
 385     *err_ = Err(cur_token(), "Trailing comma");
 386     return scoped_ptr<ListNode>();
 387   }
 388   list->set_end_token(cur_token());
 389   return list.Pass();
 390 }
 391
 392 scoped_ptr<ParseNode> Parser::ParseFile() {
 393   scoped_ptr<BlockNode> file(new BlockNode(false));
 394   for (;;) {
 395     if (at_end())
 396       break;
 397     scoped_ptr<ParseNode> statement = ParseStatement();
 398     if (!statement)
 399       break;
 400     file->append_statement(statement.Pass());
 401   }
 402   if (!at_end() && !has_error())
 403     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 404   if (has_error())
 405     return scoped_ptr<ParseNode>();
 406   return file.PassAs<ParseNode>();
 407 }
 408
 409 scoped_ptr<ParseNode> Parser::ParseStatement() {
 410   if (LookAhead(Token::LEFT_BRACE)) {
 411     return ParseBlock().PassAs<ParseNode>();
 412   } else if (LookAhead(Token::IF)) {
 413     return ParseCondition();
 414   } else {
 415     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 416     // to allow "pointless" expressions and return ParseExpression() directly.
 417     scoped_ptr<ParseNode> stmt = ParseExpression();
 418     if (stmt) {
 419       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 420         return stmt.Pass();
 421     }
 422     if (!has_error()) {
 423       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 424       *err_ = Err(token, "Expecting assignment or function call.");
 425     }
 426     return scoped_ptr<ParseNode>();
 427   }
 428 }
 429
 430 scoped_ptr<BlockNode> Parser::ParseBlock() {
 431   Token begin_token =
 432       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 433   if (has_error())
 434     return scoped_ptr<BlockNode>();
 435   scoped_ptr<BlockNode> block(new BlockNode(true));
 436   block->set_begin_token(begin_token);
 437
 438   for (;;) {
 439     if (LookAhead(Token::RIGHT_BRACE)) {
 440       block->set_end_token(Consume());
 441       break;
 442     }
 443
 444     scoped_ptr<ParseNode> statement = ParseStatement();
 445     if (!statement)
 446       return scoped_ptr<BlockNode>();
 447     block->append_statement(statement.Pass());
 448   }
 449   return block.Pass();
 450 }
 451
 452 scoped_ptr<ParseNode> Parser::ParseCondition() {
 453   scoped_ptr<ConditionNode> condition(new ConditionNode);
 454   Consume(Token::IF, "Expected 'if'");
 455   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 456   condition->set_condition(ParseExpression());
 457   if (IsAssignment(condition->condition()))
 458     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 459   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 460   condition->set_if_true(ParseBlock().Pass());
 461   if (Match(Token::ELSE))
 462     condition->set_if_false(ParseStatement().Pass());
 463   if (has_error())
 464     return scoped_ptr<ParseNode>();
 465   return condition.PassAs<ParseNode>();
 466 }