tools/gn/parser.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/parser.h"
   6
   7 #include "base/logging.h"
   8 #include "tools/gn/functions.h"
   9 #include "tools/gn/operators.h"
  10 #include "tools/gn/token.h"
  11
  12 // grammar:
  13 //
  14 // file       := (statement)*
  15 // statement  := block | if | assignment
  16 // block      := '{' statement* '}'
  17 // if         := 'if' '(' expr ')' statement [ else ]
  18 // else       := 'else' (if | statement)*
  19 // assignment := ident {'=' | '+=' | '-='} expr
  20
  21 namespace {
  22
  23 // Returns true if the two tokens are on the same line. We assume they're in
  24 // the same file.
  25 bool IsSameLine(const Token& a, const Token& b) {
  26   DCHECK(a.location().file() == b.location().file());
  27   return a.location().line_number() == b.location().line_number();
  28 }
  29
  30 }  // namespace
  31
  32 enum Precedence {
  33   PRECEDENCE_ASSIGNMENT = 1,
  34   PRECEDENCE_OR = 2,
  35   PRECEDENCE_AND = 3,
  36   PRECEDENCE_EQUALITY = 4,
  37   PRECEDENCE_RELATION = 5,
  38   PRECEDENCE_SUM = 6,
  39   PRECEDENCE_PREFIX = 7,
  40   PRECEDENCE_CALL = 8,
  41 };
  42
  43 // The top-level for blocks/ifs is still recursive descent, the expression
  44 // parser is a Pratt parser. The basic idea there is to have the precedences
  45 // (and associativities) encoded relative to each other and only parse up
  46 // until you hit something of that precedence. There's a dispatch table in
  47 // expressions_ at the top of parser.cc that describes how each token
  48 // dispatches if it's seen as either a prefix or infix operator, and if it's
  49 // infix, what its precedence is.
  50 //
  51 // Refs:
  52 // - http://javascript.crockford.com/tdop/tdop.html
  53 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  54
  55 // Indexed by Token::Type.
  56 ParserHelper Parser::expressions_[] = {
  57   {NULL, NULL, -1},                                             // INVALID
  58   {&Parser::Literal, NULL, -1},                                 // INTEGER
  59   {&Parser::Literal, NULL, -1},                                 // STRING
  60   {&Parser::Literal, NULL, -1},                                 // TRUE_TOKEN
  61   {&Parser::Literal, NULL, -1},                                 // FALSE_TOKEN
  62   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // EQUAL
  63   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // PLUS
  64   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // MINUS
  65   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // PLUS_EQUALS
  66   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // MINUS_EQUALS
  67   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // EQUAL_EQUAL
  68   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // NOT_EQUAL
  69   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_EQUAL
  70   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_EQUAL
  71   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_THAN
  72   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_THAN
  73   {NULL, &Parser::BinaryOperator, PRECEDENCE_AND},              // BOOLEAN_AND
  74   {NULL, &Parser::BinaryOperator, PRECEDENCE_OR},               // BOOLEAN_OR
  75   {&Parser::Not, NULL, -1},                                     // BANG
  76   {&Parser::Group, NULL, -1},                                   // LEFT_PAREN
  77   {NULL, NULL, -1},                                             // RIGHT_PAREN
  78   {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},         // LEFT_BRACKET
  79   {NULL, NULL, -1},                                             // RIGHT_BRACKET
  80   {NULL, NULL, -1},                                             // LEFT_BRACE
  81   {NULL, NULL, -1},                                             // RIGHT_BRACE
  82   {NULL, NULL, -1},                                             // IF
  83   {NULL, NULL, -1},                                             // ELSE
  84   {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
  85   {NULL, NULL, -1},                                             // COMMA
  86   {NULL, NULL, -1},                                             // COMMENT
  87 };
  88
  89 Parser::Parser(const std::vector<Token>& tokens, Err* err)
  90     : tokens_(tokens), err_(err), cur_(0) {
  91 }
  92
  93 Parser::~Parser() {
  94 }
  95
  96 // static
  97 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
  98                                     Err* err) {
  99   Parser p(tokens, err);
 100   return p.ParseFile().PassAs<ParseNode>();
 101 }
 102
 103 // static
 104 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
 105                                               Err* err) {
 106   Parser p(tokens, err);
 107   return p.ParseExpression().Pass();
 108 }
 109
 110 bool Parser::IsAssignment(const ParseNode* node) const {
 111   return node && node->AsBinaryOp() &&
 112          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
 113           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
 114           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
 115 }
 116
 117 bool Parser::IsStatementBreak(Token::Type token_type) const {
 118   switch (token_type) {
 119     case Token::IDENTIFIER:
 120     case Token::LEFT_BRACE:
 121     case Token::RIGHT_BRACE:
 122     case Token::IF:
 123     case Token::ELSE:
 124       return true;
 125     default:
 126       return false;
 127   }
 128 }
 129
 130 bool Parser::LookAhead(Token::Type type) {
 131   if (at_end())
 132     return false;
 133   return cur_token().type() == type;
 134 }
 135
 136 bool Parser::Match(Token::Type type) {
 137   if (!LookAhead(type))
 138     return false;
 139   Consume();
 140   return true;
 141 }
 142
 143 Token Parser::Consume(Token::Type type, const char* error_message) {
 144   Token::Type types[1] = { type };
 145   return Consume(types, 1, error_message);
 146 }
 147
 148 Token Parser::Consume(Token::Type* types,
 149                       size_t num_types,
 150                       const char* error_message) {
 151   if (has_error()) {
 152     // Don't overwrite current error, but make progress through tokens so that
 153     // a loop that's expecting a particular token will still terminate.
 154     cur_++;
 155     return Token(Location(), Token::INVALID, base::StringPiece());
 156   }
 157   if (at_end()) {
 158     const char kEOFMsg[] = "I hit EOF instead.";
 159     if (tokens_.empty())
 160       *err_ = Err(Location(), error_message, kEOFMsg);
 161     else
 162       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
 163     return Token(Location(), Token::INVALID, base::StringPiece());
 164   }
 165
 166   for (size_t i = 0; i < num_types; ++i) {
 167     if (cur_token().type() == types[i])
 168       return tokens_[cur_++];
 169   }
 170   *err_ = Err(cur_token(), error_message);
 171   return Token(Location(), Token::INVALID, base::StringPiece());
 172 }
 173
 174 Token Parser::Consume() {
 175   return tokens_[cur_++];
 176 }
 177
 178 scoped_ptr<ParseNode> Parser::ParseExpression() {
 179   return ParseExpression(0);
 180 }
 181
 182 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
 183   if (at_end())
 184     return scoped_ptr<ParseNode>();
 185
 186   Token token = Consume();
 187   PrefixFunc prefix = expressions_[token.type()].prefix;
 188
 189   if (prefix == NULL) {
 190     *err_ = Err(token,
 191                 std::string("Unexpected token '") + token.value().as_string() +
 192                     std::string("'"));
 193     return scoped_ptr<ParseNode>();
 194   }
 195
 196   scoped_ptr<ParseNode> left = (this->*prefix)(token);
 197   if (has_error())
 198     return left.Pass();
 199
 200   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
 201          precedence <= expressions_[cur_token().type()].precedence) {
 202     token = Consume();
 203     InfixFunc infix = expressions_[token.type()].infix;
 204     if (infix == NULL) {
 205       *err_ = Err(token,
 206                   std::string("Unexpected token '") +
 207                       token.value().as_string() + std::string("'"));
 208       return scoped_ptr<ParseNode>();
 209     }
 210     left = (this->*infix)(left.Pass(), token);
 211     if (has_error())
 212       return scoped_ptr<ParseNode>();
 213   }
 214
 215   return left.Pass();
 216 }
 217
 218 scoped_ptr<ParseNode> Parser::Literal(Token token) {
 219   return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
 220 }
 221
 222 scoped_ptr<ParseNode> Parser::Name(Token token) {
 223   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
 224 }
 225
 226 scoped_ptr<ParseNode> Parser::Group(Token token) {
 227   scoped_ptr<ParseNode> expr = ParseExpression();
 228   if (has_error())
 229     return scoped_ptr<ParseNode>();
 230   Consume(Token::RIGHT_PAREN, "Expected ')'");
 231   return expr.Pass();
 232 }
 233
 234 scoped_ptr<ParseNode> Parser::Not(Token token) {
 235   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
 236   if (has_error())
 237     return scoped_ptr<ParseNode>();
 238   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
 239   unary_op->set_op(token);
 240   unary_op->set_operand(expr.Pass());
 241   return unary_op.PassAs<ParseNode>();
 242 }
 243
 244 scoped_ptr<ParseNode> Parser::List(Token node) {
 245   scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
 246   if (!has_error() && !at_end())
 247     Consume(Token::RIGHT_BRACKET, "Expected ']'");
 248   return list.Pass();
 249 }
 250
 251 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
 252                                              Token token) {
 253   scoped_ptr<ParseNode> right =
 254       ParseExpression(expressions_[token.type()].precedence + 1);
 255   if (!right) {
 256     *err_ =
 257         Err(token,
 258             "Expected right hand side for '" + token.value().as_string() + "'");
 259     return scoped_ptr<ParseNode>();
 260   }
 261   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
 262   binary_op->set_op(token);
 263   binary_op->set_left(left.Pass());
 264   binary_op->set_right(right.Pass());
 265   return binary_op.PassAs<ParseNode>();
 266 }
 267
 268 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
 269                                                Token token) {
 270   scoped_ptr<ListNode> list(new ListNode);
 271   list->set_begin_token(token);
 272   list->set_end_token(token);
 273   scoped_ptr<BlockNode> block;
 274   bool has_arg = false;
 275   if (Match(Token::LEFT_PAREN)) {
 276     // Parsing a function call.
 277     has_arg = true;
 278     if (Match(Token::RIGHT_PAREN)) {
 279       // Nothing, just an empty call.
 280     } else {
 281       list = ParseList(Token::RIGHT_PAREN, false);
 282       if (has_error())
 283         return scoped_ptr<ParseNode>();
 284       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
 285     }
 286     // Optionally with a scope.
 287     if (LookAhead(Token::LEFT_BRACE)) {
 288       block = ParseBlock();
 289       if (has_error())
 290         return scoped_ptr<ParseNode>();
 291     }
 292   }
 293
 294   if (!left && !has_arg) {
 295     // Not a function call, just a standalone identifier.
 296     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
 297   }
 298   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
 299   func_call->set_function(token);
 300   func_call->set_args(list.Pass());
 301   if (block)
 302     func_call->set_block(block.Pass());
 303   return func_call.PassAs<ParseNode>();
 304 }
 305
 306 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
 307                                          Token token) {
 308   if (left->AsIdentifier() == NULL) {
 309     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
 310     return scoped_ptr<ParseNode>();
 311   }
 312   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
 313   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
 314   assign->set_op(token);
 315   assign->set_left(left.Pass());
 316   assign->set_right(value.Pass());
 317   return assign.PassAs<ParseNode>();
 318 }
 319
 320 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
 321                                         Token token) {
 322   // TODO: Maybe support more complex expressions like a[0][0]. This would
 323   // require work on the evaluator too.
 324   if (left->AsIdentifier() == NULL) {
 325     *err_ = Err(left.get(), "May only subscript simple identifiers");
 326     return scoped_ptr<ParseNode>();
 327   }
 328   scoped_ptr<ParseNode> value = ParseExpression();
 329   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
 330   scoped_ptr<AccessorNode> accessor(new AccessorNode);
 331   accessor->set_base(left->AsIdentifier()->value());
 332   accessor->set_index(value.Pass());
 333   return accessor.PassAs<ParseNode>();
 334 }
 335
 336 // Does not Consume the start or end token.
 337 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
 338                                        bool allow_trailing_comma) {
 339   scoped_ptr<ListNode> list(new ListNode);
 340   list->set_begin_token(cur_token());
 341   bool just_got_comma = false;
 342   while (!LookAhead(stop_before)) {
 343     just_got_comma = false;
 344     // Why _OR? We're parsing things that are higher precedence than the ,
 345     // that separates the items of the list. , should appear lower than
 346     // boolean expressions (the lowest of which is OR), but above assignments.
 347     list->append_item(ParseExpression(PRECEDENCE_OR));
 348     if (has_error())
 349       return scoped_ptr<ListNode>();
 350     if (at_end()) {
 351       *err_ =
 352           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
 353       return scoped_ptr<ListNode>();
 354     }
 355     just_got_comma = Match(Token::COMMA);
 356   }
 357   if (just_got_comma && !allow_trailing_comma) {
 358     *err_ = Err(cur_token(), "Trailing comma");
 359     return scoped_ptr<ListNode>();
 360   }
 361   list->set_end_token(cur_token());
 362   return list.Pass();
 363 }
 364
 365 scoped_ptr<ParseNode> Parser::ParseFile() {
 366   scoped_ptr<BlockNode> file(new BlockNode(false));
 367   for (;;) {
 368     if (at_end())
 369       break;
 370     scoped_ptr<ParseNode> statement = ParseStatement();
 371     if (!statement)
 372       break;
 373     file->append_statement(statement.Pass());
 374   }
 375   if (!at_end() && !has_error())
 376     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
 377   if (has_error())
 378     return scoped_ptr<ParseNode>();
 379   return file.PassAs<ParseNode>();
 380 }
 381
 382 scoped_ptr<ParseNode> Parser::ParseStatement() {
 383   if (LookAhead(Token::LEFT_BRACE)) {
 384     return ParseBlock().PassAs<ParseNode>();
 385   } else if (LookAhead(Token::IF)) {
 386     return ParseCondition();
 387   } else {
 388     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
 389     // to allow "pointless" expressions and return ParseExpression() directly.
 390     scoped_ptr<ParseNode> stmt = ParseExpression();
 391     if (stmt) {
 392       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
 393         return stmt.Pass();
 394     }
 395     if (!has_error()) {
 396       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
 397       *err_ = Err(token, "Expecting assignment or function call.");
 398     }
 399     return scoped_ptr<ParseNode>();
 400   }
 401 }
 402
 403 scoped_ptr<BlockNode> Parser::ParseBlock() {
 404   Token begin_token =
 405       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
 406   if (has_error())
 407     return scoped_ptr<BlockNode>();
 408   scoped_ptr<BlockNode> block(new BlockNode(true));
 409   block->set_begin_token(begin_token);
 410
 411   for (;;) {
 412     if (LookAhead(Token::RIGHT_BRACE)) {
 413       block->set_end_token(Consume());
 414       break;
 415     }
 416
 417     scoped_ptr<ParseNode> statement = ParseStatement();
 418     if (!statement)
 419       return scoped_ptr<BlockNode>();
 420     block->append_statement(statement.Pass());
 421   }
 422   return block.Pass();
 423 }
 424
 425 scoped_ptr<ParseNode> Parser::ParseCondition() {
 426   scoped_ptr<ConditionNode> condition(new ConditionNode);
 427   Consume(Token::IF, "Expected 'if'");
 428   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
 429   condition->set_condition(ParseExpression());
 430   if (IsAssignment(condition->condition()))
 431     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
 432   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
 433   condition->set_if_true(ParseBlock().Pass());
 434   if (Match(Token::ELSE))
 435     condition->set_if_false(ParseStatement().Pass());
 436   if (has_error())
 437     return scoped_ptr<ParseNode>();
 438   return condition.PassAs<ParseNode>();
 439 }