Correct blacklist entry message
[chromium-blink-merge.git] / tools / gn / parser.cc
blob649d4cfa1fa4cfac732fbb8ee28a373b554c7b12
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 // grammar:
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
21 namespace {
23 // Returns true if the two tokens are on the same line. We assume they're in
24 // the same file.
25 bool IsSameLine(const Token& a, const Token& b) {
26 DCHECK(a.location().file() == b.location().file());
27 return a.location().line_number() == b.location().line_number();
30 } // namespace
32 enum Precedence {
33 PRECEDENCE_ASSIGNMENT = 1,
34 PRECEDENCE_OR = 2,
35 PRECEDENCE_AND = 3,
36 PRECEDENCE_EQUALITY = 4,
37 PRECEDENCE_RELATION = 5,
38 PRECEDENCE_SUM = 6,
39 PRECEDENCE_PREFIX = 7,
40 PRECEDENCE_CALL = 8,
43 // The top-level for blocks/ifs is still recursive descent, the expression
44 // parser is a Pratt parser. The basic idea there is to have the precedences
45 // (and associativities) encoded relative to each other and only parse up
46 // until you hit something of that precedence. There's a dispatch table in
47 // expressions_ at the top of parser.cc that describes how each token
48 // dispatches if it's seen as either a prefix or infix operator, and if it's
49 // infix, what its precedence is.
51 // Refs:
52 // - http://javascript.crockford.com/tdop/tdop.html
53 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
55 // Indexed by Token::Type.
56 ParserHelper Parser::expressions_[] = {
57 {NULL, NULL, -1}, // INVALID
58 {&Parser::Literal, NULL, -1}, // INTEGER
59 {&Parser::Literal, NULL, -1}, // STRING
60 {&Parser::Literal, NULL, -1}, // TRUE_TOKEN
61 {&Parser::Literal, NULL, -1}, // FALSE_TOKEN
62 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
63 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
64 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
65 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
66 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
67 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
68 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
69 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
70 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
71 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
72 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
73 {NULL, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
74 {NULL, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
75 {&Parser::Not, NULL, -1}, // BANG
76 {&Parser::Group, NULL, -1}, // LEFT_PAREN
77 {NULL, NULL, -1}, // RIGHT_PAREN
78 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
79 {NULL, NULL, -1}, // RIGHT_BRACKET
80 {NULL, NULL, -1}, // LEFT_BRACE
81 {NULL, NULL, -1}, // RIGHT_BRACE
82 {NULL, NULL, -1}, // IF
83 {NULL, NULL, -1}, // ELSE
84 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
85 {NULL, NULL, -1}, // COMMA
86 {NULL, NULL, -1}, // COMMENT
89 Parser::Parser(const std::vector<Token>& tokens, Err* err)
90 : tokens_(tokens), err_(err), cur_(0) {
93 Parser::~Parser() {
96 // static
97 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
98 Err* err) {
99 Parser p(tokens, err);
100 return p.ParseFile().PassAs<ParseNode>();
103 // static
104 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
105 Err* err) {
106 Parser p(tokens, err);
107 return p.ParseExpression().Pass();
110 bool Parser::IsAssignment(const ParseNode* node) const {
111 return node && node->AsBinaryOp() &&
112 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
113 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
114 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
117 bool Parser::IsStatementBreak(Token::Type token_type) const {
118 switch (token_type) {
119 case Token::IDENTIFIER:
120 case Token::LEFT_BRACE:
121 case Token::RIGHT_BRACE:
122 case Token::IF:
123 case Token::ELSE:
124 return true;
125 default:
126 return false;
130 bool Parser::LookAhead(Token::Type type) {
131 if (at_end())
132 return false;
133 return cur_token().type() == type;
136 bool Parser::Match(Token::Type type) {
137 if (!LookAhead(type))
138 return false;
139 Consume();
140 return true;
143 Token Parser::Consume(Token::Type type, const char* error_message) {
144 Token::Type types[1] = { type };
145 return Consume(types, 1, error_message);
148 Token Parser::Consume(Token::Type* types,
149 size_t num_types,
150 const char* error_message) {
151 if (has_error()) {
152 // Don't overwrite current error, but make progress through tokens so that
153 // a loop that's expecting a particular token will still terminate.
154 cur_++;
155 return Token(Location(), Token::INVALID, base::StringPiece());
157 if (at_end()) {
158 const char kEOFMsg[] = "I hit EOF instead.";
159 if (tokens_.empty())
160 *err_ = Err(Location(), error_message, kEOFMsg);
161 else
162 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
163 return Token(Location(), Token::INVALID, base::StringPiece());
166 for (size_t i = 0; i < num_types; ++i) {
167 if (cur_token().type() == types[i])
168 return tokens_[cur_++];
170 *err_ = Err(cur_token(), error_message);
171 return Token(Location(), Token::INVALID, base::StringPiece());
174 Token Parser::Consume() {
175 return tokens_[cur_++];
178 scoped_ptr<ParseNode> Parser::ParseExpression() {
179 return ParseExpression(0);
182 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
183 if (at_end())
184 return scoped_ptr<ParseNode>();
186 Token token = Consume();
187 PrefixFunc prefix = expressions_[token.type()].prefix;
189 if (prefix == NULL) {
190 *err_ = Err(token,
191 std::string("Unexpected token '") + token.value().as_string() +
192 std::string("'"));
193 return scoped_ptr<ParseNode>();
196 scoped_ptr<ParseNode> left = (this->*prefix)(token);
197 if (has_error())
198 return left.Pass();
200 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
201 precedence <= expressions_[cur_token().type()].precedence) {
202 token = Consume();
203 InfixFunc infix = expressions_[token.type()].infix;
204 if (infix == NULL) {
205 *err_ = Err(token,
206 std::string("Unexpected token '") +
207 token.value().as_string() + std::string("'"));
208 return scoped_ptr<ParseNode>();
210 left = (this->*infix)(left.Pass(), token);
211 if (has_error())
212 return scoped_ptr<ParseNode>();
215 return left.Pass();
218 scoped_ptr<ParseNode> Parser::Literal(Token token) {
219 return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
222 scoped_ptr<ParseNode> Parser::Name(Token token) {
223 return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
226 scoped_ptr<ParseNode> Parser::Group(Token token) {
227 scoped_ptr<ParseNode> expr = ParseExpression();
228 if (has_error())
229 return scoped_ptr<ParseNode>();
230 Consume(Token::RIGHT_PAREN, "Expected ')'");
231 return expr.Pass();
234 scoped_ptr<ParseNode> Parser::Not(Token token) {
235 scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
236 if (has_error())
237 return scoped_ptr<ParseNode>();
238 scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
239 unary_op->set_op(token);
240 unary_op->set_operand(expr.Pass());
241 return unary_op.PassAs<ParseNode>();
244 scoped_ptr<ParseNode> Parser::List(Token node) {
245 scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
246 if (!has_error() && !at_end())
247 Consume(Token::RIGHT_BRACKET, "Expected ']'");
248 return list.Pass();
251 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
252 Token token) {
253 scoped_ptr<ParseNode> right =
254 ParseExpression(expressions_[token.type()].precedence + 1);
255 if (!right) {
256 *err_ =
257 Err(token,
258 "Expected right hand side for '" + token.value().as_string() + "'");
259 return scoped_ptr<ParseNode>();
261 scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
262 binary_op->set_op(token);
263 binary_op->set_left(left.Pass());
264 binary_op->set_right(right.Pass());
265 return binary_op.PassAs<ParseNode>();
268 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
269 Token token) {
270 scoped_ptr<ListNode> list(new ListNode);
271 list->set_begin_token(token);
272 list->set_end_token(token);
273 scoped_ptr<BlockNode> block;
274 bool has_arg = false;
275 if (Match(Token::LEFT_PAREN)) {
276 // Parsing a function call.
277 has_arg = true;
278 if (Match(Token::RIGHT_PAREN)) {
279 // Nothing, just an empty call.
280 } else {
281 list = ParseList(Token::RIGHT_PAREN, false);
282 if (has_error())
283 return scoped_ptr<ParseNode>();
284 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
286 // Optionally with a scope.
287 if (LookAhead(Token::LEFT_BRACE)) {
288 block = ParseBlock();
289 if (has_error())
290 return scoped_ptr<ParseNode>();
294 if (!left && !has_arg) {
295 // Not a function call, just a standalone identifier.
296 return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
298 scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
299 func_call->set_function(token);
300 func_call->set_args(list.Pass());
301 if (block)
302 func_call->set_block(block.Pass());
303 return func_call.PassAs<ParseNode>();
306 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
307 Token token) {
308 if (left->AsIdentifier() == NULL) {
309 *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
310 return scoped_ptr<ParseNode>();
312 scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
313 scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
314 assign->set_op(token);
315 assign->set_left(left.Pass());
316 assign->set_right(value.Pass());
317 return assign.PassAs<ParseNode>();
320 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
321 Token token) {
322 // TODO: Maybe support more complex expressions like a[0][0]. This would
323 // require work on the evaluator too.
324 if (left->AsIdentifier() == NULL) {
325 *err_ = Err(left.get(), "May only subscript simple identifiers");
326 return scoped_ptr<ParseNode>();
328 scoped_ptr<ParseNode> value = ParseExpression();
329 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
330 scoped_ptr<AccessorNode> accessor(new AccessorNode);
331 accessor->set_base(left->AsIdentifier()->value());
332 accessor->set_index(value.Pass());
333 return accessor.PassAs<ParseNode>();
336 // Does not Consume the start or end token.
337 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
338 bool allow_trailing_comma) {
339 scoped_ptr<ListNode> list(new ListNode);
340 list->set_begin_token(cur_token());
341 bool just_got_comma = false;
342 while (!LookAhead(stop_before)) {
343 just_got_comma = false;
344 // Why _OR? We're parsing things that are higher precedence than the ,
345 // that separates the items of the list. , should appear lower than
346 // boolean expressions (the lowest of which is OR), but above assignments.
347 list->append_item(ParseExpression(PRECEDENCE_OR));
348 if (has_error())
349 return scoped_ptr<ListNode>();
350 if (at_end()) {
351 *err_ =
352 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
353 return scoped_ptr<ListNode>();
355 just_got_comma = Match(Token::COMMA);
357 if (just_got_comma && !allow_trailing_comma) {
358 *err_ = Err(cur_token(), "Trailing comma");
359 return scoped_ptr<ListNode>();
361 list->set_end_token(cur_token());
362 return list.Pass();
365 scoped_ptr<ParseNode> Parser::ParseFile() {
366 scoped_ptr<BlockNode> file(new BlockNode(false));
367 for (;;) {
368 if (at_end())
369 break;
370 scoped_ptr<ParseNode> statement = ParseStatement();
371 if (!statement)
372 break;
373 file->append_statement(statement.Pass());
375 if (!at_end() && !has_error())
376 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
377 if (has_error())
378 return scoped_ptr<ParseNode>();
379 return file.PassAs<ParseNode>();
382 scoped_ptr<ParseNode> Parser::ParseStatement() {
383 if (LookAhead(Token::LEFT_BRACE)) {
384 return ParseBlock().PassAs<ParseNode>();
385 } else if (LookAhead(Token::IF)) {
386 return ParseCondition();
387 } else {
388 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
389 // to allow "pointless" expressions and return ParseExpression() directly.
390 scoped_ptr<ParseNode> stmt = ParseExpression();
391 if (stmt) {
392 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
393 return stmt.Pass();
395 if (!has_error()) {
396 Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
397 *err_ = Err(token, "Expecting assignment or function call.");
399 return scoped_ptr<ParseNode>();
403 scoped_ptr<BlockNode> Parser::ParseBlock() {
404 Token begin_token =
405 Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
406 if (has_error())
407 return scoped_ptr<BlockNode>();
408 scoped_ptr<BlockNode> block(new BlockNode(true));
409 block->set_begin_token(begin_token);
411 for (;;) {
412 if (LookAhead(Token::RIGHT_BRACE)) {
413 block->set_end_token(Consume());
414 break;
417 scoped_ptr<ParseNode> statement = ParseStatement();
418 if (!statement)
419 return scoped_ptr<BlockNode>();
420 block->append_statement(statement.Pass());
422 return block.Pass();
425 scoped_ptr<ParseNode> Parser::ParseCondition() {
426 scoped_ptr<ConditionNode> condition(new ConditionNode);
427 Consume(Token::IF, "Expected 'if'");
428 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
429 condition->set_condition(ParseExpression());
430 if (IsAssignment(condition->condition()))
431 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
432 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
433 condition->set_if_true(ParseBlock().Pass());
434 if (Match(Token::ELSE))
435 condition->set_if_false(ParseStatement().Pass());
436 if (has_error())
437 return scoped_ptr<ParseNode>();
438 return condition.PassAs<ParseNode>();