Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / tools / gn / parser.cc
blob38f7abbf2dece58d5957e4378e98e7cf08ff4cfe
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/parser.h"
7 #include "base/logging.h"
8 #include "tools/gn/functions.h"
9 #include "tools/gn/operators.h"
10 #include "tools/gn/token.h"
12 // grammar:
14 // file := (statement)*
15 // statement := block | if | assignment
16 // block := '{' statement* '}'
17 // if := 'if' '(' expr ')' statement [ else ]
18 // else := 'else' (if | statement)*
19 // assignment := ident {'=' | '+=' | '-='} expr
21 enum Precedence {
22 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
23 PRECEDENCE_OR = 2,
24 PRECEDENCE_AND = 3,
25 PRECEDENCE_EQUALITY = 4,
26 PRECEDENCE_RELATION = 5,
27 PRECEDENCE_SUM = 6,
28 PRECEDENCE_PREFIX = 7,
29 PRECEDENCE_CALL = 8,
30 PRECEDENCE_DOT = 9, // Highest precedence.
33 // The top-level for blocks/ifs is recursive descent, the expression parser is
34 // a Pratt parser. The basic idea there is to have the precedences (and
35 // associativities) encoded relative to each other and only parse up until you
36 // hit something of that precedence. There's a dispatch table in expressions_
37 // at the top of parser.cc that describes how each token dispatches if it's
38 // seen as either a prefix or infix operator, and if it's infix, what its
39 // precedence is.
41 // Refs:
42 // - http://javascript.crockford.com/tdop/tdop.html
43 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
45 // Indexed by Token::Type.
46 ParserHelper Parser::expressions_[] = {
47 {NULL, NULL, -1}, // INVALID
48 {&Parser::Literal, NULL, -1}, // INTEGER
49 {&Parser::Literal, NULL, -1}, // STRING
50 {&Parser::Literal, NULL, -1}, // TRUE_TOKEN
51 {&Parser::Literal, NULL, -1}, // FALSE_TOKEN
52 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
53 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
54 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
55 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
56 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
57 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
58 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
59 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
60 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
61 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
62 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
63 {NULL, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
64 {NULL, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
65 {&Parser::Not, NULL, -1}, // BANG
66 {NULL, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
67 {&Parser::Group, NULL, -1}, // LEFT_PAREN
68 {NULL, NULL, -1}, // RIGHT_PAREN
69 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
70 {NULL, NULL, -1}, // RIGHT_BRACKET
71 {NULL, NULL, -1}, // LEFT_BRACE
72 {NULL, NULL, -1}, // RIGHT_BRACE
73 {NULL, NULL, -1}, // IF
74 {NULL, NULL, -1}, // ELSE
75 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
76 {NULL, NULL, -1}, // COMMA
77 {NULL, NULL, -1}, // UNCLASSIFIED_COMMENT
78 {NULL, NULL, -1}, // LINE_COMMENT
79 {NULL, NULL, -1}, // SUFFIX_COMMENT
80 {&Parser::BlockComment, NULL, -1}, // BLOCK_COMMENT
83 Parser::Parser(const std::vector<Token>& tokens, Err* err)
84 : err_(err), cur_(0) {
85 for (const auto& token : tokens) {
86 switch(token.type()) {
87 case Token::LINE_COMMENT:
88 line_comment_tokens_.push_back(token);
89 break;
90 case Token::SUFFIX_COMMENT:
91 suffix_comment_tokens_.push_back(token);
92 break;
93 default:
94 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
95 // through the real parser.
96 tokens_.push_back(token);
97 break;
102 Parser::~Parser() {
105 // static
106 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
107 Err* err) {
108 Parser p(tokens, err);
109 return p.ParseFile();
112 // static
113 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
114 Err* err) {
115 Parser p(tokens, err);
116 return p.ParseExpression().Pass();
119 bool Parser::IsAssignment(const ParseNode* node) const {
120 return node && node->AsBinaryOp() &&
121 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
122 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
123 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
126 bool Parser::IsStatementBreak(Token::Type token_type) const {
127 switch (token_type) {
128 case Token::IDENTIFIER:
129 case Token::LEFT_BRACE:
130 case Token::RIGHT_BRACE:
131 case Token::IF:
132 case Token::ELSE:
133 return true;
134 default:
135 return false;
139 bool Parser::LookAhead(Token::Type type) {
140 if (at_end())
141 return false;
142 return cur_token().type() == type;
145 bool Parser::Match(Token::Type type) {
146 if (!LookAhead(type))
147 return false;
148 Consume();
149 return true;
152 Token Parser::Consume(Token::Type type, const char* error_message) {
153 Token::Type types[1] = { type };
154 return Consume(types, 1, error_message);
157 Token Parser::Consume(Token::Type* types,
158 size_t num_types,
159 const char* error_message) {
160 if (has_error()) {
161 // Don't overwrite current error, but make progress through tokens so that
162 // a loop that's expecting a particular token will still terminate.
163 cur_++;
164 return Token(Location(), Token::INVALID, base::StringPiece());
166 if (at_end()) {
167 const char kEOFMsg[] = "I hit EOF instead.";
168 if (tokens_.empty())
169 *err_ = Err(Location(), error_message, kEOFMsg);
170 else
171 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
172 return Token(Location(), Token::INVALID, base::StringPiece());
175 for (size_t i = 0; i < num_types; ++i) {
176 if (cur_token().type() == types[i])
177 return tokens_[cur_++];
179 *err_ = Err(cur_token(), error_message);
180 return Token(Location(), Token::INVALID, base::StringPiece());
183 Token Parser::Consume() {
184 return tokens_[cur_++];
187 scoped_ptr<ParseNode> Parser::ParseExpression() {
188 return ParseExpression(0);
191 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
192 if (at_end())
193 return scoped_ptr<ParseNode>();
195 Token token = Consume();
196 PrefixFunc prefix = expressions_[token.type()].prefix;
198 if (prefix == NULL) {
199 *err_ = Err(token,
200 std::string("Unexpected token '") + token.value().as_string() +
201 std::string("'"));
202 return scoped_ptr<ParseNode>();
205 scoped_ptr<ParseNode> left = (this->*prefix)(token);
206 if (has_error())
207 return left.Pass();
209 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
210 precedence <= expressions_[cur_token().type()].precedence) {
211 token = Consume();
212 InfixFunc infix = expressions_[token.type()].infix;
213 if (infix == NULL) {
214 *err_ = Err(token,
215 std::string("Unexpected token '") +
216 token.value().as_string() + std::string("'"));
217 return scoped_ptr<ParseNode>();
219 left = (this->*infix)(left.Pass(), token);
220 if (has_error())
221 return scoped_ptr<ParseNode>();
224 return left.Pass();
227 scoped_ptr<ParseNode> Parser::Literal(Token token) {
228 return make_scoped_ptr(new LiteralNode(token));
231 scoped_ptr<ParseNode> Parser::Name(Token token) {
232 return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
235 scoped_ptr<ParseNode> Parser::BlockComment(Token token) {
236 scoped_ptr<BlockCommentNode> comment(new BlockCommentNode());
237 comment->set_comment(token);
238 return comment.Pass();
241 scoped_ptr<ParseNode> Parser::Group(Token token) {
242 scoped_ptr<ParseNode> expr = ParseExpression();
243 if (has_error())
244 return scoped_ptr<ParseNode>();
245 Consume(Token::RIGHT_PAREN, "Expected ')'");
246 return expr.Pass();
249 scoped_ptr<ParseNode> Parser::Not(Token token) {
250 scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
251 if (has_error())
252 return scoped_ptr<ParseNode>();
253 scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
254 unary_op->set_op(token);
255 unary_op->set_operand(expr.Pass());
256 return unary_op.Pass();
259 scoped_ptr<ParseNode> Parser::List(Token node) {
260 scoped_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
261 if (!has_error() && !at_end())
262 Consume(Token::RIGHT_BRACKET, "Expected ']'");
263 return list.Pass();
266 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
267 Token token) {
268 scoped_ptr<ParseNode> right =
269 ParseExpression(expressions_[token.type()].precedence + 1);
270 if (!right) {
271 *err_ =
272 Err(token,
273 "Expected right hand side for '" + token.value().as_string() + "'");
274 return scoped_ptr<ParseNode>();
276 scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
277 binary_op->set_op(token);
278 binary_op->set_left(left.Pass());
279 binary_op->set_right(right.Pass());
280 return binary_op.Pass();
283 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
284 Token token) {
285 scoped_ptr<ListNode> list(new ListNode);
286 list->set_begin_token(token);
287 list->set_end(make_scoped_ptr(new EndNode(token)));
288 scoped_ptr<BlockNode> block;
289 bool has_arg = false;
290 if (LookAhead(Token::LEFT_PAREN)) {
291 Token start_token = Consume();
292 // Parsing a function call.
293 has_arg = true;
294 if (Match(Token::RIGHT_PAREN)) {
295 // Nothing, just an empty call.
296 } else {
297 list = ParseList(start_token, Token::RIGHT_PAREN, false);
298 if (has_error())
299 return scoped_ptr<ParseNode>();
300 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
302 // Optionally with a scope.
303 if (LookAhead(Token::LEFT_BRACE)) {
304 block = ParseBlock();
305 if (has_error())
306 return scoped_ptr<ParseNode>();
310 if (!left && !has_arg) {
311 // Not a function call, just a standalone identifier.
312 return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
314 scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
315 func_call->set_function(token);
316 func_call->set_args(list.Pass());
317 if (block)
318 func_call->set_block(block.Pass());
319 return func_call.Pass();
322 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
323 Token token) {
324 if (left->AsIdentifier() == NULL) {
325 *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
326 return scoped_ptr<ParseNode>();
328 scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
329 scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
330 assign->set_op(token);
331 assign->set_left(left.Pass());
332 assign->set_right(value.Pass());
333 return assign.Pass();
336 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
337 Token token) {
338 // TODO: Maybe support more complex expressions like a[0][0]. This would
339 // require work on the evaluator too.
340 if (left->AsIdentifier() == NULL) {
341 *err_ = Err(left.get(), "May only subscript identifiers.",
342 "The thing on the left hand side of the [] must be an identifier\n"
343 "and not an expression. If you need this, you'll have to assign the\n"
344 "value to a temporary before subscripting. Sorry.");
345 return scoped_ptr<ParseNode>();
347 scoped_ptr<ParseNode> value = ParseExpression();
348 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
349 scoped_ptr<AccessorNode> accessor(new AccessorNode);
350 accessor->set_base(left->AsIdentifier()->value());
351 accessor->set_index(value.Pass());
352 return accessor.Pass();
355 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
356 Token token) {
357 if (left->AsIdentifier() == NULL) {
358 *err_ = Err(left.get(), "May only use \".\" for identifiers.",
359 "The thing on the left hand side of the dot must be an identifier\n"
360 "and not an expression. If you need this, you'll have to assign the\n"
361 "value to a temporary first. Sorry.");
362 return scoped_ptr<ParseNode>();
365 scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
366 if (!right || !right->AsIdentifier()) {
367 *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
368 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
369 return scoped_ptr<ParseNode>();
372 scoped_ptr<AccessorNode> accessor(new AccessorNode);
373 accessor->set_base(left->AsIdentifier()->value());
374 accessor->set_member(scoped_ptr<IdentifierNode>(
375 static_cast<IdentifierNode*>(right.release())));
376 return accessor.Pass();
379 // Does not Consume the start or end token.
380 scoped_ptr<ListNode> Parser::ParseList(Token start_token,
381 Token::Type stop_before,
382 bool allow_trailing_comma) {
383 scoped_ptr<ListNode> list(new ListNode);
384 list->set_begin_token(start_token);
385 bool just_got_comma = false;
386 bool first_time = true;
387 while (!LookAhead(stop_before)) {
388 if (!first_time) {
389 if (!just_got_comma) {
390 // Require commas separate things in lists.
391 *err_ = Err(cur_token(), "Expected comma between items.");
392 return scoped_ptr<ListNode>();
395 first_time = false;
397 // Why _OR? We're parsing things that are higher precedence than the ,
398 // that separates the items of the list. , should appear lower than
399 // boolean expressions (the lowest of which is OR), but above assignments.
400 list->append_item(ParseExpression(PRECEDENCE_OR));
401 if (has_error())
402 return scoped_ptr<ListNode>();
403 if (at_end()) {
404 *err_ =
405 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
406 return scoped_ptr<ListNode>();
408 if (list->contents().back()->AsBlockComment()) {
409 // If there was a comment inside the list, we don't need a comma to the
410 // next item, so pretend we got one, if we're expecting one.
411 just_got_comma = allow_trailing_comma;
412 } else {
413 just_got_comma = Match(Token::COMMA);
416 if (just_got_comma && !allow_trailing_comma) {
417 *err_ = Err(cur_token(), "Trailing comma");
418 return scoped_ptr<ListNode>();
420 list->set_end(make_scoped_ptr(new EndNode(cur_token())));
421 return list.Pass();
424 scoped_ptr<ParseNode> Parser::ParseFile() {
425 scoped_ptr<BlockNode> file(new BlockNode(false));
426 for (;;) {
427 if (at_end())
428 break;
429 scoped_ptr<ParseNode> statement = ParseStatement();
430 if (!statement)
431 break;
432 file->append_statement(statement.Pass());
434 if (!at_end() && !has_error())
435 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
436 if (has_error())
437 return scoped_ptr<ParseNode>();
439 // TODO(scottmg): If this is measurably expensive, it could be done only
440 // when necessary (when reformatting, or during tests). Comments are
441 // separate from the parse tree at this point, so downstream code can remain
442 // ignorant of them.
443 AssignComments(file.get());
445 return file.Pass();
448 scoped_ptr<ParseNode> Parser::ParseStatement() {
449 if (LookAhead(Token::LEFT_BRACE)) {
450 return ParseBlock();
451 } else if (LookAhead(Token::IF)) {
452 return ParseCondition();
453 } else if (LookAhead(Token::BLOCK_COMMENT)) {
454 return BlockComment(Consume());
455 } else {
456 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
457 // to allow "pointless" expressions and return ParseExpression() directly.
458 scoped_ptr<ParseNode> stmt = ParseExpression();
459 if (stmt) {
460 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
461 return stmt.Pass();
463 if (!has_error()) {
464 Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
465 *err_ = Err(token, "Expecting assignment or function call.");
467 return scoped_ptr<ParseNode>();
471 scoped_ptr<BlockNode> Parser::ParseBlock() {
472 Token begin_token =
473 Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
474 if (has_error())
475 return scoped_ptr<BlockNode>();
476 scoped_ptr<BlockNode> block(new BlockNode(true));
477 block->set_begin_token(begin_token);
479 for (;;) {
480 if (LookAhead(Token::RIGHT_BRACE)) {
481 block->set_end(make_scoped_ptr(new EndNode(Consume())));
482 break;
485 scoped_ptr<ParseNode> statement = ParseStatement();
486 if (!statement)
487 return scoped_ptr<BlockNode>();
488 block->append_statement(statement.Pass());
490 return block.Pass();
493 scoped_ptr<ParseNode> Parser::ParseCondition() {
494 scoped_ptr<ConditionNode> condition(new ConditionNode);
495 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
496 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
497 condition->set_condition(ParseExpression());
498 if (IsAssignment(condition->condition()))
499 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
500 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
501 condition->set_if_true(ParseBlock().Pass());
502 if (Match(Token::ELSE))
503 condition->set_if_false(ParseStatement().Pass());
504 if (has_error())
505 return scoped_ptr<ParseNode>();
506 return condition.Pass();
509 void Parser::TraverseOrder(const ParseNode* root,
510 std::vector<const ParseNode*>* pre,
511 std::vector<const ParseNode*>* post) {
512 if (root) {
513 pre->push_back(root);
515 if (const AccessorNode* accessor = root->AsAccessor()) {
516 TraverseOrder(accessor->index(), pre, post);
517 TraverseOrder(accessor->member(), pre, post);
518 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
519 TraverseOrder(binop->left(), pre, post);
520 TraverseOrder(binop->right(), pre, post);
521 } else if (const BlockNode* block = root->AsBlock()) {
522 for (const auto& statement : block->statements())
523 TraverseOrder(statement, pre, post);
524 TraverseOrder(block->End(), pre, post);
525 } else if (const ConditionNode* condition = root->AsConditionNode()) {
526 TraverseOrder(condition->condition(), pre, post);
527 TraverseOrder(condition->if_true(), pre, post);
528 TraverseOrder(condition->if_false(), pre, post);
529 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
530 TraverseOrder(func_call->args(), pre, post);
531 TraverseOrder(func_call->block(), pre, post);
532 } else if (root->AsIdentifier()) {
533 // Nothing.
534 } else if (const ListNode* list = root->AsList()) {
535 for (const auto& node : list->contents())
536 TraverseOrder(node, pre, post);
537 TraverseOrder(list->End(), pre, post);
538 } else if (root->AsLiteral()) {
539 // Nothing.
540 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
541 TraverseOrder(unaryop->operand(), pre, post);
542 } else if (root->AsBlockComment()) {
543 // Nothing.
544 } else if (root->AsEnd()) {
545 // Nothing.
546 } else {
547 CHECK(false) << "Unhandled case in TraverseOrder.";
550 post->push_back(root);
554 void Parser::AssignComments(ParseNode* file) {
555 // Start by generating a pre- and post- order traversal of the tree so we
556 // can determine what's before and after comments.
557 std::vector<const ParseNode*> pre;
558 std::vector<const ParseNode*> post;
559 TraverseOrder(file, &pre, &post);
561 // Assign line comments to syntax immediately following.
562 int cur_comment = 0;
563 for (const auto& node : pre) {
564 const Location& start = node->GetRange().begin();
565 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
566 if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
567 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
568 line_comment_tokens_[cur_comment]);
569 ++cur_comment;
570 } else {
571 break;
576 // Remaining line comments go at end of file.
577 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
578 ++cur_comment)
579 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
581 // Assign suffix to syntax immediately before.
582 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
583 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
584 i != post.rend();
585 ++i) {
586 // Don't assign suffix comments to the function, list, or block, but instead
587 // to the last thing inside.
588 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
589 continue;
591 const Location& start = (*i)->GetRange().begin();
592 const Location& end = (*i)->GetRange().end();
594 // Don't assign suffix comments to something that starts on an earlier
595 // line, so that in:
597 // sources = [ "a",
598 // "b" ] # comment
600 // it's attached to "b", not sources = [ ... ].
601 if (start.line_number() != end.line_number())
602 continue;
604 while (cur_comment >= 0) {
605 if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
606 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
607 suffix_comment_tokens_[cur_comment]);
608 --cur_comment;
609 } else {
610 break;
614 // Suffix comments were assigned in reverse, so if there were multiple on
615 // the same node, they need to be reversed.
616 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
617 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();