1 /* vim: set sw=4 ts=8 et tw=78: */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is the Narcissus JavaScript engine.
17 * The Initial Developer of the Original Code is
18 * Brendan Eich <brendan@mozilla.org>.
19 * Portions created by the Initial Developer are Copyright (C) 2004
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
39 * Narcissus - JS implemented in JS.
41 * Lexical scanner and parser.
44 // Build a regexp that recognizes operators and punctuators (except newline).
45 var opRegExpSrc = "^";
46 for (i in opTypeNames) {
49 if (opRegExpSrc != "^")
51 opRegExpSrc += i.replace(/[?|^&(){}\[\]+\-*\/\.]/g, "\\$&");
53 var opRegExp = new RegExp(opRegExpSrc);
55 // A regexp to match floating point literals (but not integer literals).
56 var fpRegExp = /^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+|^\.\d+(?:[eE][-+]?\d+)?/;
58 // A regexp to match regexp literals.
59 var reRegExp = /^\/((?:\\.|\[(?:\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/;
61 function Tokenizer(s, f, l) {
63 this.source = String(s);
67 this.scanNewlines = false;
68 this.scanOperand = true;
69 this.filename = f || "";
73 Tokenizer.prototype = {
75 return this.source.substring(this.cursor);
79 return this.peek() == END;
83 return this.tokens[this.tokenIndex];
86 match: function (tt) {
87 return this.get() == tt || this.unget();
90 mustMatch: function (tt) {
92 throw this.newSyntaxError("Missing " + tokens[tt].toLowerCase());
99 next = this.tokens[(this.tokenIndex + this.lookahead) & 3];
100 if (this.scanNewlines && next.lineno != this.lineno)
111 peekOnSameLine: function () {
112 this.scanNewlines = true;
113 var tt = this.peek();
114 this.scanNewlines = false;
120 while (this.lookahead) {
122 this.tokenIndex = (this.tokenIndex + 1) & 3;
123 token = this.tokens[this.tokenIndex];
124 if (token.type != NEWLINE || this.scanNewlines)
129 var input = this.input;
130 var match = (this.scanNewlines ? /^[ \t]+/ : /^\s+/)(input);
132 var spaces = match[0];
133 this.cursor += spaces.length;
134 var newlines = spaces.match(/\n/g);
136 this.lineno += newlines.length;
140 if (!(match = /^\/(?:\*(?:.|\n)*?\*\/|\/.*)/(input)))
142 var comment = match[0];
143 this.cursor += comment.length;
144 newlines = comment.match(/\n/g);
146 this.lineno += newlines.length
149 this.tokenIndex = (this.tokenIndex + 1) & 3;
150 token = this.tokens[this.tokenIndex];
152 this.tokens[this.tokenIndex] = token = {};
155 return token.type = END;
157 if ((match = fpRegExp(input))) {
159 token.value = parseFloat(match[0]);
160 } else if ((match = /^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/(input))) {
162 token.value = parseInt(match[0]);
163 } else if ((match = /^[$_\w]+/(input))) { // FIXME no ES3 unicode
165 token.type = keywords[id] || IDENTIFIER;
167 } else if ((match = /^"(?:\\.|[^"])*"|^'(?:\\.|[^'])*'/(input))) { //"){
169 token.value = eval(match[0]);
170 } else if (this.scanOperand && (match = reRegExp(input))) {
172 token.value = new RegExp(match[1], match[2]);
173 } else if ((match = opRegExp(input))) {
175 if (assignOps[op] && input[op.length] == '=') {
177 token.assignOp = GLOBAL[opTypeNames[op]];
180 token.type = GLOBAL[opTypeNames[op]];
181 if (this.scanOperand &&
182 (token.type == PLUS || token.type == MINUS)) {
183 token.type += UNARY_PLUS - PLUS;
185 token.assignOp = null;
188 } else if (this.scanNewlines && (match = /^\n/(input))) {
189 token.type = NEWLINE;
191 throw this.newSyntaxError("Illegal token");
194 token.start = this.cursor;
195 this.cursor += match[0].length;
196 token.end = this.cursor;
197 token.lineno = this.lineno;
202 if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
203 this.tokenIndex = (this.tokenIndex - 1) & 3;
206 newSyntaxError: function (m) {
207 var e = new SyntaxError(m, this.filename, this.lineno);
208 e.source = this.source;
209 e.cursor = this.cursor;
214 function CompilerContext(inFunction) {
215 this.inFunction = inFunction;
221 var CCp = CompilerContext.prototype;
222 CCp.bracketLevel = CCp.curlyLevel = CCp.parenLevel = CCp.hookLevel = 0;
223 CCp.ecmaStrictMode = CCp.inForLoopInit = false;
225 function Script(t, x) {
226 var n = Statements(t, x);
228 n.funDecls = x.funDecls;
229 n.varDecls = x.varDecls;
233 // Node extends Array, which we extend slightly with a top-of-stack method.
234 Array.prototype.__defineProperty__(
237 return this.length && this[this.length-1];
242 function Node(t, type) {
245 this.type = type || token.type;
246 this.value = token.value;
247 this.lineno = token.lineno;
248 this.start = token.start;
249 this.end = token.end;
252 this.lineno = t.lineno;
256 for (var i = 2; i < arguments.length; i++)
257 this.push(arguments[i]);
260 var Np = Node.prototype = new Array;
261 Np.constructor = Node;
262 Np.toSource = Object.prototype.toSource;
264 // Always use push to add operands to an expression, to update start and end.
265 Np.push = function (kid) {
266 if (kid.start < this.start)
267 this.start = kid.start;
268 if (this.end < kid.end)
270 return Array.prototype.push.call(this, kid);
273 Node.indentLevel = 0;
275 function tokenstr(tt) {
277 return /^\W/.test(t) ? opTypeNames[t] : t.toUpperCase();
280 Np.toString = function () {
282 for (var i in this) {
283 if (this.hasOwnProperty(i) && i != 'type' && i != 'target')
284 a.push({id: i, value: this[i]});
286 a.sort(function (a,b) { return (a.id < b.id) ? -1 : 1; });
287 const INDENTATION = " ";
288 var n = ++Node.indentLevel;
289 var s = "{\n" + INDENTATION.repeat(n) + "type: " + tokenstr(this.type);
290 for (i = 0; i < a.length; i++)
291 s += ",\n" + INDENTATION.repeat(n) + a[i].id + ": " + a[i].value;
292 n = --Node.indentLevel;
293 s += "\n" + INDENTATION.repeat(n) + "}";
297 Np.getSource = function () {
298 return this.tokenizer.source.slice(this.start, this.end);
301 Np.__defineGetter__('filename',
302 function () { return this.tokenizer.filename; });
304 String.prototype.__defineProperty__(
307 var s = "", t = this + s;
315 // Statement stack and nested statement handler.
316 function nest(t, x, node, func, end) {
317 x.stmtStack.push(node);
320 end && t.mustMatch(end);
324 function Statements(t, x) {
325 var n = new Node(t, BLOCK);
327 while (!t.done && t.peek() != RIGHT_CURLY)
328 n.push(Statement(t, x));
333 function Block(t, x) {
334 t.mustMatch(LEFT_CURLY);
335 var n = Statements(t, x);
336 t.mustMatch(RIGHT_CURLY);
340 const DECLARED_FORM = 0, EXPRESSED_FORM = 1, STATEMENT_FORM = 2;
342 function Statement(t, x) {
343 var i, label, n, n2, ss, tt = t.get();
345 // Cases for statements ending in a right curly return early, avoiding the
346 // common semicolon insertion magic after this switch.
349 return FunctionDefinition(t, x, true,
350 (x.stmtStack.length > 1)
355 n = Statements(t, x);
356 t.mustMatch(RIGHT_CURLY);
361 n.condition = ParenExpression(t, x);
363 n.thenPart = Statement(t, x);
364 n.elsePart = t.match(ELSE) ? Statement(t, x) : null;
370 t.mustMatch(LEFT_PAREN);
371 n.discriminant = Expression(t, x);
372 t.mustMatch(RIGHT_PAREN);
376 t.mustMatch(LEFT_CURLY);
377 while ((tt = t.get()) != RIGHT_CURLY) {
380 if (n.defaultIndex >= 0)
381 throw t.newSyntaxError("More than one switch default");
386 n.defaultIndex = n.cases.length;
388 n2.caseLabel = Expression(t, x, COLON);
391 throw t.newSyntaxError("Invalid switch case");
394 n2.statements = new Node(t, BLOCK);
395 while ((tt=t.peek()) != CASE && tt != DEFAULT && tt != RIGHT_CURLY)
396 n2.statements.push(Statement(t, x));
405 t.mustMatch(LEFT_PAREN);
406 if ((tt = t.peek()) != SEMICOLON) {
407 x.inForLoopInit = true;
408 if (tt == VAR || tt == CONST) {
410 n2 = Variables(t, x);
412 n2 = Expression(t, x);
414 x.inForLoopInit = false;
416 if (n2 && t.match(IN)) {
418 if (n2.type == VAR) {
419 if (n2.length != 1) {
420 throw new SyntaxError("Invalid for..in left-hand side",
421 t.filename, n2.lineno);
424 // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
431 n.object = Expression(t, x);
433 n.setup = n2 || null;
434 t.mustMatch(SEMICOLON);
435 n.condition = (t.peek() == SEMICOLON) ? null : Expression(t, x);
436 t.mustMatch(SEMICOLON);
437 n.update = (t.peek() == RIGHT_PAREN) ? null : Expression(t, x);
439 t.mustMatch(RIGHT_PAREN);
440 n.body = nest(t, x, n, Statement);
446 n.condition = ParenExpression(t, x);
447 n.body = nest(t, x, n, Statement);
453 n.body = nest(t, x, n, Statement, WHILE);
454 n.condition = ParenExpression(t, x);
455 if (!x.ecmaStrictMode) {
456 // <script language="JavaScript"> (without version hints) may need
457 // automatic semicolon insertion without a newline after do-while.
458 // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
467 if (t.peekOnSameLine() == IDENTIFIER) {
469 n.label = t.token.value;
477 throw t.newSyntaxError("Label not found");
478 } while (ss[i].label != label);
482 throw t.newSyntaxError("Invalid " + ((tt == BREAK)
486 } while (!ss[i].isLoop && (tt != BREAK || ss[i].type != SWITCH));
493 n.tryBlock = Block(t, x);
495 while (t.match(CATCH)) {
497 t.mustMatch(LEFT_PAREN);
498 n2.varName = t.mustMatch(IDENTIFIER).value;
500 if (x.ecmaStrictMode)
501 throw t.newSyntaxError("Illegal catch guard");
502 if (n.catchClauses.length && !n.catchClauses.top().guard)
503 throw t.newSyntaxError("Guarded catch after unguarded");
504 n2.guard = Expression(t, x);
508 t.mustMatch(RIGHT_PAREN);
509 n2.block = Block(t, x);
510 n.catchClauses.push(n2);
512 if (t.match(FINALLY))
513 n.finallyBlock = Block(t, x);
514 if (!n.catchClauses.length && !n.finallyBlock)
515 throw t.newSyntaxError("Invalid try statement");
520 throw t.newSyntaxError(tokens[tt] + " without preceding try");
524 n.exception = Expression(t, x);
529 throw t.newSyntaxError("Invalid return");
531 tt = t.peekOnSameLine();
532 if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
533 n.value = Expression(t, x);
538 n.object = ParenExpression(t, x);
539 n.body = nest(t, x, n, Statement);
553 n = new Node(t, SEMICOLON);
558 if (tt == IDENTIFIER) {
559 t.scanOperand = false;
561 t.scanOperand = true;
563 label = t.token.value;
565 for (i = ss.length-1; i >= 0; --i) {
566 if (ss[i].label == label)
567 throw t.newSyntaxError("Duplicate label");
570 n = new Node(t, LABEL);
572 n.statement = nest(t, x, n, Statement);
577 n = new Node(t, SEMICOLON);
579 n.expression = Expression(t, x);
580 n.end = n.expression.end;
584 if (t.lineno == t.token.lineno) {
585 tt = t.peekOnSameLine();
586 if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
587 throw t.newSyntaxError("Missing ; before statement");
593 function FunctionDefinition(t, x, requireName, functionForm) {
595 if (f.type != FUNCTION)
596 f.type = (f.value == "get") ? GETTER : SETTER;
597 if (t.match(IDENTIFIER))
598 f.name = t.token.value;
599 else if (requireName)
600 throw t.newSyntaxError("Missing function identifier");
602 t.mustMatch(LEFT_PAREN);
605 while ((tt = t.get()) != RIGHT_PAREN) {
606 if (tt != IDENTIFIER)
607 throw t.newSyntaxError("Missing formal parameter");
608 f.params.push(t.token.value);
609 if (t.peek() != RIGHT_PAREN)
613 t.mustMatch(LEFT_CURLY);
614 var x2 = new CompilerContext(true);
615 f.body = Script(t, x2);
616 t.mustMatch(RIGHT_CURLY);
619 f.functionForm = functionForm;
620 if (functionForm == DECLARED_FORM)
625 function Variables(t, x) {
628 t.mustMatch(IDENTIFIER);
629 var n2 = new Node(t);
631 if (t.match(ASSIGN)) {
632 if (t.token.assignOp)
633 throw t.newSyntaxError("Invalid variable initialization");
634 n2.initializer = Expression(t, x, COMMA);
636 n2.readOnly = (n.type == CONST);
639 } while (t.match(COMMA));
643 function ParenExpression(t, x) {
644 t.mustMatch(LEFT_PAREN);
645 var n = Expression(t, x);
646 t.mustMatch(RIGHT_PAREN);
653 ASSIGN: 2, HOOK: 2, COLON: 2,
654 // The above all have to have the same precedence, see bug 330975.
660 EQ: 9, NE: 9, STRICT_EQ: 9, STRICT_NE: 9,
661 LT: 10, LE: 10, GE: 10, GT: 10, IN: 10, INSTANCEOF: 10,
662 LSH: 11, RSH: 11, URSH: 11,
664 MUL: 13, DIV: 13, MOD: 13,
665 DELETE: 14, VOID: 14, TYPEOF: 14, // PRE_INCREMENT: 14, PRE_DECREMENT: 14,
666 NOT: 14, BITWISE_NOT: 14, UNARY_PLUS: 14, UNARY_MINUS: 14,
667 INCREMENT: 15, DECREMENT: 15, // postfix
672 // Map operator type code to precedence.
673 for (i in opPrecedence)
674 opPrecedence[GLOBAL[i]] = opPrecedence[i];
685 EQ: 2, NE: 2, STRICT_EQ: 2, STRICT_NE: 2,
686 LT: 2, LE: 2, GE: 2, GT: 2, IN: 2, INSTANCEOF: 2,
687 LSH: 2, RSH: 2, URSH: 2,
689 MUL: 2, DIV: 2, MOD: 2,
690 DELETE: 1, VOID: 1, TYPEOF: 1, // PRE_INCREMENT: 1, PRE_DECREMENT: 1,
691 NOT: 1, BITWISE_NOT: 1, UNARY_PLUS: 1, UNARY_MINUS: 1,
692 INCREMENT: 1, DECREMENT: 1, // postfix
693 NEW: 1, NEW_WITH_ARGS: 2, DOT: 2, INDEX: 2, CALL: 2,
694 ARRAY_INIT: 1, OBJECT_INIT: 1, GROUP: 1
697 // Map operator type code to arity.
699 opArity[GLOBAL[i]] = opArity[i];
701 function Expression(t, x, stop) {
702 var n, id, tt, operators = [], operands = [];
703 var bl = x.bracketLevel, cl = x.curlyLevel, pl = x.parenLevel,
707 var n = operators.pop();
709 var arity = opArity[op];
711 // Flatten left-associative trees.
712 var left = operands.length >= 2 && operands[operands.length-2];
713 if (left.type == op) {
714 var right = operands.pop();
721 // Always use push to add operands to n, to update start and end.
722 var a = operands.splice(operands.length - arity);
723 for (var i = 0; i < arity; i++)
726 // Include closing bracket or postfix operator in [start,end).
727 if (n.end < t.token.end)
735 while ((tt = t.get()) != END) {
737 x.bracketLevel == bl && x.curlyLevel == cl && x.parenLevel == pl &&
739 // Stop only if tt matches the optional stop parameter, and that
740 // token is not quoted by some kind of bracket.
745 // NB: cannot be empty, Statement handled that.
753 // Use >, not >=, for right-associative ASSIGN and HOOK/COLON.
754 while (opPrecedence[operators.top().type] > opPrecedence[tt] ||
755 (tt == COLON && operators.top().type == ASSIGN)) {
761 throw t.newSyntaxError("Invalid label");
764 operators.push(new Node(t));
766 operands.top().assignOp = t.token.assignOp;
768 ++x.hookLevel; // tt == HOOK
770 t.scanOperand = true;
774 // An in operator should not be parsed if we're parsing the head of
775 // a for (...) loop, unless it is in the then part of a conditional
776 // expression, or parenthesized somehow.
777 if (x.inForLoopInit && !x.hookLevel &&
778 !x.bracketLevel && !x.curlyLevel && !x.parenLevel) {
783 // Treat comma as left-associative so reduce can fold left-heavy
784 // COMMA trees into a single array.
791 case EQ: case NE: case STRICT_EQ: case STRICT_NE:
792 case LT: case LE: case GE: case GT:
794 case LSH: case RSH: case URSH:
795 case PLUS: case MINUS:
796 case MUL: case DIV: case MOD:
800 while (opPrecedence[operators.top().type] >= opPrecedence[tt])
803 t.mustMatch(IDENTIFIER);
804 operands.push(new Node(t, DOT, operands.pop(), new Node(t)));
806 operators.push(new Node(t));
807 t.scanOperand = true;
811 case DELETE: case VOID: case TYPEOF:
812 case NOT: case BITWISE_NOT: case UNARY_PLUS: case UNARY_MINUS:
816 operators.push(new Node(t));
819 case INCREMENT: case DECREMENT:
821 operators.push(new Node(t)); // prefix increment or decrement
823 // Don't cross a line boundary for postfix {in,de}crement.
824 if (t.tokens[(t.tokenIndex + t.lookahead - 1) & 3].lineno !=
829 // Use >, not >=, so postfix has higher precedence than prefix.
830 while (opPrecedence[operators.top().type] > opPrecedence[tt])
832 n = new Node(t, tt, operands.pop());
841 operands.push(FunctionDefinition(t, x, false, EXPRESSED_FORM));
842 t.scanOperand = false;
845 case NULL: case THIS: case TRUE: case FALSE:
846 case IDENTIFIER: case NUMBER: case STRING: case REGEXP:
849 operands.push(new Node(t));
850 t.scanOperand = false;
855 // Array initialiser. Parse using recursive descent, as the
856 // sub-grammar here is not an operator grammar.
857 n = new Node(t, ARRAY_INIT);
858 while ((tt = t.peek()) != RIGHT_BRACKET) {
864 n.push(Expression(t, x, COMMA));
868 t.mustMatch(RIGHT_BRACKET);
870 t.scanOperand = false;
872 // Property indexing operator.
873 operators.push(new Node(t, INDEX));
874 t.scanOperand = true;
880 if (t.scanOperand || x.bracketLevel == bl)
882 while (reduce().type != INDEX)
890 // Object initialiser. As for array initialisers (see above),
891 // parse using recursive descent.
893 n = new Node(t, OBJECT_INIT);
895 if (!t.match(RIGHT_CURLY)) {
898 if ((t.token.value == "get" || t.token.value == "set") &&
899 t.peek() == IDENTIFIER) {
900 if (x.ecmaStrictMode)
901 throw t.newSyntaxError("Illegal property accessor");
902 n.push(FunctionDefinition(t, x, true, EXPRESSED_FORM));
911 if (x.ecmaStrictMode)
912 throw t.newSyntaxError("Illegal trailing ,");
915 throw t.newSyntaxError("Invalid property name");
918 n.push(new Node(t, PROPERTY_INIT, id,
919 Expression(t, x, COMMA)));
921 } while (t.match(COMMA));
922 t.mustMatch(RIGHT_CURLY);
925 t.scanOperand = false;
930 if (!t.scanOperand && x.curlyLevel != cl)
931 throw "PANIC: right curly botch";
936 operators.push(new Node(t, GROUP));
938 while (opPrecedence[operators.top().type] > opPrecedence[NEW])
941 // Handle () now, to regularize the n-ary case for n > 0.
942 // We must set scanOperand in case there are arguments and
943 // the first one is a regexp or unary+/-.
945 t.scanOperand = true;
946 if (t.match(RIGHT_PAREN)) {
949 n.push(operands.pop());
951 n = new Node(t, CALL, operands.pop(),
955 t.scanOperand = false;
959 n.type = NEW_WITH_ARGS;
961 operators.push(new Node(t, CALL));
967 if (t.scanOperand || x.parenLevel == pl)
969 while ((tt = reduce().type) != GROUP && tt != CALL &&
970 tt != NEW_WITH_ARGS) {
975 if (n[1].type != COMMA)
976 n[1] = new Node(t, LIST, n[1]);
983 // Automatic semicolon insertion means we may scan across a newline
984 // and into the beginning of another statement. If so, break out of
985 // the while loop and let the t.scanOperand logic handle errors.
991 if (x.hookLevel != hl)
992 throw t.newSyntaxError("Missing : after ?");
993 if (x.parenLevel != pl)
994 throw t.newSyntaxError("Missing ) in parenthetical");
995 if (x.bracketLevel != bl)
996 throw t.newSyntaxError("Missing ] in index expression");
998 throw t.newSyntaxError("Missing operand");
1000 // Resume default mode, scanning for operands, not operators.
1001 t.scanOperand = true;
1003 while (operators.length)
1005 return operands.pop();
1008 function parse(s, f, l) {
1009 var t = new Tokenizer(s, f, l);
1010 var x = new CompilerContext(false);
1011 var n = Script(t, x);
1013 throw t.newSyntaxError("Syntax error");