Non-word characters don't terminate tag names.
[mediawiki.git] / includes / libs / jsminplus.php
blobf250217f008a87725faa6e921aba6572029bfd9a
1 <?php
2 /**
3 * JSMinPlus version 1.4
5 * Minifies a javascript file using a javascript parser
7 * This implements a PHP port of Brendan Eich's Narcissus open source javascript engine (in javascript)
8 * References: http://en.wikipedia.org/wiki/Narcissus_(JavaScript_engine)
9 * Narcissus sourcecode: http://mxr.mozilla.org/mozilla/source/js/narcissus/
10 * JSMinPlus weblog: http://crisp.tweakblogs.net/blog/cat/716
12 * Tino Zijdel <crisp@tweakers.net>
14 * Usage: $minified = JSMinPlus::minify($script [, $filename])
16 * Versionlog (see also changelog.txt):
17 * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
18 * reduce memory footprint by minifying by block-scope
19 * some small byte-saving and performance improvements
20 * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
21 * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
22 * 12-04-2009 - some small bugfixes and performance improvements
23 * 09-04-2009 - initial open sourced version 1.0
25 * Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
27 * @file
30 /* ***** BEGIN LICENSE BLOCK *****
31 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
33 * The contents of this file are subject to the Mozilla Public License Version
34 * 1.1 (the "License"); you may not use this file except in compliance with
35 * the License. You may obtain a copy of the License at
36 * http://www.mozilla.org/MPL/
38 * Software distributed under the License is distributed on an "AS IS" basis,
39 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
40 * for the specific language governing rights and limitations under the
41 * License.
43 * The Original Code is the Narcissus JavaScript engine.
45 * The Initial Developer of the Original Code is
46 * Brendan Eich <brendan@mozilla.org>.
47 * Portions created by the Initial Developer are Copyright (C) 2004
48 * the Initial Developer. All Rights Reserved.
50 * Contributor(s): Tino Zijdel <crisp@tweakers.net>
51 * PHP port, modifications and minifier routine are (C) 2009-2011
53 * Alternatively, the contents of this file may be used under the terms of
54 * either the GNU General Public License Version 2 or later (the "GPL"), or
55 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
56 * in which case the provisions of the GPL or the LGPL are applicable instead
57 * of those above. If you wish to allow use of your version of this file only
58 * under the terms of either the GPL or the LGPL, and not to allow others to
59 * use your version of this file under the terms of the MPL, indicate your
60 * decision by deleting the provisions above and replace them with the notice
61 * and other provisions required by the GPL or the LGPL. If you do not delete
62 * the provisions above, a recipient may use your version of this file under
63 * the terms of any one of the MPL, the GPL or the LGPL.
65 * ***** END LICENSE BLOCK ***** */
67 define('TOKEN_END', 1);
68 define('TOKEN_NUMBER', 2);
69 define('TOKEN_IDENTIFIER', 3);
70 define('TOKEN_STRING', 4);
71 define('TOKEN_REGEXP', 5);
72 define('TOKEN_NEWLINE', 6);
73 define('TOKEN_CONDCOMMENT_START', 7);
74 define('TOKEN_CONDCOMMENT_END', 8);
76 define('JS_SCRIPT', 100);
77 define('JS_BLOCK', 101);
78 define('JS_LABEL', 102);
79 define('JS_FOR_IN', 103);
80 define('JS_CALL', 104);
81 define('JS_NEW_WITH_ARGS', 105);
82 define('JS_INDEX', 106);
83 define('JS_ARRAY_INIT', 107);
84 define('JS_OBJECT_INIT', 108);
85 define('JS_PROPERTY_INIT', 109);
86 define('JS_GETTER', 110);
87 define('JS_SETTER', 111);
88 define('JS_GROUP', 112);
89 define('JS_LIST', 113);
91 define('JS_MINIFIED', 999);
93 define('DECLARED_FORM', 0);
94 define('EXPRESSED_FORM', 1);
95 define('STATEMENT_FORM', 2);
97 /* Operators */
98 define('OP_SEMICOLON', ';');
99 define('OP_COMMA', ',');
100 define('OP_HOOK', '?');
101 define('OP_COLON', ':');
102 define('OP_OR', '||');
103 define('OP_AND', '&&');
104 define('OP_BITWISE_OR', '|');
105 define('OP_BITWISE_XOR', '^');
106 define('OP_BITWISE_AND', '&');
107 define('OP_STRICT_EQ', '===');
108 define('OP_EQ', '==');
109 define('OP_ASSIGN', '=');
110 define('OP_STRICT_NE', '!==');
111 define('OP_NE', '!=');
112 define('OP_LSH', '<<');
113 define('OP_LE', '<=');
114 define('OP_LT', '<');
115 define('OP_URSH', '>>>');
116 define('OP_RSH', '>>');
117 define('OP_GE', '>=');
118 define('OP_GT', '>');
119 define('OP_INCREMENT', '++');
120 define('OP_DECREMENT', '--');
121 define('OP_PLUS', '+');
122 define('OP_MINUS', '-');
123 define('OP_MUL', '*');
124 define('OP_DIV', '/');
125 define('OP_MOD', '%');
126 define('OP_NOT', '!');
127 define('OP_BITWISE_NOT', '~');
128 define('OP_DOT', '.');
129 define('OP_LEFT_BRACKET', '[');
130 define('OP_RIGHT_BRACKET', ']');
131 define('OP_LEFT_CURLY', '{');
132 define('OP_RIGHT_CURLY', '}');
133 define('OP_LEFT_PAREN', '(');
134 define('OP_RIGHT_PAREN', ')');
135 define('OP_CONDCOMMENT_END', '@*/');
137 define('OP_UNARY_PLUS', 'U+');
138 define('OP_UNARY_MINUS', 'U-');
140 /* Keywords */
141 define('KEYWORD_BREAK', 'break');
142 define('KEYWORD_CASE', 'case');
143 define('KEYWORD_CATCH', 'catch');
144 define('KEYWORD_CONST', 'const');
145 define('KEYWORD_CONTINUE', 'continue');
146 define('KEYWORD_DEBUGGER', 'debugger');
147 define('KEYWORD_DEFAULT', 'default');
148 define('KEYWORD_DELETE', 'delete');
149 define('KEYWORD_DO', 'do');
150 define('KEYWORD_ELSE', 'else');
151 define('KEYWORD_ENUM', 'enum');
152 define('KEYWORD_FALSE', 'false');
153 define('KEYWORD_FINALLY', 'finally');
154 define('KEYWORD_FOR', 'for');
155 define('KEYWORD_FUNCTION', 'function');
156 define('KEYWORD_IF', 'if');
157 define('KEYWORD_IN', 'in');
158 define('KEYWORD_INSTANCEOF', 'instanceof');
159 define('KEYWORD_NEW', 'new');
160 define('KEYWORD_NULL', 'null');
161 define('KEYWORD_RETURN', 'return');
162 define('KEYWORD_SWITCH', 'switch');
163 define('KEYWORD_THIS', 'this');
164 define('KEYWORD_THROW', 'throw');
165 define('KEYWORD_TRUE', 'true');
166 define('KEYWORD_TRY', 'try');
167 define('KEYWORD_TYPEOF', 'typeof');
168 define('KEYWORD_VAR', 'var');
169 define('KEYWORD_VOID', 'void');
170 define('KEYWORD_WHILE', 'while');
171 define('KEYWORD_WITH', 'with');
174 class JSMinPlus
176 private $parser;
177 private $reserved = array(
178 'break', 'case', 'catch', 'continue', 'default', 'delete', 'do',
179 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof',
180 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var',
181 'void', 'while', 'with',
182 // Words reserved for future use
183 'abstract', 'boolean', 'byte', 'char', 'class', 'const', 'debugger',
184 'double', 'enum', 'export', 'extends', 'final', 'float', 'goto',
185 'implements', 'import', 'int', 'interface', 'long', 'native',
186 'package', 'private', 'protected', 'public', 'short', 'static',
187 'super', 'synchronized', 'throws', 'transient', 'volatile',
188 // These are not reserved, but should be taken into account
189 // in isValidIdentifier (See jslint source code)
190 'arguments', 'eval', 'true', 'false', 'Infinity', 'NaN', 'null', 'undefined'
193 private function __construct()
195 $this->parser = new JSParser($this);
198 public static function minify($js, $filename='')
200 static $instance;
202 // this is a singleton
203 if(!$instance)
204 $instance = new JSMinPlus();
206 return $instance->min($js, $filename);
209 private function min($js, $filename)
213 $n = $this->parser->parse($js, $filename, 1);
214 return $this->parseTree($n);
216 catch(Exception $e)
218 echo $e->getMessage() . "\n";
221 return false;
224 public function parseTree($n, $noBlockGrouping = false)
226 $s = '';
228 switch ($n->type)
230 case JS_MINIFIED:
231 $s = $n->value;
232 break;
234 case JS_SCRIPT:
235 // we do nothing yet with funDecls or varDecls
236 $noBlockGrouping = true;
237 // FALL THROUGH
239 case JS_BLOCK:
240 $childs = $n->treeNodes;
241 $lastType = 0;
242 for ($c = 0, $i = 0, $j = count($childs); $i < $j; $i++)
244 $type = $childs[$i]->type;
245 $t = $this->parseTree($childs[$i]);
246 if (strlen($t))
248 if ($c)
250 $s = rtrim($s, ';');
252 if ($type == KEYWORD_FUNCTION && $childs[$i]->functionForm == DECLARED_FORM)
254 // put declared functions on a new line
255 $s .= "\n";
257 elseif ($type == KEYWORD_VAR && $type == $lastType)
259 // multiple var-statements can go into one
260 $t = ',' . substr($t, 4);
262 else
264 // add terminator
265 $s .= ';';
269 $s .= $t;
271 $c++;
272 $lastType = $type;
276 if ($c > 1 && !$noBlockGrouping)
278 $s = '{' . $s . '}';
280 break;
282 case KEYWORD_FUNCTION:
283 $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
284 $params = $n->params;
285 for ($i = 0, $j = count($params); $i < $j; $i++)
286 $s .= ($i ? ',' : '') . $params[$i];
287 $s .= '){' . $this->parseTree($n->body, true) . '}';
288 break;
290 case KEYWORD_IF:
291 $s = 'if(' . $this->parseTree($n->condition) . ')';
292 $thenPart = $this->parseTree($n->thenPart);
293 $elsePart = $n->elsePart ? $this->parseTree($n->elsePart) : null;
295 // empty if-statement
296 if ($thenPart == '')
297 $thenPart = ';';
299 if ($elsePart)
301 // be careful and always make a block out of the thenPart; could be more optimized but is a lot of trouble
302 if ($thenPart != ';' && $thenPart[0] != '{')
303 $thenPart = '{' . $thenPart . '}';
305 $s .= $thenPart . 'else';
307 // we could check for more, but that hardly ever applies so go for performance
308 if ($elsePart[0] != '{')
309 $s .= ' ';
311 $s .= $elsePart;
313 else
315 $s .= $thenPart;
317 break;
319 case KEYWORD_SWITCH:
320 $s = 'switch(' . $this->parseTree($n->discriminant) . '){';
321 $cases = $n->cases;
322 for ($i = 0, $j = count($cases); $i < $j; $i++)
324 $case = $cases[$i];
325 if ($case->type == KEYWORD_CASE)
326 $s .= 'case' . ($case->caseLabel->type != TOKEN_STRING ? ' ' : '') . $this->parseTree($case->caseLabel) . ':';
327 else
328 $s .= 'default:';
330 $statement = $this->parseTree($case->statements, true);
331 if ($statement)
333 $s .= $statement;
334 // no terminator for last statement
335 if ($i + 1 < $j)
336 $s .= ';';
339 $s .= '}';
340 break;
342 case KEYWORD_FOR:
343 $s = 'for(' . ($n->setup ? $this->parseTree($n->setup) : '')
344 . ';' . ($n->condition ? $this->parseTree($n->condition) : '')
345 . ';' . ($n->update ? $this->parseTree($n->update) : '') . ')';
347 $body = $this->parseTree($n->body);
348 if ($body == '')
349 $body = ';';
351 $s .= $body;
352 break;
354 case KEYWORD_WHILE:
355 $s = 'while(' . $this->parseTree($n->condition) . ')';
357 $body = $this->parseTree($n->body);
358 if ($body == '')
359 $body = ';';
361 $s .= $body;
362 break;
364 case JS_FOR_IN:
365 $s = 'for(' . ($n->varDecl ? $this->parseTree($n->varDecl) : $this->parseTree($n->iterator)) . ' in ' . $this->parseTree($n->object) . ')';
367 $body = $this->parseTree($n->body);
368 if ($body == '')
369 $body = ';';
371 $s .= $body;
372 break;
374 case KEYWORD_DO:
375 $s = 'do{' . $this->parseTree($n->body, true) . '}while(' . $this->parseTree($n->condition) . ')';
376 break;
378 case KEYWORD_BREAK:
379 case KEYWORD_CONTINUE:
380 $s = $n->value . ($n->label ? ' ' . $n->label : '');
381 break;
383 case KEYWORD_TRY:
384 $s = 'try{' . $this->parseTree($n->tryBlock, true) . '}';
385 $catchClauses = $n->catchClauses;
386 for ($i = 0, $j = count($catchClauses); $i < $j; $i++)
388 $t = $catchClauses[$i];
389 $s .= 'catch(' . $t->varName . ($t->guard ? ' if ' . $this->parseTree($t->guard) : '') . '){' . $this->parseTree($t->block, true) . '}';
391 if ($n->finallyBlock)
392 $s .= 'finally{' . $this->parseTree($n->finallyBlock, true) . '}';
393 break;
395 case KEYWORD_THROW:
396 case KEYWORD_RETURN:
397 $s = $n->type;
398 if ($n->value)
400 $t = $this->parseTree($n->value);
401 if (strlen($t))
403 if ($this->isWordChar($t[0]) || $t[0] == '\\')
404 $s .= ' ';
406 $s .= $t;
409 break;
411 case KEYWORD_WITH:
412 $s = 'with(' . $this->parseTree($n->object) . ')' . $this->parseTree($n->body);
413 break;
415 case KEYWORD_VAR:
416 case KEYWORD_CONST:
417 $s = $n->value . ' ';
418 $childs = $n->treeNodes;
419 for ($i = 0, $j = count($childs); $i < $j; $i++)
421 $t = $childs[$i];
422 $s .= ($i ? ',' : '') . $t->name;
423 $u = $t->initializer;
424 if ($u)
425 $s .= '=' . $this->parseTree($u);
427 break;
429 case KEYWORD_IN:
430 case KEYWORD_INSTANCEOF:
431 $left = $this->parseTree($n->treeNodes[0]);
432 $right = $this->parseTree($n->treeNodes[1]);
434 $s = $left;
436 if ($this->isWordChar(substr($left, -1)))
437 $s .= ' ';
439 $s .= $n->type;
441 if ($this->isWordChar($right[0]) || $right[0] == '\\')
442 $s .= ' ';
444 $s .= $right;
445 break;
447 case KEYWORD_DELETE:
448 case KEYWORD_TYPEOF:
449 $right = $this->parseTree($n->treeNodes[0]);
451 $s = $n->type;
453 if ($this->isWordChar($right[0]) || $right[0] == '\\')
454 $s .= ' ';
456 $s .= $right;
457 break;
459 case KEYWORD_VOID:
460 $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
461 break;
463 case KEYWORD_DEBUGGER:
464 throw new Exception('NOT IMPLEMENTED: DEBUGGER');
465 break;
467 case TOKEN_CONDCOMMENT_START:
468 case TOKEN_CONDCOMMENT_END:
469 $s = $n->value . ($n->type == TOKEN_CONDCOMMENT_START ? ' ' : '');
470 $childs = $n->treeNodes;
471 for ($i = 0, $j = count($childs); $i < $j; $i++)
472 $s .= $this->parseTree($childs[$i]);
473 break;
475 case OP_SEMICOLON:
476 if ($expression = $n->expression)
477 $s = $this->parseTree($expression);
478 break;
480 case JS_LABEL:
481 $s = $n->label . ':' . $this->parseTree($n->statement);
482 break;
484 case OP_COMMA:
485 $childs = $n->treeNodes;
486 for ($i = 0, $j = count($childs); $i < $j; $i++)
487 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
488 break;
490 case OP_ASSIGN:
491 $s = $this->parseTree($n->treeNodes[0]) . $n->value . $this->parseTree($n->treeNodes[1]);
492 break;
494 case OP_HOOK:
495 $s = $this->parseTree($n->treeNodes[0]) . '?' . $this->parseTree($n->treeNodes[1]) . ':' . $this->parseTree($n->treeNodes[2]);
496 break;
498 case OP_OR: case OP_AND:
499 case OP_BITWISE_OR: case OP_BITWISE_XOR: case OP_BITWISE_AND:
500 case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
501 case OP_LT: case OP_LE: case OP_GE: case OP_GT:
502 case OP_LSH: case OP_RSH: case OP_URSH:
503 case OP_MUL: case OP_DIV: case OP_MOD:
504 $s = $this->parseTree($n->treeNodes[0]) . $n->type . $this->parseTree($n->treeNodes[1]);
505 break;
507 case OP_PLUS:
508 case OP_MINUS:
509 $left = $this->parseTree($n->treeNodes[0]);
510 $right = $this->parseTree($n->treeNodes[1]);
512 switch ($n->treeNodes[1]->type)
514 case OP_PLUS:
515 case OP_MINUS:
516 case OP_INCREMENT:
517 case OP_DECREMENT:
518 case OP_UNARY_PLUS:
519 case OP_UNARY_MINUS:
520 $s = $left . $n->type . ' ' . $right;
521 break;
523 case TOKEN_STRING:
524 //combine concatenated strings with same quote style
525 if ($n->type == OP_PLUS && substr($left, -1) == $right[0])
527 $s = substr($left, 0, -1) . substr($right, 1);
528 break;
530 // FALL THROUGH
532 default:
533 $s = $left . $n->type . $right;
535 break;
537 case OP_NOT:
538 case OP_BITWISE_NOT:
539 case OP_UNARY_PLUS:
540 case OP_UNARY_MINUS:
541 $s = $n->value . $this->parseTree($n->treeNodes[0]);
542 break;
544 case OP_INCREMENT:
545 case OP_DECREMENT:
546 if ($n->postfix)
547 $s = $this->parseTree($n->treeNodes[0]) . $n->value;
548 else
549 $s = $n->value . $this->parseTree($n->treeNodes[0]);
550 break;
552 case OP_DOT:
553 $s = $this->parseTree($n->treeNodes[0]) . '.' . $this->parseTree($n->treeNodes[1]);
554 break;
556 case JS_INDEX:
557 $s = $this->parseTree($n->treeNodes[0]);
558 // See if we can replace named index with a dot saving 3 bytes
559 if ( $n->treeNodes[0]->type == TOKEN_IDENTIFIER &&
560 $n->treeNodes[1]->type == TOKEN_STRING &&
561 $this->isValidIdentifier(substr($n->treeNodes[1]->value, 1, -1))
563 $s .= '.' . substr($n->treeNodes[1]->value, 1, -1);
564 else
565 $s .= '[' . $this->parseTree($n->treeNodes[1]) . ']';
566 break;
568 case JS_LIST:
569 $childs = $n->treeNodes;
570 for ($i = 0, $j = count($childs); $i < $j; $i++)
571 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
572 break;
574 case JS_CALL:
575 $s = $this->parseTree($n->treeNodes[0]) . '(' . $this->parseTree($n->treeNodes[1]) . ')';
576 break;
578 case KEYWORD_NEW:
579 case JS_NEW_WITH_ARGS:
580 $s = 'new ' . $this->parseTree($n->treeNodes[0]) . '(' . ($n->type == JS_NEW_WITH_ARGS ? $this->parseTree($n->treeNodes[1]) : '') . ')';
581 break;
583 case JS_ARRAY_INIT:
584 $s = '[';
585 $childs = $n->treeNodes;
586 for ($i = 0, $j = count($childs); $i < $j; $i++)
588 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
590 $s .= ']';
591 break;
593 case JS_OBJECT_INIT:
594 $s = '{';
595 $childs = $n->treeNodes;
596 for ($i = 0, $j = count($childs); $i < $j; $i++)
598 $t = $childs[$i];
599 if ($i)
600 $s .= ',';
601 if ($t->type == JS_PROPERTY_INIT)
603 // Ditch the quotes when the index is a valid identifier
604 if ( $t->treeNodes[0]->type == TOKEN_STRING &&
605 $this->isValidIdentifier(substr($t->treeNodes[0]->value, 1, -1))
607 $s .= substr($t->treeNodes[0]->value, 1, -1);
608 else
609 $s .= $t->treeNodes[0]->value;
611 $s .= ':' . $this->parseTree($t->treeNodes[1]);
613 else
615 $s .= $t->type == JS_GETTER ? 'get' : 'set';
616 $s .= ' ' . $t->name . '(';
617 $params = $t->params;
618 for ($i = 0, $j = count($params); $i < $j; $i++)
619 $s .= ($i ? ',' : '') . $params[$i];
620 $s .= '){' . $this->parseTree($t->body, true) . '}';
623 $s .= '}';
624 break;
626 case TOKEN_NUMBER:
627 $s = $n->value;
628 if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
629 $s = $m[1] . 'e' . strlen($m[2]);
630 break;
632 case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
633 case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
634 $s = $n->value;
635 break;
637 case JS_GROUP:
638 if (in_array(
639 $n->treeNodes[0]->type,
640 array(
641 JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
642 TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
643 KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
647 $s = $this->parseTree($n->treeNodes[0]);
649 else
651 $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
653 break;
655 default:
656 throw new Exception('UNKNOWN TOKEN TYPE: ' . $n->type);
659 return $s;
662 private function isValidIdentifier($string)
664 return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
667 private function isWordChar($char)
669 return $char == '_' || $char == '$' || ctype_alnum($char);
673 class JSParser
675 private $t;
676 private $minifier;
678 private $opPrecedence = array(
679 ';' => 0,
680 ',' => 1,
681 '=' => 2, '?' => 2, ':' => 2,
682 // The above all have to have the same precedence, see bug 330975
683 '||' => 4,
684 '&&' => 5,
685 '|' => 6,
686 '^' => 7,
687 '&' => 8,
688 '==' => 9, '!=' => 9, '===' => 9, '!==' => 9,
689 '<' => 10, '<=' => 10, '>=' => 10, '>' => 10, 'in' => 10, 'instanceof' => 10,
690 '<<' => 11, '>>' => 11, '>>>' => 11,
691 '+' => 12, '-' => 12,
692 '*' => 13, '/' => 13, '%' => 13,
693 'delete' => 14, 'void' => 14, 'typeof' => 14,
694 '!' => 14, '~' => 14, 'U+' => 14, 'U-' => 14,
695 '++' => 15, '--' => 15,
696 'new' => 16,
697 '.' => 17,
698 JS_NEW_WITH_ARGS => 0, JS_INDEX => 0, JS_CALL => 0,
699 JS_ARRAY_INIT => 0, JS_OBJECT_INIT => 0, JS_GROUP => 0
702 private $opArity = array(
703 ',' => -2,
704 '=' => 2,
705 '?' => 3,
706 '||' => 2,
707 '&&' => 2,
708 '|' => 2,
709 '^' => 2,
710 '&' => 2,
711 '==' => 2, '!=' => 2, '===' => 2, '!==' => 2,
712 '<' => 2, '<=' => 2, '>=' => 2, '>' => 2, 'in' => 2, 'instanceof' => 2,
713 '<<' => 2, '>>' => 2, '>>>' => 2,
714 '+' => 2, '-' => 2,
715 '*' => 2, '/' => 2, '%' => 2,
716 'delete' => 1, 'void' => 1, 'typeof' => 1,
717 '!' => 1, '~' => 1, 'U+' => 1, 'U-' => 1,
718 '++' => 1, '--' => 1,
719 'new' => 1,
720 '.' => 2,
721 JS_NEW_WITH_ARGS => 2, JS_INDEX => 2, JS_CALL => 2,
722 JS_ARRAY_INIT => 1, JS_OBJECT_INIT => 1, JS_GROUP => 1,
723 TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
726 public function __construct($minifier=null)
728 $this->minifier = $minifier;
729 $this->t = new JSTokenizer();
732 public function parse($s, $f, $l)
734 // initialize tokenizer
735 $this->t->init($s, $f, $l);
737 $x = new JSCompilerContext(false);
738 $n = $this->Script($x);
739 if (!$this->t->isDone())
740 throw $this->t->newSyntaxError('Syntax error');
742 return $n;
745 private function Script($x)
747 $n = $this->Statements($x);
748 $n->type = JS_SCRIPT;
749 $n->funDecls = $x->funDecls;
750 $n->varDecls = $x->varDecls;
752 // minify by scope
753 if ($this->minifier)
755 $n->value = $this->minifier->parseTree($n);
757 // clear tree from node to save memory
758 $n->treeNodes = null;
759 $n->funDecls = null;
760 $n->varDecls = null;
762 $n->type = JS_MINIFIED;
765 return $n;
768 private function Statements($x)
770 $n = new JSNode($this->t, JS_BLOCK);
771 array_push($x->stmtStack, $n);
773 while (!$this->t->isDone() && $this->t->peek() != OP_RIGHT_CURLY)
774 $n->addNode($this->Statement($x));
776 array_pop($x->stmtStack);
778 return $n;
781 private function Block($x)
783 $this->t->mustMatch(OP_LEFT_CURLY);
784 $n = $this->Statements($x);
785 $this->t->mustMatch(OP_RIGHT_CURLY);
787 return $n;
790 private function Statement($x)
792 $tt = $this->t->get();
793 $n2 = null;
795 // Cases for statements ending in a right curly return early, avoiding the
796 // common semicolon insertion magic after this switch.
797 switch ($tt)
799 case KEYWORD_FUNCTION:
800 return $this->FunctionDefinition(
802 true,
803 count($x->stmtStack) > 1 ? STATEMENT_FORM : DECLARED_FORM
805 break;
807 case OP_LEFT_CURLY:
808 $n = $this->Statements($x);
809 $this->t->mustMatch(OP_RIGHT_CURLY);
810 return $n;
812 case KEYWORD_IF:
813 $n = new JSNode($this->t);
814 $n->condition = $this->ParenExpression($x);
815 array_push($x->stmtStack, $n);
816 $n->thenPart = $this->Statement($x);
817 $n->elsePart = $this->t->match(KEYWORD_ELSE) ? $this->Statement($x) : null;
818 array_pop($x->stmtStack);
819 return $n;
821 case KEYWORD_SWITCH:
822 $n = new JSNode($this->t);
823 $this->t->mustMatch(OP_LEFT_PAREN);
824 $n->discriminant = $this->Expression($x);
825 $this->t->mustMatch(OP_RIGHT_PAREN);
826 $n->cases = array();
827 $n->defaultIndex = -1;
829 array_push($x->stmtStack, $n);
831 $this->t->mustMatch(OP_LEFT_CURLY);
833 while (($tt = $this->t->get()) != OP_RIGHT_CURLY)
835 switch ($tt)
837 case KEYWORD_DEFAULT:
838 if ($n->defaultIndex >= 0)
839 throw $this->t->newSyntaxError('More than one switch default');
840 // FALL THROUGH
841 case KEYWORD_CASE:
842 $n2 = new JSNode($this->t);
843 if ($tt == KEYWORD_DEFAULT)
844 $n->defaultIndex = count($n->cases);
845 else
846 $n2->caseLabel = $this->Expression($x, OP_COLON);
847 break;
848 default:
849 throw $this->t->newSyntaxError('Invalid switch case');
852 $this->t->mustMatch(OP_COLON);
853 $n2->statements = new JSNode($this->t, JS_BLOCK);
854 while (($tt = $this->t->peek()) != KEYWORD_CASE && $tt != KEYWORD_DEFAULT && $tt != OP_RIGHT_CURLY)
855 $n2->statements->addNode($this->Statement($x));
857 array_push($n->cases, $n2);
860 array_pop($x->stmtStack);
861 return $n;
863 case KEYWORD_FOR:
864 $n = new JSNode($this->t);
865 $n->isLoop = true;
866 $this->t->mustMatch(OP_LEFT_PAREN);
868 if (($tt = $this->t->peek()) != OP_SEMICOLON)
870 $x->inForLoopInit = true;
871 if ($tt == KEYWORD_VAR || $tt == KEYWORD_CONST)
873 $this->t->get();
874 $n2 = $this->Variables($x);
876 else
878 $n2 = $this->Expression($x);
880 $x->inForLoopInit = false;
883 if ($n2 && $this->t->match(KEYWORD_IN))
885 $n->type = JS_FOR_IN;
886 if ($n2->type == KEYWORD_VAR)
888 if (count($n2->treeNodes) != 1)
890 throw $this->t->SyntaxError(
891 'Invalid for..in left-hand side',
892 $this->t->filename,
893 $n2->lineno
897 // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
898 $n->iterator = $n2->treeNodes[0];
899 $n->varDecl = $n2;
901 else
903 $n->iterator = $n2;
904 $n->varDecl = null;
907 $n->object = $this->Expression($x);
909 else
911 $n->setup = $n2 ? $n2 : null;
912 $this->t->mustMatch(OP_SEMICOLON);
913 $n->condition = $this->t->peek() == OP_SEMICOLON ? null : $this->Expression($x);
914 $this->t->mustMatch(OP_SEMICOLON);
915 $n->update = $this->t->peek() == OP_RIGHT_PAREN ? null : $this->Expression($x);
918 $this->t->mustMatch(OP_RIGHT_PAREN);
919 $n->body = $this->nest($x, $n);
920 return $n;
922 case KEYWORD_WHILE:
923 $n = new JSNode($this->t);
924 $n->isLoop = true;
925 $n->condition = $this->ParenExpression($x);
926 $n->body = $this->nest($x, $n);
927 return $n;
929 case KEYWORD_DO:
930 $n = new JSNode($this->t);
931 $n->isLoop = true;
932 $n->body = $this->nest($x, $n, KEYWORD_WHILE);
933 $n->condition = $this->ParenExpression($x);
934 if (!$x->ecmaStrictMode)
936 // <script language="JavaScript"> (without version hints) may need
937 // automatic semicolon insertion without a newline after do-while.
938 // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
939 $this->t->match(OP_SEMICOLON);
940 return $n;
942 break;
944 case KEYWORD_BREAK:
945 case KEYWORD_CONTINUE:
946 $n = new JSNode($this->t);
948 if ($this->t->peekOnSameLine() == TOKEN_IDENTIFIER)
950 $this->t->get();
951 $n->label = $this->t->currentToken()->value;
954 $ss = $x->stmtStack;
955 $i = count($ss);
956 $label = $n->label;
957 if ($label)
961 if (--$i < 0)
962 throw $this->t->newSyntaxError('Label not found');
964 while ($ss[$i]->label != $label);
966 else
970 if (--$i < 0)
971 throw $this->t->newSyntaxError('Invalid ' . $tt);
973 while (!$ss[$i]->isLoop && ($tt != KEYWORD_BREAK || $ss[$i]->type != KEYWORD_SWITCH));
976 $n->target = $ss[$i];
977 break;
979 case KEYWORD_TRY:
980 $n = new JSNode($this->t);
981 $n->tryBlock = $this->Block($x);
982 $n->catchClauses = array();
984 while ($this->t->match(KEYWORD_CATCH))
986 $n2 = new JSNode($this->t);
987 $this->t->mustMatch(OP_LEFT_PAREN);
988 $n2->varName = $this->t->mustMatch(TOKEN_IDENTIFIER)->value;
990 if ($this->t->match(KEYWORD_IF))
992 if ($x->ecmaStrictMode)
993 throw $this->t->newSyntaxError('Illegal catch guard');
995 if (count($n->catchClauses) && !end($n->catchClauses)->guard)
996 throw $this->t->newSyntaxError('Guarded catch after unguarded');
998 $n2->guard = $this->Expression($x);
1000 else
1002 $n2->guard = null;
1005 $this->t->mustMatch(OP_RIGHT_PAREN);
1006 $n2->block = $this->Block($x);
1007 array_push($n->catchClauses, $n2);
1010 if ($this->t->match(KEYWORD_FINALLY))
1011 $n->finallyBlock = $this->Block($x);
1013 if (!count($n->catchClauses) && !$n->finallyBlock)
1014 throw $this->t->newSyntaxError('Invalid try statement');
1015 return $n;
1017 case KEYWORD_CATCH:
1018 case KEYWORD_FINALLY:
1019 throw $this->t->newSyntaxError($tt + ' without preceding try');
1021 case KEYWORD_THROW:
1022 $n = new JSNode($this->t);
1023 $n->value = $this->Expression($x);
1024 break;
1026 case KEYWORD_RETURN:
1027 if (!$x->inFunction)
1028 throw $this->t->newSyntaxError('Invalid return');
1030 $n = new JSNode($this->t);
1031 $tt = $this->t->peekOnSameLine();
1032 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
1033 $n->value = $this->Expression($x);
1034 else
1035 $n->value = null;
1036 break;
1038 case KEYWORD_WITH:
1039 $n = new JSNode($this->t);
1040 $n->object = $this->ParenExpression($x);
1041 $n->body = $this->nest($x, $n);
1042 return $n;
1044 case KEYWORD_VAR:
1045 case KEYWORD_CONST:
1046 $n = $this->Variables($x);
1047 break;
1049 case TOKEN_CONDCOMMENT_START:
1050 case TOKEN_CONDCOMMENT_END:
1051 $n = new JSNode($this->t);
1052 return $n;
1054 case KEYWORD_DEBUGGER:
1055 $n = new JSNode($this->t);
1056 break;
1058 case TOKEN_NEWLINE:
1059 case OP_SEMICOLON:
1060 $n = new JSNode($this->t, OP_SEMICOLON);
1061 $n->expression = null;
1062 return $n;
1064 default:
1065 if ($tt == TOKEN_IDENTIFIER)
1067 $this->t->scanOperand = false;
1068 $tt = $this->t->peek();
1069 $this->t->scanOperand = true;
1070 if ($tt == OP_COLON)
1072 $label = $this->t->currentToken()->value;
1073 $ss = $x->stmtStack;
1074 for ($i = count($ss) - 1; $i >= 0; --$i)
1076 if ($ss[$i]->label == $label)
1077 throw $this->t->newSyntaxError('Duplicate label');
1080 $this->t->get();
1081 $n = new JSNode($this->t, JS_LABEL);
1082 $n->label = $label;
1083 $n->statement = $this->nest($x, $n);
1085 return $n;
1089 $n = new JSNode($this->t, OP_SEMICOLON);
1090 $this->t->unget();
1091 $n->expression = $this->Expression($x);
1092 $n->end = $n->expression->end;
1093 break;
1096 if ($this->t->lineno == $this->t->currentToken()->lineno)
1098 $tt = $this->t->peekOnSameLine();
1099 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
1100 throw $this->t->newSyntaxError('Missing ; before statement');
1103 $this->t->match(OP_SEMICOLON);
1105 return $n;
1108 private function FunctionDefinition($x, $requireName, $functionForm)
1110 $f = new JSNode($this->t);
1112 if ($f->type != KEYWORD_FUNCTION)
1113 $f->type = ($f->value == 'get') ? JS_GETTER : JS_SETTER;
1115 if ($this->t->match(TOKEN_IDENTIFIER))
1116 $f->name = $this->t->currentToken()->value;
1117 elseif ($requireName)
1118 throw $this->t->newSyntaxError('Missing function identifier');
1120 $this->t->mustMatch(OP_LEFT_PAREN);
1121 $f->params = array();
1123 while (($tt = $this->t->get()) != OP_RIGHT_PAREN)
1125 if ($tt != TOKEN_IDENTIFIER)
1126 throw $this->t->newSyntaxError('Missing formal parameter');
1128 array_push($f->params, $this->t->currentToken()->value);
1130 if ($this->t->peek() != OP_RIGHT_PAREN)
1131 $this->t->mustMatch(OP_COMMA);
1134 $this->t->mustMatch(OP_LEFT_CURLY);
1136 $x2 = new JSCompilerContext(true);
1137 $f->body = $this->Script($x2);
1139 $this->t->mustMatch(OP_RIGHT_CURLY);
1140 $f->end = $this->t->currentToken()->end;
1142 $f->functionForm = $functionForm;
1143 if ($functionForm == DECLARED_FORM)
1144 array_push($x->funDecls, $f);
1146 return $f;
1149 private function Variables($x)
1151 $n = new JSNode($this->t);
1155 $this->t->mustMatch(TOKEN_IDENTIFIER);
1157 $n2 = new JSNode($this->t);
1158 $n2->name = $n2->value;
1160 if ($this->t->match(OP_ASSIGN))
1162 if ($this->t->currentToken()->assignOp)
1163 throw $this->t->newSyntaxError('Invalid variable initialization');
1165 $n2->initializer = $this->Expression($x, OP_COMMA);
1168 $n2->readOnly = $n->type == KEYWORD_CONST;
1170 $n->addNode($n2);
1171 array_push($x->varDecls, $n2);
1173 while ($this->t->match(OP_COMMA));
1175 return $n;
1178 private function Expression($x, $stop=false)
1180 $operators = array();
1181 $operands = array();
1182 $n = false;
1184 $bl = $x->bracketLevel;
1185 $cl = $x->curlyLevel;
1186 $pl = $x->parenLevel;
1187 $hl = $x->hookLevel;
1189 while (($tt = $this->t->get()) != TOKEN_END)
1191 if ($tt == $stop &&
1192 $x->bracketLevel == $bl &&
1193 $x->curlyLevel == $cl &&
1194 $x->parenLevel == $pl &&
1195 $x->hookLevel == $hl
1198 // Stop only if tt matches the optional stop parameter, and that
1199 // token is not quoted by some kind of bracket.
1200 break;
1203 switch ($tt)
1205 case OP_SEMICOLON:
1206 // NB: cannot be empty, Statement handled that.
1207 break 2;
1209 case OP_HOOK:
1210 if ($this->t->scanOperand)
1211 break 2;
1213 while ( !empty($operators) &&
1214 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1216 $this->reduce($operators, $operands);
1218 array_push($operators, new JSNode($this->t));
1220 ++$x->hookLevel;
1221 $this->t->scanOperand = true;
1222 $n = $this->Expression($x);
1224 if (!$this->t->match(OP_COLON))
1225 break 2;
1227 --$x->hookLevel;
1228 array_push($operands, $n);
1229 break;
1231 case OP_COLON:
1232 if ($x->hookLevel)
1233 break 2;
1235 throw $this->t->newSyntaxError('Invalid label');
1236 break;
1238 case OP_ASSIGN:
1239 if ($this->t->scanOperand)
1240 break 2;
1242 // Use >, not >=, for right-associative ASSIGN
1243 while ( !empty($operators) &&
1244 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1246 $this->reduce($operators, $operands);
1248 array_push($operators, new JSNode($this->t));
1249 end($operands)->assignOp = $this->t->currentToken()->assignOp;
1250 $this->t->scanOperand = true;
1251 break;
1253 case KEYWORD_IN:
1254 // An in operator should not be parsed if we're parsing the head of
1255 // a for (...) loop, unless it is in the then part of a conditional
1256 // expression, or parenthesized somehow.
1257 if ($x->inForLoopInit && !$x->hookLevel &&
1258 !$x->bracketLevel && !$x->curlyLevel &&
1259 !$x->parenLevel
1261 break 2;
1262 // FALL THROUGH
1263 case OP_COMMA:
1264 // A comma operator should not be parsed if we're parsing the then part
1265 // of a conditional expression unless it's parenthesized somehow.
1266 if ($tt == OP_COMMA && $x->hookLevel &&
1267 !$x->bracketLevel && !$x->curlyLevel &&
1268 !$x->parenLevel
1270 break 2;
1271 // Treat comma as left-associative so reduce can fold left-heavy
1272 // COMMA trees into a single array.
1273 // FALL THROUGH
1274 case OP_OR:
1275 case OP_AND:
1276 case OP_BITWISE_OR:
1277 case OP_BITWISE_XOR:
1278 case OP_BITWISE_AND:
1279 case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
1280 case OP_LT: case OP_LE: case OP_GE: case OP_GT:
1281 case KEYWORD_INSTANCEOF:
1282 case OP_LSH: case OP_RSH: case OP_URSH:
1283 case OP_PLUS: case OP_MINUS:
1284 case OP_MUL: case OP_DIV: case OP_MOD:
1285 case OP_DOT:
1286 if ($this->t->scanOperand)
1287 break 2;
1289 while ( !empty($operators) &&
1290 $this->opPrecedence[end($operators)->type] >= $this->opPrecedence[$tt]
1292 $this->reduce($operators, $operands);
1294 if ($tt == OP_DOT)
1296 $this->t->mustMatch(TOKEN_IDENTIFIER);
1297 array_push($operands, new JSNode($this->t, OP_DOT, array_pop($operands), new JSNode($this->t)));
1299 else
1301 array_push($operators, new JSNode($this->t));
1302 $this->t->scanOperand = true;
1304 break;
1306 case KEYWORD_DELETE: case KEYWORD_VOID: case KEYWORD_TYPEOF:
1307 case OP_NOT: case OP_BITWISE_NOT: case OP_UNARY_PLUS: case OP_UNARY_MINUS:
1308 case KEYWORD_NEW:
1309 if (!$this->t->scanOperand)
1310 break 2;
1312 array_push($operators, new JSNode($this->t));
1313 break;
1315 case OP_INCREMENT: case OP_DECREMENT:
1316 if ($this->t->scanOperand)
1318 array_push($operators, new JSNode($this->t)); // prefix increment or decrement
1320 else
1322 // Don't cross a line boundary for postfix {in,de}crement.
1323 $t = $this->t->tokens[($this->t->tokenIndex + $this->t->lookahead - 1) & 3];
1324 if ($t && $t->lineno != $this->t->lineno)
1325 break 2;
1327 if (!empty($operators))
1329 // Use >, not >=, so postfix has higher precedence than prefix.
1330 while ($this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt])
1331 $this->reduce($operators, $operands);
1334 $n = new JSNode($this->t, $tt, array_pop($operands));
1335 $n->postfix = true;
1336 array_push($operands, $n);
1338 break;
1340 case KEYWORD_FUNCTION:
1341 if (!$this->t->scanOperand)
1342 break 2;
1344 array_push($operands, $this->FunctionDefinition($x, false, EXPRESSED_FORM));
1345 $this->t->scanOperand = false;
1346 break;
1348 case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
1349 case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
1350 if (!$this->t->scanOperand)
1351 break 2;
1353 array_push($operands, new JSNode($this->t));
1354 $this->t->scanOperand = false;
1355 break;
1357 case TOKEN_CONDCOMMENT_START:
1358 case TOKEN_CONDCOMMENT_END:
1359 if ($this->t->scanOperand)
1360 array_push($operators, new JSNode($this->t));
1361 else
1362 array_push($operands, new JSNode($this->t));
1363 break;
1365 case OP_LEFT_BRACKET:
1366 if ($this->t->scanOperand)
1368 // Array initialiser. Parse using recursive descent, as the
1369 // sub-grammar here is not an operator grammar.
1370 $n = new JSNode($this->t, JS_ARRAY_INIT);
1371 while (($tt = $this->t->peek()) != OP_RIGHT_BRACKET)
1373 if ($tt == OP_COMMA)
1375 $this->t->get();
1376 $n->addNode(null);
1377 continue;
1380 $n->addNode($this->Expression($x, OP_COMMA));
1381 if (!$this->t->match(OP_COMMA))
1382 break;
1385 $this->t->mustMatch(OP_RIGHT_BRACKET);
1386 array_push($operands, $n);
1387 $this->t->scanOperand = false;
1389 else
1391 // Property indexing operator.
1392 array_push($operators, new JSNode($this->t, JS_INDEX));
1393 $this->t->scanOperand = true;
1394 ++$x->bracketLevel;
1396 break;
1398 case OP_RIGHT_BRACKET:
1399 if ($this->t->scanOperand || $x->bracketLevel == $bl)
1400 break 2;
1402 while ($this->reduce($operators, $operands)->type != JS_INDEX)
1403 continue;
1405 --$x->bracketLevel;
1406 break;
1408 case OP_LEFT_CURLY:
1409 if (!$this->t->scanOperand)
1410 break 2;
1412 // Object initialiser. As for array initialisers (see above),
1413 // parse using recursive descent.
1414 ++$x->curlyLevel;
1415 $n = new JSNode($this->t, JS_OBJECT_INIT);
1416 while (!$this->t->match(OP_RIGHT_CURLY))
1420 $tt = $this->t->get();
1421 $tv = $this->t->currentToken()->value;
1422 if (($tv == 'get' || $tv == 'set') && $this->t->peek() == TOKEN_IDENTIFIER)
1424 if ($x->ecmaStrictMode)
1425 throw $this->t->newSyntaxError('Illegal property accessor');
1427 $n->addNode($this->FunctionDefinition($x, true, EXPRESSED_FORM));
1429 else
1431 switch ($tt)
1433 case TOKEN_IDENTIFIER:
1434 case TOKEN_NUMBER:
1435 case TOKEN_STRING:
1436 $id = new JSNode($this->t);
1437 break;
1439 case OP_RIGHT_CURLY:
1440 if ($x->ecmaStrictMode)
1441 throw $this->t->newSyntaxError('Illegal trailing ,');
1442 break 3;
1444 default:
1445 throw $this->t->newSyntaxError('Invalid property name');
1448 $this->t->mustMatch(OP_COLON);
1449 $n->addNode(new JSNode($this->t, JS_PROPERTY_INIT, $id, $this->Expression($x, OP_COMMA)));
1452 while ($this->t->match(OP_COMMA));
1454 $this->t->mustMatch(OP_RIGHT_CURLY);
1455 break;
1458 array_push($operands, $n);
1459 $this->t->scanOperand = false;
1460 --$x->curlyLevel;
1461 break;
1463 case OP_RIGHT_CURLY:
1464 if (!$this->t->scanOperand && $x->curlyLevel != $cl)
1465 throw new Exception('PANIC: right curly botch');
1466 break 2;
1468 case OP_LEFT_PAREN:
1469 if ($this->t->scanOperand)
1471 array_push($operators, new JSNode($this->t, JS_GROUP));
1473 else
1475 while ( !empty($operators) &&
1476 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[KEYWORD_NEW]
1478 $this->reduce($operators, $operands);
1480 // Handle () now, to regularize the n-ary case for n > 0.
1481 // We must set scanOperand in case there are arguments and
1482 // the first one is a regexp or unary+/-.
1483 $n = end($operators);
1484 $this->t->scanOperand = true;
1485 if ($this->t->match(OP_RIGHT_PAREN))
1487 if ($n && $n->type == KEYWORD_NEW)
1489 array_pop($operators);
1490 $n->addNode(array_pop($operands));
1492 else
1494 $n = new JSNode($this->t, JS_CALL, array_pop($operands), new JSNode($this->t, JS_LIST));
1497 array_push($operands, $n);
1498 $this->t->scanOperand = false;
1499 break;
1502 if ($n && $n->type == KEYWORD_NEW)
1503 $n->type = JS_NEW_WITH_ARGS;
1504 else
1505 array_push($operators, new JSNode($this->t, JS_CALL));
1508 ++$x->parenLevel;
1509 break;
1511 case OP_RIGHT_PAREN:
1512 if ($this->t->scanOperand || $x->parenLevel == $pl)
1513 break 2;
1515 while (($tt = $this->reduce($operators, $operands)->type) != JS_GROUP &&
1516 $tt != JS_CALL && $tt != JS_NEW_WITH_ARGS
1519 continue;
1522 if ($tt != JS_GROUP)
1524 $n = end($operands);
1525 if ($n->treeNodes[1]->type != OP_COMMA)
1526 $n->treeNodes[1] = new JSNode($this->t, JS_LIST, $n->treeNodes[1]);
1527 else
1528 $n->treeNodes[1]->type = JS_LIST;
1531 --$x->parenLevel;
1532 break;
1534 // Automatic semicolon insertion means we may scan across a newline
1535 // and into the beginning of another statement. If so, break out of
1536 // the while loop and let the t.scanOperand logic handle errors.
1537 default:
1538 break 2;
1542 if ($x->hookLevel != $hl)
1543 throw $this->t->newSyntaxError('Missing : in conditional expression');
1545 if ($x->parenLevel != $pl)
1546 throw $this->t->newSyntaxError('Missing ) in parenthetical');
1548 if ($x->bracketLevel != $bl)
1549 throw $this->t->newSyntaxError('Missing ] in index expression');
1551 if ($this->t->scanOperand)
1552 throw $this->t->newSyntaxError('Missing operand');
1554 // Resume default mode, scanning for operands, not operators.
1555 $this->t->scanOperand = true;
1556 $this->t->unget();
1558 while (count($operators))
1559 $this->reduce($operators, $operands);
1561 return array_pop($operands);
1564 private function ParenExpression($x)
1566 $this->t->mustMatch(OP_LEFT_PAREN);
1567 $n = $this->Expression($x);
1568 $this->t->mustMatch(OP_RIGHT_PAREN);
1570 return $n;
1573 // Statement stack and nested statement handler.
1574 private function nest($x, $node, $end = false)
1576 array_push($x->stmtStack, $node);
1577 $n = $this->statement($x);
1578 array_pop($x->stmtStack);
1580 if ($end)
1581 $this->t->mustMatch($end);
1583 return $n;
1586 private function reduce(&$operators, &$operands)
1588 $n = array_pop($operators);
1589 $op = $n->type;
1590 $arity = $this->opArity[$op];
1591 $c = count($operands);
1592 if ($arity == -2)
1594 // Flatten left-associative trees
1595 if ($c >= 2)
1597 $left = $operands[$c - 2];
1598 if ($left->type == $op)
1600 $right = array_pop($operands);
1601 $left->addNode($right);
1602 return $left;
1605 $arity = 2;
1608 // Always use push to add operands to n, to update start and end
1609 $a = array_splice($operands, $c - $arity);
1610 for ($i = 0; $i < $arity; $i++)
1611 $n->addNode($a[$i]);
1613 // Include closing bracket or postfix operator in [start,end]
1614 $te = $this->t->currentToken()->end;
1615 if ($n->end < $te)
1616 $n->end = $te;
1618 array_push($operands, $n);
1620 return $n;
1624 class JSCompilerContext
1626 public $inFunction = false;
1627 public $inForLoopInit = false;
1628 public $ecmaStrictMode = false;
1629 public $bracketLevel = 0;
1630 public $curlyLevel = 0;
1631 public $parenLevel = 0;
1632 public $hookLevel = 0;
1634 public $stmtStack = array();
1635 public $funDecls = array();
1636 public $varDecls = array();
1638 public function __construct($inFunction)
1640 $this->inFunction = $inFunction;
1644 class JSNode
1646 private $type;
1647 private $value;
1648 private $lineno;
1649 private $start;
1650 private $end;
1652 public $treeNodes = array();
1653 public $funDecls = array();
1654 public $varDecls = array();
1656 public function __construct($t, $type=0)
1658 if ($token = $t->currentToken())
1660 $this->type = $type ? $type : $token->type;
1661 $this->value = $token->value;
1662 $this->lineno = $token->lineno;
1663 $this->start = $token->start;
1664 $this->end = $token->end;
1666 else
1668 $this->type = $type;
1669 $this->lineno = $t->lineno;
1672 if (($numargs = func_num_args()) > 2)
1674 $args = func_get_args();
1675 for ($i = 2; $i < $numargs; $i++)
1676 $this->addNode($args[$i]);
1680 // we don't want to bloat our object with all kind of specific properties, so we use overloading
1681 public function __set($name, $value)
1683 $this->$name = $value;
1686 public function __get($name)
1688 if (isset($this->$name))
1689 return $this->$name;
1691 return null;
1694 public function addNode($node)
1696 if ($node !== null)
1698 if ($node->start < $this->start)
1699 $this->start = $node->start;
1700 if ($this->end < $node->end)
1701 $this->end = $node->end;
1704 $this->treeNodes[] = $node;
1708 class JSTokenizer
1710 private $cursor = 0;
1711 private $source;
1713 public $tokens = array();
1714 public $tokenIndex = 0;
1715 public $lookahead = 0;
1716 public $scanNewlines = false;
1717 public $scanOperand = true;
1719 public $filename;
1720 public $lineno;
1722 private $keywords = array(
1723 'break',
1724 'case', 'catch', 'const', 'continue',
1725 'debugger', 'default', 'delete', 'do',
1726 'else', 'enum',
1727 'false', 'finally', 'for', 'function',
1728 'if', 'in', 'instanceof',
1729 'new', 'null',
1730 'return',
1731 'switch',
1732 'this', 'throw', 'true', 'try', 'typeof',
1733 'var', 'void',
1734 'while', 'with'
1737 private $opTypeNames = array(
1738 ';', ',', '?', ':', '||', '&&', '|', '^',
1739 '&', '===', '==', '=', '!==', '!=', '<<', '<=',
1740 '<', '>>>', '>>', '>=', '>', '++', '--', '+',
1741 '-', '*', '/', '%', '!', '~', '.', '[',
1742 ']', '{', '}', '(', ')', '@*/'
1745 private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
1746 private $opRegExp;
1748 public function __construct()
1750 $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
1753 public function init($source, $filename = '', $lineno = 1)
1755 $this->source = $source;
1756 $this->filename = $filename ? $filename : '[inline]';
1757 $this->lineno = $lineno;
1759 $this->cursor = 0;
1760 $this->tokens = array();
1761 $this->tokenIndex = 0;
1762 $this->lookahead = 0;
1763 $this->scanNewlines = false;
1764 $this->scanOperand = true;
1767 public function getInput($chunksize)
1769 if ($chunksize)
1770 return substr($this->source, $this->cursor, $chunksize);
1772 return substr($this->source, $this->cursor);
1775 public function isDone()
1777 return $this->peek() == TOKEN_END;
1780 public function match($tt)
1782 return $this->get() == $tt || $this->unget();
1785 public function mustMatch($tt)
1787 if (!$this->match($tt))
1788 throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
1790 return $this->currentToken();
1793 public function peek()
1795 if ($this->lookahead)
1797 $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3];
1798 if ($this->scanNewlines && $next->lineno != $this->lineno)
1799 $tt = TOKEN_NEWLINE;
1800 else
1801 $tt = $next->type;
1803 else
1805 $tt = $this->get();
1806 $this->unget();
1809 return $tt;
1812 public function peekOnSameLine()
1814 $this->scanNewlines = true;
1815 $tt = $this->peek();
1816 $this->scanNewlines = false;
1818 return $tt;
1821 public function currentToken()
1823 if (!empty($this->tokens))
1824 return $this->tokens[$this->tokenIndex];
1827 public function get($chunksize = 1000)
1829 while($this->lookahead)
1831 $this->lookahead--;
1832 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
1833 $token = $this->tokens[$this->tokenIndex];
1834 if ($token->type != TOKEN_NEWLINE || $this->scanNewlines)
1835 return $token->type;
1838 $conditional_comment = false;
1840 // strip whitespace and comments
1841 while(true)
1843 $input = $this->getInput($chunksize);
1845 // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
1846 $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/';
1847 if (preg_match($re, $input, $match))
1849 $spaces = $match[0];
1850 $spacelen = strlen($spaces);
1851 $this->cursor += $spacelen;
1852 if (!$this->scanNewlines)
1853 $this->lineno += substr_count($spaces, "\n");
1855 if ($spacelen == $chunksize)
1856 continue; // complete chunk contained whitespace
1858 $input = $this->getInput($chunksize);
1859 if ($input == '' || $input[0] != '/')
1860 break;
1863 // Comments
1864 if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match))
1866 if (!$chunksize)
1867 break;
1869 // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
1870 $chunksize = null;
1871 continue;
1874 // check if this is a conditional (JScript) comment
1875 if (!empty($match[1]))
1877 $match[0] = '/*' . $match[1];
1878 $conditional_comment = true;
1879 break;
1881 else
1883 $this->cursor += strlen($match[0]);
1884 $this->lineno += substr_count($match[0], "\n");
1888 if ($input == '')
1890 $tt = TOKEN_END;
1891 $match = array('');
1893 elseif ($conditional_comment)
1895 $tt = TOKEN_CONDCOMMENT_START;
1897 else
1899 switch ($input[0])
1901 case '0':
1902 // hexadecimal
1903 if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
1905 $tt = TOKEN_NUMBER;
1906 break;
1908 // FALL THROUGH
1910 case '1': case '2': case '3': case '4': case '5':
1911 case '6': case '7': case '8': case '9':
1912 // should always match
1913 preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
1914 $tt = TOKEN_NUMBER;
1915 break;
1917 case "'":
1918 if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
1920 $tt = TOKEN_STRING;
1922 else
1924 if ($chunksize)
1925 return $this->get(null); // retry with a full chunk fetch
1927 throw $this->newSyntaxError('Unterminated string literal');
1929 break;
1931 case '"':
1932 if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
1934 $tt = TOKEN_STRING;
1936 else
1938 if ($chunksize)
1939 return $this->get(null); // retry with a full chunk fetch
1941 throw $this->newSyntaxError('Unterminated string literal');
1943 break;
1945 case '/':
1946 if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match))
1948 $tt = TOKEN_REGEXP;
1949 break;
1951 // FALL THROUGH
1953 case '|':
1954 case '^':
1955 case '&':
1956 case '<':
1957 case '>':
1958 case '+':
1959 case '-':
1960 case '*':
1961 case '%':
1962 case '=':
1963 case '!':
1964 // should always match
1965 preg_match($this->opRegExp, $input, $match);
1966 $op = $match[0];
1967 if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=')
1969 $tt = OP_ASSIGN;
1970 $match[0] .= '=';
1972 else
1974 $tt = $op;
1975 if ($this->scanOperand)
1977 if ($op == OP_PLUS)
1978 $tt = OP_UNARY_PLUS;
1979 elseif ($op == OP_MINUS)
1980 $tt = OP_UNARY_MINUS;
1982 $op = null;
1984 break;
1986 case '.':
1987 if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match))
1989 $tt = TOKEN_NUMBER;
1990 break;
1992 // FALL THROUGH
1994 case ';':
1995 case ',':
1996 case '?':
1997 case ':':
1998 case '~':
1999 case '[':
2000 case ']':
2001 case '{':
2002 case '}':
2003 case '(':
2004 case ')':
2005 // these are all single
2006 $match = array($input[0]);
2007 $tt = $input[0];
2008 break;
2010 case '@':
2011 // check end of conditional comment
2012 if (substr($input, 0, 3) == '@*/')
2014 $match = array('@*/');
2015 $tt = TOKEN_CONDCOMMENT_END;
2017 else
2018 throw $this->newSyntaxError('Illegal token');
2019 break;
2021 case "\n":
2022 if ($this->scanNewlines)
2024 $match = array("\n");
2025 $tt = TOKEN_NEWLINE;
2027 else
2028 throw $this->newSyntaxError('Illegal token');
2029 break;
2031 default:
2032 // Fast path for identifiers: word chars followed by whitespace or various other tokens.
2033 // Note we don't need to exclude digits in the first char, as they've already been found
2034 // above.
2035 if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
2037 // Character classes per ECMA-262 edition 5.1 section 7.6
2038 // Per spec, must accept Unicode 3.0, *may* accept later versions.
2039 // We'll take whatever PCRE understands, which should be more recent.
2040 $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
2041 "\$" .
2042 "_";
2043 $identifierPartChars = $identifierStartChars .
2044 "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
2045 "\\p{Nd}" . # UnicodeDigit
2046 "\\p{Pc}"; # UnicodeConnectorPunctuation
2047 $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
2048 $identifierRegex = "/^" .
2049 "(?:[$identifierStartChars]|$unicodeEscape)" .
2050 "(?:[$identifierPartChars]|$unicodeEscape)*" .
2051 "/uS";
2052 if (preg_match($identifierRegex, $input, $match))
2054 if (strpos($match[0], '\\') !== false) {
2055 // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
2056 // the original chars, but only within the boundaries of the identifier.
2057 $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
2058 array(__CLASS__, 'unicodeEscapeCallback'),
2059 $match[0]);
2061 // Since our original regex didn't de-escape the originals, we need to check for validity again.
2062 // No need to worry about token boundaries, as anything outside the identifier is illegal!
2063 if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
2064 throw $this->newSyntaxError('Illegal token');
2067 // Per spec it _ought_ to work to use these escapes for keywords words as well...
2068 // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
2069 // that don't match the keyword.
2070 if (in_array($decoded, $this->keywords)) {
2071 throw $this->newSyntaxError('Illegal token');
2074 // TODO: save the decoded form for output?
2077 else
2078 throw $this->newSyntaxError('Illegal token');
2080 $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
2084 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
2086 if (!isset($this->tokens[$this->tokenIndex]))
2087 $this->tokens[$this->tokenIndex] = new JSToken();
2089 $token = $this->tokens[$this->tokenIndex];
2090 $token->type = $tt;
2092 if ($tt == OP_ASSIGN)
2093 $token->assignOp = $op;
2095 $token->start = $this->cursor;
2097 $token->value = $match[0];
2098 $this->cursor += strlen($match[0]);
2100 $token->end = $this->cursor;
2101 $token->lineno = $this->lineno;
2103 return $tt;
2106 public function unget()
2108 if (++$this->lookahead == 4)
2109 throw $this->newSyntaxError('PANIC: too much lookahead!');
2111 $this->tokenIndex = ($this->tokenIndex - 1) & 3;
2114 public function newSyntaxError($m)
2116 return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
2119 public static function unicodeEscapeCallback($m)
2121 return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
2125 class JSToken
2127 public $type;
2128 public $value;
2129 public $start;
2130 public $end;
2131 public $lineno;
2132 public $assignOp;