Properly deprecate srprop=score|hasrelated
[mediawiki.git] / includes / libs / JavaScriptMinifier.php
blob2990782ce1e66e1b7945650a8c3f238e58e99722
1 <?php
2 // @codingStandardsIgnoreFile File external to MediaWiki. Ignore coding conventions checks.
3 /**
4 * JavaScript Minifier
6 * @file
7 * @author Paul Copperman <paul.copperman@gmail.com>
8 * @license Choose any of Apache, MIT, GPL, LGPL
9 */
11 /**
12 * This class is meant to safely minify javascript code, while leaving syntactically correct
13 * programs intact. Other libraries, such as JSMin require a certain coding style to work
14 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
15 * slow, because they construct a complete parse tree before outputting the code minified.
16 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
17 * fast enough to be used for on-the-fly minifying.
19 class JavaScriptMinifier {
21 /* Class constants */
22 /* Parsing states.
23 * The state machine is only necessary to decide whether to parse a slash as division
24 * operator or as regexp literal.
25 * States are named after the next expected item. We only distinguish states when the
26 * distinction is relevant for our purpose.
28 const STATEMENT = 0;
29 const CONDITION = 1;
30 const PROPERTY_ASSIGNMENT = 2;
31 const EXPRESSION = 3;
32 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
33 const EXPRESSION_OP = 5;
34 const EXPRESSION_FUNC = 6;
35 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
36 const EXPRESSION_TERNARY_OP = 8;
37 const EXPRESSION_TERNARY_FUNC = 9;
38 const PAREN_EXPRESSION = 10; // expression which is not on the top level
39 const PAREN_EXPRESSION_OP = 11;
40 const PAREN_EXPRESSION_FUNC = 12;
41 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
42 const PROPERTY_EXPRESSION_OP = 14;
43 const PROPERTY_EXPRESSION_FUNC = 15;
45 /* Token types */
46 const TYPE_UN_OP = 1; // unary operators
47 const TYPE_INCR_OP = 2; // ++ and --
48 const TYPE_BIN_OP = 3; // binary operators
49 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
50 const TYPE_HOOK = 5; // ?
51 const TYPE_COLON = 6; // :
52 const TYPE_COMMA = 7; // ,
53 const TYPE_SEMICOLON = 8; // ;
54 const TYPE_BRACE_OPEN = 9; // {
55 const TYPE_BRACE_CLOSE = 10; // }
56 const TYPE_PAREN_OPEN = 11; // ( and [
57 const TYPE_PAREN_CLOSE = 12; // ) and ]
58 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
59 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
60 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
61 const TYPE_FUNC = 16; // keywords: function
62 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
64 // Sanity limit to avoid excessive memory usage
65 const STACK_LIMIT = 1000;
67 /* Static functions */
69 /**
70 * Returns minified JavaScript code.
72 * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
73 * literals (e.g. quoted strings) longer than $maxLineLength are encountered
74 * or when required to guard against semicolon insertion.
76 * @param string $s JavaScript code to minify
77 * @param bool $statementsOnOwnLine Whether to put each statement on its own line
78 * @param int $maxLineLength Maximum length of a single line, or -1 for no maximum.
79 * @return String Minified code
81 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
82 // First we declare a few tables that contain our parsing rules
84 // $opChars : characters, which can be combined without whitespace in between them
85 $opChars = array(
86 '!' => true,
87 '"' => true,
88 '%' => true,
89 '&' => true,
90 "'" => true,
91 '(' => true,
92 ')' => true,
93 '*' => true,
94 '+' => true,
95 ',' => true,
96 '-' => true,
97 '.' => true,
98 '/' => true,
99 ':' => true,
100 ';' => true,
101 '<' => true,
102 '=' => true,
103 '>' => true,
104 '?' => true,
105 '[' => true,
106 ']' => true,
107 '^' => true,
108 '{' => true,
109 '|' => true,
110 '}' => true,
111 '~' => true
114 // $tokenTypes : maps keywords and operators to their corresponding token type
115 $tokenTypes = array(
116 '!' => self::TYPE_UN_OP,
117 '~' => self::TYPE_UN_OP,
118 'delete' => self::TYPE_UN_OP,
119 'new' => self::TYPE_UN_OP,
120 'typeof' => self::TYPE_UN_OP,
121 'void' => self::TYPE_UN_OP,
122 '++' => self::TYPE_INCR_OP,
123 '--' => self::TYPE_INCR_OP,
124 '!=' => self::TYPE_BIN_OP,
125 '!==' => self::TYPE_BIN_OP,
126 '%' => self::TYPE_BIN_OP,
127 '%=' => self::TYPE_BIN_OP,
128 '&' => self::TYPE_BIN_OP,
129 '&&' => self::TYPE_BIN_OP,
130 '&=' => self::TYPE_BIN_OP,
131 '*' => self::TYPE_BIN_OP,
132 '*=' => self::TYPE_BIN_OP,
133 '+=' => self::TYPE_BIN_OP,
134 '-=' => self::TYPE_BIN_OP,
135 '.' => self::TYPE_BIN_OP,
136 '/' => self::TYPE_BIN_OP,
137 '/=' => self::TYPE_BIN_OP,
138 '<' => self::TYPE_BIN_OP,
139 '<<' => self::TYPE_BIN_OP,
140 '<<=' => self::TYPE_BIN_OP,
141 '<=' => self::TYPE_BIN_OP,
142 '=' => self::TYPE_BIN_OP,
143 '==' => self::TYPE_BIN_OP,
144 '===' => self::TYPE_BIN_OP,
145 '>' => self::TYPE_BIN_OP,
146 '>=' => self::TYPE_BIN_OP,
147 '>>' => self::TYPE_BIN_OP,
148 '>>=' => self::TYPE_BIN_OP,
149 '>>>' => self::TYPE_BIN_OP,
150 '>>>=' => self::TYPE_BIN_OP,
151 '^' => self::TYPE_BIN_OP,
152 '^=' => self::TYPE_BIN_OP,
153 '|' => self::TYPE_BIN_OP,
154 '|=' => self::TYPE_BIN_OP,
155 '||' => self::TYPE_BIN_OP,
156 'in' => self::TYPE_BIN_OP,
157 'instanceof' => self::TYPE_BIN_OP,
158 '+' => self::TYPE_ADD_OP,
159 '-' => self::TYPE_ADD_OP,
160 '?' => self::TYPE_HOOK,
161 ':' => self::TYPE_COLON,
162 ',' => self::TYPE_COMMA,
163 ';' => self::TYPE_SEMICOLON,
164 '{' => self::TYPE_BRACE_OPEN,
165 '}' => self::TYPE_BRACE_CLOSE,
166 '(' => self::TYPE_PAREN_OPEN,
167 '[' => self::TYPE_PAREN_OPEN,
168 ')' => self::TYPE_PAREN_CLOSE,
169 ']' => self::TYPE_PAREN_CLOSE,
170 'break' => self::TYPE_RETURN,
171 'continue' => self::TYPE_RETURN,
172 'return' => self::TYPE_RETURN,
173 'throw' => self::TYPE_RETURN,
174 'catch' => self::TYPE_IF,
175 'for' => self::TYPE_IF,
176 'if' => self::TYPE_IF,
177 'switch' => self::TYPE_IF,
178 'while' => self::TYPE_IF,
179 'with' => self::TYPE_IF,
180 'case' => self::TYPE_DO,
181 'do' => self::TYPE_DO,
182 'else' => self::TYPE_DO,
183 'finally' => self::TYPE_DO,
184 'try' => self::TYPE_DO,
185 'var' => self::TYPE_DO,
186 'function' => self::TYPE_FUNC
189 // $goto : This is the main table for our state machine. For every state/token pair
190 // the following state is defined. When no rule exists for a given pair,
191 // the state is left unchanged.
192 $goto = array(
193 self::STATEMENT => array(
194 self::TYPE_UN_OP => self::EXPRESSION,
195 self::TYPE_INCR_OP => self::EXPRESSION,
196 self::TYPE_ADD_OP => self::EXPRESSION,
197 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
198 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
199 self::TYPE_IF => self::CONDITION,
200 self::TYPE_FUNC => self::CONDITION,
201 self::TYPE_LITERAL => self::EXPRESSION_OP
203 self::CONDITION => array(
204 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
206 self::PROPERTY_ASSIGNMENT => array(
207 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
208 self::TYPE_BRACE_OPEN => self::STATEMENT
210 self::EXPRESSION => array(
211 self::TYPE_SEMICOLON => self::STATEMENT,
212 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
213 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
214 self::TYPE_FUNC => self::EXPRESSION_FUNC,
215 self::TYPE_LITERAL => self::EXPRESSION_OP
217 self::EXPRESSION_NO_NL => array(
218 self::TYPE_SEMICOLON => self::STATEMENT,
219 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
220 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
221 self::TYPE_FUNC => self::EXPRESSION_FUNC,
222 self::TYPE_LITERAL => self::EXPRESSION_OP
224 self::EXPRESSION_OP => array(
225 self::TYPE_BIN_OP => self::EXPRESSION,
226 self::TYPE_ADD_OP => self::EXPRESSION,
227 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
228 self::TYPE_COLON => self::STATEMENT,
229 self::TYPE_COMMA => self::EXPRESSION,
230 self::TYPE_SEMICOLON => self::STATEMENT,
231 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
233 self::EXPRESSION_FUNC => array(
234 self::TYPE_BRACE_OPEN => self::STATEMENT
236 self::EXPRESSION_TERNARY => array(
237 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
238 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
239 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
240 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
242 self::EXPRESSION_TERNARY_OP => array(
243 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
244 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
245 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
246 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
247 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
249 self::EXPRESSION_TERNARY_FUNC => array(
250 self::TYPE_BRACE_OPEN => self::STATEMENT
252 self::PAREN_EXPRESSION => array(
253 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
254 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
255 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
256 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
258 self::PAREN_EXPRESSION_OP => array(
259 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
260 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
261 self::TYPE_HOOK => self::PAREN_EXPRESSION,
262 self::TYPE_COLON => self::PAREN_EXPRESSION,
263 self::TYPE_COMMA => self::PAREN_EXPRESSION,
264 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
265 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
267 self::PAREN_EXPRESSION_FUNC => array(
268 self::TYPE_BRACE_OPEN => self::STATEMENT
270 self::PROPERTY_EXPRESSION => array(
271 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
272 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
273 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
274 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
276 self::PROPERTY_EXPRESSION_OP => array(
277 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
278 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
279 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
280 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
281 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
283 self::PROPERTY_EXPRESSION_FUNC => array(
284 self::TYPE_BRACE_OPEN => self::STATEMENT
288 // $push : This table contains the rules for when to push a state onto the stack.
289 // The pushed state is the state to return to when the corresponding
290 // closing token is found
291 $push = array(
292 self::STATEMENT => array(
293 self::TYPE_BRACE_OPEN => self::STATEMENT,
294 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
296 self::CONDITION => array(
297 self::TYPE_PAREN_OPEN => self::STATEMENT
299 self::PROPERTY_ASSIGNMENT => array(
300 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
302 self::EXPRESSION => array(
303 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
304 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
306 self::EXPRESSION_NO_NL => array(
307 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
308 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
310 self::EXPRESSION_OP => array(
311 self::TYPE_HOOK => self::EXPRESSION,
312 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
314 self::EXPRESSION_FUNC => array(
315 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
317 self::EXPRESSION_TERNARY => array(
318 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
319 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
321 self::EXPRESSION_TERNARY_OP => array(
322 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
323 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
325 self::EXPRESSION_TERNARY_FUNC => array(
326 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
328 self::PAREN_EXPRESSION => array(
329 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
332 self::PAREN_EXPRESSION_OP => array(
333 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
335 self::PAREN_EXPRESSION_FUNC => array(
336 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
338 self::PROPERTY_EXPRESSION => array(
339 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
342 self::PROPERTY_EXPRESSION_OP => array(
343 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
345 self::PROPERTY_EXPRESSION_FUNC => array(
346 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
350 // $pop : Rules for when to pop a state from the stack
351 $pop = array(
352 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
353 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
354 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
355 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
356 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
357 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
358 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
359 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
360 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
361 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
364 // $semicolon : Rules for when a semicolon insertion is appropriate
365 $semicolon = array(
366 self::EXPRESSION_NO_NL => array(
367 self::TYPE_UN_OP => true,
368 self::TYPE_INCR_OP => true,
369 self::TYPE_ADD_OP => true,
370 self::TYPE_BRACE_OPEN => true,
371 self::TYPE_PAREN_OPEN => true,
372 self::TYPE_RETURN => true,
373 self::TYPE_IF => true,
374 self::TYPE_DO => true,
375 self::TYPE_FUNC => true,
376 self::TYPE_LITERAL => true
378 self::EXPRESSION_OP => array(
379 self::TYPE_UN_OP => true,
380 self::TYPE_INCR_OP => true,
381 self::TYPE_BRACE_OPEN => true,
382 self::TYPE_RETURN => true,
383 self::TYPE_IF => true,
384 self::TYPE_DO => true,
385 self::TYPE_FUNC => true,
386 self::TYPE_LITERAL => true
390 // Rules for when newlines should be inserted if
391 // $statementsOnOwnLine is enabled.
392 // $newlineBefore is checked before switching state,
393 // $newlineAfter is checked after
394 $newlineBefore = array(
395 self::STATEMENT => array(
396 self::TYPE_BRACE_CLOSE => true,
399 $newlineAfter = array(
400 self::STATEMENT => array(
401 self::TYPE_BRACE_OPEN => true,
402 self::TYPE_PAREN_CLOSE => true,
403 self::TYPE_SEMICOLON => true,
407 // $divStates : Contains all states that can be followed by a division operator
408 $divStates = array(
409 self::EXPRESSION_OP => true,
410 self::EXPRESSION_TERNARY_OP => true,
411 self::PAREN_EXPRESSION_OP => true,
412 self::PROPERTY_EXPRESSION_OP => true
415 // Here's where the minifying takes place: Loop through the input, looking for tokens
416 // and output them to $out, taking actions to the above defined rules when appropriate.
417 $out = '';
418 $pos = 0;
419 $length = strlen( $s );
420 $lineLength = 0;
421 $newlineFound = true;
422 $state = self::STATEMENT;
423 $stack = array();
424 $last = ';'; // Pretend that we have seen a semicolon yet
425 while( $pos < $length ) {
426 // First, skip over any whitespace and multiline comments, recording whether we
427 // found any newline character
428 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
429 if( !$skip ) {
430 $ch = $s[$pos];
431 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
432 // Multiline comment. Search for the end token or EOT.
433 $end = strpos( $s, '*/', $pos + 2 );
434 $skip = $end === false ? $length - $pos : $end - $pos + 2;
437 if( $skip ) {
438 // The semicolon insertion mechanism needs to know whether there was a newline
439 // between two tokens, so record it now.
440 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
441 $newlineFound = true;
443 $pos += $skip;
444 continue;
446 // Handle C++-style comments and html comments, which are treated as single line
447 // comments by the browser, regardless of whether the end tag is on the same line.
448 // Handle --> the same way, but only if it's at the beginning of the line
449 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
450 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
451 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
453 $pos += strcspn( $s, "\r\n", $pos );
454 continue;
457 // Find out which kind of token we're handling. $end will point past the end of it.
458 $end = $pos + 1;
459 // Handle string literals
460 if( $ch === "'" || $ch === '"' ) {
461 // Search to the end of the string literal, skipping over backslash escapes
462 $search = $ch . '\\';
464 $end += strcspn( $s, $search, $end ) + 2;
465 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
466 $end--;
467 // We have to distinguish between regexp literals and division operators
468 // A division operator is only possible in certain states
469 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
470 // Regexp literal, search to the end, skipping over backslash escapes and
471 // character classes
472 for( ; ; ) {
474 $end += strcspn( $s, '/[\\', $end ) + 2;
475 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
476 $end--;
477 if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
478 break;
481 $end += strcspn( $s, ']\\', $end ) + 2;
482 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
483 $end--;
485 // Search past the regexp modifiers (gi)
486 while( $end < $length && ctype_alpha( $s[$end] ) ) {
487 $end++;
489 } elseif(
490 $ch === '0'
491 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
493 // Hex numeric literal
494 $end++; // x or X
495 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
496 if ( !$len ) {
497 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
499 $end += $len;
500 } elseif(
501 ctype_digit( $ch )
502 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
504 $end += strspn( $s, '0123456789', $end );
505 $decimal = strspn( $s, '.', $end );
506 if ($decimal) {
507 if ( $decimal > 2 ) {
508 return self::parseError($s, $end, 'The number has too many decimal points' );
510 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
512 $exponent = strspn( $s, 'eE', $end );
513 if( $exponent ) {
514 if ( $exponent > 1 ) {
515 return self::parseError($s, $end, 'Number with several E' );
517 $end++;
519 // + sign is optional; - sign is required.
520 $end += strspn( $s, '-+', $end );
521 $len = strspn( $s, '0123456789', $end );
522 if ( !$len ) {
523 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
525 $end += $len;
527 } elseif( isset( $opChars[$ch] ) ) {
528 // Punctuation character. Search for the longest matching operator.
529 while(
530 $end < $length
531 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
533 $end++;
535 } else {
536 // Identifier or reserved word. Search for the end by excluding whitespace and
537 // punctuation.
538 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
541 // Now get the token type from our type array
542 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
543 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
545 if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
546 // This token triggers the semicolon insertion mechanism of javascript. While we
547 // could add the ; token here ourselves, keeping the newline has a few advantages.
548 $out .= "\n";
549 $state = self::STATEMENT;
550 $lineLength = 0;
551 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
552 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
554 // This line would get too long if we added $token, so add a newline first.
555 // Only do this if it won't trigger semicolon insertion and if it won't
556 // put a postfix increment operator on its own line, which is illegal in js.
557 $out .= "\n";
558 $lineLength = 0;
559 // Check, whether we have to separate the token from the last one with whitespace
560 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
561 $out .= ' ';
562 $lineLength++;
563 // Don't accidentally create ++, -- or // tokens
564 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
565 $out .= ' ';
566 $lineLength++;
569 $out .= $token;
570 $lineLength += $end - $pos; // += strlen( $token )
571 $last = $s[$end - 1];
572 $pos = $end;
573 $newlineFound = false;
575 // Output a newline after the token if required
576 // This is checked before AND after switching state
577 $newlineAdded = false;
578 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
579 $out .= "\n";
580 $lineLength = 0;
581 $newlineAdded = true;
584 // Now that we have output our token, transition into the new state.
585 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
586 $stack[] = $push[$state][$type];
588 if( $stack && isset( $pop[$state][$type] ) ) {
589 $state = array_pop( $stack );
590 } elseif( isset( $goto[$state][$type] ) ) {
591 $state = $goto[$state][$type];
594 // Check for newline insertion again
595 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
596 $out .= "\n";
597 $lineLength = 0;
600 return $out;
603 static function parseError($fullJavascript, $position, $errorMsg) {
604 // TODO: Handle the error: trigger_error, throw exception, return false...
605 return false;