Add parser tests for bug 52468 and bug 52363.
[mediawiki.git] / includes / ConfEditor.php
blob67cb87db1e157d8be24e84dbfddd01628923ad20
1 <?php
2 /**
3 * Configuration file editor.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * This is a state machine style parser with two internal stacks:
25 * * A next state stack, which determines the state the machine will progress to next
26 * * A path stack, which keeps track of the logical location in the file.
28 * Reference grammar:
30 * file = T_OPEN_TAG *statement
31 * statement = T_VARIABLE "=" expression ";"
32 * expression = array / scalar / T_VARIABLE
33 * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
34 * element = assoc-element / expression
35 * assoc-element = scalar T_DOUBLE_ARROW expression
36 * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
38 class ConfEditor {
39 /** The text to parse */
40 var $text;
42 /** The token array from token_get_all() */
43 var $tokens;
45 /** The current position in the token array */
46 var $pos;
48 /** The current 1-based line number */
49 var $lineNum;
51 /** The current 1-based column number */
52 var $colNum;
54 /** The current 0-based byte number */
55 var $byteNum;
57 /** The current ConfEditorToken object */
58 var $currentToken;
60 /** The previous ConfEditorToken object */
61 var $prevToken;
63 /**
64 * The state machine stack. This is an array of strings where the topmost
65 * element will be popped off and become the next parser state.
67 var $stateStack;
69 /**
70 * The path stack is a stack of associative arrays with the following elements:
71 * name The name of top level of the path
72 * level The level (number of elements) of the path
73 * startByte The byte offset of the start of the path
74 * startToken The token offset of the start
75 * endByte The byte offset of thee
76 * endToken The token offset of the end, plus one
77 * valueStartToken The start token offset of the value part
78 * valueStartByte The start byte offset of the value part
79 * valueEndToken The end token offset of the value part, plus one
80 * valueEndByte The end byte offset of the value part, plus one
81 * nextArrayIndex The next numeric array index at this level
82 * hasComma True if the array element ends with a comma
83 * arrowByte The byte offset of the "=>", or false if there isn't one
85 var $pathStack;
87 /**
88 * The elements of the top of the pathStack for every path encountered, indexed
89 * by slash-separated path.
91 var $pathInfo;
93 /**
94 * Next serial number for whitespace placeholder paths (\@extra-N)
96 var $serial;
98 /**
99 * Editor state. This consists of the internal copy/insert operations which
100 * are applied to the source string to obtain the destination string.
102 var $edits;
105 * Simple entry point for command-line testing
107 * @param $text string
109 * @return string
111 static function test( $text ) {
112 try {
113 $ce = new self( $text );
114 $ce->parse();
115 } catch ( ConfEditorParseError $e ) {
116 return $e->getMessage() . "\n" . $e->highlight( $text );
118 return "OK";
122 * Construct a new parser
124 public function __construct( $text ) {
125 $this->text = $text;
129 * Edit the text. Returns the edited text.
130 * @param array $ops of operations.
132 * Operations are given as an associative array, with members:
133 * type: One of delete, set, append or insert (required)
134 * path: The path to operate on (required)
135 * key: The array key to insert/append, with PHP quotes
136 * value: The value, with PHP quotes
138 * delete
139 * Deletes an array element or statement with the specified path.
140 * e.g.
141 * array('type' => 'delete', 'path' => '$foo/bar/baz' )
142 * is equivalent to the runtime PHP code:
143 * unset( $foo['bar']['baz'] );
145 * set
146 * Sets the value of an array element. If the element doesn't exist, it
147 * is appended to the array. If it does exist, the value is set, with
148 * comments and indenting preserved.
150 * append
151 * Appends a new element to the end of the array. Adds a trailing comma.
152 * e.g.
153 * array( 'type' => 'append', 'path', '$foo/bar',
154 * 'key' => 'baz', 'value' => "'x'" )
155 * is like the PHP code:
156 * $foo['bar']['baz'] = 'x';
158 * insert
159 * Insert a new element at the start of the array.
161 * @throws MWException
162 * @return string
164 public function edit( $ops ) {
165 $this->parse();
167 $this->edits = array(
168 array( 'copy', 0, strlen( $this->text ) )
170 foreach ( $ops as $op ) {
171 $type = $op['type'];
172 $path = $op['path'];
173 $value = isset( $op['value'] ) ? $op['value'] : null;
174 $key = isset( $op['key'] ) ? $op['key'] : null;
176 switch ( $type ) {
177 case 'delete':
178 list( $start, $end ) = $this->findDeletionRegion( $path );
179 $this->replaceSourceRegion( $start, $end, false );
180 break;
181 case 'set':
182 if ( isset( $this->pathInfo[$path] ) ) {
183 list( $start, $end ) = $this->findValueRegion( $path );
184 $encValue = $value; // var_export( $value, true );
185 $this->replaceSourceRegion( $start, $end, $encValue );
186 break;
188 // No existing path, fall through to append
189 $slashPos = strrpos( $path, '/' );
190 $key = var_export( substr( $path, $slashPos + 1 ), true );
191 $path = substr( $path, 0, $slashPos );
192 // Fall through
193 case 'append':
194 // Find the last array element
195 $lastEltPath = $this->findLastArrayElement( $path );
196 if ( $lastEltPath === false ) {
197 throw new MWException( "Can't find any element of array \"$path\"" );
199 $lastEltInfo = $this->pathInfo[$lastEltPath];
201 // Has it got a comma already?
202 if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
203 // No comma, insert one after the value region
204 list( , $end ) = $this->findValueRegion( $lastEltPath );
205 $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
208 // Make the text to insert
209 list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
211 if ( $key === null ) {
212 list( $indent, ) = $this->getIndent( $start );
213 $textToInsert = "$indent$value,";
214 } else {
215 list( $indent, $arrowIndent ) =
216 $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
217 $textToInsert = "$indent$key$arrowIndent=> $value,";
219 $textToInsert .= ( $indent === false ? ' ' : "\n" );
221 // Insert the item
222 $this->replaceSourceRegion( $end, $end, $textToInsert );
223 break;
224 case 'insert':
225 // Find first array element
226 $firstEltPath = $this->findFirstArrayElement( $path );
227 if ( $firstEltPath === false ) {
228 throw new MWException( "Can't find array element of \"$path\"" );
230 list( $start, ) = $this->findDeletionRegion( $firstEltPath );
231 $info = $this->pathInfo[$firstEltPath];
233 // Make the text to insert
234 if ( $key === null ) {
235 list( $indent, ) = $this->getIndent( $start );
236 $textToInsert = "$indent$value,";
237 } else {
238 list( $indent, $arrowIndent ) =
239 $this->getIndent( $start, $key, $info['arrowByte'] );
240 $textToInsert = "$indent$key$arrowIndent=> $value,";
242 $textToInsert .= ( $indent === false ? ' ' : "\n" );
244 // Insert the item
245 $this->replaceSourceRegion( $start, $start, $textToInsert );
246 break;
247 default:
248 throw new MWException( "Unrecognised operation: \"$type\"" );
252 // Do the edits
253 $out = '';
254 foreach ( $this->edits as $edit ) {
255 if ( $edit[0] == 'copy' ) {
256 $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
257 } else { // if ( $edit[0] == 'insert' )
258 $out .= $edit[1];
262 // Do a second parse as a sanity check
263 $this->text = $out;
264 try {
265 $this->parse();
266 } catch ( ConfEditorParseError $e ) {
267 throw new MWException(
268 "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
269 $e->getMessage() );
271 return $out;
275 * Get the variables defined in the text
276 * @return array( varname => value )
278 function getVars() {
279 $vars = array();
280 $this->parse();
281 foreach ( $this->pathInfo as $path => $data ) {
282 if ( $path[0] != '$' ) {
283 continue;
285 $trimmedPath = substr( $path, 1 );
286 $name = $data['name'];
287 if ( $name[0] == '@' ) {
288 continue;
290 if ( $name[0] == '$' ) {
291 $name = substr( $name, 1 );
293 $parentPath = substr( $trimmedPath, 0,
294 strlen( $trimmedPath ) - strlen( $name ) );
295 if ( substr( $parentPath, -1 ) == '/' ) {
296 $parentPath = substr( $parentPath, 0, -1 );
299 $value = substr( $this->text, $data['valueStartByte'],
300 $data['valueEndByte'] - $data['valueStartByte']
302 $this->setVar( $vars, $parentPath, $name,
303 $this->parseScalar( $value ) );
305 return $vars;
309 * Set a value in an array, unless it's set already. For instance,
310 * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
311 * $arr['foo']['bar']['baz'] = 3;
312 * @param $array array
313 * @param string $path slash-delimited path
314 * @param $key mixed Key
315 * @param $value mixed Value
317 function setVar( &$array, $path, $key, $value ) {
318 $pathArr = explode( '/', $path );
319 $target =& $array;
320 if ( $path !== '' ) {
321 foreach ( $pathArr as $p ) {
322 if ( !isset( $target[$p] ) ) {
323 $target[$p] = array();
325 $target =& $target[$p];
328 if ( !isset( $target[$key] ) ) {
329 $target[$key] = $value;
334 * Parse a scalar value in PHP
335 * @return mixed Parsed value
337 function parseScalar( $str ) {
338 if ( $str !== '' && $str[0] == '\'' ) {
339 // Single-quoted string
340 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
341 // appended to the token; without it we ended up reading in the
342 // extra quote on the end!
343 return strtr( substr( trim( $str ), 1, -1 ),
344 array( '\\\'' => '\'', '\\\\' => '\\' ) );
346 if ( $str !== '' && $str[0] == '"' ) {
347 // Double-quoted string
348 // @todo FIXME: trim() call is due to mystery bug where whitespace gets
349 // appended to the token; without it we ended up reading in the
350 // extra quote on the end!
351 return stripcslashes( substr( trim( $str ), 1, -1 ) );
353 if ( substr( $str, 0, 4 ) == 'true' ) {
354 return true;
356 if ( substr( $str, 0, 5 ) == 'false' ) {
357 return false;
359 if ( substr( $str, 0, 4 ) == 'null' ) {
360 return null;
362 // Must be some kind of numeric value, so let PHP's weak typing
363 // be useful for a change
364 return $str;
368 * Replace the byte offset region of the source with $newText.
369 * Works by adding elements to the $this->edits array.
371 function replaceSourceRegion( $start, $end, $newText = false ) {
372 // Split all copy operations with a source corresponding to the region
373 // in question.
374 $newEdits = array();
375 foreach ( $this->edits as $edit ) {
376 if ( $edit[0] !== 'copy' ) {
377 $newEdits[] = $edit;
378 continue;
380 $copyStart = $edit[1];
381 $copyEnd = $edit[2];
382 if ( $start >= $copyEnd || $end <= $copyStart ) {
383 // Outside this region
384 $newEdits[] = $edit;
385 continue;
387 if ( ( $start < $copyStart && $end > $copyStart )
388 || ( $start < $copyEnd && $end > $copyEnd )
390 throw new MWException( "Overlapping regions found, can't do the edit" );
392 // Split the copy
393 $newEdits[] = array( 'copy', $copyStart, $start );
394 if ( $newText !== false ) {
395 $newEdits[] = array( 'insert', $newText );
397 $newEdits[] = array( 'copy', $end, $copyEnd );
399 $this->edits = $newEdits;
403 * Finds the source byte region which you would want to delete, if $pathName
404 * was to be deleted. Includes the leading spaces and tabs, the trailing line
405 * break, and any comments in between.
406 * @param $pathName
407 * @throws MWException
408 * @return array
410 function findDeletionRegion( $pathName ) {
411 if ( !isset( $this->pathInfo[$pathName] ) ) {
412 throw new MWException( "Can't find path \"$pathName\"" );
414 $path = $this->pathInfo[$pathName];
415 // Find the start
416 $this->firstToken();
417 while ( $this->pos != $path['startToken'] ) {
418 $this->nextToken();
420 $regionStart = $path['startByte'];
421 for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
422 $token = $this->getTokenAhead( $offset );
423 if ( !$token->isSkip() ) {
424 // If there is other content on the same line, don't move the start point
425 // back, because that will cause the regions to overlap.
426 $regionStart = $path['startByte'];
427 break;
429 $lfPos = strrpos( $token->text, "\n" );
430 if ( $lfPos === false ) {
431 $regionStart -= strlen( $token->text );
432 } else {
433 // The line start does not include the LF
434 $regionStart -= strlen( $token->text ) - $lfPos - 1;
435 break;
438 // Find the end
439 while ( $this->pos != $path['endToken'] ) {
440 $this->nextToken();
442 $regionEnd = $path['endByte']; // past the end
443 for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
444 $token = $this->getTokenAhead( $offset );
445 if ( !$token->isSkip() ) {
446 break;
448 $lfPos = strpos( $token->text, "\n" );
449 if ( $lfPos === false ) {
450 $regionEnd += strlen( $token->text );
451 } else {
452 // This should point past the LF
453 $regionEnd += $lfPos + 1;
454 break;
457 return array( $regionStart, $regionEnd );
461 * Find the byte region in the source corresponding to the value part.
462 * This includes the quotes, but does not include the trailing comma
463 * or semicolon.
465 * The end position is the past-the-end (end + 1) value as per convention.
466 * @param $pathName
467 * @throws MWException
468 * @return array
470 function findValueRegion( $pathName ) {
471 if ( !isset( $this->pathInfo[$pathName] ) ) {
472 throw new MWException( "Can't find path \"$pathName\"" );
474 $path = $this->pathInfo[$pathName];
475 if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
476 throw new MWException( "Can't find value region for path \"$pathName\"" );
478 return array( $path['valueStartByte'], $path['valueEndByte'] );
482 * Find the path name of the last element in the array.
483 * If the array is empty, this will return the \@extra interstitial element.
484 * If the specified path is not found or is not an array, it will return false.
485 * @return bool|int|string
487 function findLastArrayElement( $path ) {
488 // Try for a real element
489 $lastEltPath = false;
490 foreach ( $this->pathInfo as $candidatePath => $info ) {
491 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
492 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
493 if ( $part2 == '@' ) {
494 // Do nothing
495 } elseif ( $part1 == "$path/" ) {
496 $lastEltPath = $candidatePath;
497 } elseif ( $lastEltPath !== false ) {
498 break;
501 if ( $lastEltPath !== false ) {
502 return $lastEltPath;
505 // Try for an interstitial element
506 $extraPath = false;
507 foreach ( $this->pathInfo as $candidatePath => $info ) {
508 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
509 if ( $part1 == "$path/" ) {
510 $extraPath = $candidatePath;
511 } elseif ( $extraPath !== false ) {
512 break;
515 return $extraPath;
519 * Find the path name of first element in the array.
520 * If the array is empty, this will return the \@extra interstitial element.
521 * If the specified path is not found or is not an array, it will return false.
522 * @return bool|int|string
524 function findFirstArrayElement( $path ) {
525 // Try for an ordinary element
526 foreach ( $this->pathInfo as $candidatePath => $info ) {
527 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
528 $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
529 if ( $part1 == "$path/" && $part2 != '@' ) {
530 return $candidatePath;
534 // Try for an interstitial element
535 foreach ( $this->pathInfo as $candidatePath => $info ) {
536 $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
537 if ( $part1 == "$path/" ) {
538 return $candidatePath;
541 return false;
545 * Get the indent string which sits after a given start position.
546 * Returns false if the position is not at the start of the line.
547 * @return array
549 function getIndent( $pos, $key = false, $arrowPos = false ) {
550 $arrowIndent = ' ';
551 if ( $pos == 0 || $this->text[$pos - 1] == "\n" ) {
552 $indentLength = strspn( $this->text, " \t", $pos );
553 $indent = substr( $this->text, $pos, $indentLength );
554 } else {
555 $indent = false;
557 if ( $indent !== false && $arrowPos !== false ) {
558 $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
559 if ( $arrowIndentLength > 0 ) {
560 $arrowIndent = str_repeat( ' ', $arrowIndentLength );
563 return array( $indent, $arrowIndent );
567 * Run the parser on the text. Throws an exception if the string does not
568 * match our defined subset of PHP syntax.
570 public function parse() {
571 $this->initParse();
572 $this->pushState( 'file' );
573 $this->pushPath( '@extra-' . ( $this->serial++ ) );
574 $token = $this->firstToken();
576 while ( !$token->isEnd() ) {
577 $state = $this->popState();
578 if ( !$state ) {
579 $this->error( 'internal error: empty state stack' );
582 switch ( $state ) {
583 case 'file':
584 $this->expect( T_OPEN_TAG );
585 $token = $this->skipSpace();
586 if ( $token->isEnd() ) {
587 break 2;
589 $this->pushState( 'statement', 'file 2' );
590 break;
591 case 'file 2':
592 $token = $this->skipSpace();
593 if ( $token->isEnd() ) {
594 break 2;
596 $this->pushState( 'statement', 'file 2' );
597 break;
598 case 'statement':
599 $token = $this->skipSpace();
600 if ( !$this->validatePath( $token->text ) ) {
601 $this->error( "Invalid variable name \"{$token->text}\"" );
603 $this->nextPath( $token->text );
604 $this->expect( T_VARIABLE );
605 $this->skipSpace();
606 $arrayAssign = false;
607 if ( $this->currentToken()->type == '[' ) {
608 $this->nextToken();
609 $token = $this->skipSpace();
610 if ( !$token->isScalar() ) {
611 $this->error( "expected a string or number for the array key" );
613 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
614 $text = $this->parseScalar( $token->text );
615 } else {
616 $text = $token->text;
618 if ( !$this->validatePath( $text ) ) {
619 $this->error( "Invalid associative array name \"$text\"" );
621 $this->pushPath( $text );
622 $this->nextToken();
623 $this->skipSpace();
624 $this->expect( ']' );
625 $this->skipSpace();
626 $arrayAssign = true;
628 $this->expect( '=' );
629 $this->skipSpace();
630 $this->startPathValue();
631 if ( $arrayAssign ) {
632 $this->pushState( 'expression', 'array assign end' );
633 } else {
634 $this->pushState( 'expression', 'statement end' );
636 break;
637 case 'array assign end':
638 case 'statement end':
639 $this->endPathValue();
640 if ( $state == 'array assign end' ) {
641 $this->popPath();
643 $this->skipSpace();
644 $this->expect( ';' );
645 $this->nextPath( '@extra-' . ( $this->serial++ ) );
646 break;
647 case 'expression':
648 $token = $this->skipSpace();
649 if ( $token->type == T_ARRAY ) {
650 $this->pushState( 'array' );
651 } elseif ( $token->isScalar() ) {
652 $this->nextToken();
653 } elseif ( $token->type == T_VARIABLE ) {
654 $this->nextToken();
655 } else {
656 $this->error( "expected simple expression" );
658 break;
659 case 'array':
660 $this->skipSpace();
661 $this->expect( T_ARRAY );
662 $this->skipSpace();
663 $this->expect( '(' );
664 $this->skipSpace();
665 $this->pushPath( '@extra-' . ( $this->serial++ ) );
666 if ( $this->isAhead( ')' ) ) {
667 // Empty array
668 $this->pushState( 'array end' );
669 } else {
670 $this->pushState( 'element', 'array end' );
672 break;
673 case 'array end':
674 $this->skipSpace();
675 $this->popPath();
676 $this->expect( ')' );
677 break;
678 case 'element':
679 $token = $this->skipSpace();
680 // Look ahead to find the double arrow
681 if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
682 // Found associative element
683 $this->pushState( 'assoc-element', 'element end' );
684 } else {
685 // Not associative
686 $this->nextPath( '@next' );
687 $this->startPathValue();
688 $this->pushState( 'expression', 'element end' );
690 break;
691 case 'element end':
692 $token = $this->skipSpace();
693 if ( $token->type == ',' ) {
694 $this->endPathValue();
695 $this->markComma();
696 $this->nextToken();
697 $this->nextPath( '@extra-' . ( $this->serial++ ) );
698 // Look ahead to find ending bracket
699 if ( $this->isAhead( ")" ) ) {
700 // Found ending bracket, no continuation
701 $this->skipSpace();
702 } else {
703 // No ending bracket, continue to next element
704 $this->pushState( 'element' );
706 } elseif ( $token->type == ')' ) {
707 // End array
708 $this->endPathValue();
709 } else {
710 $this->error( "expected the next array element or the end of the array" );
712 break;
713 case 'assoc-element':
714 $token = $this->skipSpace();
715 if ( !$token->isScalar() ) {
716 $this->error( "expected a string or number for the array key" );
718 if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
719 $text = $this->parseScalar( $token->text );
720 } else {
721 $text = $token->text;
723 if ( !$this->validatePath( $text ) ) {
724 $this->error( "Invalid associative array name \"$text\"" );
726 $this->nextPath( $text );
727 $this->nextToken();
728 $this->skipSpace();
729 $this->markArrow();
730 $this->expect( T_DOUBLE_ARROW );
731 $this->skipSpace();
732 $this->startPathValue();
733 $this->pushState( 'expression' );
734 break;
737 if ( count( $this->stateStack ) ) {
738 $this->error( 'unexpected end of file' );
740 $this->popPath();
744 * Initialise a parse.
746 protected function initParse() {
747 $this->tokens = token_get_all( $this->text );
748 $this->stateStack = array();
749 $this->pathStack = array();
750 $this->firstToken();
751 $this->pathInfo = array();
752 $this->serial = 1;
756 * Set the parse position. Do not call this except from firstToken() and
757 * nextToken(), there is more to update than just the position.
759 protected function setPos( $pos ) {
760 $this->pos = $pos;
761 if ( $this->pos >= count( $this->tokens ) ) {
762 $this->currentToken = ConfEditorToken::newEnd();
763 } else {
764 $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
766 return $this->currentToken;
770 * Create a ConfEditorToken from an element of token_get_all()
771 * @return ConfEditorToken
773 function newTokenObj( $internalToken ) {
774 if ( is_array( $internalToken ) ) {
775 return new ConfEditorToken( $internalToken[0], $internalToken[1] );
776 } else {
777 return new ConfEditorToken( $internalToken, $internalToken );
782 * Reset the parse position
784 function firstToken() {
785 $this->setPos( 0 );
786 $this->prevToken = ConfEditorToken::newEnd();
787 $this->lineNum = 1;
788 $this->colNum = 1;
789 $this->byteNum = 0;
790 return $this->currentToken;
794 * Get the current token
796 function currentToken() {
797 return $this->currentToken;
801 * Advance the current position and return the resulting next token
803 function nextToken() {
804 if ( $this->currentToken ) {
805 $text = $this->currentToken->text;
806 $lfCount = substr_count( $text, "\n" );
807 if ( $lfCount ) {
808 $this->lineNum += $lfCount;
809 $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
810 } else {
811 $this->colNum += strlen( $text );
813 $this->byteNum += strlen( $text );
815 $this->prevToken = $this->currentToken;
816 $this->setPos( $this->pos + 1 );
817 return $this->currentToken;
821 * Get the token $offset steps ahead of the current position.
822 * $offset may be negative, to get tokens behind the current position.
823 * @return ConfEditorToken
825 function getTokenAhead( $offset ) {
826 $pos = $this->pos + $offset;
827 if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
828 return ConfEditorToken::newEnd();
829 } else {
830 return $this->newTokenObj( $this->tokens[$pos] );
835 * Advances the current position past any whitespace or comments
837 function skipSpace() {
838 while ( $this->currentToken && $this->currentToken->isSkip() ) {
839 $this->nextToken();
841 return $this->currentToken;
845 * Throws an error if the current token is not of the given type, and
846 * then advances to the next position.
848 function expect( $type ) {
849 if ( $this->currentToken && $this->currentToken->type == $type ) {
850 return $this->nextToken();
851 } else {
852 $this->error( "expected " . $this->getTypeName( $type ) .
853 ", got " . $this->getTypeName( $this->currentToken->type ) );
858 * Push a state or two on to the state stack.
860 function pushState( $nextState, $stateAfterThat = null ) {
861 if ( $stateAfterThat !== null ) {
862 $this->stateStack[] = $stateAfterThat;
864 $this->stateStack[] = $nextState;
868 * Pop a state from the state stack.
869 * @return mixed
871 function popState() {
872 return array_pop( $this->stateStack );
876 * Returns true if the user input path is valid.
877 * This exists to allow "/" and "@" to be reserved for string path keys
878 * @return bool
880 function validatePath( $path ) {
881 return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
885 * Internal function to update some things at the end of a path region. Do
886 * not call except from popPath() or nextPath().
888 function endPath() {
889 $key = '';
890 foreach ( $this->pathStack as $pathInfo ) {
891 if ( $key !== '' ) {
892 $key .= '/';
894 $key .= $pathInfo['name'];
896 $pathInfo['endByte'] = $this->byteNum;
897 $pathInfo['endToken'] = $this->pos;
898 $this->pathInfo[$key] = $pathInfo;
902 * Go up to a new path level, for example at the start of an array.
904 function pushPath( $path ) {
905 $this->pathStack[] = array(
906 'name' => $path,
907 'level' => count( $this->pathStack ) + 1,
908 'startByte' => $this->byteNum,
909 'startToken' => $this->pos,
910 'valueStartToken' => false,
911 'valueStartByte' => false,
912 'valueEndToken' => false,
913 'valueEndByte' => false,
914 'nextArrayIndex' => 0,
915 'hasComma' => false,
916 'arrowByte' => false
921 * Go down a path level, for example at the end of an array.
923 function popPath() {
924 $this->endPath();
925 array_pop( $this->pathStack );
929 * Go to the next path on the same level. This ends the current path and
930 * starts a new one. If $path is \@next, the new path is set to the next
931 * numeric array element.
933 function nextPath( $path ) {
934 $this->endPath();
935 $i = count( $this->pathStack ) - 1;
936 if ( $path == '@next' ) {
937 $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
938 $this->pathStack[$i]['name'] = $nextArrayIndex;
939 $nextArrayIndex++;
940 } else {
941 $this->pathStack[$i]['name'] = $path;
943 $this->pathStack[$i] =
944 array(
945 'startByte' => $this->byteNum,
946 'startToken' => $this->pos,
947 'valueStartToken' => false,
948 'valueStartByte' => false,
949 'valueEndToken' => false,
950 'valueEndByte' => false,
951 'hasComma' => false,
952 'arrowByte' => false,
953 ) + $this->pathStack[$i];
957 * Mark the start of the value part of a path.
959 function startPathValue() {
960 $path =& $this->pathStack[count( $this->pathStack ) - 1];
961 $path['valueStartToken'] = $this->pos;
962 $path['valueStartByte'] = $this->byteNum;
966 * Mark the end of the value part of a path.
968 function endPathValue() {
969 $path =& $this->pathStack[count( $this->pathStack ) - 1];
970 $path['valueEndToken'] = $this->pos;
971 $path['valueEndByte'] = $this->byteNum;
975 * Mark the comma separator in an array element
977 function markComma() {
978 $path =& $this->pathStack[count( $this->pathStack ) - 1];
979 $path['hasComma'] = true;
983 * Mark the arrow separator in an associative array element
985 function markArrow() {
986 $path =& $this->pathStack[count( $this->pathStack ) - 1];
987 $path['arrowByte'] = $this->byteNum;
991 * Generate a parse error
993 function error( $msg ) {
994 throw new ConfEditorParseError( $this, $msg );
998 * Get a readable name for the given token type.
999 * @return string
1001 function getTypeName( $type ) {
1002 if ( is_int( $type ) ) {
1003 return token_name( $type );
1004 } else {
1005 return "\"$type\"";
1010 * Looks ahead to see if the given type is the next token type, starting
1011 * from the current position plus the given offset. Skips any intervening
1012 * whitespace.
1013 * @return bool
1015 function isAhead( $type, $offset = 0 ) {
1016 $ahead = $offset;
1017 $token = $this->getTokenAhead( $offset );
1018 while ( !$token->isEnd() ) {
1019 if ( $token->isSkip() ) {
1020 $ahead++;
1021 $token = $this->getTokenAhead( $ahead );
1022 continue;
1023 } elseif ( $token->type == $type ) {
1024 // Found the type
1025 return true;
1026 } else {
1027 // Not found
1028 return false;
1031 return false;
1035 * Get the previous token object
1037 function prevToken() {
1038 return $this->prevToken;
1042 * Echo a reasonably readable representation of the tokenizer array.
1044 function dumpTokens() {
1045 $out = '';
1046 foreach ( $this->tokens as $token ) {
1047 $obj = $this->newTokenObj( $token );
1048 $out .= sprintf( "%-28s %s\n",
1049 $this->getTypeName( $obj->type ),
1050 addcslashes( $obj->text, "\0..\37" ) );
1052 echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
1057 * Exception class for parse errors
1059 class ConfEditorParseError extends MWException {
1060 var $lineNum, $colNum;
1061 function __construct( $editor, $msg ) {
1062 $this->lineNum = $editor->lineNum;
1063 $this->colNum = $editor->colNum;
1064 parent::__construct( "Parse error on line {$editor->lineNum} " .
1065 "col {$editor->colNum}: $msg" );
1068 function highlight( $text ) {
1069 $lines = StringUtils::explode( "\n", $text );
1070 foreach ( $lines as $lineNum => $line ) {
1071 if ( $lineNum == $this->lineNum - 1 ) {
1072 return "$line\n" . str_repeat( ' ', $this->colNum - 1 ) . "^\n";
1075 return '';
1081 * Class to wrap a token from the tokenizer.
1083 class ConfEditorToken {
1084 var $type, $text;
1086 static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
1087 static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
1089 static function newEnd() {
1090 return new self( 'END', '' );
1093 function __construct( $type, $text ) {
1094 $this->type = $type;
1095 $this->text = $text;
1098 function isSkip() {
1099 return in_array( $this->type, self::$skipTypes );
1102 function isScalar() {
1103 return in_array( $this->type, self::$scalarTypes );
1106 function isEnd() {
1107 return $this->type == 'END';