Bump trunk version since we've branched
[mediawiki.git] / includes / MagicWord.php
blobbc05e92a3af1858f3495a8a8ed873babda500dd8
1 <?php
2 /**
3 * File for magic words
5 * See docs/magicword.txt
7 * @file
8 * @ingroup Parser
9 */
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
27 * @ingroup Parser
29 class MagicWord {
30 /**#@+
31 * @private
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'sitename',
71 'server',
72 'servername',
73 'scriptpath',
74 'stylepath',
75 'pagename',
76 'pagenamee',
77 'fullpagename',
78 'fullpagenamee',
79 'namespace',
80 'namespacee',
81 'currentweek',
82 'currentdow',
83 'localweek',
84 'localdow',
85 'revisionid',
86 'revisionday',
87 'revisionday2',
88 'revisionmonth',
89 'revisionmonth1',
90 'revisionyear',
91 'revisiontimestamp',
92 'revisionuser',
93 'subpagename',
94 'subpagenamee',
95 'talkspace',
96 'talkspacee',
97 'subjectspace',
98 'subjectspacee',
99 'talkpagename',
100 'talkpagenamee',
101 'subjectpagename',
102 'subjectpagenamee',
103 'numberofusers',
104 'numberofactiveusers',
105 'numberofpages',
106 'currentversion',
107 'basepagename',
108 'basepagenamee',
109 'currenttimestamp',
110 'localtimestamp',
111 'directionmark',
112 'contentlanguage',
113 'numberofadmins',
114 'numberofviews',
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonth1' => 86400,
121 'currentmonthname' => 86400,
122 'currentmonthnamegen' => 86400,
123 'currentmonthabbrev' => 86400,
124 'currentday' => 3600,
125 'currentday2' => 3600,
126 'currentdayname' => 3600,
127 'currentyear' => 86400,
128 'currenttime' => 3600,
129 'currenthour' => 3600,
130 'localmonth' => 86400,
131 'localmonth1' => 86400,
132 'localmonthname' => 86400,
133 'localmonthnamegen' => 86400,
134 'localmonthabbrev' => 86400,
135 'localday' => 3600,
136 'localday2' => 3600,
137 'localdayname' => 3600,
138 'localyear' => 86400,
139 'localtime' => 3600,
140 'localhour' => 3600,
141 'numberofarticles' => 3600,
142 'numberoffiles' => 3600,
143 'numberofedits' => 3600,
144 'currentweek' => 3600,
145 'currentdow' => 3600,
146 'localweek' => 3600,
147 'localdow' => 3600,
148 'numberofusers' => 3600,
149 'numberofactiveusers' => 3600,
150 'numberofpages' => 3600,
151 'currentversion' => 86400,
152 'currenttimestamp' => 3600,
153 'localtimestamp' => 3600,
154 'pagesinnamespace' => 3600,
155 'numberofadmins' => 3600,
156 'numberofviews' => 3600,
157 'numberingroup' => 3600,
160 static public $mDoubleUnderscoreIDs = array(
161 'notoc',
162 'nogallery',
163 'forcetoc',
164 'toc',
165 'noeditsection',
166 'newsectionlink',
167 'nonewsectionlink',
168 'hiddencat',
169 'index',
170 'noindex',
171 'staticredirect',
172 'notitleconvert',
173 'nocontentconvert',
176 static public $mSubstIDs = array(
177 'subst',
178 'safesubst',
181 static public $mObjects = array();
182 static public $mDoubleUnderscoreArray = null;
184 /**#@-*/
186 function __construct($id = 0, $syn = array(), $cs = false) {
187 $this->mId = $id;
188 $this->mSynonyms = (array)$syn;
189 $this->mCaseSensitive = $cs;
193 * Factory: creates an object representing an ID
194 * @return MagicWord
196 static function &get( $id ) {
197 wfProfileIn( __METHOD__ );
198 if ( !isset( self::$mObjects[$id] ) ) {
199 $mw = new MagicWord();
200 $mw->load( $id );
201 self::$mObjects[$id] = $mw;
203 wfProfileOut( __METHOD__ );
204 return self::$mObjects[$id];
208 * Get an array of parser variable IDs
210 static function getVariableIDs() {
211 if ( !self::$mVariableIDsInitialised ) {
212 # Deprecated constant definition hook, available for extensions that need it
213 $magicWords = array();
214 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
215 foreach ( $magicWords as $word ) {
216 define( $word, $word );
219 # Get variable IDs
220 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
221 self::$mVariableIDsInitialised = true;
223 return self::$mVariableIDs;
227 * Get an array of parser substitution modifier IDs
229 static function getSubstIDs() {
230 return self::$mSubstIDs;
233 /* Allow external reads of TTL array */
234 static function getCacheTTL($id) {
235 if (array_key_exists($id,self::$mCacheTTLs)) {
236 return self::$mCacheTTLs[$id];
237 } else {
238 return -1;
242 /** Get a MagicWordArray of double-underscore entities */
243 static function getDoubleUnderscoreArray() {
244 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
245 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
247 return self::$mDoubleUnderscoreArray;
251 * Clear the self::$mObjects variable
252 * For use in parser tests
254 public static function clearCache() {
255 self::$mObjects = array();
258 # Initialises this object with an ID
259 function load( $id ) {
260 global $wgContLang;
261 $this->mId = $id;
262 $wgContLang->getMagic( $this );
263 if ( !$this->mSynonyms ) {
264 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
265 #throw new MWException( "Error: invalid magic word '$id'" );
266 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
271 * Preliminary initialisation
272 * @private
274 function initRegex() {
275 // Sort the synonyms by length, descending, so that the longest synonym
276 // matches in precedence to the shortest
277 $synonyms = $this->mSynonyms;
278 usort( $synonyms, array( $this, 'compareStringLength' ) );
280 $escSyn = array();
281 foreach ( $synonyms as $synonym )
282 // In case a magic word contains /, like that's going to happen;)
283 $escSyn[] = preg_quote( $synonym, '/' );
284 $this->mBaseRegex = implode( '|', $escSyn );
286 $case = $this->mCaseSensitive ? '' : 'iu';
287 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
288 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
289 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
290 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
291 "/^(?:{$this->mBaseRegex})$/{$case}" );
295 * A comparison function that returns -1, 0 or 1 depending on whether the
296 * first string is longer, the same length or shorter than the second
297 * string.
299 function compareStringLength( $s1, $s2 ) {
300 $l1 = strlen( $s1 );
301 $l2 = strlen( $s2 );
302 if ( $l1 < $l2 ) {
303 return 1;
304 } elseif ( $l1 > $l2 ) {
305 return -1;
306 } else {
307 return 0;
312 * Gets a regex representing matching the word
314 function getRegex() {
315 if ($this->mRegex == '' ) {
316 $this->initRegex();
318 return $this->mRegex;
322 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
323 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
324 * the complete expression
326 function getRegexCase() {
327 if ( $this->mRegex === '' )
328 $this->initRegex();
330 return $this->mCaseSensitive ? '' : 'iu';
334 * Gets a regex matching the word, if it is at the string start
336 function getRegexStart() {
337 if ($this->mRegex == '' ) {
338 $this->initRegex();
340 return $this->mRegexStart;
344 * regex without the slashes and what not
346 function getBaseRegex() {
347 if ($this->mRegex == '') {
348 $this->initRegex();
350 return $this->mBaseRegex;
354 * Returns true if the text contains the word
355 * @return bool
357 function match( $text ) {
358 return (bool)preg_match( $this->getRegex(), $text );
362 * Returns true if the text starts with the word
363 * @return bool
365 function matchStart( $text ) {
366 return (bool)preg_match( $this->getRegexStart(), $text );
370 * Returns NULL if there's no match, the value of $1 otherwise
371 * The return code is the matched string, if there's no variable
372 * part in the regex and the matched variable part ($1) if there
373 * is one.
375 function matchVariableStartToEnd( $text ) {
376 $matches = array();
377 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
378 if ( $matchcount == 0 ) {
379 return null;
380 } else {
381 # multiple matched parts (variable match); some will be empty because of
382 # synonyms. The variable will be the second non-empty one so remove any
383 # blank elements and re-sort the indices.
384 # See also bug 6526
386 $matches = array_values(array_filter($matches));
388 if ( count($matches) == 1 ) { return $matches[0]; }
389 else { return $matches[1]; }
395 * Returns true if the text matches the word, and alters the
396 * input string, removing all instances of the word
398 function matchAndRemove( &$text ) {
399 $this->mFound = false;
400 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
401 return $this->mFound;
404 function matchStartAndRemove( &$text ) {
405 $this->mFound = false;
406 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
407 return $this->mFound;
411 * Used in matchAndRemove()
412 * @private
414 function pregRemoveAndRecord( ) {
415 $this->mFound = true;
416 return '';
420 * Replaces the word with something else
422 function replace( $replacement, $subject, $limit=-1 ) {
423 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
424 $this->mModified = !($res === $subject);
425 return $res;
429 * Variable handling: {{SUBST:xxx}} style words
430 * Calls back a function to determine what to replace xxx with
431 * Input word must contain $1
433 function substituteCallback( $text, $callback ) {
434 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
435 $this->mModified = !($res === $text);
436 return $res;
440 * Matches the word, where $1 is a wildcard
442 function getVariableRegex() {
443 if ( $this->mVariableRegex == '' ) {
444 $this->initRegex();
446 return $this->mVariableRegex;
450 * Matches the entire string, where $1 is a wildcard
452 function getVariableStartToEndRegex() {
453 if ( $this->mVariableStartToEndRegex == '' ) {
454 $this->initRegex();
456 return $this->mVariableStartToEndRegex;
460 * Accesses the synonym list directly
462 function getSynonym( $i ) {
463 return $this->mSynonyms[$i];
466 function getSynonyms() {
467 return $this->mSynonyms;
471 * Returns true if the last call to replace() or substituteCallback()
472 * returned a modified text, otherwise false.
474 function getWasModified(){
475 return $this->mModified;
479 * $magicarr is an associative array of (magic word ID => replacement)
480 * This method uses the php feature to do several replacements at the same time,
481 * thereby gaining some efficiency. The result is placed in the out variable
482 * $result. The return value is true if something was replaced.
483 * @static
484 * @todo Should this be static? It doesn't seem to be used at all
486 function replaceMultiple( $magicarr, $subject, &$result ){
487 $search = array();
488 $replace = array();
489 foreach( $magicarr as $id => $replacement ){
490 $mw = MagicWord::get( $id );
491 $search[] = $mw->getRegex();
492 $replace[] = $replacement;
495 $result = preg_replace( $search, $replace, $subject );
496 return !($result === $subject);
500 * Adds all the synonyms of this MagicWord to an array, to allow quick
501 * lookup in a list of magic words
503 function addToArray( &$array, $value ) {
504 global $wgContLang;
505 foreach ( $this->mSynonyms as $syn ) {
506 $array[$wgContLang->lc($syn)] = $value;
510 function isCaseSensitive() {
511 return $this->mCaseSensitive;
514 function getId() {
515 return $this->mId;
520 * Class for handling an array of magic words
521 * @ingroup Parser
523 class MagicWordArray {
524 var $names = array();
525 var $hash;
526 var $baseRegex, $regex;
527 var $matches;
529 function __construct( $names = array() ) {
530 $this->names = $names;
534 * Add a magic word by name
536 public function add( $name ) {
537 $this->names[] = $name;
538 $this->hash = $this->baseRegex = $this->regex = null;
542 * Add a number of magic words by name
544 public function addArray( $names ) {
545 $this->names = array_merge( $this->names, array_values( $names ) );
546 $this->hash = $this->baseRegex = $this->regex = null;
550 * Get a 2-d hashtable for this array
552 function getHash() {
553 if ( is_null( $this->hash ) ) {
554 global $wgContLang;
555 $this->hash = array( 0 => array(), 1 => array() );
556 foreach ( $this->names as $name ) {
557 $magic = MagicWord::get( $name );
558 $case = intval( $magic->isCaseSensitive() );
559 foreach ( $magic->getSynonyms() as $syn ) {
560 if ( !$case ) {
561 $syn = $wgContLang->lc( $syn );
563 $this->hash[$case][$syn] = $name;
567 return $this->hash;
571 * Get the base regex
573 function getBaseRegex() {
574 if ( is_null( $this->baseRegex ) ) {
575 $this->baseRegex = array( 0 => '', 1 => '' );
576 foreach ( $this->names as $name ) {
577 $magic = MagicWord::get( $name );
578 $case = intval( $magic->isCaseSensitive() );
579 foreach ( $magic->getSynonyms() as $i => $syn ) {
580 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
581 if ( $this->baseRegex[$case] === '' ) {
582 $this->baseRegex[$case] = $group;
583 } else {
584 $this->baseRegex[$case] .= '|' . $group;
589 return $this->baseRegex;
593 * Get an unanchored regex that does not match parameters
595 function getRegex() {
596 if ( is_null( $this->regex ) ) {
597 $base = $this->getBaseRegex();
598 $this->regex = array( '', '' );
599 if ( $this->baseRegex[0] !== '' ) {
600 $this->regex[0] = "/{$base[0]}/iuS";
602 if ( $this->baseRegex[1] !== '' ) {
603 $this->regex[1] = "/{$base[1]}/S";
606 return $this->regex;
610 * Get a regex for matching variables with parameters
612 function getVariableRegex() {
613 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
617 * Get a regex anchored to the start of the string that does not match parameters
619 function getRegexStart() {
620 $base = $this->getBaseRegex();
621 $newRegex = array( '', '' );
622 if ( $base[0] !== '' ) {
623 $newRegex[0] = "/^(?:{$base[0]})/iuS";
625 if ( $base[1] !== '' ) {
626 $newRegex[1] = "/^(?:{$base[1]})/S";
628 return $newRegex;
632 * Get an anchored regex for matching variables with parameters
634 function getVariableStartToEndRegex() {
635 $base = $this->getBaseRegex();
636 $newRegex = array( '', '' );
637 if ( $base[0] !== '' ) {
638 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
640 if ( $base[1] !== '' ) {
641 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
643 return $newRegex;
647 * Parse a match array from preg_match
648 * Returns array(magic word ID, parameter value)
649 * If there is no parameter value, that element will be false.
651 function parseMatch( $m ) {
652 reset( $m );
653 while ( list( $key, $value ) = each( $m ) ) {
654 if ( $key === 0 || $value === '' ) {
655 continue;
657 $parts = explode( '_', $key, 2 );
658 if ( count( $parts ) != 2 ) {
659 // This shouldn't happen
660 // continue;
661 throw new MWException( __METHOD__ . ': bad parameter name' );
663 list( /* $synIndex */, $magicName ) = $parts;
664 $paramValue = next( $m );
665 return array( $magicName, $paramValue );
667 // This shouldn't happen either
668 throw new MWException( __METHOD__.': parameter not found' );
672 * Match some text, with parameter capture
673 * Returns an array with the magic word name in the first element and the
674 * parameter in the second element.
675 * Both elements are false if there was no match.
677 public function matchVariableStartToEnd( $text ) {
678 $regexes = $this->getVariableStartToEndRegex();
679 foreach ( $regexes as $regex ) {
680 if ( $regex !== '' ) {
681 $m = false;
682 if ( preg_match( $regex, $text, $m ) ) {
683 return $this->parseMatch( $m );
687 return array( false, false );
691 * Match some text, without parameter capture
692 * Returns the magic word name, or false if there was no capture
694 public function matchStartToEnd( $text ) {
695 $hash = $this->getHash();
696 if ( isset( $hash[1][$text] ) ) {
697 return $hash[1][$text];
699 global $wgContLang;
700 $lc = $wgContLang->lc( $text );
701 if ( isset( $hash[0][$lc] ) ) {
702 return $hash[0][$lc];
704 return false;
708 * Returns an associative array, ID => param value, for all items that match
709 * Removes the matched items from the input string (passed by reference)
711 public function matchAndRemove( &$text ) {
712 $found = array();
713 $regexes = $this->getRegex();
714 foreach ( $regexes as $regex ) {
715 if ( $regex === '' ) {
716 continue;
718 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
719 foreach ( $matches as $m ) {
720 list( $name, $param ) = $this->parseMatch( $m );
721 $found[$name] = $param;
723 $text = preg_replace( $regex, '', $text );
725 return $found;
729 * Return the ID of the magic word at the start of $text, and remove
730 * the prefix from $text.
731 * Return false if no match found and $text is not modified.
732 * Does not match parameters.
734 public function matchStartAndRemove( &$text ) {
735 $regexes = $this->getRegexStart();
736 foreach ( $regexes as $regex ) {
737 if ( $regex === '' ) {
738 continue;
740 if ( preg_match( $regex, $text, $m ) ) {
741 list( $id, ) = $this->parseMatch( $m );
742 if ( strlen( $m[0] ) >= strlen( $text ) ) {
743 $text = '';
744 } else {
745 $text = substr( $text, strlen( $m[0] ) );
747 return $id;
750 return false;