Tweak r48841: use 90% font instead
[mediawiki.git] / includes / MagicWord.php
blob4e97016dde9ba9085f7b8f4c005e7c81ce3b4548
1 <?php
2 /**
3 * File for magic words
4 * See docs/magicword.txt
6 * @file
7 * @ingroup Parser
8 */
10 /**
11 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
12 * Usage:
13 * if (MagicWord::get( 'redirect' )->match( $text ) )
15 * Possible future improvements:
16 * * Simultaneous searching for a number of magic words
17 * * MagicWord::$mObjects in shared memory
19 * Please avoid reading the data out of one of these objects and then writing
20 * special case code. If possible, add another match()-like function here.
22 * To add magic words in an extension, use the LanguageGetMagic hook. For
23 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
24 * hook. Use string keys.
26 * @ingroup Parser
28 class MagicWord {
29 /**#@+
30 * @private
32 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
33 var $mRegexStart, $mBaseRegex, $mVariableRegex;
34 var $mModified, $mFound;
36 static public $mVariableIDsInitialised = false;
37 static public $mVariableIDs = array(
38 'currentmonth',
39 'currentmonthname',
40 'currentmonthnamegen',
41 'currentmonthabbrev',
42 'currentday',
43 'currentday2',
44 'currentdayname',
45 'currentyear',
46 'currenttime',
47 'currenthour',
48 'localmonth',
49 'localmonthname',
50 'localmonthnamegen',
51 'localmonthabbrev',
52 'localday',
53 'localday2',
54 'localdayname',
55 'localyear',
56 'localtime',
57 'localhour',
58 'numberofarticles',
59 'numberoffiles',
60 'numberofedits',
61 'sitename',
62 'server',
63 'servername',
64 'scriptpath',
65 'pagename',
66 'pagenamee',
67 'fullpagename',
68 'fullpagenamee',
69 'namespace',
70 'namespacee',
71 'currentweek',
72 'currentdow',
73 'localweek',
74 'localdow',
75 'revisionid',
76 'revisionday',
77 'revisionday2',
78 'revisionmonth',
79 'revisionyear',
80 'revisiontimestamp',
81 'revisionuser',
82 'subpagename',
83 'subpagenamee',
84 'displaytitle',
85 'talkspace',
86 'talkspacee',
87 'subjectspace',
88 'subjectspacee',
89 'talkpagename',
90 'talkpagenamee',
91 'subjectpagename',
92 'subjectpagenamee',
93 'numberofusers',
94 'numberofactiveusers',
95 'newsectionlink',
96 'nonewsectionlink',
97 'numberofpages',
98 'currentversion',
99 'basepagename',
100 'basepagenamee',
101 'urlencode',
102 'currenttimestamp',
103 'localtimestamp',
104 'directionmark',
105 'language',
106 'contentlanguage',
107 'pagesinnamespace',
108 'numberofadmins',
109 'numberofviews',
110 'defaultsort',
111 'pagesincategory',
112 'index',
113 'noindex',
114 'numberingroup',
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonthname' => 86400,
121 'currentmonthnamegen' => 86400,
122 'currentmonthabbrev' => 86400,
123 'currentday' => 3600,
124 'currentday2' => 3600,
125 'currentdayname' => 3600,
126 'currentyear' => 86400,
127 'currenttime' => 3600,
128 'currenthour' => 3600,
129 'localmonth' => 86400,
130 'localmonthname' => 86400,
131 'localmonthnamegen' => 86400,
132 'localmonthabbrev' => 86400,
133 'localday' => 3600,
134 'localday2' => 3600,
135 'localdayname' => 3600,
136 'localyear' => 86400,
137 'localtime' => 3600,
138 'localhour' => 3600,
139 'numberofarticles' => 3600,
140 'numberoffiles' => 3600,
141 'numberofedits' => 3600,
142 'currentweek' => 3600,
143 'currentdow' => 3600,
144 'localweek' => 3600,
145 'localdow' => 3600,
146 'numberofusers' => 3600,
147 'numberofactiveusers' => 3600,
148 'numberofpages' => 3600,
149 'currentversion' => 86400,
150 'currenttimestamp' => 3600,
151 'localtimestamp' => 3600,
152 'pagesinnamespace' => 3600,
153 'numberofadmins' => 3600,
154 'numberofviews' => 3600,
155 'numberingroup' => 3600,
158 static public $mDoubleUnderscoreIDs = array(
159 'notoc',
160 'nogallery',
161 'forcetoc',
162 'toc',
163 'noeditsection',
164 'newsectionlink',
165 'nonewsectionlink',
166 'hiddencat',
167 'index',
168 'noindex',
169 'staticredirect',
173 static public $mObjects = array();
174 static public $mDoubleUnderscoreArray = null;
176 /**#@-*/
178 function __construct($id = 0, $syn = '', $cs = false) {
179 $this->mId = $id;
180 $this->mSynonyms = (array)$syn;
181 $this->mCaseSensitive = $cs;
182 $this->mRegex = '';
183 $this->mRegexStart = '';
184 $this->mVariableRegex = '';
185 $this->mVariableStartToEndRegex = '';
186 $this->mModified = false;
190 * Factory: creates an object representing an ID
191 * @static
193 static function &get( $id ) {
194 wfProfileIn( __METHOD__ );
195 if (!array_key_exists( $id, self::$mObjects ) ) {
196 $mw = new MagicWord();
197 $mw->load( $id );
198 self::$mObjects[$id] = $mw;
200 wfProfileOut( __METHOD__ );
201 return self::$mObjects[$id];
205 * Get an array of parser variable IDs
207 static function getVariableIDs() {
208 if ( !self::$mVariableIDsInitialised ) {
209 # Deprecated constant definition hook, available for extensions that need it
210 $magicWords = array();
211 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
212 foreach ( $magicWords as $word ) {
213 define( $word, $word );
216 # Get variable IDs
217 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
218 self::$mVariableIDsInitialised = true;
220 return self::$mVariableIDs;
223 /* Allow external reads of TTL array */
224 static function getCacheTTL($id) {
225 if (array_key_exists($id,self::$mCacheTTLs)) {
226 return self::$mCacheTTLs[$id];
227 } else {
228 return -1;
232 /** Get a MagicWordArray of double-underscore entities */
233 static function getDoubleUnderscoreArray() {
234 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
235 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
237 return self::$mDoubleUnderscoreArray;
240 # Initialises this object with an ID
241 function load( $id ) {
242 global $wgContLang;
243 $this->mId = $id;
244 $wgContLang->getMagic( $this );
245 if ( !$this->mSynonyms ) {
246 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
247 #throw new MWException( "Error: invalid magic word '$id'" );
248 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
253 * Preliminary initialisation
254 * @private
256 function initRegex() {
257 #$variableClass = Title::legalChars();
258 # This was used for matching "$1" variables, but different uses of the feature will have
259 # different restrictions, which should be checked *after* the MagicWord has been matched,
260 # not here. - IMSoP
262 $escSyn = array();
263 foreach ( $this->mSynonyms as $synonym )
264 // In case a magic word contains /, like that's going to happen;)
265 $escSyn[] = preg_quote( $synonym, '/' );
266 $this->mBaseRegex = implode( '|', $escSyn );
268 $case = $this->mCaseSensitive ? '' : 'iu';
269 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
270 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
271 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
272 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
273 "/^(?:{$this->mBaseRegex})$/{$case}" );
277 * Gets a regex representing matching the word
279 function getRegex() {
280 if ($this->mRegex == '' ) {
281 $this->initRegex();
283 return $this->mRegex;
287 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
288 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
289 * the complete expression
291 function getRegexCase() {
292 if ( $this->mRegex === '' )
293 $this->initRegex();
295 return $this->mCaseSensitive ? '' : 'iu';
299 * Gets a regex matching the word, if it is at the string start
301 function getRegexStart() {
302 if ($this->mRegex == '' ) {
303 $this->initRegex();
305 return $this->mRegexStart;
309 * regex without the slashes and what not
311 function getBaseRegex() {
312 if ($this->mRegex == '') {
313 $this->initRegex();
315 return $this->mBaseRegex;
319 * Returns true if the text contains the word
320 * @return bool
322 function match( $text ) {
323 return preg_match( $this->getRegex(), $text );
327 * Returns true if the text starts with the word
328 * @return bool
330 function matchStart( $text ) {
331 return preg_match( $this->getRegexStart(), $text );
335 * Returns NULL if there's no match, the value of $1 otherwise
336 * The return code is the matched string, if there's no variable
337 * part in the regex and the matched variable part ($1) if there
338 * is one.
340 function matchVariableStartToEnd( $text ) {
341 $matches = array();
342 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
343 if ( $matchcount == 0 ) {
344 return NULL;
345 } else {
346 # multiple matched parts (variable match); some will be empty because of
347 # synonyms. The variable will be the second non-empty one so remove any
348 # blank elements and re-sort the indices.
349 # See also bug 6526
351 $matches = array_values(array_filter($matches));
353 if ( count($matches) == 1 ) { return $matches[0]; }
354 else { return $matches[1]; }
360 * Returns true if the text matches the word, and alters the
361 * input string, removing all instances of the word
363 function matchAndRemove( &$text ) {
364 $this->mFound = false;
365 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
366 return $this->mFound;
369 function matchStartAndRemove( &$text ) {
370 $this->mFound = false;
371 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
372 return $this->mFound;
376 * Used in matchAndRemove()
377 * @private
379 function pregRemoveAndRecord( ) {
380 $this->mFound = true;
381 return '';
385 * Replaces the word with something else
387 function replace( $replacement, $subject, $limit=-1 ) {
388 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
389 $this->mModified = !($res === $subject);
390 return $res;
394 * Variable handling: {{SUBST:xxx}} style words
395 * Calls back a function to determine what to replace xxx with
396 * Input word must contain $1
398 function substituteCallback( $text, $callback ) {
399 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
400 $this->mModified = !($res === $text);
401 return $res;
405 * Matches the word, where $1 is a wildcard
407 function getVariableRegex() {
408 if ( $this->mVariableRegex == '' ) {
409 $this->initRegex();
411 return $this->mVariableRegex;
415 * Matches the entire string, where $1 is a wildcard
417 function getVariableStartToEndRegex() {
418 if ( $this->mVariableStartToEndRegex == '' ) {
419 $this->initRegex();
421 return $this->mVariableStartToEndRegex;
425 * Accesses the synonym list directly
427 function getSynonym( $i ) {
428 return $this->mSynonyms[$i];
431 function getSynonyms() {
432 return $this->mSynonyms;
436 * Returns true if the last call to replace() or substituteCallback()
437 * returned a modified text, otherwise false.
439 function getWasModified(){
440 return $this->mModified;
444 * $magicarr is an associative array of (magic word ID => replacement)
445 * This method uses the php feature to do several replacements at the same time,
446 * thereby gaining some efficiency. The result is placed in the out variable
447 * $result. The return value is true if something was replaced.
448 * @static
450 function replaceMultiple( $magicarr, $subject, &$result ){
451 $search = array();
452 $replace = array();
453 foreach( $magicarr as $id => $replacement ){
454 $mw = MagicWord::get( $id );
455 $search[] = $mw->getRegex();
456 $replace[] = $replacement;
459 $result = preg_replace( $search, $replace, $subject );
460 return !($result === $subject);
464 * Adds all the synonyms of this MagicWord to an array, to allow quick
465 * lookup in a list of magic words
467 function addToArray( &$array, $value ) {
468 global $wgContLang;
469 foreach ( $this->mSynonyms as $syn ) {
470 $array[$wgContLang->lc($syn)] = $value;
474 function isCaseSensitive() {
475 return $this->mCaseSensitive;
478 function getId() {
479 return $this->mId;
484 * Class for handling an array of magic words
485 * @ingroup Parser
487 class MagicWordArray {
488 var $names = array();
489 var $hash;
490 var $baseRegex, $regex;
491 var $matches;
493 function __construct( $names = array() ) {
494 $this->names = $names;
498 * Add a magic word by name
500 public function add( $name ) {
501 global $wgContLang;
502 $this->names[] = $name;
503 $this->hash = $this->baseRegex = $this->regex = null;
507 * Add a number of magic words by name
509 public function addArray( $names ) {
510 $this->names = array_merge( $this->names, array_values( $names ) );
511 $this->hash = $this->baseRegex = $this->regex = null;
515 * Get a 2-d hashtable for this array
517 function getHash() {
518 if ( is_null( $this->hash ) ) {
519 global $wgContLang;
520 $this->hash = array( 0 => array(), 1 => array() );
521 foreach ( $this->names as $name ) {
522 $magic = MagicWord::get( $name );
523 $case = intval( $magic->isCaseSensitive() );
524 foreach ( $magic->getSynonyms() as $syn ) {
525 if ( !$case ) {
526 $syn = $wgContLang->lc( $syn );
528 $this->hash[$case][$syn] = $name;
532 return $this->hash;
536 * Get the base regex
538 function getBaseRegex() {
539 if ( is_null( $this->baseRegex ) ) {
540 $this->baseRegex = array( 0 => '', 1 => '' );
541 foreach ( $this->names as $name ) {
542 $magic = MagicWord::get( $name );
543 $case = intval( $magic->isCaseSensitive() );
544 foreach ( $magic->getSynonyms() as $i => $syn ) {
545 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
546 if ( $this->baseRegex[$case] === '' ) {
547 $this->baseRegex[$case] = $group;
548 } else {
549 $this->baseRegex[$case] .= '|' . $group;
554 return $this->baseRegex;
558 * Get an unanchored regex
560 function getRegex() {
561 if ( is_null( $this->regex ) ) {
562 $base = $this->getBaseRegex();
563 $this->regex = array( '', '' );
564 if ( $this->baseRegex[0] !== '' ) {
565 $this->regex[0] = "/{$base[0]}/iuS";
567 if ( $this->baseRegex[1] !== '' ) {
568 $this->regex[1] = "/{$base[1]}/S";
571 return $this->regex;
575 * Get a regex for matching variables
577 function getVariableRegex() {
578 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
582 * Get an anchored regex for matching variables
584 function getVariableStartToEndRegex() {
585 $base = $this->getBaseRegex();
586 $newRegex = array( '', '' );
587 if ( $base[0] !== '' ) {
588 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
590 if ( $base[1] !== '' ) {
591 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
593 return $newRegex;
597 * Parse a match array from preg_match
598 * Returns array(magic word ID, parameter value)
599 * If there is no parameter value, that element will be false.
601 function parseMatch( $m ) {
602 reset( $m );
603 while ( list( $key, $value ) = each( $m ) ) {
604 if ( $key === 0 || $value === '' ) {
605 continue;
607 $parts = explode( '_', $key, 2 );
608 if ( count( $parts ) != 2 ) {
609 // This shouldn't happen
610 // continue;
611 throw new MWException( __METHOD__ . ': bad parameter name' );
613 list( /* $synIndex */, $magicName ) = $parts;
614 $paramValue = next( $m );
615 return array( $magicName, $paramValue );
617 // This shouldn't happen either
618 throw new MWException( __METHOD__.': parameter not found' );
619 return array( false, false );
623 * Match some text, with parameter capture
624 * Returns an array with the magic word name in the first element and the
625 * parameter in the second element.
626 * Both elements are false if there was no match.
628 public function matchVariableStartToEnd( $text ) {
629 global $wgContLang;
630 $regexes = $this->getVariableStartToEndRegex();
631 foreach ( $regexes as $regex ) {
632 if ( $regex !== '' ) {
633 $m = false;
634 if ( preg_match( $regex, $text, $m ) ) {
635 return $this->parseMatch( $m );
639 return array( false, false );
643 * Match some text, without parameter capture
644 * Returns the magic word name, or false if there was no capture
646 public function matchStartToEnd( $text ) {
647 $hash = $this->getHash();
648 if ( isset( $hash[1][$text] ) ) {
649 return $hash[1][$text];
651 global $wgContLang;
652 $lc = $wgContLang->lc( $text );
653 if ( isset( $hash[0][$lc] ) ) {
654 return $hash[0][$lc];
656 return false;
660 * Returns an associative array, ID => param value, for all items that match
661 * Removes the matched items from the input string (passed by reference)
663 public function matchAndRemove( &$text ) {
664 $found = array();
665 $regexes = $this->getRegex();
666 foreach ( $regexes as $regex ) {
667 if ( $regex === '' ) {
668 continue;
670 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
671 foreach ( $matches as $m ) {
672 list( $name, $param ) = $this->parseMatch( $m );
673 $found[$name] = $param;
675 $text = preg_replace( $regex, '', $text );
677 return $found;