MessageCache invalidation improvements
[mediawiki.git] / includes / MagicWord.php
blob391e05aea1c0188885dd57751af123bf898fa549
1 <?php
2 /**
3 * See docs/magicword.txt.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Parser
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
44 * @par Example:
45 * @code
46 * $magicWords = [];
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
57 * @ingroup Parser
59 class MagicWord {
60 /**#@-*/
62 /** @var int */
63 public $mId;
65 /** @var array */
66 public $mSynonyms;
68 /** @var bool */
69 public $mCaseSensitive;
71 /** @var string */
72 private $mRegex = '';
74 /** @var string */
75 private $mRegexStart = '';
77 /** @var string */
78 private $mRegexStartToEnd = '';
80 /** @var string */
81 private $mBaseRegex = '';
83 /** @var string */
84 private $mVariableRegex = '';
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
89 /** @var bool */
90 private $mModified = false;
92 /** @var bool */
93 private $mFound = false;
95 public static $mVariableIDsInitialised = false;
96 public static $mVariableIDs = [
97 '!',
98 'currentmonth',
99 'currentmonth1',
100 'currentmonthname',
101 'currentmonthnamegen',
102 'currentmonthabbrev',
103 'currentday',
104 'currentday2',
105 'currentdayname',
106 'currentyear',
107 'currenttime',
108 'currenthour',
109 'localmonth',
110 'localmonth1',
111 'localmonthname',
112 'localmonthnamegen',
113 'localmonthabbrev',
114 'localday',
115 'localday2',
116 'localdayname',
117 'localyear',
118 'localtime',
119 'localhour',
120 'numberofarticles',
121 'numberoffiles',
122 'numberofedits',
123 'articlepath',
124 'pageid',
125 'sitename',
126 'server',
127 'servername',
128 'scriptpath',
129 'stylepath',
130 'pagename',
131 'pagenamee',
132 'fullpagename',
133 'fullpagenamee',
134 'namespace',
135 'namespacee',
136 'namespacenumber',
137 'currentweek',
138 'currentdow',
139 'localweek',
140 'localdow',
141 'revisionid',
142 'revisionday',
143 'revisionday2',
144 'revisionmonth',
145 'revisionmonth1',
146 'revisionyear',
147 'revisiontimestamp',
148 'revisionuser',
149 'revisionsize',
150 'subpagename',
151 'subpagenamee',
152 'talkspace',
153 'talkspacee',
154 'subjectspace',
155 'subjectspacee',
156 'talkpagename',
157 'talkpagenamee',
158 'subjectpagename',
159 'subjectpagenamee',
160 'numberofusers',
161 'numberofactiveusers',
162 'numberofpages',
163 'currentversion',
164 'rootpagename',
165 'rootpagenamee',
166 'basepagename',
167 'basepagenamee',
168 'currenttimestamp',
169 'localtimestamp',
170 'directionmark',
171 'contentlanguage',
172 'numberofadmins',
173 'cascadingsources',
176 /* Array of caching hints for ParserCache */
177 public static $mCacheTTLs = [
178 'currentmonth' => 86400,
179 'currentmonth1' => 86400,
180 'currentmonthname' => 86400,
181 'currentmonthnamegen' => 86400,
182 'currentmonthabbrev' => 86400,
183 'currentday' => 3600,
184 'currentday2' => 3600,
185 'currentdayname' => 3600,
186 'currentyear' => 86400,
187 'currenttime' => 3600,
188 'currenthour' => 3600,
189 'localmonth' => 86400,
190 'localmonth1' => 86400,
191 'localmonthname' => 86400,
192 'localmonthnamegen' => 86400,
193 'localmonthabbrev' => 86400,
194 'localday' => 3600,
195 'localday2' => 3600,
196 'localdayname' => 3600,
197 'localyear' => 86400,
198 'localtime' => 3600,
199 'localhour' => 3600,
200 'numberofarticles' => 3600,
201 'numberoffiles' => 3600,
202 'numberofedits' => 3600,
203 'currentweek' => 3600,
204 'currentdow' => 3600,
205 'localweek' => 3600,
206 'localdow' => 3600,
207 'numberofusers' => 3600,
208 'numberofactiveusers' => 3600,
209 'numberofpages' => 3600,
210 'currentversion' => 86400,
211 'currenttimestamp' => 3600,
212 'localtimestamp' => 3600,
213 'pagesinnamespace' => 3600,
214 'numberofadmins' => 3600,
215 'numberingroup' => 3600,
218 public static $mDoubleUnderscoreIDs = [
219 'notoc',
220 'nogallery',
221 'forcetoc',
222 'toc',
223 'noeditsection',
224 'newsectionlink',
225 'nonewsectionlink',
226 'hiddencat',
227 'index',
228 'noindex',
229 'staticredirect',
230 'notitleconvert',
231 'nocontentconvert',
234 public static $mSubstIDs = [
235 'subst',
236 'safesubst',
239 public static $mObjects = [];
240 public static $mDoubleUnderscoreArray = null;
242 /**#@-*/
244 public function __construct( $id = 0, $syn = [], $cs = false ) {
245 $this->mId = $id;
246 $this->mSynonyms = (array)$syn;
247 $this->mCaseSensitive = $cs;
251 * Factory: creates an object representing an ID
253 * @param int $id
255 * @return MagicWord
257 public static function &get( $id ) {
258 if ( !isset( self::$mObjects[$id] ) ) {
259 $mw = new MagicWord();
260 $mw->load( $id );
261 self::$mObjects[$id] = $mw;
263 return self::$mObjects[$id];
267 * Get an array of parser variable IDs
269 * @return array
271 public static function getVariableIDs() {
272 if ( !self::$mVariableIDsInitialised ) {
273 # Get variable IDs
274 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
275 self::$mVariableIDsInitialised = true;
277 return self::$mVariableIDs;
281 * Get an array of parser substitution modifier IDs
282 * @return array
284 public static function getSubstIDs() {
285 return self::$mSubstIDs;
289 * Allow external reads of TTL array
291 * @param int $id
292 * @return int
294 public static function getCacheTTL( $id ) {
295 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
296 return self::$mCacheTTLs[$id];
297 } else {
298 return -1;
303 * Get a MagicWordArray of double-underscore entities
305 * @return MagicWordArray
307 public static function getDoubleUnderscoreArray() {
308 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
309 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
310 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
312 return self::$mDoubleUnderscoreArray;
316 * Clear the self::$mObjects variable
317 * For use in parser tests
319 public static function clearCache() {
320 self::$mObjects = [];
324 * Initialises this object with an ID
326 * @param int $id
327 * @throws MWException
329 public function load( $id ) {
330 global $wgContLang;
331 $this->mId = $id;
332 $wgContLang->getMagic( $this );
333 if ( !$this->mSynonyms ) {
334 $this->mSynonyms = [ 'brionmademeputthishere' ];
335 throw new MWException( "Error: invalid magic word '$id'" );
340 * Preliminary initialisation
341 * @private
343 public function initRegex() {
344 // Sort the synonyms by length, descending, so that the longest synonym
345 // matches in precedence to the shortest
346 $synonyms = $this->mSynonyms;
347 usort( $synonyms, [ $this, 'compareStringLength' ] );
349 $escSyn = [];
350 foreach ( $synonyms as $synonym ) {
351 // In case a magic word contains /, like that's going to happen;)
352 $escSyn[] = preg_quote( $synonym, '/' );
354 $this->mBaseRegex = implode( '|', $escSyn );
356 $case = $this->mCaseSensitive ? '' : 'iu';
357 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
358 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
359 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
360 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
361 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
362 "/^(?:{$this->mBaseRegex})$/{$case}" );
366 * A comparison function that returns -1, 0 or 1 depending on whether the
367 * first string is longer, the same length or shorter than the second
368 * string.
370 * @param string $s1
371 * @param string $s2
373 * @return int
375 public function compareStringLength( $s1, $s2 ) {
376 $l1 = strlen( $s1 );
377 $l2 = strlen( $s2 );
378 if ( $l1 < $l2 ) {
379 return 1;
380 } elseif ( $l1 > $l2 ) {
381 return -1;
382 } else {
383 return 0;
388 * Gets a regex representing matching the word
390 * @return string
392 public function getRegex() {
393 if ( $this->mRegex == '' ) {
394 $this->initRegex();
396 return $this->mRegex;
400 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
401 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
402 * the complete expression
404 * @return string
406 public function getRegexCase() {
407 if ( $this->mRegex === '' ) {
408 $this->initRegex();
411 return $this->mCaseSensitive ? '' : 'iu';
415 * Gets a regex matching the word, if it is at the string start
417 * @return string
419 public function getRegexStart() {
420 if ( $this->mRegex == '' ) {
421 $this->initRegex();
423 return $this->mRegexStart;
427 * Gets a regex matching the word from start to end of a string
429 * @return string
430 * @since 1.23
432 public function getRegexStartToEnd() {
433 if ( $this->mRegexStartToEnd == '' ) {
434 $this->initRegex();
436 return $this->mRegexStartToEnd;
440 * regex without the slashes and what not
442 * @return string
444 public function getBaseRegex() {
445 if ( $this->mRegex == '' ) {
446 $this->initRegex();
448 return $this->mBaseRegex;
452 * Returns true if the text contains the word
454 * @param string $text
456 * @return bool
458 public function match( $text ) {
459 return (bool)preg_match( $this->getRegex(), $text );
463 * Returns true if the text starts with the word
465 * @param string $text
467 * @return bool
469 public function matchStart( $text ) {
470 return (bool)preg_match( $this->getRegexStart(), $text );
474 * Returns true if the text matched the word
476 * @param string $text
478 * @return bool
479 * @since 1.23
481 public function matchStartToEnd( $text ) {
482 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
486 * Returns NULL if there's no match, the value of $1 otherwise
487 * The return code is the matched string, if there's no variable
488 * part in the regex and the matched variable part ($1) if there
489 * is one.
491 * @param string $text
493 * @return string
495 public function matchVariableStartToEnd( $text ) {
496 $matches = [];
497 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
498 if ( $matchcount == 0 ) {
499 return null;
500 } else {
501 # multiple matched parts (variable match); some will be empty because of
502 # synonyms. The variable will be the second non-empty one so remove any
503 # blank elements and re-sort the indices.
504 # See also bug 6526
506 $matches = array_values( array_filter( $matches ) );
508 if ( count( $matches ) == 1 ) {
509 return $matches[0];
510 } else {
511 return $matches[1];
517 * Returns true if the text matches the word, and alters the
518 * input string, removing all instances of the word
520 * @param string $text
522 * @return bool
524 public function matchAndRemove( &$text ) {
525 $this->mFound = false;
526 $text = preg_replace_callback(
527 $this->getRegex(),
528 [ &$this, 'pregRemoveAndRecord' ],
529 $text
532 return $this->mFound;
536 * @param string $text
537 * @return bool
539 public function matchStartAndRemove( &$text ) {
540 $this->mFound = false;
541 $text = preg_replace_callback(
542 $this->getRegexStart(),
543 [ &$this, 'pregRemoveAndRecord' ],
544 $text
547 return $this->mFound;
551 * Used in matchAndRemove()
553 * @return string
555 public function pregRemoveAndRecord() {
556 $this->mFound = true;
557 return '';
561 * Replaces the word with something else
563 * @param string $replacement
564 * @param string $subject
565 * @param int $limit
567 * @return string
569 public function replace( $replacement, $subject, $limit = -1 ) {
570 $res = preg_replace(
571 $this->getRegex(),
572 StringUtils::escapeRegexReplacement( $replacement ),
573 $subject,
574 $limit
576 $this->mModified = $res !== $subject;
577 return $res;
581 * Variable handling: {{SUBST:xxx}} style words
582 * Calls back a function to determine what to replace xxx with
583 * Input word must contain $1
585 * @param string $text
586 * @param callable $callback
588 * @return string
590 public function substituteCallback( $text, $callback ) {
591 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
592 $this->mModified = $res !== $text;
593 return $res;
597 * Matches the word, where $1 is a wildcard
599 * @return string
601 public function getVariableRegex() {
602 if ( $this->mVariableRegex == '' ) {
603 $this->initRegex();
605 return $this->mVariableRegex;
609 * Matches the entire string, where $1 is a wildcard
611 * @return string
613 public function getVariableStartToEndRegex() {
614 if ( $this->mVariableStartToEndRegex == '' ) {
615 $this->initRegex();
617 return $this->mVariableStartToEndRegex;
621 * Accesses the synonym list directly
623 * @param int $i
625 * @return string
627 public function getSynonym( $i ) {
628 return $this->mSynonyms[$i];
632 * @return array
634 public function getSynonyms() {
635 return $this->mSynonyms;
639 * Returns true if the last call to replace() or substituteCallback()
640 * returned a modified text, otherwise false.
642 * @return bool
644 public function getWasModified() {
645 return $this->mModified;
649 * $magicarr is an associative array of (magic word ID => replacement)
650 * This method uses the php feature to do several replacements at the same time,
651 * thereby gaining some efficiency. The result is placed in the out variable
652 * $result. The return value is true if something was replaced.
653 * @deprecated since 1.25, unused
655 * @param array $magicarr
656 * @param string $subject
657 * @param string $result
659 * @return bool
661 public function replaceMultiple( $magicarr, $subject, &$result ) {
662 wfDeprecated( __METHOD__, '1.25' );
663 $search = [];
664 $replace = [];
665 foreach ( $magicarr as $id => $replacement ) {
666 $mw = MagicWord::get( $id );
667 $search[] = $mw->getRegex();
668 $replace[] = $replacement;
671 $result = preg_replace( $search, $replace, $subject );
672 return $result !== $subject;
676 * Adds all the synonyms of this MagicWord to an array, to allow quick
677 * lookup in a list of magic words
679 * @param array $array
680 * @param string $value
682 public function addToArray( &$array, $value ) {
683 global $wgContLang;
684 foreach ( $this->mSynonyms as $syn ) {
685 $array[$wgContLang->lc( $syn )] = $value;
690 * @return bool
692 public function isCaseSensitive() {
693 return $this->mCaseSensitive;
697 * @return int
699 public function getId() {
700 return $this->mId;