Merge "Special:Upload should not crash on failing previews"
[mediawiki.git] / includes / MagicWord.php
blob5968e879038e2f577cdcb16f3743f766c38fe6be
1 <?php
2 /**
3 * See docs/magicword.txt.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Parser
24 /**
25 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
27 * @par Usage:
28 * @code
29 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
30 * // some code
31 * }
32 * @endcode
34 * Possible future improvements:
35 * * Simultaneous searching for a number of magic words
36 * * MagicWord::$mObjects in shared memory
38 * Please avoid reading the data out of one of these objects and then writing
39 * special case code. If possible, add another match()-like function here.
41 * To add magic words in an extension, use $magicWords in a file listed in
42 * $wgExtensionMessagesFiles[].
44 * @par Example:
45 * @code
46 * $magicWords = [];
48 * $magicWords['en'] = [
49 * 'magicwordkey' => [ 0, 'case_insensitive_magic_word' ],
50 * 'magicwordkey2' => [ 1, 'CASE_sensitive_magic_word2' ],
51 * ];
52 * @endcode
54 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
55 * hook. Use string keys.
57 * @ingroup Parser
59 class MagicWord {
60 /**#@-*/
62 /** @var int */
63 public $mId;
65 /** @var array */
66 public $mSynonyms;
68 /** @var bool */
69 public $mCaseSensitive;
71 /** @var string */
72 private $mRegex = '';
74 /** @var string */
75 private $mRegexStart = '';
77 /** @var string */
78 private $mRegexStartToEnd = '';
80 /** @var string */
81 private $mBaseRegex = '';
83 /** @var string */
84 private $mVariableRegex = '';
86 /** @var string */
87 private $mVariableStartToEndRegex = '';
89 /** @var bool */
90 private $mModified = false;
92 /** @var bool */
93 private $mFound = false;
95 public static $mVariableIDsInitialised = false;
96 public static $mVariableIDs = [
97 '!',
98 'currentmonth',
99 'currentmonth1',
100 'currentmonthname',
101 'currentmonthnamegen',
102 'currentmonthabbrev',
103 'currentday',
104 'currentday2',
105 'currentdayname',
106 'currentyear',
107 'currenttime',
108 'currenthour',
109 'localmonth',
110 'localmonth1',
111 'localmonthname',
112 'localmonthnamegen',
113 'localmonthabbrev',
114 'localday',
115 'localday2',
116 'localdayname',
117 'localyear',
118 'localtime',
119 'localhour',
120 'numberofarticles',
121 'numberoffiles',
122 'numberofedits',
123 'articlepath',
124 'pageid',
125 'sitename',
126 'server',
127 'servername',
128 'scriptpath',
129 'stylepath',
130 'pagename',
131 'pagenamee',
132 'fullpagename',
133 'fullpagenamee',
134 'namespace',
135 'namespacee',
136 'namespacenumber',
137 'currentweek',
138 'currentdow',
139 'localweek',
140 'localdow',
141 'revisionid',
142 'revisionday',
143 'revisionday2',
144 'revisionmonth',
145 'revisionmonth1',
146 'revisionyear',
147 'revisiontimestamp',
148 'revisionuser',
149 'revisionsize',
150 'subpagename',
151 'subpagenamee',
152 'talkspace',
153 'talkspacee',
154 'subjectspace',
155 'subjectspacee',
156 'talkpagename',
157 'talkpagenamee',
158 'subjectpagename',
159 'subjectpagenamee',
160 'numberofusers',
161 'numberofactiveusers',
162 'numberofpages',
163 'currentversion',
164 'rootpagename',
165 'rootpagenamee',
166 'basepagename',
167 'basepagenamee',
168 'currenttimestamp',
169 'localtimestamp',
170 'directionmark',
171 'contentlanguage',
172 'pagelanguage',
173 'numberofadmins',
174 'cascadingsources',
177 /* Array of caching hints for ParserCache */
178 public static $mCacheTTLs = [
179 'currentmonth' => 86400,
180 'currentmonth1' => 86400,
181 'currentmonthname' => 86400,
182 'currentmonthnamegen' => 86400,
183 'currentmonthabbrev' => 86400,
184 'currentday' => 3600,
185 'currentday2' => 3600,
186 'currentdayname' => 3600,
187 'currentyear' => 86400,
188 'currenttime' => 3600,
189 'currenthour' => 3600,
190 'localmonth' => 86400,
191 'localmonth1' => 86400,
192 'localmonthname' => 86400,
193 'localmonthnamegen' => 86400,
194 'localmonthabbrev' => 86400,
195 'localday' => 3600,
196 'localday2' => 3600,
197 'localdayname' => 3600,
198 'localyear' => 86400,
199 'localtime' => 3600,
200 'localhour' => 3600,
201 'numberofarticles' => 3600,
202 'numberoffiles' => 3600,
203 'numberofedits' => 3600,
204 'currentweek' => 3600,
205 'currentdow' => 3600,
206 'localweek' => 3600,
207 'localdow' => 3600,
208 'numberofusers' => 3600,
209 'numberofactiveusers' => 3600,
210 'numberofpages' => 3600,
211 'currentversion' => 86400,
212 'currenttimestamp' => 3600,
213 'localtimestamp' => 3600,
214 'pagesinnamespace' => 3600,
215 'numberofadmins' => 3600,
216 'numberingroup' => 3600,
219 public static $mDoubleUnderscoreIDs = [
220 'notoc',
221 'nogallery',
222 'forcetoc',
223 'toc',
224 'noeditsection',
225 'newsectionlink',
226 'nonewsectionlink',
227 'hiddencat',
228 'index',
229 'noindex',
230 'staticredirect',
231 'notitleconvert',
232 'nocontentconvert',
235 public static $mSubstIDs = [
236 'subst',
237 'safesubst',
240 public static $mObjects = [];
241 public static $mDoubleUnderscoreArray = null;
243 /**#@-*/
245 public function __construct( $id = 0, $syn = [], $cs = false ) {
246 $this->mId = $id;
247 $this->mSynonyms = (array)$syn;
248 $this->mCaseSensitive = $cs;
252 * Factory: creates an object representing an ID
254 * @param int $id
256 * @return MagicWord
258 public static function &get( $id ) {
259 if ( !isset( self::$mObjects[$id] ) ) {
260 $mw = new MagicWord();
261 $mw->load( $id );
262 self::$mObjects[$id] = $mw;
264 return self::$mObjects[$id];
268 * Get an array of parser variable IDs
270 * @return array
272 public static function getVariableIDs() {
273 if ( !self::$mVariableIDsInitialised ) {
274 # Get variable IDs
275 Hooks::run( 'MagicWordwgVariableIDs', [ &self::$mVariableIDs ] );
276 self::$mVariableIDsInitialised = true;
278 return self::$mVariableIDs;
282 * Get an array of parser substitution modifier IDs
283 * @return array
285 public static function getSubstIDs() {
286 return self::$mSubstIDs;
290 * Allow external reads of TTL array
292 * @param int $id
293 * @return int
295 public static function getCacheTTL( $id ) {
296 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
297 return self::$mCacheTTLs[$id];
298 } else {
299 return -1;
304 * Get a MagicWordArray of double-underscore entities
306 * @return MagicWordArray
308 public static function getDoubleUnderscoreArray() {
309 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
310 Hooks::run( 'GetDoubleUnderscoreIDs', [ &self::$mDoubleUnderscoreIDs ] );
311 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
313 return self::$mDoubleUnderscoreArray;
317 * Clear the self::$mObjects variable
318 * For use in parser tests
320 public static function clearCache() {
321 self::$mObjects = [];
325 * Initialises this object with an ID
327 * @param int $id
328 * @throws MWException
330 public function load( $id ) {
331 global $wgContLang;
332 $this->mId = $id;
333 $wgContLang->getMagic( $this );
334 if ( !$this->mSynonyms ) {
335 $this->mSynonyms = [ 'brionmademeputthishere' ];
336 throw new MWException( "Error: invalid magic word '$id'" );
341 * Preliminary initialisation
342 * @private
344 public function initRegex() {
345 // Sort the synonyms by length, descending, so that the longest synonym
346 // matches in precedence to the shortest
347 $synonyms = $this->mSynonyms;
348 usort( $synonyms, [ $this, 'compareStringLength' ] );
350 $escSyn = [];
351 foreach ( $synonyms as $synonym ) {
352 // In case a magic word contains /, like that's going to happen;)
353 $escSyn[] = preg_quote( $synonym, '/' );
355 $this->mBaseRegex = implode( '|', $escSyn );
357 $case = $this->mCaseSensitive ? '' : 'iu';
358 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
359 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
360 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
361 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
362 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
363 "/^(?:{$this->mBaseRegex})$/{$case}" );
367 * A comparison function that returns -1, 0 or 1 depending on whether the
368 * first string is longer, the same length or shorter than the second
369 * string.
371 * @param string $s1
372 * @param string $s2
374 * @return int
376 public function compareStringLength( $s1, $s2 ) {
377 $l1 = strlen( $s1 );
378 $l2 = strlen( $s2 );
379 if ( $l1 < $l2 ) {
380 return 1;
381 } elseif ( $l1 > $l2 ) {
382 return -1;
383 } else {
384 return 0;
389 * Gets a regex representing matching the word
391 * @return string
393 public function getRegex() {
394 if ( $this->mRegex == '' ) {
395 $this->initRegex();
397 return $this->mRegex;
401 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
402 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
403 * the complete expression
405 * @return string
407 public function getRegexCase() {
408 if ( $this->mRegex === '' ) {
409 $this->initRegex();
412 return $this->mCaseSensitive ? '' : 'iu';
416 * Gets a regex matching the word, if it is at the string start
418 * @return string
420 public function getRegexStart() {
421 if ( $this->mRegex == '' ) {
422 $this->initRegex();
424 return $this->mRegexStart;
428 * Gets a regex matching the word from start to end of a string
430 * @return string
431 * @since 1.23
433 public function getRegexStartToEnd() {
434 if ( $this->mRegexStartToEnd == '' ) {
435 $this->initRegex();
437 return $this->mRegexStartToEnd;
441 * regex without the slashes and what not
443 * @return string
445 public function getBaseRegex() {
446 if ( $this->mRegex == '' ) {
447 $this->initRegex();
449 return $this->mBaseRegex;
453 * Returns true if the text contains the word
455 * @param string $text
457 * @return bool
459 public function match( $text ) {
460 return (bool)preg_match( $this->getRegex(), $text );
464 * Returns true if the text starts with the word
466 * @param string $text
468 * @return bool
470 public function matchStart( $text ) {
471 return (bool)preg_match( $this->getRegexStart(), $text );
475 * Returns true if the text matched the word
477 * @param string $text
479 * @return bool
480 * @since 1.23
482 public function matchStartToEnd( $text ) {
483 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
487 * Returns NULL if there's no match, the value of $1 otherwise
488 * The return code is the matched string, if there's no variable
489 * part in the regex and the matched variable part ($1) if there
490 * is one.
492 * @param string $text
494 * @return string
496 public function matchVariableStartToEnd( $text ) {
497 $matches = [];
498 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
499 if ( $matchcount == 0 ) {
500 return null;
501 } else {
502 # multiple matched parts (variable match); some will be empty because of
503 # synonyms. The variable will be the second non-empty one so remove any
504 # blank elements and re-sort the indices.
505 # See also bug 6526
507 $matches = array_values( array_filter( $matches ) );
509 if ( count( $matches ) == 1 ) {
510 return $matches[0];
511 } else {
512 return $matches[1];
518 * Returns true if the text matches the word, and alters the
519 * input string, removing all instances of the word
521 * @param string $text
523 * @return bool
525 public function matchAndRemove( &$text ) {
526 $this->mFound = false;
527 $text = preg_replace_callback(
528 $this->getRegex(),
529 [ &$this, 'pregRemoveAndRecord' ],
530 $text
533 return $this->mFound;
537 * @param string $text
538 * @return bool
540 public function matchStartAndRemove( &$text ) {
541 $this->mFound = false;
542 $text = preg_replace_callback(
543 $this->getRegexStart(),
544 [ &$this, 'pregRemoveAndRecord' ],
545 $text
548 return $this->mFound;
552 * Used in matchAndRemove()
554 * @return string
556 public function pregRemoveAndRecord() {
557 $this->mFound = true;
558 return '';
562 * Replaces the word with something else
564 * @param string $replacement
565 * @param string $subject
566 * @param int $limit
568 * @return string
570 public function replace( $replacement, $subject, $limit = -1 ) {
571 $res = preg_replace(
572 $this->getRegex(),
573 StringUtils::escapeRegexReplacement( $replacement ),
574 $subject,
575 $limit
577 $this->mModified = $res !== $subject;
578 return $res;
582 * Variable handling: {{SUBST:xxx}} style words
583 * Calls back a function to determine what to replace xxx with
584 * Input word must contain $1
586 * @param string $text
587 * @param callable $callback
589 * @return string
591 public function substituteCallback( $text, $callback ) {
592 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
593 $this->mModified = $res !== $text;
594 return $res;
598 * Matches the word, where $1 is a wildcard
600 * @return string
602 public function getVariableRegex() {
603 if ( $this->mVariableRegex == '' ) {
604 $this->initRegex();
606 return $this->mVariableRegex;
610 * Matches the entire string, where $1 is a wildcard
612 * @return string
614 public function getVariableStartToEndRegex() {
615 if ( $this->mVariableStartToEndRegex == '' ) {
616 $this->initRegex();
618 return $this->mVariableStartToEndRegex;
622 * Accesses the synonym list directly
624 * @param int $i
626 * @return string
628 public function getSynonym( $i ) {
629 return $this->mSynonyms[$i];
633 * @return array
635 public function getSynonyms() {
636 return $this->mSynonyms;
640 * Returns true if the last call to replace() or substituteCallback()
641 * returned a modified text, otherwise false.
643 * @return bool
645 public function getWasModified() {
646 return $this->mModified;
650 * $magicarr is an associative array of (magic word ID => replacement)
651 * This method uses the php feature to do several replacements at the same time,
652 * thereby gaining some efficiency. The result is placed in the out variable
653 * $result. The return value is true if something was replaced.
654 * @deprecated since 1.25, unused
656 * @param array $magicarr
657 * @param string $subject
658 * @param string $result
660 * @return bool
662 public function replaceMultiple( $magicarr, $subject, &$result ) {
663 wfDeprecated( __METHOD__, '1.25' );
664 $search = [];
665 $replace = [];
666 foreach ( $magicarr as $id => $replacement ) {
667 $mw = MagicWord::get( $id );
668 $search[] = $mw->getRegex();
669 $replace[] = $replacement;
672 $result = preg_replace( $search, $replace, $subject );
673 return $result !== $subject;
677 * Adds all the synonyms of this MagicWord to an array, to allow quick
678 * lookup in a list of magic words
680 * @param array $array
681 * @param string $value
683 public function addToArray( &$array, $value ) {
684 global $wgContLang;
685 foreach ( $this->mSynonyms as $syn ) {
686 $array[$wgContLang->lc( $syn )] = $value;
691 * @return bool
693 public function isCaseSensitive() {
694 return $this->mCaseSensitive;
698 * @return int
700 public function getId() {
701 return $this->mId;