Localisation updates from https://translatewiki.net.
[mediawiki.git] / includes / MagicWord.php
blob2c7ba91bf2bb0daa88aa3d3d6c4ec86575ee2d05
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@-*/
64 /** @var int */
65 public $mId;
67 /** @var array */
68 public $mSynonyms;
70 /** @var bool */
71 public $mCaseSensitive;
73 /** @var string */
74 private $mRegex = '';
76 /** @var string */
77 private $mRegexStart = '';
79 /** @var string */
80 private $mRegexStartToEnd = '';
82 /** @var string */
83 private $mBaseRegex = '';
85 /** @var string */
86 private $mVariableRegex = '';
88 /** @var string */
89 private $mVariableStartToEndRegex = '';
91 /** @var bool */
92 private $mModified = false;
94 /** @var bool */
95 private $mFound = false;
97 static public $mVariableIDsInitialised = false;
98 static public $mVariableIDs = array(
99 '!',
100 'currentmonth',
101 'currentmonth1',
102 'currentmonthname',
103 'currentmonthnamegen',
104 'currentmonthabbrev',
105 'currentday',
106 'currentday2',
107 'currentdayname',
108 'currentyear',
109 'currenttime',
110 'currenthour',
111 'localmonth',
112 'localmonth1',
113 'localmonthname',
114 'localmonthnamegen',
115 'localmonthabbrev',
116 'localday',
117 'localday2',
118 'localdayname',
119 'localyear',
120 'localtime',
121 'localhour',
122 'numberofarticles',
123 'numberoffiles',
124 'numberofedits',
125 'articlepath',
126 'pageid',
127 'sitename',
128 'server',
129 'servername',
130 'scriptpath',
131 'stylepath',
132 'pagename',
133 'pagenamee',
134 'fullpagename',
135 'fullpagenamee',
136 'namespace',
137 'namespacee',
138 'namespacenumber',
139 'currentweek',
140 'currentdow',
141 'localweek',
142 'localdow',
143 'revisionid',
144 'revisionday',
145 'revisionday2',
146 'revisionmonth',
147 'revisionmonth1',
148 'revisionyear',
149 'revisiontimestamp',
150 'revisionuser',
151 'revisionsize',
152 'subpagename',
153 'subpagenamee',
154 'talkspace',
155 'talkspacee',
156 'subjectspace',
157 'subjectspacee',
158 'talkpagename',
159 'talkpagenamee',
160 'subjectpagename',
161 'subjectpagenamee',
162 'numberofusers',
163 'numberofactiveusers',
164 'numberofpages',
165 'currentversion',
166 'rootpagename',
167 'rootpagenamee',
168 'basepagename',
169 'basepagenamee',
170 'currenttimestamp',
171 'localtimestamp',
172 'directionmark',
173 'contentlanguage',
174 'numberofadmins',
175 'cascadingsources',
178 /* Array of caching hints for ParserCache */
179 static public $mCacheTTLs = array(
180 'currentmonth' => 86400,
181 'currentmonth1' => 86400,
182 'currentmonthname' => 86400,
183 'currentmonthnamegen' => 86400,
184 'currentmonthabbrev' => 86400,
185 'currentday' => 3600,
186 'currentday2' => 3600,
187 'currentdayname' => 3600,
188 'currentyear' => 86400,
189 'currenttime' => 3600,
190 'currenthour' => 3600,
191 'localmonth' => 86400,
192 'localmonth1' => 86400,
193 'localmonthname' => 86400,
194 'localmonthnamegen' => 86400,
195 'localmonthabbrev' => 86400,
196 'localday' => 3600,
197 'localday2' => 3600,
198 'localdayname' => 3600,
199 'localyear' => 86400,
200 'localtime' => 3600,
201 'localhour' => 3600,
202 'numberofarticles' => 3600,
203 'numberoffiles' => 3600,
204 'numberofedits' => 3600,
205 'currentweek' => 3600,
206 'currentdow' => 3600,
207 'localweek' => 3600,
208 'localdow' => 3600,
209 'numberofusers' => 3600,
210 'numberofactiveusers' => 3600,
211 'numberofpages' => 3600,
212 'currentversion' => 86400,
213 'currenttimestamp' => 3600,
214 'localtimestamp' => 3600,
215 'pagesinnamespace' => 3600,
216 'numberofadmins' => 3600,
217 'numberingroup' => 3600,
220 static public $mDoubleUnderscoreIDs = array(
221 'notoc',
222 'nogallery',
223 'forcetoc',
224 'toc',
225 'noeditsection',
226 'newsectionlink',
227 'nonewsectionlink',
228 'hiddencat',
229 'index',
230 'noindex',
231 'staticredirect',
232 'notitleconvert',
233 'nocontentconvert',
236 static public $mSubstIDs = array(
237 'subst',
238 'safesubst',
241 static public $mObjects = array();
242 static public $mDoubleUnderscoreArray = null;
244 /**#@-*/
246 function __construct( $id = 0, $syn = array(), $cs = false ) {
247 $this->mId = $id;
248 $this->mSynonyms = (array)$syn;
249 $this->mCaseSensitive = $cs;
253 * Factory: creates an object representing an ID
255 * @param int $id
257 * @return MagicWord
259 static function &get( $id ) {
260 if ( !isset( self::$mObjects[$id] ) ) {
261 $mw = new MagicWord();
262 $mw->load( $id );
263 self::$mObjects[$id] = $mw;
265 return self::$mObjects[$id];
269 * Get an array of parser variable IDs
271 * @return array
273 static function getVariableIDs() {
274 if ( !self::$mVariableIDsInitialised ) {
275 # Get variable IDs
276 Hooks::run( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
277 self::$mVariableIDsInitialised = true;
279 return self::$mVariableIDs;
283 * Get an array of parser substitution modifier IDs
284 * @return array
286 static function getSubstIDs() {
287 return self::$mSubstIDs;
291 * Allow external reads of TTL array
293 * @param int $id
294 * @return int
296 static function getCacheTTL( $id ) {
297 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
298 return self::$mCacheTTLs[$id];
299 } else {
300 return -1;
305 * Get a MagicWordArray of double-underscore entities
307 * @return MagicWordArray
309 static function getDoubleUnderscoreArray() {
310 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
311 Hooks::run( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
312 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
314 return self::$mDoubleUnderscoreArray;
318 * Clear the self::$mObjects variable
319 * For use in parser tests
321 public static function clearCache() {
322 self::$mObjects = array();
326 * Initialises this object with an ID
328 * @param int $id
329 * @throws MWException
331 function load( $id ) {
332 global $wgContLang;
333 $this->mId = $id;
334 $wgContLang->getMagic( $this );
335 if ( !$this->mSynonyms ) {
336 $this->mSynonyms = array( 'brionmademeputthishere' );
337 throw new MWException( "Error: invalid magic word '$id'" );
342 * Preliminary initialisation
343 * @private
345 function initRegex() {
346 // Sort the synonyms by length, descending, so that the longest synonym
347 // matches in precedence to the shortest
348 $synonyms = $this->mSynonyms;
349 usort( $synonyms, array( $this, 'compareStringLength' ) );
351 $escSyn = array();
352 foreach ( $synonyms as $synonym ) {
353 // In case a magic word contains /, like that's going to happen;)
354 $escSyn[] = preg_quote( $synonym, '/' );
356 $this->mBaseRegex = implode( '|', $escSyn );
358 $case = $this->mCaseSensitive ? '' : 'iu';
359 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
360 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
361 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
362 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
363 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
364 "/^(?:{$this->mBaseRegex})$/{$case}" );
368 * A comparison function that returns -1, 0 or 1 depending on whether the
369 * first string is longer, the same length or shorter than the second
370 * string.
372 * @param string $s1
373 * @param string $s2
375 * @return int
377 function compareStringLength( $s1, $s2 ) {
378 $l1 = strlen( $s1 );
379 $l2 = strlen( $s2 );
380 if ( $l1 < $l2 ) {
381 return 1;
382 } elseif ( $l1 > $l2 ) {
383 return -1;
384 } else {
385 return 0;
390 * Gets a regex representing matching the word
392 * @return string
394 function getRegex() {
395 if ( $this->mRegex == '' ) {
396 $this->initRegex();
398 return $this->mRegex;
402 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
403 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
404 * the complete expression
406 * @return string
408 function getRegexCase() {
409 if ( $this->mRegex === '' ) {
410 $this->initRegex();
413 return $this->mCaseSensitive ? '' : 'iu';
417 * Gets a regex matching the word, if it is at the string start
419 * @return string
421 function getRegexStart() {
422 if ( $this->mRegex == '' ) {
423 $this->initRegex();
425 return $this->mRegexStart;
429 * Gets a regex matching the word from start to end of a string
431 * @return string
432 * @since 1.23
434 function getRegexStartToEnd() {
435 if ( $this->mRegexStartToEnd == '' ) {
436 $this->initRegex();
438 return $this->mRegexStartToEnd;
442 * regex without the slashes and what not
444 * @return string
446 function getBaseRegex() {
447 if ( $this->mRegex == '' ) {
448 $this->initRegex();
450 return $this->mBaseRegex;
454 * Returns true if the text contains the word
456 * @param string $text
458 * @return bool
460 function match( $text ) {
461 return (bool)preg_match( $this->getRegex(), $text );
465 * Returns true if the text starts with the word
467 * @param string $text
469 * @return bool
471 function matchStart( $text ) {
472 return (bool)preg_match( $this->getRegexStart(), $text );
476 * Returns true if the text matched the word
478 * @param string $text
480 * @return bool
481 * @since 1.23
483 function matchStartToEnd( $text ) {
484 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
488 * Returns NULL if there's no match, the value of $1 otherwise
489 * The return code is the matched string, if there's no variable
490 * part in the regex and the matched variable part ($1) if there
491 * is one.
493 * @param string $text
495 * @return string
497 function matchVariableStartToEnd( $text ) {
498 $matches = array();
499 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
500 if ( $matchcount == 0 ) {
501 return null;
502 } else {
503 # multiple matched parts (variable match); some will be empty because of
504 # synonyms. The variable will be the second non-empty one so remove any
505 # blank elements and re-sort the indices.
506 # See also bug 6526
508 $matches = array_values( array_filter( $matches ) );
510 if ( count( $matches ) == 1 ) {
511 return $matches[0];
512 } else {
513 return $matches[1];
519 * Returns true if the text matches the word, and alters the
520 * input string, removing all instances of the word
522 * @param string $text
524 * @return bool
526 function matchAndRemove( &$text ) {
527 $this->mFound = false;
528 $text = preg_replace_callback(
529 $this->getRegex(),
530 array( &$this, 'pregRemoveAndRecord' ),
531 $text
534 return $this->mFound;
538 * @param string $text
539 * @return bool
541 function matchStartAndRemove( &$text ) {
542 $this->mFound = false;
543 $text = preg_replace_callback(
544 $this->getRegexStart(),
545 array( &$this, 'pregRemoveAndRecord' ),
546 $text
549 return $this->mFound;
553 * Used in matchAndRemove()
555 * @return string
557 function pregRemoveAndRecord() {
558 $this->mFound = true;
559 return '';
563 * Replaces the word with something else
565 * @param string $replacement
566 * @param string $subject
567 * @param int $limit
569 * @return string
571 function replace( $replacement, $subject, $limit = -1 ) {
572 $res = preg_replace(
573 $this->getRegex(),
574 StringUtils::escapeRegexReplacement( $replacement ),
575 $subject,
576 $limit
578 $this->mModified = $res !== $subject;
579 return $res;
583 * Variable handling: {{SUBST:xxx}} style words
584 * Calls back a function to determine what to replace xxx with
585 * Input word must contain $1
587 * @param string $text
588 * @param callable $callback
590 * @return string
592 function substituteCallback( $text, $callback ) {
593 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
594 $this->mModified = $res !== $text;
595 return $res;
599 * Matches the word, where $1 is a wildcard
601 * @return string
603 function getVariableRegex() {
604 if ( $this->mVariableRegex == '' ) {
605 $this->initRegex();
607 return $this->mVariableRegex;
611 * Matches the entire string, where $1 is a wildcard
613 * @return string
615 function getVariableStartToEndRegex() {
616 if ( $this->mVariableStartToEndRegex == '' ) {
617 $this->initRegex();
619 return $this->mVariableStartToEndRegex;
623 * Accesses the synonym list directly
625 * @param int $i
627 * @return string
629 function getSynonym( $i ) {
630 return $this->mSynonyms[$i];
634 * @return array
636 function getSynonyms() {
637 return $this->mSynonyms;
641 * Returns true if the last call to replace() or substituteCallback()
642 * returned a modified text, otherwise false.
644 * @return bool
646 function getWasModified() {
647 return $this->mModified;
651 * $magicarr is an associative array of (magic word ID => replacement)
652 * This method uses the php feature to do several replacements at the same time,
653 * thereby gaining some efficiency. The result is placed in the out variable
654 * $result. The return value is true if something was replaced.
655 * @deprecated since 1.25, unused
657 * @param array $magicarr
658 * @param string $subject
659 * @param string $result
661 * @return bool
663 function replaceMultiple( $magicarr, $subject, &$result ) {
664 wfDeprecated( __METHOD__, '1.25' );
665 $search = array();
666 $replace = array();
667 foreach ( $magicarr as $id => $replacement ) {
668 $mw = MagicWord::get( $id );
669 $search[] = $mw->getRegex();
670 $replace[] = $replacement;
673 $result = preg_replace( $search, $replace, $subject );
674 return $result !== $subject;
678 * Adds all the synonyms of this MagicWord to an array, to allow quick
679 * lookup in a list of magic words
681 * @param array $array
682 * @param string $value
684 function addToArray( &$array, $value ) {
685 global $wgContLang;
686 foreach ( $this->mSynonyms as $syn ) {
687 $array[$wgContLang->lc( $syn )] = $value;
692 * @return bool
694 function isCaseSensitive() {
695 return $this->mCaseSensitive;
699 * @return int
701 function getId() {
702 return $this->mId;
707 * Class for handling an array of magic words
708 * @ingroup Parser
710 class MagicWordArray {
711 /** @var array */
712 public $names = array();
714 /** @var array */
715 private $hash;
717 private $baseRegex;
719 private $regex;
722 * @param array $names
724 function __construct( $names = array() ) {
725 $this->names = $names;
729 * Add a magic word by name
731 * @param string $name
733 public function add( $name ) {
734 $this->names[] = $name;
735 $this->hash = $this->baseRegex = $this->regex = null;
739 * Add a number of magic words by name
741 * @param array $names
743 public function addArray( $names ) {
744 $this->names = array_merge( $this->names, array_values( $names ) );
745 $this->hash = $this->baseRegex = $this->regex = null;
749 * Get a 2-d hashtable for this array
750 * @return array
752 function getHash() {
753 if ( is_null( $this->hash ) ) {
754 global $wgContLang;
755 $this->hash = array( 0 => array(), 1 => array() );
756 foreach ( $this->names as $name ) {
757 $magic = MagicWord::get( $name );
758 $case = intval( $magic->isCaseSensitive() );
759 foreach ( $magic->getSynonyms() as $syn ) {
760 if ( !$case ) {
761 $syn = $wgContLang->lc( $syn );
763 $this->hash[$case][$syn] = $name;
767 return $this->hash;
771 * Get the base regex
772 * @return array
774 function getBaseRegex() {
775 if ( is_null( $this->baseRegex ) ) {
776 $this->baseRegex = array( 0 => '', 1 => '' );
777 foreach ( $this->names as $name ) {
778 $magic = MagicWord::get( $name );
779 $case = intval( $magic->isCaseSensitive() );
780 foreach ( $magic->getSynonyms() as $i => $syn ) {
781 // Group name must start with a non-digit in PCRE 8.34+
782 $it = strtr( $i, '0123456789', 'abcdefghij' );
783 $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
784 if ( $this->baseRegex[$case] === '' ) {
785 $this->baseRegex[$case] = $group;
786 } else {
787 $this->baseRegex[$case] .= '|' . $group;
792 return $this->baseRegex;
796 * Get an unanchored regex that does not match parameters
797 * @return array
799 function getRegex() {
800 if ( is_null( $this->regex ) ) {
801 $base = $this->getBaseRegex();
802 $this->regex = array( '', '' );
803 if ( $this->baseRegex[0] !== '' ) {
804 $this->regex[0] = "/{$base[0]}/iuS";
806 if ( $this->baseRegex[1] !== '' ) {
807 $this->regex[1] = "/{$base[1]}/S";
810 return $this->regex;
814 * Get a regex for matching variables with parameters
816 * @return string
818 function getVariableRegex() {
819 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
823 * Get a regex anchored to the start of the string that does not match parameters
825 * @return array
827 function getRegexStart() {
828 $base = $this->getBaseRegex();
829 $newRegex = array( '', '' );
830 if ( $base[0] !== '' ) {
831 $newRegex[0] = "/^(?:{$base[0]})/iuS";
833 if ( $base[1] !== '' ) {
834 $newRegex[1] = "/^(?:{$base[1]})/S";
836 return $newRegex;
840 * Get an anchored regex for matching variables with parameters
842 * @return array
844 function getVariableStartToEndRegex() {
845 $base = $this->getBaseRegex();
846 $newRegex = array( '', '' );
847 if ( $base[0] !== '' ) {
848 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
850 if ( $base[1] !== '' ) {
851 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
853 return $newRegex;
857 * @since 1.20
858 * @return array
860 public function getNames() {
861 return $this->names;
865 * Parse a match array from preg_match
866 * Returns array(magic word ID, parameter value)
867 * If there is no parameter value, that element will be false.
869 * @param array $m
871 * @throws MWException
872 * @return array
874 function parseMatch( $m ) {
875 reset( $m );
876 while ( list( $key, $value ) = each( $m ) ) {
877 if ( $key === 0 || $value === '' ) {
878 continue;
880 $parts = explode( '_', $key, 2 );
881 if ( count( $parts ) != 2 ) {
882 // This shouldn't happen
883 // continue;
884 throw new MWException( __METHOD__ . ': bad parameter name' );
886 list( /* $synIndex */, $magicName ) = $parts;
887 $paramValue = next( $m );
888 return array( $magicName, $paramValue );
890 // This shouldn't happen either
891 throw new MWException( __METHOD__ . ': parameter not found' );
895 * Match some text, with parameter capture
896 * Returns an array with the magic word name in the first element and the
897 * parameter in the second element.
898 * Both elements are false if there was no match.
900 * @param string $text
902 * @return array
904 public function matchVariableStartToEnd( $text ) {
905 $regexes = $this->getVariableStartToEndRegex();
906 foreach ( $regexes as $regex ) {
907 if ( $regex !== '' ) {
908 $m = array();
909 if ( preg_match( $regex, $text, $m ) ) {
910 return $this->parseMatch( $m );
914 return array( false, false );
918 * Match some text, without parameter capture
919 * Returns the magic word name, or false if there was no capture
921 * @param string $text
923 * @return string|bool False on failure
925 public function matchStartToEnd( $text ) {
926 $hash = $this->getHash();
927 if ( isset( $hash[1][$text] ) ) {
928 return $hash[1][$text];
930 global $wgContLang;
931 $lc = $wgContLang->lc( $text );
932 if ( isset( $hash[0][$lc] ) ) {
933 return $hash[0][$lc];
935 return false;
939 * Returns an associative array, ID => param value, for all items that match
940 * Removes the matched items from the input string (passed by reference)
942 * @param string $text
944 * @return array
946 public function matchAndRemove( &$text ) {
947 $found = array();
948 $regexes = $this->getRegex();
949 foreach ( $regexes as $regex ) {
950 if ( $regex === '' ) {
951 continue;
953 $matches = array();
954 if ( preg_match_all( $regex, $text, $matches, PREG_SET_ORDER ) ) {
955 foreach ( $matches as $m ) {
956 list( $name, $param ) = $this->parseMatch( $m );
957 $found[$name] = $param;
960 $text = preg_replace( $regex, '', $text );
962 return $found;
966 * Return the ID of the magic word at the start of $text, and remove
967 * the prefix from $text.
968 * Return false if no match found and $text is not modified.
969 * Does not match parameters.
971 * @param string $text
973 * @return int|bool False on failure
975 public function matchStartAndRemove( &$text ) {
976 $regexes = $this->getRegexStart();
977 foreach ( $regexes as $regex ) {
978 if ( $regex === '' ) {
979 continue;
981 if ( preg_match( $regex, $text, $m ) ) {
982 list( $id, ) = $this->parseMatch( $m );
983 if ( strlen( $m[0] ) >= strlen( $text ) ) {
984 $text = '';
985 } else {
986 $text = substr( $text, strlen( $m[0] ) );
988 return $id;
991 return false;