Revert "Include short descriptions for extensions bundled in the release"
[mediawiki.git] / includes / MagicWord.php
blob377d40649435e5c9fcac8b76c53f584c63cce9e9
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mRegexStartToEnd = '';
69 var $mBaseRegex = '';
70 var $mVariableRegex = '';
71 var $mVariableStartToEndRegex = '';
72 var $mModified = false;
73 var $mFound = false;
75 static public $mVariableIDsInitialised = false;
76 static public $mVariableIDs = array(
77 'currentmonth',
78 'currentmonth1',
79 'currentmonthname',
80 'currentmonthnamegen',
81 'currentmonthabbrev',
82 'currentday',
83 'currentday2',
84 'currentdayname',
85 'currentyear',
86 'currenttime',
87 'currenthour',
88 'localmonth',
89 'localmonth1',
90 'localmonthname',
91 'localmonthnamegen',
92 'localmonthabbrev',
93 'localday',
94 'localday2',
95 'localdayname',
96 'localyear',
97 'localtime',
98 'localhour',
99 'numberofarticles',
100 'numberoffiles',
101 'numberofedits',
102 'articlepath',
103 'pageid',
104 'sitename',
105 'server',
106 'servername',
107 'scriptpath',
108 'stylepath',
109 'pagename',
110 'pagenamee',
111 'fullpagename',
112 'fullpagenamee',
113 'namespace',
114 'namespacee',
115 'namespacenumber',
116 'currentweek',
117 'currentdow',
118 'localweek',
119 'localdow',
120 'revisionid',
121 'revisionday',
122 'revisionday2',
123 'revisionmonth',
124 'revisionmonth1',
125 'revisionyear',
126 'revisiontimestamp',
127 'revisionuser',
128 'revisionsize',
129 'subpagename',
130 'subpagenamee',
131 'talkspace',
132 'talkspacee',
133 'subjectspace',
134 'subjectspacee',
135 'talkpagename',
136 'talkpagenamee',
137 'subjectpagename',
138 'subjectpagenamee',
139 'numberofusers',
140 'numberofactiveusers',
141 'numberofpages',
142 'currentversion',
143 'rootpagename',
144 'rootpagenamee',
145 'basepagename',
146 'basepagenamee',
147 'currenttimestamp',
148 'localtimestamp',
149 'directionmark',
150 'contentlanguage',
151 'numberofadmins',
152 'numberofviews',
153 'cascadingsources',
156 /* Array of caching hints for ParserCache */
157 static public $mCacheTTLs = array(
158 'currentmonth' => 86400,
159 'currentmonth1' => 86400,
160 'currentmonthname' => 86400,
161 'currentmonthnamegen' => 86400,
162 'currentmonthabbrev' => 86400,
163 'currentday' => 3600,
164 'currentday2' => 3600,
165 'currentdayname' => 3600,
166 'currentyear' => 86400,
167 'currenttime' => 3600,
168 'currenthour' => 3600,
169 'localmonth' => 86400,
170 'localmonth1' => 86400,
171 'localmonthname' => 86400,
172 'localmonthnamegen' => 86400,
173 'localmonthabbrev' => 86400,
174 'localday' => 3600,
175 'localday2' => 3600,
176 'localdayname' => 3600,
177 'localyear' => 86400,
178 'localtime' => 3600,
179 'localhour' => 3600,
180 'numberofarticles' => 3600,
181 'numberoffiles' => 3600,
182 'numberofedits' => 3600,
183 'currentweek' => 3600,
184 'currentdow' => 3600,
185 'localweek' => 3600,
186 'localdow' => 3600,
187 'numberofusers' => 3600,
188 'numberofactiveusers' => 3600,
189 'numberofpages' => 3600,
190 'currentversion' => 86400,
191 'currenttimestamp' => 3600,
192 'localtimestamp' => 3600,
193 'pagesinnamespace' => 3600,
194 'numberofadmins' => 3600,
195 'numberofviews' => 3600,
196 'numberingroup' => 3600,
199 static public $mDoubleUnderscoreIDs = array(
200 'notoc',
201 'nogallery',
202 'forcetoc',
203 'toc',
204 'noeditsection',
205 'newsectionlink',
206 'nonewsectionlink',
207 'hiddencat',
208 'index',
209 'noindex',
210 'staticredirect',
211 'notitleconvert',
212 'nocontentconvert',
215 static public $mSubstIDs = array(
216 'subst',
217 'safesubst',
220 static public $mObjects = array();
221 static public $mDoubleUnderscoreArray = null;
223 /**#@-*/
225 function __construct( $id = 0, $syn = array(), $cs = false ) {
226 $this->mId = $id;
227 $this->mSynonyms = (array)$syn;
228 $this->mCaseSensitive = $cs;
232 * Factory: creates an object representing an ID
234 * @param $id
236 * @return MagicWord
238 static function &get( $id ) {
239 if ( !isset( self::$mObjects[$id] ) ) {
240 $mw = new MagicWord();
241 $mw->load( $id );
242 self::$mObjects[$id] = $mw;
244 return self::$mObjects[$id];
248 * Get an array of parser variable IDs
250 * @return array
252 static function getVariableIDs() {
253 if ( !self::$mVariableIDsInitialised ) {
254 # Get variable IDs
255 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
256 self::$mVariableIDsInitialised = true;
258 return self::$mVariableIDs;
262 * Get an array of parser substitution modifier IDs
263 * @return array
265 static function getSubstIDs() {
266 return self::$mSubstIDs;
270 * Allow external reads of TTL array
272 * @param $id int
273 * @return array
275 static function getCacheTTL( $id ) {
276 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
277 return self::$mCacheTTLs[$id];
278 } else {
279 return -1;
284 * Get a MagicWordArray of double-underscore entities
286 * @return MagicWordArray
288 static function getDoubleUnderscoreArray() {
289 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
290 wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
291 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
293 return self::$mDoubleUnderscoreArray;
297 * Clear the self::$mObjects variable
298 * For use in parser tests
300 public static function clearCache() {
301 self::$mObjects = array();
305 * Initialises this object with an ID
307 * @param $id
308 * @throws MWException
310 function load( $id ) {
311 global $wgContLang;
312 wfProfileIn( __METHOD__ );
313 $this->mId = $id;
314 $wgContLang->getMagic( $this );
315 if ( !$this->mSynonyms ) {
316 $this->mSynonyms = array( 'brionmademeputthishere' );
317 wfProfileOut( __METHOD__ );
318 throw new MWException( "Error: invalid magic word '$id'" );
320 wfProfileOut( __METHOD__ );
324 * Preliminary initialisation
325 * @private
327 function initRegex() {
328 // Sort the synonyms by length, descending, so that the longest synonym
329 // matches in precedence to the shortest
330 $synonyms = $this->mSynonyms;
331 usort( $synonyms, array( $this, 'compareStringLength' ) );
333 $escSyn = array();
334 foreach ( $synonyms as $synonym ) {
335 // In case a magic word contains /, like that's going to happen;)
336 $escSyn[] = preg_quote( $synonym, '/' );
338 $this->mBaseRegex = implode( '|', $escSyn );
340 $case = $this->mCaseSensitive ? '' : 'iu';
341 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
342 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
343 $this->mRegexStartToEnd = "/^(?:{$this->mBaseRegex})$/{$case}";
344 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
345 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
346 "/^(?:{$this->mBaseRegex})$/{$case}" );
350 * A comparison function that returns -1, 0 or 1 depending on whether the
351 * first string is longer, the same length or shorter than the second
352 * string.
354 * @param $s1 string
355 * @param $s2 string
357 * @return int
359 function compareStringLength( $s1, $s2 ) {
360 $l1 = strlen( $s1 );
361 $l2 = strlen( $s2 );
362 if ( $l1 < $l2 ) {
363 return 1;
364 } elseif ( $l1 > $l2 ) {
365 return -1;
366 } else {
367 return 0;
372 * Gets a regex representing matching the word
374 * @return string
376 function getRegex() {
377 if ( $this->mRegex == '' ) {
378 $this->initRegex();
380 return $this->mRegex;
384 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
385 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
386 * the complete expression
388 * @return string
390 function getRegexCase() {
391 if ( $this->mRegex === '' ) {
392 $this->initRegex();
395 return $this->mCaseSensitive ? '' : 'iu';
399 * Gets a regex matching the word, if it is at the string start
401 * @return string
403 function getRegexStart() {
404 if ( $this->mRegex == '' ) {
405 $this->initRegex();
407 return $this->mRegexStart;
411 * Gets a regex matching the word from start to end of a string
413 * @return string
414 * @since 1.23
416 function getRegexStartToEnd() {
417 if ( $this->mRegexStartToEnd == '' ) {
418 $this->initRegex();
420 return $this->mRegexStartToEnd;
424 * regex without the slashes and what not
426 * @return string
428 function getBaseRegex() {
429 if ( $this->mRegex == '' ) {
430 $this->initRegex();
432 return $this->mBaseRegex;
436 * Returns true if the text contains the word
438 * @param $text string
440 * @return bool
442 function match( $text ) {
443 return (bool)preg_match( $this->getRegex(), $text );
447 * Returns true if the text starts with the word
449 * @param $text string
451 * @return bool
453 function matchStart( $text ) {
454 return (bool)preg_match( $this->getRegexStart(), $text );
458 * Returns true if the text matched the word
460 * @param $text string
462 * @return bool
463 * @since 1.23
465 function matchStartToEnd( $text ) {
466 return (bool)preg_match( $this->getRegexStartToEnd(), $text );
470 * Returns NULL if there's no match, the value of $1 otherwise
471 * The return code is the matched string, if there's no variable
472 * part in the regex and the matched variable part ($1) if there
473 * is one.
475 * @param $text string
477 * @return string
479 function matchVariableStartToEnd( $text ) {
480 $matches = array();
481 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
482 if ( $matchcount == 0 ) {
483 return null;
484 } else {
485 # multiple matched parts (variable match); some will be empty because of
486 # synonyms. The variable will be the second non-empty one so remove any
487 # blank elements and re-sort the indices.
488 # See also bug 6526
490 $matches = array_values( array_filter( $matches ) );
492 if ( count( $matches ) == 1 ) {
493 return $matches[0];
494 } else {
495 return $matches[1];
501 * Returns true if the text matches the word, and alters the
502 * input string, removing all instances of the word
504 * @param $text string
506 * @return bool
508 function matchAndRemove( &$text ) {
509 $this->mFound = false;
510 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
511 return $this->mFound;
515 * @param $text
516 * @return bool
518 function matchStartAndRemove( &$text ) {
519 $this->mFound = false;
520 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
521 return $this->mFound;
525 * Used in matchAndRemove()
527 * @return string
529 function pregRemoveAndRecord() {
530 $this->mFound = true;
531 return '';
535 * Replaces the word with something else
537 * @param $replacement
538 * @param $subject
539 * @param $limit int
541 * @return string
543 function replace( $replacement, $subject, $limit = -1 ) {
544 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
545 $this->mModified = $res !== $subject;
546 return $res;
550 * Variable handling: {{SUBST:xxx}} style words
551 * Calls back a function to determine what to replace xxx with
552 * Input word must contain $1
554 * @param $text string
555 * @param $callback
557 * @return string
559 function substituteCallback( $text, $callback ) {
560 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
561 $this->mModified = $res !== $text;
562 return $res;
566 * Matches the word, where $1 is a wildcard
568 * @return string
570 function getVariableRegex() {
571 if ( $this->mVariableRegex == '' ) {
572 $this->initRegex();
574 return $this->mVariableRegex;
578 * Matches the entire string, where $1 is a wildcard
580 * @return string
582 function getVariableStartToEndRegex() {
583 if ( $this->mVariableStartToEndRegex == '' ) {
584 $this->initRegex();
586 return $this->mVariableStartToEndRegex;
590 * Accesses the synonym list directly
592 * @param $i int
594 * @return string
596 function getSynonym( $i ) {
597 return $this->mSynonyms[$i];
601 * @return array
603 function getSynonyms() {
604 return $this->mSynonyms;
608 * Returns true if the last call to replace() or substituteCallback()
609 * returned a modified text, otherwise false.
611 * @return bool
613 function getWasModified() {
614 return $this->mModified;
618 * $magicarr is an associative array of (magic word ID => replacement)
619 * This method uses the php feature to do several replacements at the same time,
620 * thereby gaining some efficiency. The result is placed in the out variable
621 * $result. The return value is true if something was replaced.
622 * @todo Should this be static? It doesn't seem to be used at all
624 * @param $magicarr
625 * @param $subject
626 * @param $result
628 * @return bool
630 function replaceMultiple( $magicarr, $subject, &$result ) {
631 $search = array();
632 $replace = array();
633 foreach ( $magicarr as $id => $replacement ) {
634 $mw = MagicWord::get( $id );
635 $search[] = $mw->getRegex();
636 $replace[] = $replacement;
639 $result = preg_replace( $search, $replace, $subject );
640 return $result !== $subject;
644 * Adds all the synonyms of this MagicWord to an array, to allow quick
645 * lookup in a list of magic words
647 * @param $array
648 * @param $value
650 function addToArray( &$array, $value ) {
651 global $wgContLang;
652 foreach ( $this->mSynonyms as $syn ) {
653 $array[$wgContLang->lc( $syn )] = $value;
658 * @return bool
660 function isCaseSensitive() {
661 return $this->mCaseSensitive;
665 * @return int
667 function getId() {
668 return $this->mId;
673 * Class for handling an array of magic words
674 * @ingroup Parser
676 class MagicWordArray {
677 var $names = array();
678 var $hash;
679 var $baseRegex, $regex;
680 var $matches;
683 * @param $names array
685 function __construct( $names = array() ) {
686 $this->names = $names;
690 * Add a magic word by name
692 * @param $name string
694 public function add( $name ) {
695 $this->names[] = $name;
696 $this->hash = $this->baseRegex = $this->regex = null;
700 * Add a number of magic words by name
702 * @param $names array
704 public function addArray( $names ) {
705 $this->names = array_merge( $this->names, array_values( $names ) );
706 $this->hash = $this->baseRegex = $this->regex = null;
710 * Get a 2-d hashtable for this array
712 function getHash() {
713 if ( is_null( $this->hash ) ) {
714 global $wgContLang;
715 $this->hash = array( 0 => array(), 1 => array() );
716 foreach ( $this->names as $name ) {
717 $magic = MagicWord::get( $name );
718 $case = intval( $magic->isCaseSensitive() );
719 foreach ( $magic->getSynonyms() as $syn ) {
720 if ( !$case ) {
721 $syn = $wgContLang->lc( $syn );
723 $this->hash[$case][$syn] = $name;
727 return $this->hash;
731 * Get the base regex
733 function getBaseRegex() {
734 if ( is_null( $this->baseRegex ) ) {
735 $this->baseRegex = array( 0 => '', 1 => '' );
736 foreach ( $this->names as $name ) {
737 $magic = MagicWord::get( $name );
738 $case = intval( $magic->isCaseSensitive() );
739 foreach ( $magic->getSynonyms() as $i => $syn ) {
740 // Group name must start with a non-digit in PCRE 8.34+
741 $it = strtr( $i, '0123456789', 'abcdefghij' );
742 $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
743 if ( $this->baseRegex[$case] === '' ) {
744 $this->baseRegex[$case] = $group;
745 } else {
746 $this->baseRegex[$case] .= '|' . $group;
751 return $this->baseRegex;
755 * Get an unanchored regex that does not match parameters
757 function getRegex() {
758 if ( is_null( $this->regex ) ) {
759 $base = $this->getBaseRegex();
760 $this->regex = array( '', '' );
761 if ( $this->baseRegex[0] !== '' ) {
762 $this->regex[0] = "/{$base[0]}/iuS";
764 if ( $this->baseRegex[1] !== '' ) {
765 $this->regex[1] = "/{$base[1]}/S";
768 return $this->regex;
772 * Get a regex for matching variables with parameters
774 * @return string
776 function getVariableRegex() {
777 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
781 * Get a regex anchored to the start of the string that does not match parameters
783 * @return array
785 function getRegexStart() {
786 $base = $this->getBaseRegex();
787 $newRegex = array( '', '' );
788 if ( $base[0] !== '' ) {
789 $newRegex[0] = "/^(?:{$base[0]})/iuS";
791 if ( $base[1] !== '' ) {
792 $newRegex[1] = "/^(?:{$base[1]})/S";
794 return $newRegex;
798 * Get an anchored regex for matching variables with parameters
800 * @return array
802 function getVariableStartToEndRegex() {
803 $base = $this->getBaseRegex();
804 $newRegex = array( '', '' );
805 if ( $base[0] !== '' ) {
806 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
808 if ( $base[1] !== '' ) {
809 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
811 return $newRegex;
815 * @since 1.20
816 * @return array
818 public function getNames() {
819 return $this->names;
823 * Parse a match array from preg_match
824 * Returns array(magic word ID, parameter value)
825 * If there is no parameter value, that element will be false.
827 * @param $m array
829 * @throws MWException
830 * @return array
832 function parseMatch( $m ) {
833 reset( $m );
834 while ( list( $key, $value ) = each( $m ) ) {
835 if ( $key === 0 || $value === '' ) {
836 continue;
838 $parts = explode( '_', $key, 2 );
839 if ( count( $parts ) != 2 ) {
840 // This shouldn't happen
841 // continue;
842 throw new MWException( __METHOD__ . ': bad parameter name' );
844 list( /* $synIndex */, $magicName ) = $parts;
845 $paramValue = next( $m );
846 return array( $magicName, $paramValue );
848 // This shouldn't happen either
849 throw new MWException( __METHOD__ . ': parameter not found' );
853 * Match some text, with parameter capture
854 * Returns an array with the magic word name in the first element and the
855 * parameter in the second element.
856 * Both elements are false if there was no match.
858 * @param $text string
860 * @return array
862 public function matchVariableStartToEnd( $text ) {
863 $regexes = $this->getVariableStartToEndRegex();
864 foreach ( $regexes as $regex ) {
865 if ( $regex !== '' ) {
866 $m = array();
867 if ( preg_match( $regex, $text, $m ) ) {
868 return $this->parseMatch( $m );
872 return array( false, false );
876 * Match some text, without parameter capture
877 * Returns the magic word name, or false if there was no capture
879 * @param $text string
881 * @return string|bool False on failure
883 public function matchStartToEnd( $text ) {
884 $hash = $this->getHash();
885 if ( isset( $hash[1][$text] ) ) {
886 return $hash[1][$text];
888 global $wgContLang;
889 $lc = $wgContLang->lc( $text );
890 if ( isset( $hash[0][$lc] ) ) {
891 return $hash[0][$lc];
893 return false;
897 * Returns an associative array, ID => param value, for all items that match
898 * Removes the matched items from the input string (passed by reference)
900 * @param $text string
902 * @return array
904 public function matchAndRemove( &$text ) {
905 $found = array();
906 $regexes = $this->getRegex();
907 foreach ( $regexes as $regex ) {
908 if ( $regex === '' ) {
909 continue;
911 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
912 foreach ( $matches as $m ) {
913 list( $name, $param ) = $this->parseMatch( $m );
914 $found[$name] = $param;
916 $text = preg_replace( $regex, '', $text );
918 return $found;
922 * Return the ID of the magic word at the start of $text, and remove
923 * the prefix from $text.
924 * Return false if no match found and $text is not modified.
925 * Does not match parameters.
927 * @param $text string
929 * @return int|bool False on failure
931 public function matchStartAndRemove( &$text ) {
932 $regexes = $this->getRegexStart();
933 foreach ( $regexes as $regex ) {
934 if ( $regex === '' ) {
935 continue;
937 if ( preg_match( $regex, $text, $m ) ) {
938 list( $id, ) = $this->parseMatch( $m );
939 if ( strlen( $m[0] ) >= strlen( $text ) ) {
940 $text = '';
941 } else {
942 $text = substr( $text, strlen( $m[0] ) );
944 return $id;
947 return false;