Merge "Only show notoken as possible API error once"
[mediawiki.git] / includes / MagicWord.php
blobae7f8fed74e45773c14d9d22c2471db54db39c42
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'pageid',
103 'sitename',
104 'server',
105 'servername',
106 'scriptpath',
107 'stylepath',
108 'pagename',
109 'pagenamee',
110 'fullpagename',
111 'fullpagenamee',
112 'namespace',
113 'namespacee',
114 'namespacenumber',
115 'currentweek',
116 'currentdow',
117 'localweek',
118 'localdow',
119 'revisionid',
120 'revisionday',
121 'revisionday2',
122 'revisionmonth',
123 'revisionmonth1',
124 'revisionyear',
125 'revisiontimestamp',
126 'revisionuser',
127 'subpagename',
128 'subpagenamee',
129 'talkspace',
130 'talkspacee',
131 'subjectspace',
132 'subjectspacee',
133 'talkpagename',
134 'talkpagenamee',
135 'subjectpagename',
136 'subjectpagenamee',
137 'numberofusers',
138 'numberofactiveusers',
139 'numberofpages',
140 'currentversion',
141 'rootpagename',
142 'rootpagenamee',
143 'basepagename',
144 'basepagenamee',
145 'currenttimestamp',
146 'localtimestamp',
147 'directionmark',
148 'contentlanguage',
149 'numberofadmins',
150 'numberofviews',
153 /* Array of caching hints for ParserCache */
154 static public $mCacheTTLs = array(
155 'currentmonth' => 86400,
156 'currentmonth1' => 86400,
157 'currentmonthname' => 86400,
158 'currentmonthnamegen' => 86400,
159 'currentmonthabbrev' => 86400,
160 'currentday' => 3600,
161 'currentday2' => 3600,
162 'currentdayname' => 3600,
163 'currentyear' => 86400,
164 'currenttime' => 3600,
165 'currenthour' => 3600,
166 'localmonth' => 86400,
167 'localmonth1' => 86400,
168 'localmonthname' => 86400,
169 'localmonthnamegen' => 86400,
170 'localmonthabbrev' => 86400,
171 'localday' => 3600,
172 'localday2' => 3600,
173 'localdayname' => 3600,
174 'localyear' => 86400,
175 'localtime' => 3600,
176 'localhour' => 3600,
177 'numberofarticles' => 3600,
178 'numberoffiles' => 3600,
179 'numberofedits' => 3600,
180 'currentweek' => 3600,
181 'currentdow' => 3600,
182 'localweek' => 3600,
183 'localdow' => 3600,
184 'numberofusers' => 3600,
185 'numberofactiveusers' => 3600,
186 'numberofpages' => 3600,
187 'currentversion' => 86400,
188 'currenttimestamp' => 3600,
189 'localtimestamp' => 3600,
190 'pagesinnamespace' => 3600,
191 'numberofadmins' => 3600,
192 'numberofviews' => 3600,
193 'numberingroup' => 3600,
196 static public $mDoubleUnderscoreIDs = array(
197 'notoc',
198 'nogallery',
199 'forcetoc',
200 'toc',
201 'noeditsection',
202 'newsectionlink',
203 'nonewsectionlink',
204 'hiddencat',
205 'index',
206 'noindex',
207 'staticredirect',
208 'notitleconvert',
209 'nocontentconvert',
212 static public $mSubstIDs = array(
213 'subst',
214 'safesubst',
217 static public $mObjects = array();
218 static public $mDoubleUnderscoreArray = null;
220 /**#@-*/
222 function __construct( $id = 0, $syn = array(), $cs = false ) {
223 $this->mId = $id;
224 $this->mSynonyms = (array)$syn;
225 $this->mCaseSensitive = $cs;
229 * Factory: creates an object representing an ID
231 * @param $id
233 * @return MagicWord
235 static function &get( $id ) {
236 if ( !isset( self::$mObjects[$id] ) ) {
237 $mw = new MagicWord();
238 $mw->load( $id );
239 self::$mObjects[$id] = $mw;
241 return self::$mObjects[$id];
245 * Get an array of parser variable IDs
247 * @return array
249 static function getVariableIDs() {
250 if ( !self::$mVariableIDsInitialised ) {
251 # Get variable IDs
252 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
253 self::$mVariableIDsInitialised = true;
255 return self::$mVariableIDs;
259 * Get an array of parser substitution modifier IDs
260 * @return array
262 static function getSubstIDs() {
263 return self::$mSubstIDs;
267 * Allow external reads of TTL array
269 * @param $id int
270 * @return array
272 static function getCacheTTL( $id ) {
273 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
274 return self::$mCacheTTLs[$id];
275 } else {
276 return -1;
281 * Get a MagicWordArray of double-underscore entities
283 * @return MagicWordArray
285 static function getDoubleUnderscoreArray() {
286 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
287 wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
288 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
290 return self::$mDoubleUnderscoreArray;
294 * Clear the self::$mObjects variable
295 * For use in parser tests
297 public static function clearCache() {
298 self::$mObjects = array();
302 * Initialises this object with an ID
304 * @param $id
305 * @throws MWException
307 function load( $id ) {
308 global $wgContLang;
309 wfProfileIn( __METHOD__ );
310 $this->mId = $id;
311 $wgContLang->getMagic( $this );
312 if ( !$this->mSynonyms ) {
313 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
314 wfProfileOut( __METHOD__ );
315 throw new MWException( "Error: invalid magic word '$id'" );
316 #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
318 wfProfileOut( __METHOD__ );
322 * Preliminary initialisation
323 * @private
325 function initRegex() {
326 // Sort the synonyms by length, descending, so that the longest synonym
327 // matches in precedence to the shortest
328 $synonyms = $this->mSynonyms;
329 usort( $synonyms, array( $this, 'compareStringLength' ) );
331 $escSyn = array();
332 foreach ( $synonyms as $synonym ) {
333 // In case a magic word contains /, like that's going to happen;)
334 $escSyn[] = preg_quote( $synonym, '/' );
336 $this->mBaseRegex = implode( '|', $escSyn );
338 $case = $this->mCaseSensitive ? '' : 'iu';
339 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
340 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
341 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
342 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
343 "/^(?:{$this->mBaseRegex})$/{$case}" );
347 * A comparison function that returns -1, 0 or 1 depending on whether the
348 * first string is longer, the same length or shorter than the second
349 * string.
351 * @param $s1 string
352 * @param $s2 string
354 * @return int
356 function compareStringLength( $s1, $s2 ) {
357 $l1 = strlen( $s1 );
358 $l2 = strlen( $s2 );
359 if ( $l1 < $l2 ) {
360 return 1;
361 } elseif ( $l1 > $l2 ) {
362 return -1;
363 } else {
364 return 0;
369 * Gets a regex representing matching the word
371 * @return string
373 function getRegex() {
374 if ( $this->mRegex == '' ) {
375 $this->initRegex();
377 return $this->mRegex;
381 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
382 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
383 * the complete expression
385 * @return string
387 function getRegexCase() {
388 if ( $this->mRegex === '' ) {
389 $this->initRegex();
392 return $this->mCaseSensitive ? '' : 'iu';
396 * Gets a regex matching the word, if it is at the string start
398 * @return string
400 function getRegexStart() {
401 if ( $this->mRegex == '' ) {
402 $this->initRegex();
404 return $this->mRegexStart;
408 * regex without the slashes and what not
410 * @return string
412 function getBaseRegex() {
413 if ( $this->mRegex == '' ) {
414 $this->initRegex();
416 return $this->mBaseRegex;
420 * Returns true if the text contains the word
422 * @param $text string
424 * @return bool
426 function match( $text ) {
427 return (bool)preg_match( $this->getRegex(), $text );
431 * Returns true if the text starts with the word
433 * @param $text string
435 * @return bool
437 function matchStart( $text ) {
438 return (bool)preg_match( $this->getRegexStart(), $text );
442 * Returns NULL if there's no match, the value of $1 otherwise
443 * The return code is the matched string, if there's no variable
444 * part in the regex and the matched variable part ($1) if there
445 * is one.
447 * @param $text string
449 * @return string
451 function matchVariableStartToEnd( $text ) {
452 $matches = array();
453 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
454 if ( $matchcount == 0 ) {
455 return null;
456 } else {
457 # multiple matched parts (variable match); some will be empty because of
458 # synonyms. The variable will be the second non-empty one so remove any
459 # blank elements and re-sort the indices.
460 # See also bug 6526
462 $matches = array_values( array_filter( $matches ) );
464 if ( count( $matches ) == 1 ) {
465 return $matches[0];
466 } else {
467 return $matches[1];
473 * Returns true if the text matches the word, and alters the
474 * input string, removing all instances of the word
476 * @param $text string
478 * @return bool
480 function matchAndRemove( &$text ) {
481 $this->mFound = false;
482 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
483 return $this->mFound;
487 * @param $text
488 * @return bool
490 function matchStartAndRemove( &$text ) {
491 $this->mFound = false;
492 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
493 return $this->mFound;
497 * Used in matchAndRemove()
499 * @return string
501 function pregRemoveAndRecord() {
502 $this->mFound = true;
503 return '';
507 * Replaces the word with something else
509 * @param $replacement
510 * @param $subject
511 * @param $limit int
513 * @return string
515 function replace( $replacement, $subject, $limit = -1 ) {
516 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
517 $this->mModified = $res !== $subject;
518 return $res;
522 * Variable handling: {{SUBST:xxx}} style words
523 * Calls back a function to determine what to replace xxx with
524 * Input word must contain $1
526 * @param $text string
527 * @param $callback
529 * @return string
531 function substituteCallback( $text, $callback ) {
532 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
533 $this->mModified = $res !== $text;
534 return $res;
538 * Matches the word, where $1 is a wildcard
540 * @return string
542 function getVariableRegex() {
543 if ( $this->mVariableRegex == '' ) {
544 $this->initRegex();
546 return $this->mVariableRegex;
550 * Matches the entire string, where $1 is a wildcard
552 * @return string
554 function getVariableStartToEndRegex() {
555 if ( $this->mVariableStartToEndRegex == '' ) {
556 $this->initRegex();
558 return $this->mVariableStartToEndRegex;
562 * Accesses the synonym list directly
564 * @param $i int
566 * @return string
568 function getSynonym( $i ) {
569 return $this->mSynonyms[$i];
573 * @return array
575 function getSynonyms() {
576 return $this->mSynonyms;
580 * Returns true if the last call to replace() or substituteCallback()
581 * returned a modified text, otherwise false.
583 * @return bool
585 function getWasModified() {
586 return $this->mModified;
590 * $magicarr is an associative array of (magic word ID => replacement)
591 * This method uses the php feature to do several replacements at the same time,
592 * thereby gaining some efficiency. The result is placed in the out variable
593 * $result. The return value is true if something was replaced.
594 * @todo Should this be static? It doesn't seem to be used at all
596 * @param $magicarr
597 * @param $subject
598 * @param $result
600 * @return bool
602 function replaceMultiple( $magicarr, $subject, &$result ) {
603 $search = array();
604 $replace = array();
605 foreach ( $magicarr as $id => $replacement ) {
606 $mw = MagicWord::get( $id );
607 $search[] = $mw->getRegex();
608 $replace[] = $replacement;
611 $result = preg_replace( $search, $replace, $subject );
612 return $result !== $subject;
616 * Adds all the synonyms of this MagicWord to an array, to allow quick
617 * lookup in a list of magic words
619 * @param $array
620 * @param $value
622 function addToArray( &$array, $value ) {
623 global $wgContLang;
624 foreach ( $this->mSynonyms as $syn ) {
625 $array[$wgContLang->lc( $syn )] = $value;
630 * @return bool
632 function isCaseSensitive() {
633 return $this->mCaseSensitive;
637 * @return int
639 function getId() {
640 return $this->mId;
645 * Class for handling an array of magic words
646 * @ingroup Parser
648 class MagicWordArray {
649 var $names = array();
650 var $hash;
651 var $baseRegex, $regex;
652 var $matches;
655 * @param $names array
657 function __construct( $names = array() ) {
658 $this->names = $names;
662 * Add a magic word by name
664 * @param $name string
666 public function add( $name ) {
667 $this->names[] = $name;
668 $this->hash = $this->baseRegex = $this->regex = null;
672 * Add a number of magic words by name
674 * @param $names array
676 public function addArray( $names ) {
677 $this->names = array_merge( $this->names, array_values( $names ) );
678 $this->hash = $this->baseRegex = $this->regex = null;
682 * Get a 2-d hashtable for this array
684 function getHash() {
685 if ( is_null( $this->hash ) ) {
686 global $wgContLang;
687 $this->hash = array( 0 => array(), 1 => array() );
688 foreach ( $this->names as $name ) {
689 $magic = MagicWord::get( $name );
690 $case = intval( $magic->isCaseSensitive() );
691 foreach ( $magic->getSynonyms() as $syn ) {
692 if ( !$case ) {
693 $syn = $wgContLang->lc( $syn );
695 $this->hash[$case][$syn] = $name;
699 return $this->hash;
703 * Get the base regex
705 function getBaseRegex() {
706 if ( is_null( $this->baseRegex ) ) {
707 $this->baseRegex = array( 0 => '', 1 => '' );
708 foreach ( $this->names as $name ) {
709 $magic = MagicWord::get( $name );
710 $case = intval( $magic->isCaseSensitive() );
711 foreach ( $magic->getSynonyms() as $i => $syn ) {
712 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
713 if ( $this->baseRegex[$case] === '' ) {
714 $this->baseRegex[$case] = $group;
715 } else {
716 $this->baseRegex[$case] .= '|' . $group;
721 return $this->baseRegex;
725 * Get an unanchored regex that does not match parameters
727 function getRegex() {
728 if ( is_null( $this->regex ) ) {
729 $base = $this->getBaseRegex();
730 $this->regex = array( '', '' );
731 if ( $this->baseRegex[0] !== '' ) {
732 $this->regex[0] = "/{$base[0]}/iuS";
734 if ( $this->baseRegex[1] !== '' ) {
735 $this->regex[1] = "/{$base[1]}/S";
738 return $this->regex;
742 * Get a regex for matching variables with parameters
744 * @return string
746 function getVariableRegex() {
747 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
751 * Get a regex anchored to the start of the string that does not match parameters
753 * @return array
755 function getRegexStart() {
756 $base = $this->getBaseRegex();
757 $newRegex = array( '', '' );
758 if ( $base[0] !== '' ) {
759 $newRegex[0] = "/^(?:{$base[0]})/iuS";
761 if ( $base[1] !== '' ) {
762 $newRegex[1] = "/^(?:{$base[1]})/S";
764 return $newRegex;
768 * Get an anchored regex for matching variables with parameters
770 * @return array
772 function getVariableStartToEndRegex() {
773 $base = $this->getBaseRegex();
774 $newRegex = array( '', '' );
775 if ( $base[0] !== '' ) {
776 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
778 if ( $base[1] !== '' ) {
779 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
781 return $newRegex;
785 * @since 1.20
786 * @return array
788 public function getNames() {
789 return $this->names;
793 * Parse a match array from preg_match
794 * Returns array(magic word ID, parameter value)
795 * If there is no parameter value, that element will be false.
797 * @param $m array
799 * @throws MWException
800 * @return array
802 function parseMatch( $m ) {
803 reset( $m );
804 while ( list( $key, $value ) = each( $m ) ) {
805 if ( $key === 0 || $value === '' ) {
806 continue;
808 $parts = explode( '_', $key, 2 );
809 if ( count( $parts ) != 2 ) {
810 // This shouldn't happen
811 // continue;
812 throw new MWException( __METHOD__ . ': bad parameter name' );
814 list( /* $synIndex */, $magicName ) = $parts;
815 $paramValue = next( $m );
816 return array( $magicName, $paramValue );
818 // This shouldn't happen either
819 throw new MWException( __METHOD__ . ': parameter not found' );
823 * Match some text, with parameter capture
824 * Returns an array with the magic word name in the first element and the
825 * parameter in the second element.
826 * Both elements are false if there was no match.
828 * @param $text string
830 * @return array
832 public function matchVariableStartToEnd( $text ) {
833 $regexes = $this->getVariableStartToEndRegex();
834 foreach ( $regexes as $regex ) {
835 if ( $regex !== '' ) {
836 $m = array();
837 if ( preg_match( $regex, $text, $m ) ) {
838 return $this->parseMatch( $m );
842 return array( false, false );
846 * Match some text, without parameter capture
847 * Returns the magic word name, or false if there was no capture
849 * @param $text string
851 * @return string|bool False on failure
853 public function matchStartToEnd( $text ) {
854 $hash = $this->getHash();
855 if ( isset( $hash[1][$text] ) ) {
856 return $hash[1][$text];
858 global $wgContLang;
859 $lc = $wgContLang->lc( $text );
860 if ( isset( $hash[0][$lc] ) ) {
861 return $hash[0][$lc];
863 return false;
867 * Returns an associative array, ID => param value, for all items that match
868 * Removes the matched items from the input string (passed by reference)
870 * @param $text string
872 * @return array
874 public function matchAndRemove( &$text ) {
875 $found = array();
876 $regexes = $this->getRegex();
877 foreach ( $regexes as $regex ) {
878 if ( $regex === '' ) {
879 continue;
881 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
882 foreach ( $matches as $m ) {
883 list( $name, $param ) = $this->parseMatch( $m );
884 $found[$name] = $param;
886 $text = preg_replace( $regex, '', $text );
888 return $found;
892 * Return the ID of the magic word at the start of $text, and remove
893 * the prefix from $text.
894 * Return false if no match found and $text is not modified.
895 * Does not match parameters.
897 * @param $text string
899 * @return int|bool False on failure
901 public function matchStartAndRemove( &$text ) {
902 $regexes = $this->getRegexStart();
903 foreach ( $regexes as $regex ) {
904 if ( $regex === '' ) {
905 continue;
907 if ( preg_match( $regex, $text, $m ) ) {
908 list( $id, ) = $this->parseMatch( $m );
909 if ( strlen( $m[0] ) >= strlen( $text ) ) {
910 $text = '';
911 } else {
912 $text = substr( $text, strlen( $m[0] ) );
914 return $id;
917 return false;