Merge "Revert "Remove old compat methods/functions for mb_ functions""
[mediawiki.git] / includes / MagicWord.php
blobba38f379862cb877fc02c164503f71924d2da41d
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'sitename',
103 'server',
104 'servername',
105 'scriptpath',
106 'stylepath',
107 'pagename',
108 'pagenamee',
109 'fullpagename',
110 'fullpagenamee',
111 'namespace',
112 'namespacee',
113 'namespacenumber',
114 'currentweek',
115 'currentdow',
116 'localweek',
117 'localdow',
118 'revisionid',
119 'revisionday',
120 'revisionday2',
121 'revisionmonth',
122 'revisionmonth1',
123 'revisionyear',
124 'revisiontimestamp',
125 'revisionuser',
126 'subpagename',
127 'subpagenamee',
128 'talkspace',
129 'talkspacee',
130 'subjectspace',
131 'subjectspacee',
132 'talkpagename',
133 'talkpagenamee',
134 'subjectpagename',
135 'subjectpagenamee',
136 'numberofusers',
137 'numberofactiveusers',
138 'numberofpages',
139 'currentversion',
140 'basepagename',
141 'basepagenamee',
142 'currenttimestamp',
143 'localtimestamp',
144 'directionmark',
145 'contentlanguage',
146 'numberofadmins',
147 'numberofviews',
150 /* Array of caching hints for ParserCache */
151 static public $mCacheTTLs = array (
152 'currentmonth' => 86400,
153 'currentmonth1' => 86400,
154 'currentmonthname' => 86400,
155 'currentmonthnamegen' => 86400,
156 'currentmonthabbrev' => 86400,
157 'currentday' => 3600,
158 'currentday2' => 3600,
159 'currentdayname' => 3600,
160 'currentyear' => 86400,
161 'currenttime' => 3600,
162 'currenthour' => 3600,
163 'localmonth' => 86400,
164 'localmonth1' => 86400,
165 'localmonthname' => 86400,
166 'localmonthnamegen' => 86400,
167 'localmonthabbrev' => 86400,
168 'localday' => 3600,
169 'localday2' => 3600,
170 'localdayname' => 3600,
171 'localyear' => 86400,
172 'localtime' => 3600,
173 'localhour' => 3600,
174 'numberofarticles' => 3600,
175 'numberoffiles' => 3600,
176 'numberofedits' => 3600,
177 'currentweek' => 3600,
178 'currentdow' => 3600,
179 'localweek' => 3600,
180 'localdow' => 3600,
181 'numberofusers' => 3600,
182 'numberofactiveusers' => 3600,
183 'numberofpages' => 3600,
184 'currentversion' => 86400,
185 'currenttimestamp' => 3600,
186 'localtimestamp' => 3600,
187 'pagesinnamespace' => 3600,
188 'numberofadmins' => 3600,
189 'numberofviews' => 3600,
190 'numberingroup' => 3600,
193 static public $mDoubleUnderscoreIDs = array(
194 'notoc',
195 'nogallery',
196 'forcetoc',
197 'toc',
198 'noeditsection',
199 'newsectionlink',
200 'nonewsectionlink',
201 'hiddencat',
202 'index',
203 'noindex',
204 'staticredirect',
205 'notitleconvert',
206 'nocontentconvert',
209 static public $mSubstIDs = array(
210 'subst',
211 'safesubst',
214 static public $mObjects = array();
215 static public $mDoubleUnderscoreArray = null;
217 /**#@-*/
219 function __construct($id = 0, $syn = array(), $cs = false) {
220 $this->mId = $id;
221 $this->mSynonyms = (array)$syn;
222 $this->mCaseSensitive = $cs;
226 * Factory: creates an object representing an ID
228 * @param $id
230 * @return MagicWord
232 static function &get( $id ) {
233 if ( !isset( self::$mObjects[$id] ) ) {
234 $mw = new MagicWord();
235 $mw->load( $id );
236 self::$mObjects[$id] = $mw;
238 return self::$mObjects[$id];
242 * Get an array of parser variable IDs
244 * @return array
246 static function getVariableIDs() {
247 if ( !self::$mVariableIDsInitialised ) {
248 # Get variable IDs
249 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
250 self::$mVariableIDsInitialised = true;
252 return self::$mVariableIDs;
256 * Get an array of parser substitution modifier IDs
257 * @return array
259 static function getSubstIDs() {
260 return self::$mSubstIDs;
264 * Allow external reads of TTL array
266 * @param $id int
267 * @return array
269 static function getCacheTTL( $id ) {
270 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
271 return self::$mCacheTTLs[$id];
272 } else {
273 return -1;
278 * Get a MagicWordArray of double-underscore entities
280 * @return MagicWordArray
282 static function getDoubleUnderscoreArray() {
283 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
284 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
286 return self::$mDoubleUnderscoreArray;
290 * Clear the self::$mObjects variable
291 * For use in parser tests
293 public static function clearCache() {
294 self::$mObjects = array();
298 * Initialises this object with an ID
300 * @param $id
302 function load( $id ) {
303 global $wgContLang;
304 wfProfileIn( __METHOD__ );
305 $this->mId = $id;
306 $wgContLang->getMagic( $this );
307 if ( !$this->mSynonyms ) {
308 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
309 #throw new MWException( "Error: invalid magic word '$id'" );
310 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
312 wfProfileOut( __METHOD__ );
316 * Preliminary initialisation
317 * @private
319 function initRegex() {
320 // Sort the synonyms by length, descending, so that the longest synonym
321 // matches in precedence to the shortest
322 $synonyms = $this->mSynonyms;
323 usort( $synonyms, array( $this, 'compareStringLength' ) );
325 $escSyn = array();
326 foreach ( $synonyms as $synonym )
327 // In case a magic word contains /, like that's going to happen;)
328 $escSyn[] = preg_quote( $synonym, '/' );
329 $this->mBaseRegex = implode( '|', $escSyn );
331 $case = $this->mCaseSensitive ? '' : 'iu';
332 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
333 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
334 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
335 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
336 "/^(?:{$this->mBaseRegex})$/{$case}" );
340 * A comparison function that returns -1, 0 or 1 depending on whether the
341 * first string is longer, the same length or shorter than the second
342 * string.
344 * @param $s1 string
345 * @param $s2 string
347 * @return int
349 function compareStringLength( $s1, $s2 ) {
350 $l1 = strlen( $s1 );
351 $l2 = strlen( $s2 );
352 if ( $l1 < $l2 ) {
353 return 1;
354 } elseif ( $l1 > $l2 ) {
355 return -1;
356 } else {
357 return 0;
362 * Gets a regex representing matching the word
364 * @return string
366 function getRegex() {
367 if ($this->mRegex == '' ) {
368 $this->initRegex();
370 return $this->mRegex;
374 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
375 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
376 * the complete expression
378 * @return string
380 function getRegexCase() {
381 if ( $this->mRegex === '' )
382 $this->initRegex();
384 return $this->mCaseSensitive ? '' : 'iu';
388 * Gets a regex matching the word, if it is at the string start
390 * @return string
392 function getRegexStart() {
393 if ($this->mRegex == '' ) {
394 $this->initRegex();
396 return $this->mRegexStart;
400 * regex without the slashes and what not
402 * @return string
404 function getBaseRegex() {
405 if ($this->mRegex == '') {
406 $this->initRegex();
408 return $this->mBaseRegex;
412 * Returns true if the text contains the word
414 * @param $text string
416 * @return bool
418 function match( $text ) {
419 return (bool)preg_match( $this->getRegex(), $text );
423 * Returns true if the text starts with the word
425 * @param $text string
427 * @return bool
429 function matchStart( $text ) {
430 return (bool)preg_match( $this->getRegexStart(), $text );
434 * Returns NULL if there's no match, the value of $1 otherwise
435 * The return code is the matched string, if there's no variable
436 * part in the regex and the matched variable part ($1) if there
437 * is one.
439 * @param $text string
441 * @return string
443 function matchVariableStartToEnd( $text ) {
444 $matches = array();
445 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
446 if ( $matchcount == 0 ) {
447 return null;
448 } else {
449 # multiple matched parts (variable match); some will be empty because of
450 # synonyms. The variable will be the second non-empty one so remove any
451 # blank elements and re-sort the indices.
452 # See also bug 6526
454 $matches = array_values(array_filter($matches));
456 if ( count($matches) == 1 ) {
457 return $matches[0];
458 } else {
459 return $matches[1];
466 * Returns true if the text matches the word, and alters the
467 * input string, removing all instances of the word
469 * @param $text string
471 * @return bool
473 function matchAndRemove( &$text ) {
474 $this->mFound = false;
475 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
476 return $this->mFound;
480 * @param $text
481 * @return bool
483 function matchStartAndRemove( &$text ) {
484 $this->mFound = false;
485 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
486 return $this->mFound;
490 * Used in matchAndRemove()
492 * @return string
494 function pregRemoveAndRecord() {
495 $this->mFound = true;
496 return '';
500 * Replaces the word with something else
502 * @param $replacement
503 * @param $subject
504 * @param $limit int
506 * @return string
508 function replace( $replacement, $subject, $limit = -1 ) {
509 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
510 $this->mModified = !($res === $subject);
511 return $res;
515 * Variable handling: {{SUBST:xxx}} style words
516 * Calls back a function to determine what to replace xxx with
517 * Input word must contain $1
519 * @param $text string
520 * @param $callback
522 * @return string
524 function substituteCallback( $text, $callback ) {
525 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
526 $this->mModified = !($res === $text);
527 return $res;
531 * Matches the word, where $1 is a wildcard
533 * @return string
535 function getVariableRegex() {
536 if ( $this->mVariableRegex == '' ) {
537 $this->initRegex();
539 return $this->mVariableRegex;
543 * Matches the entire string, where $1 is a wildcard
545 * @return string
547 function getVariableStartToEndRegex() {
548 if ( $this->mVariableStartToEndRegex == '' ) {
549 $this->initRegex();
551 return $this->mVariableStartToEndRegex;
555 * Accesses the synonym list directly
557 * @param $i int
559 * @return string
561 function getSynonym( $i ) {
562 return $this->mSynonyms[$i];
566 * @return array
568 function getSynonyms() {
569 return $this->mSynonyms;
573 * Returns true if the last call to replace() or substituteCallback()
574 * returned a modified text, otherwise false.
576 * @return bool
578 function getWasModified(){
579 return $this->mModified;
583 * $magicarr is an associative array of (magic word ID => replacement)
584 * This method uses the php feature to do several replacements at the same time,
585 * thereby gaining some efficiency. The result is placed in the out variable
586 * $result. The return value is true if something was replaced.
587 * @todo Should this be static? It doesn't seem to be used at all
589 * @param $magicarr
590 * @param $subject
591 * @param $result
593 * @return bool
595 function replaceMultiple( $magicarr, $subject, &$result ){
596 $search = array();
597 $replace = array();
598 foreach( $magicarr as $id => $replacement ){
599 $mw = MagicWord::get( $id );
600 $search[] = $mw->getRegex();
601 $replace[] = $replacement;
604 $result = preg_replace( $search, $replace, $subject );
605 return !($result === $subject);
609 * Adds all the synonyms of this MagicWord to an array, to allow quick
610 * lookup in a list of magic words
612 * @param $array
613 * @param $value
615 function addToArray( &$array, $value ) {
616 global $wgContLang;
617 foreach ( $this->mSynonyms as $syn ) {
618 $array[$wgContLang->lc($syn)] = $value;
623 * @return bool
625 function isCaseSensitive() {
626 return $this->mCaseSensitive;
630 * @return int
632 function getId() {
633 return $this->mId;
638 * Class for handling an array of magic words
639 * @ingroup Parser
641 class MagicWordArray {
642 var $names = array();
643 var $hash;
644 var $baseRegex, $regex;
645 var $matches;
647 function __construct( $names = array() ) {
648 $this->names = $names;
652 * Add a magic word by name
654 * @param $name string
656 public function add( $name ) {
657 $this->names[] = $name;
658 $this->hash = $this->baseRegex = $this->regex = null;
662 * Add a number of magic words by name
664 * @param $names array
666 public function addArray( $names ) {
667 $this->names = array_merge( $this->names, array_values( $names ) );
668 $this->hash = $this->baseRegex = $this->regex = null;
672 * Get a 2-d hashtable for this array
674 function getHash() {
675 if ( is_null( $this->hash ) ) {
676 global $wgContLang;
677 $this->hash = array( 0 => array(), 1 => array() );
678 foreach ( $this->names as $name ) {
679 $magic = MagicWord::get( $name );
680 $case = intval( $magic->isCaseSensitive() );
681 foreach ( $magic->getSynonyms() as $syn ) {
682 if ( !$case ) {
683 $syn = $wgContLang->lc( $syn );
685 $this->hash[$case][$syn] = $name;
689 return $this->hash;
693 * Get the base regex
695 function getBaseRegex() {
696 if ( is_null( $this->baseRegex ) ) {
697 $this->baseRegex = array( 0 => '', 1 => '' );
698 foreach ( $this->names as $name ) {
699 $magic = MagicWord::get( $name );
700 $case = intval( $magic->isCaseSensitive() );
701 foreach ( $magic->getSynonyms() as $i => $syn ) {
702 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
703 if ( $this->baseRegex[$case] === '' ) {
704 $this->baseRegex[$case] = $group;
705 } else {
706 $this->baseRegex[$case] .= '|' . $group;
711 return $this->baseRegex;
715 * Get an unanchored regex that does not match parameters
717 function getRegex() {
718 if ( is_null( $this->regex ) ) {
719 $base = $this->getBaseRegex();
720 $this->regex = array( '', '' );
721 if ( $this->baseRegex[0] !== '' ) {
722 $this->regex[0] = "/{$base[0]}/iuS";
724 if ( $this->baseRegex[1] !== '' ) {
725 $this->regex[1] = "/{$base[1]}/S";
728 return $this->regex;
732 * Get a regex for matching variables with parameters
734 * @return string
736 function getVariableRegex() {
737 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
741 * Get a regex anchored to the start of the string that does not match parameters
743 * @return array
745 function getRegexStart() {
746 $base = $this->getBaseRegex();
747 $newRegex = array( '', '' );
748 if ( $base[0] !== '' ) {
749 $newRegex[0] = "/^(?:{$base[0]})/iuS";
751 if ( $base[1] !== '' ) {
752 $newRegex[1] = "/^(?:{$base[1]})/S";
754 return $newRegex;
758 * Get an anchored regex for matching variables with parameters
760 * @return array
762 function getVariableStartToEndRegex() {
763 $base = $this->getBaseRegex();
764 $newRegex = array( '', '' );
765 if ( $base[0] !== '' ) {
766 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
768 if ( $base[1] !== '' ) {
769 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
771 return $newRegex;
775 * Parse a match array from preg_match
776 * Returns array(magic word ID, parameter value)
777 * If there is no parameter value, that element will be false.
779 * @param $m array
781 * @return array
783 function parseMatch( $m ) {
784 reset( $m );
785 while ( list( $key, $value ) = each( $m ) ) {
786 if ( $key === 0 || $value === '' ) {
787 continue;
789 $parts = explode( '_', $key, 2 );
790 if ( count( $parts ) != 2 ) {
791 // This shouldn't happen
792 // continue;
793 throw new MWException( __METHOD__ . ': bad parameter name' );
795 list( /* $synIndex */, $magicName ) = $parts;
796 $paramValue = next( $m );
797 return array( $magicName, $paramValue );
799 // This shouldn't happen either
800 throw new MWException( __METHOD__.': parameter not found' );
804 * Match some text, with parameter capture
805 * Returns an array with the magic word name in the first element and the
806 * parameter in the second element.
807 * Both elements are false if there was no match.
809 * @param $text string
811 * @return array
813 public function matchVariableStartToEnd( $text ) {
814 $regexes = $this->getVariableStartToEndRegex();
815 foreach ( $regexes as $regex ) {
816 if ( $regex !== '' ) {
817 $m = false;
818 if ( preg_match( $regex, $text, $m ) ) {
819 return $this->parseMatch( $m );
823 return array( false, false );
827 * Match some text, without parameter capture
828 * Returns the magic word name, or false if there was no capture
830 * @param $text string
832 * @return string|bool False on failure
834 public function matchStartToEnd( $text ) {
835 $hash = $this->getHash();
836 if ( isset( $hash[1][$text] ) ) {
837 return $hash[1][$text];
839 global $wgContLang;
840 $lc = $wgContLang->lc( $text );
841 if ( isset( $hash[0][$lc] ) ) {
842 return $hash[0][$lc];
844 return false;
848 * Returns an associative array, ID => param value, for all items that match
849 * Removes the matched items from the input string (passed by reference)
851 * @param $text string
853 * @return array
855 public function matchAndRemove( &$text ) {
856 $found = array();
857 $regexes = $this->getRegex();
858 foreach ( $regexes as $regex ) {
859 if ( $regex === '' ) {
860 continue;
862 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
863 foreach ( $matches as $m ) {
864 list( $name, $param ) = $this->parseMatch( $m );
865 $found[$name] = $param;
867 $text = preg_replace( $regex, '', $text );
869 return $found;
873 * Return the ID of the magic word at the start of $text, and remove
874 * the prefix from $text.
875 * Return false if no match found and $text is not modified.
876 * Does not match parameters.
878 * @param $text string
880 * @return int|bool False on failure
882 public function matchStartAndRemove( &$text ) {
883 $regexes = $this->getRegexStart();
884 foreach ( $regexes as $regex ) {
885 if ( $regex === '' ) {
886 continue;
888 if ( preg_match( $regex, $text, $m ) ) {
889 list( $id, ) = $this->parseMatch( $m );
890 if ( strlen( $m[0] ) >= strlen( $text ) ) {
891 $text = '';
892 } else {
893 $text = substr( $text, strlen( $m[0] ) );
895 return $id;
898 return false;