Merge "Minor CSS cleanup for Vector and Monobook skins"
[mediawiki.git] / includes / MagicWord.php
blob86508ec128ae3458ce462e1db7407873c61a4ce5
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'pageid',
103 'sitename',
104 'server',
105 'servername',
106 'scriptpath',
107 'stylepath',
108 'pagename',
109 'pagenamee',
110 'fullpagename',
111 'fullpagenamee',
112 'namespace',
113 'namespacee',
114 'namespacenumber',
115 'currentweek',
116 'currentdow',
117 'localweek',
118 'localdow',
119 'revisionid',
120 'revisionday',
121 'revisionday2',
122 'revisionmonth',
123 'revisionmonth1',
124 'revisionyear',
125 'revisiontimestamp',
126 'revisionuser',
127 'subpagename',
128 'subpagenamee',
129 'talkspace',
130 'talkspacee',
131 'subjectspace',
132 'subjectspacee',
133 'talkpagename',
134 'talkpagenamee',
135 'subjectpagename',
136 'subjectpagenamee',
137 'numberofusers',
138 'numberofactiveusers',
139 'numberofpages',
140 'currentversion',
141 'basepagename',
142 'basepagenamee',
143 'currenttimestamp',
144 'localtimestamp',
145 'directionmark',
146 'contentlanguage',
147 'numberofadmins',
148 'numberofviews',
151 /* Array of caching hints for ParserCache */
152 static public $mCacheTTLs = array(
153 'currentmonth' => 86400,
154 'currentmonth1' => 86400,
155 'currentmonthname' => 86400,
156 'currentmonthnamegen' => 86400,
157 'currentmonthabbrev' => 86400,
158 'currentday' => 3600,
159 'currentday2' => 3600,
160 'currentdayname' => 3600,
161 'currentyear' => 86400,
162 'currenttime' => 3600,
163 'currenthour' => 3600,
164 'localmonth' => 86400,
165 'localmonth1' => 86400,
166 'localmonthname' => 86400,
167 'localmonthnamegen' => 86400,
168 'localmonthabbrev' => 86400,
169 'localday' => 3600,
170 'localday2' => 3600,
171 'localdayname' => 3600,
172 'localyear' => 86400,
173 'localtime' => 3600,
174 'localhour' => 3600,
175 'numberofarticles' => 3600,
176 'numberoffiles' => 3600,
177 'numberofedits' => 3600,
178 'currentweek' => 3600,
179 'currentdow' => 3600,
180 'localweek' => 3600,
181 'localdow' => 3600,
182 'numberofusers' => 3600,
183 'numberofactiveusers' => 3600,
184 'numberofpages' => 3600,
185 'currentversion' => 86400,
186 'currenttimestamp' => 3600,
187 'localtimestamp' => 3600,
188 'pagesinnamespace' => 3600,
189 'numberofadmins' => 3600,
190 'numberofviews' => 3600,
191 'numberingroup' => 3600,
194 static public $mDoubleUnderscoreIDs = array(
195 'notoc',
196 'nogallery',
197 'forcetoc',
198 'toc',
199 'noeditsection',
200 'newsectionlink',
201 'nonewsectionlink',
202 'hiddencat',
203 'index',
204 'noindex',
205 'staticredirect',
206 'notitleconvert',
207 'nocontentconvert',
210 static public $mSubstIDs = array(
211 'subst',
212 'safesubst',
215 static public $mObjects = array();
216 static public $mDoubleUnderscoreArray = null;
218 /**#@-*/
220 function __construct( $id = 0, $syn = array(), $cs = false ) {
221 $this->mId = $id;
222 $this->mSynonyms = (array)$syn;
223 $this->mCaseSensitive = $cs;
227 * Factory: creates an object representing an ID
229 * @param $id
231 * @return MagicWord
233 static function &get( $id ) {
234 if ( !isset( self::$mObjects[$id] ) ) {
235 $mw = new MagicWord();
236 $mw->load( $id );
237 self::$mObjects[$id] = $mw;
239 return self::$mObjects[$id];
243 * Get an array of parser variable IDs
245 * @return array
247 static function getVariableIDs() {
248 if ( !self::$mVariableIDsInitialised ) {
249 # Get variable IDs
250 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
251 self::$mVariableIDsInitialised = true;
253 return self::$mVariableIDs;
257 * Get an array of parser substitution modifier IDs
258 * @return array
260 static function getSubstIDs() {
261 return self::$mSubstIDs;
265 * Allow external reads of TTL array
267 * @param $id int
268 * @return array
270 static function getCacheTTL( $id ) {
271 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
272 return self::$mCacheTTLs[$id];
273 } else {
274 return -1;
279 * Get a MagicWordArray of double-underscore entities
281 * @return MagicWordArray
283 static function getDoubleUnderscoreArray() {
284 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
285 wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
286 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
288 return self::$mDoubleUnderscoreArray;
292 * Clear the self::$mObjects variable
293 * For use in parser tests
295 public static function clearCache() {
296 self::$mObjects = array();
300 * Initialises this object with an ID
302 * @param $id
303 * @throws MWException
305 function load( $id ) {
306 global $wgContLang;
307 wfProfileIn( __METHOD__ );
308 $this->mId = $id;
309 $wgContLang->getMagic( $this );
310 if ( !$this->mSynonyms ) {
311 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
312 wfProfileOut( __METHOD__ );
313 throw new MWException( "Error: invalid magic word '$id'" );
314 #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
316 wfProfileOut( __METHOD__ );
320 * Preliminary initialisation
321 * @private
323 function initRegex() {
324 // Sort the synonyms by length, descending, so that the longest synonym
325 // matches in precedence to the shortest
326 $synonyms = $this->mSynonyms;
327 usort( $synonyms, array( $this, 'compareStringLength' ) );
329 $escSyn = array();
330 foreach ( $synonyms as $synonym )
331 // In case a magic word contains /, like that's going to happen;)
332 $escSyn[] = preg_quote( $synonym, '/' );
333 $this->mBaseRegex = implode( '|', $escSyn );
335 $case = $this->mCaseSensitive ? '' : 'iu';
336 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
337 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
338 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
339 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
340 "/^(?:{$this->mBaseRegex})$/{$case}" );
344 * A comparison function that returns -1, 0 or 1 depending on whether the
345 * first string is longer, the same length or shorter than the second
346 * string.
348 * @param $s1 string
349 * @param $s2 string
351 * @return int
353 function compareStringLength( $s1, $s2 ) {
354 $l1 = strlen( $s1 );
355 $l2 = strlen( $s2 );
356 if ( $l1 < $l2 ) {
357 return 1;
358 } elseif ( $l1 > $l2 ) {
359 return -1;
360 } else {
361 return 0;
366 * Gets a regex representing matching the word
368 * @return string
370 function getRegex() {
371 if ( $this->mRegex == '' ) {
372 $this->initRegex();
374 return $this->mRegex;
378 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
379 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
380 * the complete expression
382 * @return string
384 function getRegexCase() {
385 if ( $this->mRegex === '' )
386 $this->initRegex();
388 return $this->mCaseSensitive ? '' : 'iu';
392 * Gets a regex matching the word, if it is at the string start
394 * @return string
396 function getRegexStart() {
397 if ( $this->mRegex == '' ) {
398 $this->initRegex();
400 return $this->mRegexStart;
404 * regex without the slashes and what not
406 * @return string
408 function getBaseRegex() {
409 if ( $this->mRegex == '' ) {
410 $this->initRegex();
412 return $this->mBaseRegex;
416 * Returns true if the text contains the word
418 * @param $text string
420 * @return bool
422 function match( $text ) {
423 return (bool)preg_match( $this->getRegex(), $text );
427 * Returns true if the text starts with the word
429 * @param $text string
431 * @return bool
433 function matchStart( $text ) {
434 return (bool)preg_match( $this->getRegexStart(), $text );
438 * Returns NULL if there's no match, the value of $1 otherwise
439 * The return code is the matched string, if there's no variable
440 * part in the regex and the matched variable part ($1) if there
441 * is one.
443 * @param $text string
445 * @return string
447 function matchVariableStartToEnd( $text ) {
448 $matches = array();
449 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
450 if ( $matchcount == 0 ) {
451 return null;
452 } else {
453 # multiple matched parts (variable match); some will be empty because of
454 # synonyms. The variable will be the second non-empty one so remove any
455 # blank elements and re-sort the indices.
456 # See also bug 6526
458 $matches = array_values( array_filter( $matches ) );
460 if ( count( $matches ) == 1 ) {
461 return $matches[0];
462 } else {
463 return $matches[1];
469 * Returns true if the text matches the word, and alters the
470 * input string, removing all instances of the word
472 * @param $text string
474 * @return bool
476 function matchAndRemove( &$text ) {
477 $this->mFound = false;
478 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
479 return $this->mFound;
483 * @param $text
484 * @return bool
486 function matchStartAndRemove( &$text ) {
487 $this->mFound = false;
488 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
489 return $this->mFound;
493 * Used in matchAndRemove()
495 * @return string
497 function pregRemoveAndRecord() {
498 $this->mFound = true;
499 return '';
503 * Replaces the word with something else
505 * @param $replacement
506 * @param $subject
507 * @param $limit int
509 * @return string
511 function replace( $replacement, $subject, $limit = -1 ) {
512 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
513 $this->mModified = !($res === $subject);
514 return $res;
518 * Variable handling: {{SUBST:xxx}} style words
519 * Calls back a function to determine what to replace xxx with
520 * Input word must contain $1
522 * @param $text string
523 * @param $callback
525 * @return string
527 function substituteCallback( $text, $callback ) {
528 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
529 $this->mModified = !($res === $text);
530 return $res;
534 * Matches the word, where $1 is a wildcard
536 * @return string
538 function getVariableRegex() {
539 if ( $this->mVariableRegex == '' ) {
540 $this->initRegex();
542 return $this->mVariableRegex;
546 * Matches the entire string, where $1 is a wildcard
548 * @return string
550 function getVariableStartToEndRegex() {
551 if ( $this->mVariableStartToEndRegex == '' ) {
552 $this->initRegex();
554 return $this->mVariableStartToEndRegex;
558 * Accesses the synonym list directly
560 * @param $i int
562 * @return string
564 function getSynonym( $i ) {
565 return $this->mSynonyms[$i];
569 * @return array
571 function getSynonyms() {
572 return $this->mSynonyms;
576 * Returns true if the last call to replace() or substituteCallback()
577 * returned a modified text, otherwise false.
579 * @return bool
581 function getWasModified() {
582 return $this->mModified;
586 * $magicarr is an associative array of (magic word ID => replacement)
587 * This method uses the php feature to do several replacements at the same time,
588 * thereby gaining some efficiency. The result is placed in the out variable
589 * $result. The return value is true if something was replaced.
590 * @todo Should this be static? It doesn't seem to be used at all
592 * @param $magicarr
593 * @param $subject
594 * @param $result
596 * @return bool
598 function replaceMultiple( $magicarr, $subject, &$result ) {
599 $search = array();
600 $replace = array();
601 foreach( $magicarr as $id => $replacement ) {
602 $mw = MagicWord::get( $id );
603 $search[] = $mw->getRegex();
604 $replace[] = $replacement;
607 $result = preg_replace( $search, $replace, $subject );
608 return !($result === $subject);
612 * Adds all the synonyms of this MagicWord to an array, to allow quick
613 * lookup in a list of magic words
615 * @param $array
616 * @param $value
618 function addToArray( &$array, $value ) {
619 global $wgContLang;
620 foreach ( $this->mSynonyms as $syn ) {
621 $array[$wgContLang->lc( $syn )] = $value;
626 * @return bool
628 function isCaseSensitive() {
629 return $this->mCaseSensitive;
633 * @return int
635 function getId() {
636 return $this->mId;
641 * Class for handling an array of magic words
642 * @ingroup Parser
644 class MagicWordArray {
645 var $names = array();
646 var $hash;
647 var $baseRegex, $regex;
648 var $matches;
651 * @param $names array
653 function __construct( $names = array() ) {
654 $this->names = $names;
658 * Add a magic word by name
660 * @param $name string
662 public function add( $name ) {
663 $this->names[] = $name;
664 $this->hash = $this->baseRegex = $this->regex = null;
668 * Add a number of magic words by name
670 * @param $names array
672 public function addArray( $names ) {
673 $this->names = array_merge( $this->names, array_values( $names ) );
674 $this->hash = $this->baseRegex = $this->regex = null;
678 * Get a 2-d hashtable for this array
680 function getHash() {
681 if ( is_null( $this->hash ) ) {
682 global $wgContLang;
683 $this->hash = array( 0 => array(), 1 => array() );
684 foreach ( $this->names as $name ) {
685 $magic = MagicWord::get( $name );
686 $case = intval( $magic->isCaseSensitive() );
687 foreach ( $magic->getSynonyms() as $syn ) {
688 if ( !$case ) {
689 $syn = $wgContLang->lc( $syn );
691 $this->hash[$case][$syn] = $name;
695 return $this->hash;
699 * Get the base regex
701 function getBaseRegex() {
702 if ( is_null( $this->baseRegex ) ) {
703 $this->baseRegex = array( 0 => '', 1 => '' );
704 foreach ( $this->names as $name ) {
705 $magic = MagicWord::get( $name );
706 $case = intval( $magic->isCaseSensitive() );
707 foreach ( $magic->getSynonyms() as $i => $syn ) {
708 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
709 if ( $this->baseRegex[$case] === '' ) {
710 $this->baseRegex[$case] = $group;
711 } else {
712 $this->baseRegex[$case] .= '|' . $group;
717 return $this->baseRegex;
721 * Get an unanchored regex that does not match parameters
723 function getRegex() {
724 if ( is_null( $this->regex ) ) {
725 $base = $this->getBaseRegex();
726 $this->regex = array( '', '' );
727 if ( $this->baseRegex[0] !== '' ) {
728 $this->regex[0] = "/{$base[0]}/iuS";
730 if ( $this->baseRegex[1] !== '' ) {
731 $this->regex[1] = "/{$base[1]}/S";
734 return $this->regex;
738 * Get a regex for matching variables with parameters
740 * @return string
742 function getVariableRegex() {
743 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
747 * Get a regex anchored to the start of the string that does not match parameters
749 * @return array
751 function getRegexStart() {
752 $base = $this->getBaseRegex();
753 $newRegex = array( '', '' );
754 if ( $base[0] !== '' ) {
755 $newRegex[0] = "/^(?:{$base[0]})/iuS";
757 if ( $base[1] !== '' ) {
758 $newRegex[1] = "/^(?:{$base[1]})/S";
760 return $newRegex;
764 * Get an anchored regex for matching variables with parameters
766 * @return array
768 function getVariableStartToEndRegex() {
769 $base = $this->getBaseRegex();
770 $newRegex = array( '', '' );
771 if ( $base[0] !== '' ) {
772 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
774 if ( $base[1] !== '' ) {
775 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
777 return $newRegex;
781 * @since 1.20
782 * @return array
784 public function getNames() {
785 return $this->names;
789 * Parse a match array from preg_match
790 * Returns array(magic word ID, parameter value)
791 * If there is no parameter value, that element will be false.
793 * @param $m array
795 * @throws MWException
796 * @return array
798 function parseMatch( $m ) {
799 reset( $m );
800 while ( list( $key, $value ) = each( $m ) ) {
801 if ( $key === 0 || $value === '' ) {
802 continue;
804 $parts = explode( '_', $key, 2 );
805 if ( count( $parts ) != 2 ) {
806 // This shouldn't happen
807 // continue;
808 throw new MWException( __METHOD__ . ': bad parameter name' );
810 list( /* $synIndex */, $magicName ) = $parts;
811 $paramValue = next( $m );
812 return array( $magicName, $paramValue );
814 // This shouldn't happen either
815 throw new MWException( __METHOD__ . ': parameter not found' );
819 * Match some text, with parameter capture
820 * Returns an array with the magic word name in the first element and the
821 * parameter in the second element.
822 * Both elements are false if there was no match.
824 * @param $text string
826 * @return array
828 public function matchVariableStartToEnd( $text ) {
829 $regexes = $this->getVariableStartToEndRegex();
830 foreach ( $regexes as $regex ) {
831 if ( $regex !== '' ) {
832 $m = array();
833 if ( preg_match( $regex, $text, $m ) ) {
834 return $this->parseMatch( $m );
838 return array( false, false );
842 * Match some text, without parameter capture
843 * Returns the magic word name, or false if there was no capture
845 * @param $text string
847 * @return string|bool False on failure
849 public function matchStartToEnd( $text ) {
850 $hash = $this->getHash();
851 if ( isset( $hash[1][$text] ) ) {
852 return $hash[1][$text];
854 global $wgContLang;
855 $lc = $wgContLang->lc( $text );
856 if ( isset( $hash[0][$lc] ) ) {
857 return $hash[0][$lc];
859 return false;
863 * Returns an associative array, ID => param value, for all items that match
864 * Removes the matched items from the input string (passed by reference)
866 * @param $text string
868 * @return array
870 public function matchAndRemove( &$text ) {
871 $found = array();
872 $regexes = $this->getRegex();
873 foreach ( $regexes as $regex ) {
874 if ( $regex === '' ) {
875 continue;
877 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
878 foreach ( $matches as $m ) {
879 list( $name, $param ) = $this->parseMatch( $m );
880 $found[$name] = $param;
882 $text = preg_replace( $regex, '', $text );
884 return $found;
888 * Return the ID of the magic word at the start of $text, and remove
889 * the prefix from $text.
890 * Return false if no match found and $text is not modified.
891 * Does not match parameters.
893 * @param $text string
895 * @return int|bool False on failure
897 public function matchStartAndRemove( &$text ) {
898 $regexes = $this->getRegexStart();
899 foreach ( $regexes as $regex ) {
900 if ( $regex === '' ) {
901 continue;
903 if ( preg_match( $regex, $text, $m ) ) {
904 list( $id, ) = $this->parseMatch( $m );
905 if ( strlen( $m[0] ) >= strlen( $text ) ) {
906 $text = '';
907 } else {
908 $text = substr( $text, strlen( $m[0] ) );
910 return $id;
913 return false;