Introduce a new hook that allows extensions to add to My Contributions
[mediawiki.git] / includes / MagicWord.php
blob9745b9ada20c0a6820e879c76df7594e9b04c0d8
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'pageid',
103 'sitename',
104 'server',
105 'servername',
106 'scriptpath',
107 'stylepath',
108 'pagename',
109 'pagenamee',
110 'fullpagename',
111 'fullpagenamee',
112 'namespace',
113 'namespacee',
114 'namespacenumber',
115 'currentweek',
116 'currentdow',
117 'localweek',
118 'localdow',
119 'revisionid',
120 'revisionday',
121 'revisionday2',
122 'revisionmonth',
123 'revisionmonth1',
124 'revisionyear',
125 'revisiontimestamp',
126 'revisionuser',
127 'subpagename',
128 'subpagenamee',
129 'talkspace',
130 'talkspacee',
131 'subjectspace',
132 'subjectspacee',
133 'talkpagename',
134 'talkpagenamee',
135 'subjectpagename',
136 'subjectpagenamee',
137 'numberofusers',
138 'numberofactiveusers',
139 'numberofpages',
140 'currentversion',
141 'basepagename',
142 'basepagenamee',
143 'currenttimestamp',
144 'localtimestamp',
145 'directionmark',
146 'contentlanguage',
147 'numberofadmins',
148 'numberofviews',
151 /* Array of caching hints for ParserCache */
152 static public $mCacheTTLs = array (
153 'currentmonth' => 86400,
154 'currentmonth1' => 86400,
155 'currentmonthname' => 86400,
156 'currentmonthnamegen' => 86400,
157 'currentmonthabbrev' => 86400,
158 'currentday' => 3600,
159 'currentday2' => 3600,
160 'currentdayname' => 3600,
161 'currentyear' => 86400,
162 'currenttime' => 3600,
163 'currenthour' => 3600,
164 'localmonth' => 86400,
165 'localmonth1' => 86400,
166 'localmonthname' => 86400,
167 'localmonthnamegen' => 86400,
168 'localmonthabbrev' => 86400,
169 'localday' => 3600,
170 'localday2' => 3600,
171 'localdayname' => 3600,
172 'localyear' => 86400,
173 'localtime' => 3600,
174 'localhour' => 3600,
175 'numberofarticles' => 3600,
176 'numberoffiles' => 3600,
177 'numberofedits' => 3600,
178 'currentweek' => 3600,
179 'currentdow' => 3600,
180 'localweek' => 3600,
181 'localdow' => 3600,
182 'numberofusers' => 3600,
183 'numberofactiveusers' => 3600,
184 'numberofpages' => 3600,
185 'currentversion' => 86400,
186 'currenttimestamp' => 3600,
187 'localtimestamp' => 3600,
188 'pagesinnamespace' => 3600,
189 'numberofadmins' => 3600,
190 'numberofviews' => 3600,
191 'numberingroup' => 3600,
194 static public $mDoubleUnderscoreIDs = array(
195 'notoc',
196 'nogallery',
197 'forcetoc',
198 'toc',
199 'noeditsection',
200 'newsectionlink',
201 'nonewsectionlink',
202 'hiddencat',
203 'index',
204 'noindex',
205 'staticredirect',
206 'notitleconvert',
207 'nocontentconvert',
210 static public $mSubstIDs = array(
211 'subst',
212 'safesubst',
215 static public $mObjects = array();
216 static public $mDoubleUnderscoreArray = null;
218 /**#@-*/
220 function __construct($id = 0, $syn = array(), $cs = false) {
221 $this->mId = $id;
222 $this->mSynonyms = (array)$syn;
223 $this->mCaseSensitive = $cs;
227 * Factory: creates an object representing an ID
229 * @param $id
231 * @return MagicWord
233 static function &get( $id ) {
234 if ( !isset( self::$mObjects[$id] ) ) {
235 $mw = new MagicWord();
236 $mw->load( $id );
237 self::$mObjects[$id] = $mw;
239 return self::$mObjects[$id];
243 * Get an array of parser variable IDs
245 * @return array
247 static function getVariableIDs() {
248 if ( !self::$mVariableIDsInitialised ) {
249 # Get variable IDs
250 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
251 self::$mVariableIDsInitialised = true;
253 return self::$mVariableIDs;
257 * Get an array of parser substitution modifier IDs
258 * @return array
260 static function getSubstIDs() {
261 return self::$mSubstIDs;
265 * Allow external reads of TTL array
267 * @param $id int
268 * @return array
270 static function getCacheTTL( $id ) {
271 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
272 return self::$mCacheTTLs[$id];
273 } else {
274 return -1;
279 * Get a MagicWordArray of double-underscore entities
281 * @return MagicWordArray
283 static function getDoubleUnderscoreArray() {
284 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
285 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
287 return self::$mDoubleUnderscoreArray;
291 * Clear the self::$mObjects variable
292 * For use in parser tests
294 public static function clearCache() {
295 self::$mObjects = array();
299 * Initialises this object with an ID
301 * @param $id
303 function load( $id ) {
304 global $wgContLang;
305 wfProfileIn( __METHOD__ );
306 $this->mId = $id;
307 $wgContLang->getMagic( $this );
308 if ( !$this->mSynonyms ) {
309 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
310 throw new MWException( "Error: invalid magic word '$id'" );
311 #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
313 wfProfileOut( __METHOD__ );
317 * Preliminary initialisation
318 * @private
320 function initRegex() {
321 // Sort the synonyms by length, descending, so that the longest synonym
322 // matches in precedence to the shortest
323 $synonyms = $this->mSynonyms;
324 usort( $synonyms, array( $this, 'compareStringLength' ) );
326 $escSyn = array();
327 foreach ( $synonyms as $synonym )
328 // In case a magic word contains /, like that's going to happen;)
329 $escSyn[] = preg_quote( $synonym, '/' );
330 $this->mBaseRegex = implode( '|', $escSyn );
332 $case = $this->mCaseSensitive ? '' : 'iu';
333 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
334 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
335 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
336 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
337 "/^(?:{$this->mBaseRegex})$/{$case}" );
341 * A comparison function that returns -1, 0 or 1 depending on whether the
342 * first string is longer, the same length or shorter than the second
343 * string.
345 * @param $s1 string
346 * @param $s2 string
348 * @return int
350 function compareStringLength( $s1, $s2 ) {
351 $l1 = strlen( $s1 );
352 $l2 = strlen( $s2 );
353 if ( $l1 < $l2 ) {
354 return 1;
355 } elseif ( $l1 > $l2 ) {
356 return -1;
357 } else {
358 return 0;
363 * Gets a regex representing matching the word
365 * @return string
367 function getRegex() {
368 if ($this->mRegex == '' ) {
369 $this->initRegex();
371 return $this->mRegex;
375 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
376 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
377 * the complete expression
379 * @return string
381 function getRegexCase() {
382 if ( $this->mRegex === '' )
383 $this->initRegex();
385 return $this->mCaseSensitive ? '' : 'iu';
389 * Gets a regex matching the word, if it is at the string start
391 * @return string
393 function getRegexStart() {
394 if ($this->mRegex == '' ) {
395 $this->initRegex();
397 return $this->mRegexStart;
401 * regex without the slashes and what not
403 * @return string
405 function getBaseRegex() {
406 if ($this->mRegex == '') {
407 $this->initRegex();
409 return $this->mBaseRegex;
413 * Returns true if the text contains the word
415 * @param $text string
417 * @return bool
419 function match( $text ) {
420 return (bool)preg_match( $this->getRegex(), $text );
424 * Returns true if the text starts with the word
426 * @param $text string
428 * @return bool
430 function matchStart( $text ) {
431 return (bool)preg_match( $this->getRegexStart(), $text );
435 * Returns NULL if there's no match, the value of $1 otherwise
436 * The return code is the matched string, if there's no variable
437 * part in the regex and the matched variable part ($1) if there
438 * is one.
440 * @param $text string
442 * @return string
444 function matchVariableStartToEnd( $text ) {
445 $matches = array();
446 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
447 if ( $matchcount == 0 ) {
448 return null;
449 } else {
450 # multiple matched parts (variable match); some will be empty because of
451 # synonyms. The variable will be the second non-empty one so remove any
452 # blank elements and re-sort the indices.
453 # See also bug 6526
455 $matches = array_values(array_filter($matches));
457 if ( count($matches) == 1 ) {
458 return $matches[0];
459 } else {
460 return $matches[1];
467 * Returns true if the text matches the word, and alters the
468 * input string, removing all instances of the word
470 * @param $text string
472 * @return bool
474 function matchAndRemove( &$text ) {
475 $this->mFound = false;
476 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
477 return $this->mFound;
481 * @param $text
482 * @return bool
484 function matchStartAndRemove( &$text ) {
485 $this->mFound = false;
486 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
487 return $this->mFound;
491 * Used in matchAndRemove()
493 * @return string
495 function pregRemoveAndRecord() {
496 $this->mFound = true;
497 return '';
501 * Replaces the word with something else
503 * @param $replacement
504 * @param $subject
505 * @param $limit int
507 * @return string
509 function replace( $replacement, $subject, $limit = -1 ) {
510 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
511 $this->mModified = !($res === $subject);
512 return $res;
516 * Variable handling: {{SUBST:xxx}} style words
517 * Calls back a function to determine what to replace xxx with
518 * Input word must contain $1
520 * @param $text string
521 * @param $callback
523 * @return string
525 function substituteCallback( $text, $callback ) {
526 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
527 $this->mModified = !($res === $text);
528 return $res;
532 * Matches the word, where $1 is a wildcard
534 * @return string
536 function getVariableRegex() {
537 if ( $this->mVariableRegex == '' ) {
538 $this->initRegex();
540 return $this->mVariableRegex;
544 * Matches the entire string, where $1 is a wildcard
546 * @return string
548 function getVariableStartToEndRegex() {
549 if ( $this->mVariableStartToEndRegex == '' ) {
550 $this->initRegex();
552 return $this->mVariableStartToEndRegex;
556 * Accesses the synonym list directly
558 * @param $i int
560 * @return string
562 function getSynonym( $i ) {
563 return $this->mSynonyms[$i];
567 * @return array
569 function getSynonyms() {
570 return $this->mSynonyms;
574 * Returns true if the last call to replace() or substituteCallback()
575 * returned a modified text, otherwise false.
577 * @return bool
579 function getWasModified(){
580 return $this->mModified;
584 * $magicarr is an associative array of (magic word ID => replacement)
585 * This method uses the php feature to do several replacements at the same time,
586 * thereby gaining some efficiency. The result is placed in the out variable
587 * $result. The return value is true if something was replaced.
588 * @todo Should this be static? It doesn't seem to be used at all
590 * @param $magicarr
591 * @param $subject
592 * @param $result
594 * @return bool
596 function replaceMultiple( $magicarr, $subject, &$result ){
597 $search = array();
598 $replace = array();
599 foreach( $magicarr as $id => $replacement ){
600 $mw = MagicWord::get( $id );
601 $search[] = $mw->getRegex();
602 $replace[] = $replacement;
605 $result = preg_replace( $search, $replace, $subject );
606 return !($result === $subject);
610 * Adds all the synonyms of this MagicWord to an array, to allow quick
611 * lookup in a list of magic words
613 * @param $array
614 * @param $value
616 function addToArray( &$array, $value ) {
617 global $wgContLang;
618 foreach ( $this->mSynonyms as $syn ) {
619 $array[$wgContLang->lc($syn)] = $value;
624 * @return bool
626 function isCaseSensitive() {
627 return $this->mCaseSensitive;
631 * @return int
633 function getId() {
634 return $this->mId;
639 * Class for handling an array of magic words
640 * @ingroup Parser
642 class MagicWordArray {
643 var $names = array();
644 var $hash;
645 var $baseRegex, $regex;
646 var $matches;
648 function __construct( $names = array() ) {
649 $this->names = $names;
653 * Add a magic word by name
655 * @param $name string
657 public function add( $name ) {
658 $this->names[] = $name;
659 $this->hash = $this->baseRegex = $this->regex = null;
663 * Add a number of magic words by name
665 * @param $names array
667 public function addArray( $names ) {
668 $this->names = array_merge( $this->names, array_values( $names ) );
669 $this->hash = $this->baseRegex = $this->regex = null;
673 * Get a 2-d hashtable for this array
675 function getHash() {
676 if ( is_null( $this->hash ) ) {
677 global $wgContLang;
678 $this->hash = array( 0 => array(), 1 => array() );
679 foreach ( $this->names as $name ) {
680 $magic = MagicWord::get( $name );
681 $case = intval( $magic->isCaseSensitive() );
682 foreach ( $magic->getSynonyms() as $syn ) {
683 if ( !$case ) {
684 $syn = $wgContLang->lc( $syn );
686 $this->hash[$case][$syn] = $name;
690 return $this->hash;
694 * Get the base regex
696 function getBaseRegex() {
697 if ( is_null( $this->baseRegex ) ) {
698 $this->baseRegex = array( 0 => '', 1 => '' );
699 foreach ( $this->names as $name ) {
700 $magic = MagicWord::get( $name );
701 $case = intval( $magic->isCaseSensitive() );
702 foreach ( $magic->getSynonyms() as $i => $syn ) {
703 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
704 if ( $this->baseRegex[$case] === '' ) {
705 $this->baseRegex[$case] = $group;
706 } else {
707 $this->baseRegex[$case] .= '|' . $group;
712 return $this->baseRegex;
716 * Get an unanchored regex that does not match parameters
718 function getRegex() {
719 if ( is_null( $this->regex ) ) {
720 $base = $this->getBaseRegex();
721 $this->regex = array( '', '' );
722 if ( $this->baseRegex[0] !== '' ) {
723 $this->regex[0] = "/{$base[0]}/iuS";
725 if ( $this->baseRegex[1] !== '' ) {
726 $this->regex[1] = "/{$base[1]}/S";
729 return $this->regex;
733 * Get a regex for matching variables with parameters
735 * @return string
737 function getVariableRegex() {
738 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
742 * Get a regex anchored to the start of the string that does not match parameters
744 * @return array
746 function getRegexStart() {
747 $base = $this->getBaseRegex();
748 $newRegex = array( '', '' );
749 if ( $base[0] !== '' ) {
750 $newRegex[0] = "/^(?:{$base[0]})/iuS";
752 if ( $base[1] !== '' ) {
753 $newRegex[1] = "/^(?:{$base[1]})/S";
755 return $newRegex;
759 * Get an anchored regex for matching variables with parameters
761 * @return array
763 function getVariableStartToEndRegex() {
764 $base = $this->getBaseRegex();
765 $newRegex = array( '', '' );
766 if ( $base[0] !== '' ) {
767 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
769 if ( $base[1] !== '' ) {
770 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
772 return $newRegex;
776 * Parse a match array from preg_match
777 * Returns array(magic word ID, parameter value)
778 * If there is no parameter value, that element will be false.
780 * @param $m array
782 * @return array
784 function parseMatch( $m ) {
785 reset( $m );
786 while ( list( $key, $value ) = each( $m ) ) {
787 if ( $key === 0 || $value === '' ) {
788 continue;
790 $parts = explode( '_', $key, 2 );
791 if ( count( $parts ) != 2 ) {
792 // This shouldn't happen
793 // continue;
794 throw new MWException( __METHOD__ . ': bad parameter name' );
796 list( /* $synIndex */, $magicName ) = $parts;
797 $paramValue = next( $m );
798 return array( $magicName, $paramValue );
800 // This shouldn't happen either
801 throw new MWException( __METHOD__.': parameter not found' );
805 * Match some text, with parameter capture
806 * Returns an array with the magic word name in the first element and the
807 * parameter in the second element.
808 * Both elements are false if there was no match.
810 * @param $text string
812 * @return array
814 public function matchVariableStartToEnd( $text ) {
815 $regexes = $this->getVariableStartToEndRegex();
816 foreach ( $regexes as $regex ) {
817 if ( $regex !== '' ) {
818 $m = false;
819 if ( preg_match( $regex, $text, $m ) ) {
820 return $this->parseMatch( $m );
824 return array( false, false );
828 * Match some text, without parameter capture
829 * Returns the magic word name, or false if there was no capture
831 * @param $text string
833 * @return string|bool False on failure
835 public function matchStartToEnd( $text ) {
836 $hash = $this->getHash();
837 if ( isset( $hash[1][$text] ) ) {
838 return $hash[1][$text];
840 global $wgContLang;
841 $lc = $wgContLang->lc( $text );
842 if ( isset( $hash[0][$lc] ) ) {
843 return $hash[0][$lc];
845 return false;
849 * Returns an associative array, ID => param value, for all items that match
850 * Removes the matched items from the input string (passed by reference)
852 * @param $text string
854 * @return array
856 public function matchAndRemove( &$text ) {
857 $found = array();
858 $regexes = $this->getRegex();
859 foreach ( $regexes as $regex ) {
860 if ( $regex === '' ) {
861 continue;
863 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
864 foreach ( $matches as $m ) {
865 list( $name, $param ) = $this->parseMatch( $m );
866 $found[$name] = $param;
868 $text = preg_replace( $regex, '', $text );
870 return $found;
874 * Return the ID of the magic word at the start of $text, and remove
875 * the prefix from $text.
876 * Return false if no match found and $text is not modified.
877 * Does not match parameters.
879 * @param $text string
881 * @return int|bool False on failure
883 public function matchStartAndRemove( &$text ) {
884 $regexes = $this->getRegexStart();
885 foreach ( $regexes as $regex ) {
886 if ( $regex === '' ) {
887 continue;
889 if ( preg_match( $regex, $text, $m ) ) {
890 list( $id, ) = $this->parseMatch( $m );
891 if ( strlen( $m[0] ) >= strlen( $text ) ) {
892 $text = '';
893 } else {
894 $text = substr( $text, strlen( $m[0] ) );
896 return $id;
899 return false;