Localisation updates from http://translatewiki.net.
[mediawiki.git] / includes / MagicWord.php
blob131ae1ddea198ef436fa3c3d7d968558ca8fe7a3
1 <?php
2 /**
3 * File for magic words
5 * See docs/magicword.txt
7 * @file
8 * @ingroup Parser
9 */
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
14 * @par Usage:
15 * @code
16 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
17 * // some code
18 * }
19 * @endcode
21 * Possible future improvements:
22 * * Simultaneous searching for a number of magic words
23 * * MagicWord::$mObjects in shared memory
25 * Please avoid reading the data out of one of these objects and then writing
26 * special case code. If possible, add another match()-like function here.
28 * To add magic words in an extension, use $magicWords in a file listed in
29 * $wgExtensionMessagesFiles[].
31 * @par Example:
32 * @code
33 * $magicWords = array();
35 * $magicWords['en'] = array(
36 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
37 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
38 * );
39 * @endcode
41 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
42 * hook. Use string keys.
44 * @ingroup Parser
46 class MagicWord {
47 /**#@+
48 * @private
50 var $mId, $mSynonyms, $mCaseSensitive;
51 var $mRegex = '';
52 var $mRegexStart = '';
53 var $mBaseRegex = '';
54 var $mVariableRegex = '';
55 var $mVariableStartToEndRegex = '';
56 var $mModified = false;
57 var $mFound = false;
59 static public $mVariableIDsInitialised = false;
60 static public $mVariableIDs = array(
61 'currentmonth',
62 'currentmonth1',
63 'currentmonthname',
64 'currentmonthnamegen',
65 'currentmonthabbrev',
66 'currentday',
67 'currentday2',
68 'currentdayname',
69 'currentyear',
70 'currenttime',
71 'currenthour',
72 'localmonth',
73 'localmonth1',
74 'localmonthname',
75 'localmonthnamegen',
76 'localmonthabbrev',
77 'localday',
78 'localday2',
79 'localdayname',
80 'localyear',
81 'localtime',
82 'localhour',
83 'numberofarticles',
84 'numberoffiles',
85 'numberofedits',
86 'articlepath',
87 'sitename',
88 'server',
89 'servername',
90 'scriptpath',
91 'stylepath',
92 'pagename',
93 'pagenamee',
94 'fullpagename',
95 'fullpagenamee',
96 'namespace',
97 'namespacee',
98 'namespacenumber',
99 'currentweek',
100 'currentdow',
101 'localweek',
102 'localdow',
103 'revisionid',
104 'revisionday',
105 'revisionday2',
106 'revisionmonth',
107 'revisionmonth1',
108 'revisionyear',
109 'revisiontimestamp',
110 'revisionuser',
111 'subpagename',
112 'subpagenamee',
113 'talkspace',
114 'talkspacee',
115 'subjectspace',
116 'subjectspacee',
117 'talkpagename',
118 'talkpagenamee',
119 'subjectpagename',
120 'subjectpagenamee',
121 'numberofusers',
122 'numberofactiveusers',
123 'numberofpages',
124 'currentversion',
125 'basepagename',
126 'basepagenamee',
127 'currenttimestamp',
128 'localtimestamp',
129 'directionmark',
130 'contentlanguage',
131 'numberofadmins',
132 'numberofviews',
135 /* Array of caching hints for ParserCache */
136 static public $mCacheTTLs = array (
137 'currentmonth' => 86400,
138 'currentmonth1' => 86400,
139 'currentmonthname' => 86400,
140 'currentmonthnamegen' => 86400,
141 'currentmonthabbrev' => 86400,
142 'currentday' => 3600,
143 'currentday2' => 3600,
144 'currentdayname' => 3600,
145 'currentyear' => 86400,
146 'currenttime' => 3600,
147 'currenthour' => 3600,
148 'localmonth' => 86400,
149 'localmonth1' => 86400,
150 'localmonthname' => 86400,
151 'localmonthnamegen' => 86400,
152 'localmonthabbrev' => 86400,
153 'localday' => 3600,
154 'localday2' => 3600,
155 'localdayname' => 3600,
156 'localyear' => 86400,
157 'localtime' => 3600,
158 'localhour' => 3600,
159 'numberofarticles' => 3600,
160 'numberoffiles' => 3600,
161 'numberofedits' => 3600,
162 'currentweek' => 3600,
163 'currentdow' => 3600,
164 'localweek' => 3600,
165 'localdow' => 3600,
166 'numberofusers' => 3600,
167 'numberofactiveusers' => 3600,
168 'numberofpages' => 3600,
169 'currentversion' => 86400,
170 'currenttimestamp' => 3600,
171 'localtimestamp' => 3600,
172 'pagesinnamespace' => 3600,
173 'numberofadmins' => 3600,
174 'numberofviews' => 3600,
175 'numberingroup' => 3600,
178 static public $mDoubleUnderscoreIDs = array(
179 'notoc',
180 'nogallery',
181 'forcetoc',
182 'toc',
183 'noeditsection',
184 'newsectionlink',
185 'nonewsectionlink',
186 'hiddencat',
187 'index',
188 'noindex',
189 'staticredirect',
190 'notitleconvert',
191 'nocontentconvert',
194 static public $mSubstIDs = array(
195 'subst',
196 'safesubst',
199 static public $mObjects = array();
200 static public $mDoubleUnderscoreArray = null;
202 /**#@-*/
204 function __construct($id = 0, $syn = array(), $cs = false) {
205 $this->mId = $id;
206 $this->mSynonyms = (array)$syn;
207 $this->mCaseSensitive = $cs;
211 * Factory: creates an object representing an ID
213 * @param $id
215 * @return MagicWord
217 static function &get( $id ) {
218 if ( !isset( self::$mObjects[$id] ) ) {
219 $mw = new MagicWord();
220 $mw->load( $id );
221 self::$mObjects[$id] = $mw;
223 return self::$mObjects[$id];
227 * Get an array of parser variable IDs
229 * @return array
231 static function getVariableIDs() {
232 if ( !self::$mVariableIDsInitialised ) {
233 # Get variable IDs
234 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
235 self::$mVariableIDsInitialised = true;
237 return self::$mVariableIDs;
241 * Get an array of parser substitution modifier IDs
242 * @return array
244 static function getSubstIDs() {
245 return self::$mSubstIDs;
249 * Allow external reads of TTL array
251 * @param $id int
252 * @return array
254 static function getCacheTTL( $id ) {
255 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
256 return self::$mCacheTTLs[$id];
257 } else {
258 return -1;
263 * Get a MagicWordArray of double-underscore entities
265 * @return MagicWordArray
267 static function getDoubleUnderscoreArray() {
268 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
269 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
271 return self::$mDoubleUnderscoreArray;
275 * Clear the self::$mObjects variable
276 * For use in parser tests
278 public static function clearCache() {
279 self::$mObjects = array();
283 * Initialises this object with an ID
285 * @param $id
287 function load( $id ) {
288 global $wgContLang;
289 wfProfileIn( __METHOD__ );
290 $this->mId = $id;
291 $wgContLang->getMagic( $this );
292 if ( !$this->mSynonyms ) {
293 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
294 #throw new MWException( "Error: invalid magic word '$id'" );
295 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
297 wfProfileOut( __METHOD__ );
301 * Preliminary initialisation
302 * @private
304 function initRegex() {
305 // Sort the synonyms by length, descending, so that the longest synonym
306 // matches in precedence to the shortest
307 $synonyms = $this->mSynonyms;
308 usort( $synonyms, array( $this, 'compareStringLength' ) );
310 $escSyn = array();
311 foreach ( $synonyms as $synonym )
312 // In case a magic word contains /, like that's going to happen;)
313 $escSyn[] = preg_quote( $synonym, '/' );
314 $this->mBaseRegex = implode( '|', $escSyn );
316 $case = $this->mCaseSensitive ? '' : 'iu';
317 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
318 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
319 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
320 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
321 "/^(?:{$this->mBaseRegex})$/{$case}" );
325 * A comparison function that returns -1, 0 or 1 depending on whether the
326 * first string is longer, the same length or shorter than the second
327 * string.
329 * @param $s1 string
330 * @param $s2 string
332 * @return int
334 function compareStringLength( $s1, $s2 ) {
335 $l1 = strlen( $s1 );
336 $l2 = strlen( $s2 );
337 if ( $l1 < $l2 ) {
338 return 1;
339 } elseif ( $l1 > $l2 ) {
340 return -1;
341 } else {
342 return 0;
347 * Gets a regex representing matching the word
349 * @return string
351 function getRegex() {
352 if ($this->mRegex == '' ) {
353 $this->initRegex();
355 return $this->mRegex;
359 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
360 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
361 * the complete expression
363 * @return string
365 function getRegexCase() {
366 if ( $this->mRegex === '' )
367 $this->initRegex();
369 return $this->mCaseSensitive ? '' : 'iu';
373 * Gets a regex matching the word, if it is at the string start
375 * @return string
377 function getRegexStart() {
378 if ($this->mRegex == '' ) {
379 $this->initRegex();
381 return $this->mRegexStart;
385 * regex without the slashes and what not
387 * @return string
389 function getBaseRegex() {
390 if ($this->mRegex == '') {
391 $this->initRegex();
393 return $this->mBaseRegex;
397 * Returns true if the text contains the word
399 * @param $text string
401 * @return bool
403 function match( $text ) {
404 return (bool)preg_match( $this->getRegex(), $text );
408 * Returns true if the text starts with the word
410 * @param $text string
412 * @return bool
414 function matchStart( $text ) {
415 return (bool)preg_match( $this->getRegexStart(), $text );
419 * Returns NULL if there's no match, the value of $1 otherwise
420 * The return code is the matched string, if there's no variable
421 * part in the regex and the matched variable part ($1) if there
422 * is one.
424 * @param $text string
426 * @return string
428 function matchVariableStartToEnd( $text ) {
429 $matches = array();
430 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
431 if ( $matchcount == 0 ) {
432 return null;
433 } else {
434 # multiple matched parts (variable match); some will be empty because of
435 # synonyms. The variable will be the second non-empty one so remove any
436 # blank elements and re-sort the indices.
437 # See also bug 6526
439 $matches = array_values(array_filter($matches));
441 if ( count($matches) == 1 ) {
442 return $matches[0];
443 } else {
444 return $matches[1];
451 * Returns true if the text matches the word, and alters the
452 * input string, removing all instances of the word
454 * @param $text string
456 * @return bool
458 function matchAndRemove( &$text ) {
459 $this->mFound = false;
460 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
461 return $this->mFound;
465 * @param $text
466 * @return bool
468 function matchStartAndRemove( &$text ) {
469 $this->mFound = false;
470 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
471 return $this->mFound;
475 * Used in matchAndRemove()
477 * @return string
479 function pregRemoveAndRecord() {
480 $this->mFound = true;
481 return '';
485 * Replaces the word with something else
487 * @param $replacement
488 * @param $subject
489 * @param $limit int
491 * @return string
493 function replace( $replacement, $subject, $limit = -1 ) {
494 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
495 $this->mModified = !($res === $subject);
496 return $res;
500 * Variable handling: {{SUBST:xxx}} style words
501 * Calls back a function to determine what to replace xxx with
502 * Input word must contain $1
504 * @param $text string
505 * @param $callback
507 * @return string
509 function substituteCallback( $text, $callback ) {
510 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
511 $this->mModified = !($res === $text);
512 return $res;
516 * Matches the word, where $1 is a wildcard
518 * @return string
520 function getVariableRegex() {
521 if ( $this->mVariableRegex == '' ) {
522 $this->initRegex();
524 return $this->mVariableRegex;
528 * Matches the entire string, where $1 is a wildcard
530 * @return string
532 function getVariableStartToEndRegex() {
533 if ( $this->mVariableStartToEndRegex == '' ) {
534 $this->initRegex();
536 return $this->mVariableStartToEndRegex;
540 * Accesses the synonym list directly
542 * @param $i int
544 * @return string
546 function getSynonym( $i ) {
547 return $this->mSynonyms[$i];
551 * @return array
553 function getSynonyms() {
554 return $this->mSynonyms;
558 * Returns true if the last call to replace() or substituteCallback()
559 * returned a modified text, otherwise false.
561 * @return bool
563 function getWasModified(){
564 return $this->mModified;
568 * $magicarr is an associative array of (magic word ID => replacement)
569 * This method uses the php feature to do several replacements at the same time,
570 * thereby gaining some efficiency. The result is placed in the out variable
571 * $result. The return value is true if something was replaced.
572 * @todo Should this be static? It doesn't seem to be used at all
574 * @param $magicarr
575 * @param $subject
576 * @param $result
578 * @return bool
580 function replaceMultiple( $magicarr, $subject, &$result ){
581 $search = array();
582 $replace = array();
583 foreach( $magicarr as $id => $replacement ){
584 $mw = MagicWord::get( $id );
585 $search[] = $mw->getRegex();
586 $replace[] = $replacement;
589 $result = preg_replace( $search, $replace, $subject );
590 return !($result === $subject);
594 * Adds all the synonyms of this MagicWord to an array, to allow quick
595 * lookup in a list of magic words
597 * @param $array
598 * @param $value
600 function addToArray( &$array, $value ) {
601 global $wgContLang;
602 foreach ( $this->mSynonyms as $syn ) {
603 $array[$wgContLang->lc($syn)] = $value;
608 * @return bool
610 function isCaseSensitive() {
611 return $this->mCaseSensitive;
615 * @return int
617 function getId() {
618 return $this->mId;
623 * Class for handling an array of magic words
624 * @ingroup Parser
626 class MagicWordArray {
627 var $names = array();
628 var $hash;
629 var $baseRegex, $regex;
630 var $matches;
632 function __construct( $names = array() ) {
633 $this->names = $names;
637 * Add a magic word by name
639 * @param $name string
641 public function add( $name ) {
642 $this->names[] = $name;
643 $this->hash = $this->baseRegex = $this->regex = null;
647 * Add a number of magic words by name
649 * @param $names array
651 public function addArray( $names ) {
652 $this->names = array_merge( $this->names, array_values( $names ) );
653 $this->hash = $this->baseRegex = $this->regex = null;
657 * Get a 2-d hashtable for this array
659 function getHash() {
660 if ( is_null( $this->hash ) ) {
661 global $wgContLang;
662 $this->hash = array( 0 => array(), 1 => array() );
663 foreach ( $this->names as $name ) {
664 $magic = MagicWord::get( $name );
665 $case = intval( $magic->isCaseSensitive() );
666 foreach ( $magic->getSynonyms() as $syn ) {
667 if ( !$case ) {
668 $syn = $wgContLang->lc( $syn );
670 $this->hash[$case][$syn] = $name;
674 return $this->hash;
678 * Get the base regex
680 function getBaseRegex() {
681 if ( is_null( $this->baseRegex ) ) {
682 $this->baseRegex = array( 0 => '', 1 => '' );
683 foreach ( $this->names as $name ) {
684 $magic = MagicWord::get( $name );
685 $case = intval( $magic->isCaseSensitive() );
686 foreach ( $magic->getSynonyms() as $i => $syn ) {
687 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
688 if ( $this->baseRegex[$case] === '' ) {
689 $this->baseRegex[$case] = $group;
690 } else {
691 $this->baseRegex[$case] .= '|' . $group;
696 return $this->baseRegex;
700 * Get an unanchored regex that does not match parameters
702 function getRegex() {
703 if ( is_null( $this->regex ) ) {
704 $base = $this->getBaseRegex();
705 $this->regex = array( '', '' );
706 if ( $this->baseRegex[0] !== '' ) {
707 $this->regex[0] = "/{$base[0]}/iuS";
709 if ( $this->baseRegex[1] !== '' ) {
710 $this->regex[1] = "/{$base[1]}/S";
713 return $this->regex;
717 * Get a regex for matching variables with parameters
719 * @return string
721 function getVariableRegex() {
722 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
726 * Get a regex anchored to the start of the string that does not match parameters
728 * @return array
730 function getRegexStart() {
731 $base = $this->getBaseRegex();
732 $newRegex = array( '', '' );
733 if ( $base[0] !== '' ) {
734 $newRegex[0] = "/^(?:{$base[0]})/iuS";
736 if ( $base[1] !== '' ) {
737 $newRegex[1] = "/^(?:{$base[1]})/S";
739 return $newRegex;
743 * Get an anchored regex for matching variables with parameters
745 * @return array
747 function getVariableStartToEndRegex() {
748 $base = $this->getBaseRegex();
749 $newRegex = array( '', '' );
750 if ( $base[0] !== '' ) {
751 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
753 if ( $base[1] !== '' ) {
754 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
756 return $newRegex;
760 * Parse a match array from preg_match
761 * Returns array(magic word ID, parameter value)
762 * If there is no parameter value, that element will be false.
764 * @param $m array
766 * @return array
768 function parseMatch( $m ) {
769 reset( $m );
770 while ( list( $key, $value ) = each( $m ) ) {
771 if ( $key === 0 || $value === '' ) {
772 continue;
774 $parts = explode( '_', $key, 2 );
775 if ( count( $parts ) != 2 ) {
776 // This shouldn't happen
777 // continue;
778 throw new MWException( __METHOD__ . ': bad parameter name' );
780 list( /* $synIndex */, $magicName ) = $parts;
781 $paramValue = next( $m );
782 return array( $magicName, $paramValue );
784 // This shouldn't happen either
785 throw new MWException( __METHOD__.': parameter not found' );
789 * Match some text, with parameter capture
790 * Returns an array with the magic word name in the first element and the
791 * parameter in the second element.
792 * Both elements are false if there was no match.
794 * @param $text string
796 * @return array
798 public function matchVariableStartToEnd( $text ) {
799 $regexes = $this->getVariableStartToEndRegex();
800 foreach ( $regexes as $regex ) {
801 if ( $regex !== '' ) {
802 $m = false;
803 if ( preg_match( $regex, $text, $m ) ) {
804 return $this->parseMatch( $m );
808 return array( false, false );
812 * Match some text, without parameter capture
813 * Returns the magic word name, or false if there was no capture
815 * @param $text string
817 * @return string|bool False on failure
819 public function matchStartToEnd( $text ) {
820 $hash = $this->getHash();
821 if ( isset( $hash[1][$text] ) ) {
822 return $hash[1][$text];
824 global $wgContLang;
825 $lc = $wgContLang->lc( $text );
826 if ( isset( $hash[0][$lc] ) ) {
827 return $hash[0][$lc];
829 return false;
833 * Returns an associative array, ID => param value, for all items that match
834 * Removes the matched items from the input string (passed by reference)
836 * @param $text string
838 * @return array
840 public function matchAndRemove( &$text ) {
841 $found = array();
842 $regexes = $this->getRegex();
843 foreach ( $regexes as $regex ) {
844 if ( $regex === '' ) {
845 continue;
847 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
848 foreach ( $matches as $m ) {
849 list( $name, $param ) = $this->parseMatch( $m );
850 $found[$name] = $param;
852 $text = preg_replace( $regex, '', $text );
854 return $found;
858 * Return the ID of the magic word at the start of $text, and remove
859 * the prefix from $text.
860 * Return false if no match found and $text is not modified.
861 * Does not match parameters.
863 * @param $text string
865 * @return int|bool False on failure
867 public function matchStartAndRemove( &$text ) {
868 $regexes = $this->getRegexStart();
869 foreach ( $regexes as $regex ) {
870 if ( $regex === '' ) {
871 continue;
873 if ( preg_match( $regex, $text, $m ) ) {
874 list( $id, ) = $this->parseMatch( $m );
875 if ( strlen( $m[0] ) >= strlen( $text ) ) {
876 $text = '';
877 } else {
878 $text = substr( $text, strlen( $m[0] ) );
880 return $id;
883 return false;