Localisation updates from http://translatewiki.net.
[mediawiki.git] / includes / MagicWord.php
blob18b79c4f5a653e14ce6bd8476a3c902d34525a3b
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'pageid',
103 'sitename',
104 'server',
105 'servername',
106 'scriptpath',
107 'stylepath',
108 'pagename',
109 'pagenamee',
110 'fullpagename',
111 'fullpagenamee',
112 'namespace',
113 'namespacee',
114 'namespacenumber',
115 'currentweek',
116 'currentdow',
117 'localweek',
118 'localdow',
119 'revisionid',
120 'revisionday',
121 'revisionday2',
122 'revisionmonth',
123 'revisionmonth1',
124 'revisionyear',
125 'revisiontimestamp',
126 'revisionuser',
127 'subpagename',
128 'subpagenamee',
129 'talkspace',
130 'talkspacee',
131 'subjectspace',
132 'subjectspacee',
133 'talkpagename',
134 'talkpagenamee',
135 'subjectpagename',
136 'subjectpagenamee',
137 'numberofusers',
138 'numberofactiveusers',
139 'numberofpages',
140 'currentversion',
141 'basepagename',
142 'basepagenamee',
143 'currenttimestamp',
144 'localtimestamp',
145 'directionmark',
146 'contentlanguage',
147 'numberofadmins',
148 'numberofviews',
151 /* Array of caching hints for ParserCache */
152 static public $mCacheTTLs = array(
153 'currentmonth' => 86400,
154 'currentmonth1' => 86400,
155 'currentmonthname' => 86400,
156 'currentmonthnamegen' => 86400,
157 'currentmonthabbrev' => 86400,
158 'currentday' => 3600,
159 'currentday2' => 3600,
160 'currentdayname' => 3600,
161 'currentyear' => 86400,
162 'currenttime' => 3600,
163 'currenthour' => 3600,
164 'localmonth' => 86400,
165 'localmonth1' => 86400,
166 'localmonthname' => 86400,
167 'localmonthnamegen' => 86400,
168 'localmonthabbrev' => 86400,
169 'localday' => 3600,
170 'localday2' => 3600,
171 'localdayname' => 3600,
172 'localyear' => 86400,
173 'localtime' => 3600,
174 'localhour' => 3600,
175 'numberofarticles' => 3600,
176 'numberoffiles' => 3600,
177 'numberofedits' => 3600,
178 'currentweek' => 3600,
179 'currentdow' => 3600,
180 'localweek' => 3600,
181 'localdow' => 3600,
182 'numberofusers' => 3600,
183 'numberofactiveusers' => 3600,
184 'numberofpages' => 3600,
185 'currentversion' => 86400,
186 'currenttimestamp' => 3600,
187 'localtimestamp' => 3600,
188 'pagesinnamespace' => 3600,
189 'numberofadmins' => 3600,
190 'numberofviews' => 3600,
191 'numberingroup' => 3600,
194 static public $mDoubleUnderscoreIDs = array(
195 'notoc',
196 'nogallery',
197 'forcetoc',
198 'toc',
199 'noeditsection',
200 'newsectionlink',
201 'nonewsectionlink',
202 'hiddencat',
203 'index',
204 'noindex',
205 'staticredirect',
206 'notitleconvert',
207 'nocontentconvert',
210 static public $mSubstIDs = array(
211 'subst',
212 'safesubst',
215 static public $mObjects = array();
216 static public $mDoubleUnderscoreArray = null;
218 /**#@-*/
220 function __construct( $id = 0, $syn = array(), $cs = false ) {
221 $this->mId = $id;
222 $this->mSynonyms = (array)$syn;
223 $this->mCaseSensitive = $cs;
227 * Factory: creates an object representing an ID
229 * @param $id
231 * @return MagicWord
233 static function &get( $id ) {
234 if ( !isset( self::$mObjects[$id] ) ) {
235 $mw = new MagicWord();
236 $mw->load( $id );
237 self::$mObjects[$id] = $mw;
239 return self::$mObjects[$id];
243 * Get an array of parser variable IDs
245 * @return array
247 static function getVariableIDs() {
248 if ( !self::$mVariableIDsInitialised ) {
249 # Get variable IDs
250 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
251 self::$mVariableIDsInitialised = true;
253 return self::$mVariableIDs;
257 * Get an array of parser substitution modifier IDs
258 * @return array
260 static function getSubstIDs() {
261 return self::$mSubstIDs;
265 * Allow external reads of TTL array
267 * @param $id int
268 * @return array
270 static function getCacheTTL( $id ) {
271 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
272 return self::$mCacheTTLs[$id];
273 } else {
274 return -1;
279 * Get a MagicWordArray of double-underscore entities
281 * @return MagicWordArray
283 static function getDoubleUnderscoreArray() {
284 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
285 wfRunHooks( 'GetDoubleUnderscoreIDs', array( &self::$mDoubleUnderscoreIDs ) );
286 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
288 return self::$mDoubleUnderscoreArray;
292 * Clear the self::$mObjects variable
293 * For use in parser tests
295 public static function clearCache() {
296 self::$mObjects = array();
300 * Initialises this object with an ID
302 * @param $id
303 * @throws MWException
305 function load( $id ) {
306 global $wgContLang;
307 wfProfileIn( __METHOD__ );
308 $this->mId = $id;
309 $wgContLang->getMagic( $this );
310 if ( !$this->mSynonyms ) {
311 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
312 wfProfileOut( __METHOD__ );
313 throw new MWException( "Error: invalid magic word '$id'" );
314 #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
316 wfProfileOut( __METHOD__ );
320 * Preliminary initialisation
321 * @private
323 function initRegex() {
324 // Sort the synonyms by length, descending, so that the longest synonym
325 // matches in precedence to the shortest
326 $synonyms = $this->mSynonyms;
327 usort( $synonyms, array( $this, 'compareStringLength' ) );
329 $escSyn = array();
330 foreach ( $synonyms as $synonym ) {
331 // In case a magic word contains /, like that's going to happen;)
332 $escSyn[] = preg_quote( $synonym, '/' );
334 $this->mBaseRegex = implode( '|', $escSyn );
336 $case = $this->mCaseSensitive ? '' : 'iu';
337 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
338 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
339 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
340 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
341 "/^(?:{$this->mBaseRegex})$/{$case}" );
345 * A comparison function that returns -1, 0 or 1 depending on whether the
346 * first string is longer, the same length or shorter than the second
347 * string.
349 * @param $s1 string
350 * @param $s2 string
352 * @return int
354 function compareStringLength( $s1, $s2 ) {
355 $l1 = strlen( $s1 );
356 $l2 = strlen( $s2 );
357 if ( $l1 < $l2 ) {
358 return 1;
359 } elseif ( $l1 > $l2 ) {
360 return -1;
361 } else {
362 return 0;
367 * Gets a regex representing matching the word
369 * @return string
371 function getRegex() {
372 if ( $this->mRegex == '' ) {
373 $this->initRegex();
375 return $this->mRegex;
379 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
380 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
381 * the complete expression
383 * @return string
385 function getRegexCase() {
386 if ( $this->mRegex === '' ) {
387 $this->initRegex();
390 return $this->mCaseSensitive ? '' : 'iu';
394 * Gets a regex matching the word, if it is at the string start
396 * @return string
398 function getRegexStart() {
399 if ( $this->mRegex == '' ) {
400 $this->initRegex();
402 return $this->mRegexStart;
406 * regex without the slashes and what not
408 * @return string
410 function getBaseRegex() {
411 if ( $this->mRegex == '' ) {
412 $this->initRegex();
414 return $this->mBaseRegex;
418 * Returns true if the text contains the word
420 * @param $text string
422 * @return bool
424 function match( $text ) {
425 return (bool)preg_match( $this->getRegex(), $text );
429 * Returns true if the text starts with the word
431 * @param $text string
433 * @return bool
435 function matchStart( $text ) {
436 return (bool)preg_match( $this->getRegexStart(), $text );
440 * Returns NULL if there's no match, the value of $1 otherwise
441 * The return code is the matched string, if there's no variable
442 * part in the regex and the matched variable part ($1) if there
443 * is one.
445 * @param $text string
447 * @return string
449 function matchVariableStartToEnd( $text ) {
450 $matches = array();
451 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
452 if ( $matchcount == 0 ) {
453 return null;
454 } else {
455 # multiple matched parts (variable match); some will be empty because of
456 # synonyms. The variable will be the second non-empty one so remove any
457 # blank elements and re-sort the indices.
458 # See also bug 6526
460 $matches = array_values( array_filter( $matches ) );
462 if ( count( $matches ) == 1 ) {
463 return $matches[0];
464 } else {
465 return $matches[1];
471 * Returns true if the text matches the word, and alters the
472 * input string, removing all instances of the word
474 * @param $text string
476 * @return bool
478 function matchAndRemove( &$text ) {
479 $this->mFound = false;
480 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
481 return $this->mFound;
485 * @param $text
486 * @return bool
488 function matchStartAndRemove( &$text ) {
489 $this->mFound = false;
490 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
491 return $this->mFound;
495 * Used in matchAndRemove()
497 * @return string
499 function pregRemoveAndRecord() {
500 $this->mFound = true;
501 return '';
505 * Replaces the word with something else
507 * @param $replacement
508 * @param $subject
509 * @param $limit int
511 * @return string
513 function replace( $replacement, $subject, $limit = -1 ) {
514 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
515 $this->mModified = !($res === $subject);
516 return $res;
520 * Variable handling: {{SUBST:xxx}} style words
521 * Calls back a function to determine what to replace xxx with
522 * Input word must contain $1
524 * @param $text string
525 * @param $callback
527 * @return string
529 function substituteCallback( $text, $callback ) {
530 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
531 $this->mModified = !($res === $text);
532 return $res;
536 * Matches the word, where $1 is a wildcard
538 * @return string
540 function getVariableRegex() {
541 if ( $this->mVariableRegex == '' ) {
542 $this->initRegex();
544 return $this->mVariableRegex;
548 * Matches the entire string, where $1 is a wildcard
550 * @return string
552 function getVariableStartToEndRegex() {
553 if ( $this->mVariableStartToEndRegex == '' ) {
554 $this->initRegex();
556 return $this->mVariableStartToEndRegex;
560 * Accesses the synonym list directly
562 * @param $i int
564 * @return string
566 function getSynonym( $i ) {
567 return $this->mSynonyms[$i];
571 * @return array
573 function getSynonyms() {
574 return $this->mSynonyms;
578 * Returns true if the last call to replace() or substituteCallback()
579 * returned a modified text, otherwise false.
581 * @return bool
583 function getWasModified() {
584 return $this->mModified;
588 * $magicarr is an associative array of (magic word ID => replacement)
589 * This method uses the php feature to do several replacements at the same time,
590 * thereby gaining some efficiency. The result is placed in the out variable
591 * $result. The return value is true if something was replaced.
592 * @todo Should this be static? It doesn't seem to be used at all
594 * @param $magicarr
595 * @param $subject
596 * @param $result
598 * @return bool
600 function replaceMultiple( $magicarr, $subject, &$result ) {
601 $search = array();
602 $replace = array();
603 foreach ( $magicarr as $id => $replacement ) {
604 $mw = MagicWord::get( $id );
605 $search[] = $mw->getRegex();
606 $replace[] = $replacement;
609 $result = preg_replace( $search, $replace, $subject );
610 return !($result === $subject);
614 * Adds all the synonyms of this MagicWord to an array, to allow quick
615 * lookup in a list of magic words
617 * @param $array
618 * @param $value
620 function addToArray( &$array, $value ) {
621 global $wgContLang;
622 foreach ( $this->mSynonyms as $syn ) {
623 $array[$wgContLang->lc( $syn )] = $value;
628 * @return bool
630 function isCaseSensitive() {
631 return $this->mCaseSensitive;
635 * @return int
637 function getId() {
638 return $this->mId;
643 * Class for handling an array of magic words
644 * @ingroup Parser
646 class MagicWordArray {
647 var $names = array();
648 var $hash;
649 var $baseRegex, $regex;
650 var $matches;
653 * @param $names array
655 function __construct( $names = array() ) {
656 $this->names = $names;
660 * Add a magic word by name
662 * @param $name string
664 public function add( $name ) {
665 $this->names[] = $name;
666 $this->hash = $this->baseRegex = $this->regex = null;
670 * Add a number of magic words by name
672 * @param $names array
674 public function addArray( $names ) {
675 $this->names = array_merge( $this->names, array_values( $names ) );
676 $this->hash = $this->baseRegex = $this->regex = null;
680 * Get a 2-d hashtable for this array
682 function getHash() {
683 if ( is_null( $this->hash ) ) {
684 global $wgContLang;
685 $this->hash = array( 0 => array(), 1 => array() );
686 foreach ( $this->names as $name ) {
687 $magic = MagicWord::get( $name );
688 $case = intval( $magic->isCaseSensitive() );
689 foreach ( $magic->getSynonyms() as $syn ) {
690 if ( !$case ) {
691 $syn = $wgContLang->lc( $syn );
693 $this->hash[$case][$syn] = $name;
697 return $this->hash;
701 * Get the base regex
703 function getBaseRegex() {
704 if ( is_null( $this->baseRegex ) ) {
705 $this->baseRegex = array( 0 => '', 1 => '' );
706 foreach ( $this->names as $name ) {
707 $magic = MagicWord::get( $name );
708 $case = intval( $magic->isCaseSensitive() );
709 foreach ( $magic->getSynonyms() as $i => $syn ) {
710 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
711 if ( $this->baseRegex[$case] === '' ) {
712 $this->baseRegex[$case] = $group;
713 } else {
714 $this->baseRegex[$case] .= '|' . $group;
719 return $this->baseRegex;
723 * Get an unanchored regex that does not match parameters
725 function getRegex() {
726 if ( is_null( $this->regex ) ) {
727 $base = $this->getBaseRegex();
728 $this->regex = array( '', '' );
729 if ( $this->baseRegex[0] !== '' ) {
730 $this->regex[0] = "/{$base[0]}/iuS";
732 if ( $this->baseRegex[1] !== '' ) {
733 $this->regex[1] = "/{$base[1]}/S";
736 return $this->regex;
740 * Get a regex for matching variables with parameters
742 * @return string
744 function getVariableRegex() {
745 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
749 * Get a regex anchored to the start of the string that does not match parameters
751 * @return array
753 function getRegexStart() {
754 $base = $this->getBaseRegex();
755 $newRegex = array( '', '' );
756 if ( $base[0] !== '' ) {
757 $newRegex[0] = "/^(?:{$base[0]})/iuS";
759 if ( $base[1] !== '' ) {
760 $newRegex[1] = "/^(?:{$base[1]})/S";
762 return $newRegex;
766 * Get an anchored regex for matching variables with parameters
768 * @return array
770 function getVariableStartToEndRegex() {
771 $base = $this->getBaseRegex();
772 $newRegex = array( '', '' );
773 if ( $base[0] !== '' ) {
774 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
776 if ( $base[1] !== '' ) {
777 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
779 return $newRegex;
783 * @since 1.20
784 * @return array
786 public function getNames() {
787 return $this->names;
791 * Parse a match array from preg_match
792 * Returns array(magic word ID, parameter value)
793 * If there is no parameter value, that element will be false.
795 * @param $m array
797 * @throws MWException
798 * @return array
800 function parseMatch( $m ) {
801 reset( $m );
802 while ( list( $key, $value ) = each( $m ) ) {
803 if ( $key === 0 || $value === '' ) {
804 continue;
806 $parts = explode( '_', $key, 2 );
807 if ( count( $parts ) != 2 ) {
808 // This shouldn't happen
809 // continue;
810 throw new MWException( __METHOD__ . ': bad parameter name' );
812 list( /* $synIndex */, $magicName ) = $parts;
813 $paramValue = next( $m );
814 return array( $magicName, $paramValue );
816 // This shouldn't happen either
817 throw new MWException( __METHOD__ . ': parameter not found' );
821 * Match some text, with parameter capture
822 * Returns an array with the magic word name in the first element and the
823 * parameter in the second element.
824 * Both elements are false if there was no match.
826 * @param $text string
828 * @return array
830 public function matchVariableStartToEnd( $text ) {
831 $regexes = $this->getVariableStartToEndRegex();
832 foreach ( $regexes as $regex ) {
833 if ( $regex !== '' ) {
834 $m = array();
835 if ( preg_match( $regex, $text, $m ) ) {
836 return $this->parseMatch( $m );
840 return array( false, false );
844 * Match some text, without parameter capture
845 * Returns the magic word name, or false if there was no capture
847 * @param $text string
849 * @return string|bool False on failure
851 public function matchStartToEnd( $text ) {
852 $hash = $this->getHash();
853 if ( isset( $hash[1][$text] ) ) {
854 return $hash[1][$text];
856 global $wgContLang;
857 $lc = $wgContLang->lc( $text );
858 if ( isset( $hash[0][$lc] ) ) {
859 return $hash[0][$lc];
861 return false;
865 * Returns an associative array, ID => param value, for all items that match
866 * Removes the matched items from the input string (passed by reference)
868 * @param $text string
870 * @return array
872 public function matchAndRemove( &$text ) {
873 $found = array();
874 $regexes = $this->getRegex();
875 foreach ( $regexes as $regex ) {
876 if ( $regex === '' ) {
877 continue;
879 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
880 foreach ( $matches as $m ) {
881 list( $name, $param ) = $this->parseMatch( $m );
882 $found[$name] = $param;
884 $text = preg_replace( $regex, '', $text );
886 return $found;
890 * Return the ID of the magic word at the start of $text, and remove
891 * the prefix from $text.
892 * Return false if no match found and $text is not modified.
893 * Does not match parameters.
895 * @param $text string
897 * @return int|bool False on failure
899 public function matchStartAndRemove( &$text ) {
900 $regexes = $this->getRegexStart();
901 foreach ( $regexes as $regex ) {
902 if ( $regex === '' ) {
903 continue;
905 if ( preg_match( $regex, $text, $m ) ) {
906 list( $id, ) = $this->parseMatch( $m );
907 if ( strlen( $m[0] ) >= strlen( $text ) ) {
908 $text = '';
909 } else {
910 $text = substr( $text, strlen( $m[0] ) );
912 return $id;
915 return false;