Pass __METHOD__ to DatabaseBase::commit() and DatabaseBase::rollback()
[mediawiki.git] / includes / MagicWord.php
blob941d97fc543f0949529624b940be421a609bab19
1 <?php
2 /**
3 * File for magic words
5 * See docs/magicword.txt
7 * @file
8 * @ingroup Parser
9 */
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
14 * @par Usage:
15 * @code
16 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
17 * // some code
18 * }
19 * @endcode
21 * Possible future improvements:
22 * * Simultaneous searching for a number of magic words
23 * * MagicWord::$mObjects in shared memory
25 * Please avoid reading the data out of one of these objects and then writing
26 * special case code. If possible, add another match()-like function here.
28 * To add magic words in an extension, use $magicWords in a file listed in
29 * $wgExtensionMessagesFiles[].
31 * @par Example:
32 * @code
33 * $magicWords = array();
35 * $magicWords['en'] = array(
36 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
37 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
38 * );
39 * @endcode
41 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
42 * hook. Use string keys.
44 * @ingroup Parser
46 class MagicWord {
47 /**#@+
48 * @private
50 var $mId, $mSynonyms, $mCaseSensitive;
51 var $mRegex = '';
52 var $mRegexStart = '';
53 var $mBaseRegex = '';
54 var $mVariableRegex = '';
55 var $mVariableStartToEndRegex = '';
56 var $mModified = false;
57 var $mFound = false;
59 static public $mVariableIDsInitialised = false;
60 static public $mVariableIDs = array(
61 'currentmonth',
62 'currentmonth1',
63 'currentmonthname',
64 'currentmonthnamegen',
65 'currentmonthabbrev',
66 'currentday',
67 'currentday2',
68 'currentdayname',
69 'currentyear',
70 'currenttime',
71 'currenthour',
72 'localmonth',
73 'localmonth1',
74 'localmonthname',
75 'localmonthnamegen',
76 'localmonthabbrev',
77 'localday',
78 'localday2',
79 'localdayname',
80 'localyear',
81 'localtime',
82 'localhour',
83 'numberofarticles',
84 'numberoffiles',
85 'numberofedits',
86 'articlepath',
87 'sitename',
88 'server',
89 'servername',
90 'scriptpath',
91 'stylepath',
92 'pagename',
93 'pagenamee',
94 'fullpagename',
95 'fullpagenamee',
96 'namespace',
97 'namespacee',
98 'currentweek',
99 'currentdow',
100 'localweek',
101 'localdow',
102 'revisionid',
103 'revisionday',
104 'revisionday2',
105 'revisionmonth',
106 'revisionmonth1',
107 'revisionyear',
108 'revisiontimestamp',
109 'revisionuser',
110 'subpagename',
111 'subpagenamee',
112 'talkspace',
113 'talkspacee',
114 'subjectspace',
115 'subjectspacee',
116 'talkpagename',
117 'talkpagenamee',
118 'subjectpagename',
119 'subjectpagenamee',
120 'numberofusers',
121 'numberofactiveusers',
122 'numberofpages',
123 'currentversion',
124 'basepagename',
125 'basepagenamee',
126 'currenttimestamp',
127 'localtimestamp',
128 'directionmark',
129 'contentlanguage',
130 'numberofadmins',
131 'numberofviews',
134 /* Array of caching hints for ParserCache */
135 static public $mCacheTTLs = array (
136 'currentmonth' => 86400,
137 'currentmonth1' => 86400,
138 'currentmonthname' => 86400,
139 'currentmonthnamegen' => 86400,
140 'currentmonthabbrev' => 86400,
141 'currentday' => 3600,
142 'currentday2' => 3600,
143 'currentdayname' => 3600,
144 'currentyear' => 86400,
145 'currenttime' => 3600,
146 'currenthour' => 3600,
147 'localmonth' => 86400,
148 'localmonth1' => 86400,
149 'localmonthname' => 86400,
150 'localmonthnamegen' => 86400,
151 'localmonthabbrev' => 86400,
152 'localday' => 3600,
153 'localday2' => 3600,
154 'localdayname' => 3600,
155 'localyear' => 86400,
156 'localtime' => 3600,
157 'localhour' => 3600,
158 'numberofarticles' => 3600,
159 'numberoffiles' => 3600,
160 'numberofedits' => 3600,
161 'currentweek' => 3600,
162 'currentdow' => 3600,
163 'localweek' => 3600,
164 'localdow' => 3600,
165 'numberofusers' => 3600,
166 'numberofactiveusers' => 3600,
167 'numberofpages' => 3600,
168 'currentversion' => 86400,
169 'currenttimestamp' => 3600,
170 'localtimestamp' => 3600,
171 'pagesinnamespace' => 3600,
172 'numberofadmins' => 3600,
173 'numberofviews' => 3600,
174 'numberingroup' => 3600,
177 static public $mDoubleUnderscoreIDs = array(
178 'notoc',
179 'nogallery',
180 'forcetoc',
181 'toc',
182 'noeditsection',
183 'newsectionlink',
184 'nonewsectionlink',
185 'hiddencat',
186 'index',
187 'noindex',
188 'staticredirect',
189 'notitleconvert',
190 'nocontentconvert',
193 static public $mSubstIDs = array(
194 'subst',
195 'safesubst',
198 static public $mObjects = array();
199 static public $mDoubleUnderscoreArray = null;
201 /**#@-*/
203 function __construct($id = 0, $syn = array(), $cs = false) {
204 $this->mId = $id;
205 $this->mSynonyms = (array)$syn;
206 $this->mCaseSensitive = $cs;
210 * Factory: creates an object representing an ID
212 * @param $id
214 * @return MagicWord
216 static function &get( $id ) {
217 if ( !isset( self::$mObjects[$id] ) ) {
218 $mw = new MagicWord();
219 $mw->load( $id );
220 self::$mObjects[$id] = $mw;
222 return self::$mObjects[$id];
226 * Get an array of parser variable IDs
228 * @return array
230 static function getVariableIDs() {
231 if ( !self::$mVariableIDsInitialised ) {
232 # Get variable IDs
233 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
234 self::$mVariableIDsInitialised = true;
236 return self::$mVariableIDs;
240 * Get an array of parser substitution modifier IDs
241 * @return array
243 static function getSubstIDs() {
244 return self::$mSubstIDs;
248 * Allow external reads of TTL array
250 * @param $id int
251 * @return array
253 static function getCacheTTL( $id ) {
254 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
255 return self::$mCacheTTLs[$id];
256 } else {
257 return -1;
262 * Get a MagicWordArray of double-underscore entities
264 * @return MagicWordArray
266 static function getDoubleUnderscoreArray() {
267 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
268 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
270 return self::$mDoubleUnderscoreArray;
274 * Clear the self::$mObjects variable
275 * For use in parser tests
277 public static function clearCache() {
278 self::$mObjects = array();
282 * Initialises this object with an ID
284 * @param $id
286 function load( $id ) {
287 global $wgContLang;
288 wfProfileIn( __METHOD__ );
289 $this->mId = $id;
290 $wgContLang->getMagic( $this );
291 if ( !$this->mSynonyms ) {
292 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
293 #throw new MWException( "Error: invalid magic word '$id'" );
294 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
296 wfProfileOut( __METHOD__ );
300 * Preliminary initialisation
301 * @private
303 function initRegex() {
304 // Sort the synonyms by length, descending, so that the longest synonym
305 // matches in precedence to the shortest
306 $synonyms = $this->mSynonyms;
307 usort( $synonyms, array( $this, 'compareStringLength' ) );
309 $escSyn = array();
310 foreach ( $synonyms as $synonym )
311 // In case a magic word contains /, like that's going to happen;)
312 $escSyn[] = preg_quote( $synonym, '/' );
313 $this->mBaseRegex = implode( '|', $escSyn );
315 $case = $this->mCaseSensitive ? '' : 'iu';
316 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
317 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
318 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
319 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
320 "/^(?:{$this->mBaseRegex})$/{$case}" );
324 * A comparison function that returns -1, 0 or 1 depending on whether the
325 * first string is longer, the same length or shorter than the second
326 * string.
328 * @param $s1 string
329 * @param $s2 string
331 * @return int
333 function compareStringLength( $s1, $s2 ) {
334 $l1 = strlen( $s1 );
335 $l2 = strlen( $s2 );
336 if ( $l1 < $l2 ) {
337 return 1;
338 } elseif ( $l1 > $l2 ) {
339 return -1;
340 } else {
341 return 0;
346 * Gets a regex representing matching the word
348 * @return string
350 function getRegex() {
351 if ($this->mRegex == '' ) {
352 $this->initRegex();
354 return $this->mRegex;
358 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
359 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
360 * the complete expression
362 * @return string
364 function getRegexCase() {
365 if ( $this->mRegex === '' )
366 $this->initRegex();
368 return $this->mCaseSensitive ? '' : 'iu';
372 * Gets a regex matching the word, if it is at the string start
374 * @return string
376 function getRegexStart() {
377 if ($this->mRegex == '' ) {
378 $this->initRegex();
380 return $this->mRegexStart;
384 * regex without the slashes and what not
386 * @return string
388 function getBaseRegex() {
389 if ($this->mRegex == '') {
390 $this->initRegex();
392 return $this->mBaseRegex;
396 * Returns true if the text contains the word
398 * @param $text string
400 * @return bool
402 function match( $text ) {
403 return (bool)preg_match( $this->getRegex(), $text );
407 * Returns true if the text starts with the word
409 * @param $text string
411 * @return bool
413 function matchStart( $text ) {
414 return (bool)preg_match( $this->getRegexStart(), $text );
418 * Returns NULL if there's no match, the value of $1 otherwise
419 * The return code is the matched string, if there's no variable
420 * part in the regex and the matched variable part ($1) if there
421 * is one.
423 * @param $text string
425 * @return string
427 function matchVariableStartToEnd( $text ) {
428 $matches = array();
429 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
430 if ( $matchcount == 0 ) {
431 return null;
432 } else {
433 # multiple matched parts (variable match); some will be empty because of
434 # synonyms. The variable will be the second non-empty one so remove any
435 # blank elements and re-sort the indices.
436 # See also bug 6526
438 $matches = array_values(array_filter($matches));
440 if ( count($matches) == 1 ) {
441 return $matches[0];
442 } else {
443 return $matches[1];
450 * Returns true if the text matches the word, and alters the
451 * input string, removing all instances of the word
453 * @param $text string
455 * @return bool
457 function matchAndRemove( &$text ) {
458 $this->mFound = false;
459 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
460 return $this->mFound;
464 * @param $text
465 * @return bool
467 function matchStartAndRemove( &$text ) {
468 $this->mFound = false;
469 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
470 return $this->mFound;
474 * Used in matchAndRemove()
476 * @return string
478 function pregRemoveAndRecord() {
479 $this->mFound = true;
480 return '';
484 * Replaces the word with something else
486 * @param $replacement
487 * @param $subject
488 * @param $limit int
490 * @return string
492 function replace( $replacement, $subject, $limit = -1 ) {
493 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
494 $this->mModified = !($res === $subject);
495 return $res;
499 * Variable handling: {{SUBST:xxx}} style words
500 * Calls back a function to determine what to replace xxx with
501 * Input word must contain $1
503 * @param $text string
504 * @param $callback
506 * @return string
508 function substituteCallback( $text, $callback ) {
509 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
510 $this->mModified = !($res === $text);
511 return $res;
515 * Matches the word, where $1 is a wildcard
517 * @return string
519 function getVariableRegex() {
520 if ( $this->mVariableRegex == '' ) {
521 $this->initRegex();
523 return $this->mVariableRegex;
527 * Matches the entire string, where $1 is a wildcard
529 * @return string
531 function getVariableStartToEndRegex() {
532 if ( $this->mVariableStartToEndRegex == '' ) {
533 $this->initRegex();
535 return $this->mVariableStartToEndRegex;
539 * Accesses the synonym list directly
541 * @param $i int
543 * @return string
545 function getSynonym( $i ) {
546 return $this->mSynonyms[$i];
550 * @return array
552 function getSynonyms() {
553 return $this->mSynonyms;
557 * Returns true if the last call to replace() or substituteCallback()
558 * returned a modified text, otherwise false.
560 * @return bool
562 function getWasModified(){
563 return $this->mModified;
567 * $magicarr is an associative array of (magic word ID => replacement)
568 * This method uses the php feature to do several replacements at the same time,
569 * thereby gaining some efficiency. The result is placed in the out variable
570 * $result. The return value is true if something was replaced.
571 * @todo Should this be static? It doesn't seem to be used at all
573 * @param $magicarr
574 * @param $subject
575 * @param $result
577 * @return bool
579 function replaceMultiple( $magicarr, $subject, &$result ){
580 $search = array();
581 $replace = array();
582 foreach( $magicarr as $id => $replacement ){
583 $mw = MagicWord::get( $id );
584 $search[] = $mw->getRegex();
585 $replace[] = $replacement;
588 $result = preg_replace( $search, $replace, $subject );
589 return !($result === $subject);
593 * Adds all the synonyms of this MagicWord to an array, to allow quick
594 * lookup in a list of magic words
596 * @param $array
597 * @param $value
599 function addToArray( &$array, $value ) {
600 global $wgContLang;
601 foreach ( $this->mSynonyms as $syn ) {
602 $array[$wgContLang->lc($syn)] = $value;
607 * @return bool
609 function isCaseSensitive() {
610 return $this->mCaseSensitive;
614 * @return int
616 function getId() {
617 return $this->mId;
622 * Class for handling an array of magic words
623 * @ingroup Parser
625 class MagicWordArray {
626 var $names = array();
627 var $hash;
628 var $baseRegex, $regex;
629 var $matches;
631 function __construct( $names = array() ) {
632 $this->names = $names;
636 * Add a magic word by name
638 * @param $name string
640 public function add( $name ) {
641 $this->names[] = $name;
642 $this->hash = $this->baseRegex = $this->regex = null;
646 * Add a number of magic words by name
648 * @param $names array
650 public function addArray( $names ) {
651 $this->names = array_merge( $this->names, array_values( $names ) );
652 $this->hash = $this->baseRegex = $this->regex = null;
656 * Get a 2-d hashtable for this array
658 function getHash() {
659 if ( is_null( $this->hash ) ) {
660 global $wgContLang;
661 $this->hash = array( 0 => array(), 1 => array() );
662 foreach ( $this->names as $name ) {
663 $magic = MagicWord::get( $name );
664 $case = intval( $magic->isCaseSensitive() );
665 foreach ( $magic->getSynonyms() as $syn ) {
666 if ( !$case ) {
667 $syn = $wgContLang->lc( $syn );
669 $this->hash[$case][$syn] = $name;
673 return $this->hash;
677 * Get the base regex
679 function getBaseRegex() {
680 if ( is_null( $this->baseRegex ) ) {
681 $this->baseRegex = array( 0 => '', 1 => '' );
682 foreach ( $this->names as $name ) {
683 $magic = MagicWord::get( $name );
684 $case = intval( $magic->isCaseSensitive() );
685 foreach ( $magic->getSynonyms() as $i => $syn ) {
686 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
687 if ( $this->baseRegex[$case] === '' ) {
688 $this->baseRegex[$case] = $group;
689 } else {
690 $this->baseRegex[$case] .= '|' . $group;
695 return $this->baseRegex;
699 * Get an unanchored regex that does not match parameters
701 function getRegex() {
702 if ( is_null( $this->regex ) ) {
703 $base = $this->getBaseRegex();
704 $this->regex = array( '', '' );
705 if ( $this->baseRegex[0] !== '' ) {
706 $this->regex[0] = "/{$base[0]}/iuS";
708 if ( $this->baseRegex[1] !== '' ) {
709 $this->regex[1] = "/{$base[1]}/S";
712 return $this->regex;
716 * Get a regex for matching variables with parameters
718 * @return string
720 function getVariableRegex() {
721 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
725 * Get a regex anchored to the start of the string that does not match parameters
727 * @return array
729 function getRegexStart() {
730 $base = $this->getBaseRegex();
731 $newRegex = array( '', '' );
732 if ( $base[0] !== '' ) {
733 $newRegex[0] = "/^(?:{$base[0]})/iuS";
735 if ( $base[1] !== '' ) {
736 $newRegex[1] = "/^(?:{$base[1]})/S";
738 return $newRegex;
742 * Get an anchored regex for matching variables with parameters
744 * @return array
746 function getVariableStartToEndRegex() {
747 $base = $this->getBaseRegex();
748 $newRegex = array( '', '' );
749 if ( $base[0] !== '' ) {
750 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
752 if ( $base[1] !== '' ) {
753 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
755 return $newRegex;
759 * Parse a match array from preg_match
760 * Returns array(magic word ID, parameter value)
761 * If there is no parameter value, that element will be false.
763 * @param $m array
765 * @return array
767 function parseMatch( $m ) {
768 reset( $m );
769 while ( list( $key, $value ) = each( $m ) ) {
770 if ( $key === 0 || $value === '' ) {
771 continue;
773 $parts = explode( '_', $key, 2 );
774 if ( count( $parts ) != 2 ) {
775 // This shouldn't happen
776 // continue;
777 throw new MWException( __METHOD__ . ': bad parameter name' );
779 list( /* $synIndex */, $magicName ) = $parts;
780 $paramValue = next( $m );
781 return array( $magicName, $paramValue );
783 // This shouldn't happen either
784 throw new MWException( __METHOD__.': parameter not found' );
788 * Match some text, with parameter capture
789 * Returns an array with the magic word name in the first element and the
790 * parameter in the second element.
791 * Both elements are false if there was no match.
793 * @param $text string
795 * @return array
797 public function matchVariableStartToEnd( $text ) {
798 $regexes = $this->getVariableStartToEndRegex();
799 foreach ( $regexes as $regex ) {
800 if ( $regex !== '' ) {
801 $m = false;
802 if ( preg_match( $regex, $text, $m ) ) {
803 return $this->parseMatch( $m );
807 return array( false, false );
811 * Match some text, without parameter capture
812 * Returns the magic word name, or false if there was no capture
814 * @param $text string
816 * @return string|bool False on failure
818 public function matchStartToEnd( $text ) {
819 $hash = $this->getHash();
820 if ( isset( $hash[1][$text] ) ) {
821 return $hash[1][$text];
823 global $wgContLang;
824 $lc = $wgContLang->lc( $text );
825 if ( isset( $hash[0][$lc] ) ) {
826 return $hash[0][$lc];
828 return false;
832 * Returns an associative array, ID => param value, for all items that match
833 * Removes the matched items from the input string (passed by reference)
835 * @param $text string
837 * @return array
839 public function matchAndRemove( &$text ) {
840 $found = array();
841 $regexes = $this->getRegex();
842 foreach ( $regexes as $regex ) {
843 if ( $regex === '' ) {
844 continue;
846 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
847 foreach ( $matches as $m ) {
848 list( $name, $param ) = $this->parseMatch( $m );
849 $found[$name] = $param;
851 $text = preg_replace( $regex, '', $text );
853 return $found;
857 * Return the ID of the magic word at the start of $text, and remove
858 * the prefix from $text.
859 * Return false if no match found and $text is not modified.
860 * Does not match parameters.
862 * @param $text string
864 * @return int|bool False on failure
866 public function matchStartAndRemove( &$text ) {
867 $regexes = $this->getRegexStart();
868 foreach ( $regexes as $regex ) {
869 if ( $regex === '' ) {
870 continue;
872 if ( preg_match( $regex, $text, $m ) ) {
873 list( $id, ) = $this->parseMatch( $m );
874 if ( strlen( $m[0] ) >= strlen( $text ) ) {
875 $text = '';
876 } else {
877 $text = substr( $text, strlen( $m[0] ) );
879 return $id;
882 return false;