test SQL for our QueryPages objects
[mediawiki.git] / includes / MagicWord.php
blob563d3a5621f796a6a8794e8d676c93157159c343
1 <?php
2 /**
3 * File for magic words
5 * See docs/magicword.txt
7 * @file
8 * @ingroup Parser
9 */
11 /**
12 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
13 * Usage:
14 * if (MagicWord::get( 'redirect' )->match( $text ) )
16 * Possible future improvements:
17 * * Simultaneous searching for a number of magic words
18 * * MagicWord::$mObjects in shared memory
20 * Please avoid reading the data out of one of these objects and then writing
21 * special case code. If possible, add another match()-like function here.
23 * To add magic words in an extension, use the LanguageGetMagic hook. For
24 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
25 * hook. Use string keys.
27 * @ingroup Parser
29 class MagicWord {
30 /**#@+
31 * @private
33 var $mId, $mSynonyms, $mCaseSensitive;
34 var $mRegex = '';
35 var $mRegexStart = '';
36 var $mBaseRegex = '';
37 var $mVariableRegex = '';
38 var $mVariableStartToEndRegex = '';
39 var $mModified = false;
40 var $mFound = false;
42 static public $mVariableIDsInitialised = false;
43 static public $mVariableIDs = array(
44 'currentmonth',
45 'currentmonth1',
46 'currentmonthname',
47 'currentmonthnamegen',
48 'currentmonthabbrev',
49 'currentday',
50 'currentday2',
51 'currentdayname',
52 'currentyear',
53 'currenttime',
54 'currenthour',
55 'localmonth',
56 'localmonth1',
57 'localmonthname',
58 'localmonthnamegen',
59 'localmonthabbrev',
60 'localday',
61 'localday2',
62 'localdayname',
63 'localyear',
64 'localtime',
65 'localhour',
66 'numberofarticles',
67 'numberoffiles',
68 'numberofedits',
69 'articlepath',
70 'sitename',
71 'server',
72 'servername',
73 'scriptpath',
74 'stylepath',
75 'pagename',
76 'pagenamee',
77 'fullpagename',
78 'fullpagenamee',
79 'namespace',
80 'namespacee',
81 'currentweek',
82 'currentdow',
83 'localweek',
84 'localdow',
85 'revisionid',
86 'revisionday',
87 'revisionday2',
88 'revisionmonth',
89 'revisionmonth1',
90 'revisionyear',
91 'revisiontimestamp',
92 'revisionuser',
93 'subpagename',
94 'subpagenamee',
95 'talkspace',
96 'talkspacee',
97 'subjectspace',
98 'subjectspacee',
99 'talkpagename',
100 'talkpagenamee',
101 'subjectpagename',
102 'subjectpagenamee',
103 'numberofusers',
104 'numberofactiveusers',
105 'numberofpages',
106 'currentversion',
107 'basepagename',
108 'basepagenamee',
109 'currenttimestamp',
110 'localtimestamp',
111 'directionmark',
112 'contentlanguage',
113 'numberofadmins',
114 'numberofviews',
117 /* Array of caching hints for ParserCache */
118 static public $mCacheTTLs = array (
119 'currentmonth' => 86400,
120 'currentmonth1' => 86400,
121 'currentmonthname' => 86400,
122 'currentmonthnamegen' => 86400,
123 'currentmonthabbrev' => 86400,
124 'currentday' => 3600,
125 'currentday2' => 3600,
126 'currentdayname' => 3600,
127 'currentyear' => 86400,
128 'currenttime' => 3600,
129 'currenthour' => 3600,
130 'localmonth' => 86400,
131 'localmonth1' => 86400,
132 'localmonthname' => 86400,
133 'localmonthnamegen' => 86400,
134 'localmonthabbrev' => 86400,
135 'localday' => 3600,
136 'localday2' => 3600,
137 'localdayname' => 3600,
138 'localyear' => 86400,
139 'localtime' => 3600,
140 'localhour' => 3600,
141 'numberofarticles' => 3600,
142 'numberoffiles' => 3600,
143 'numberofedits' => 3600,
144 'currentweek' => 3600,
145 'currentdow' => 3600,
146 'localweek' => 3600,
147 'localdow' => 3600,
148 'numberofusers' => 3600,
149 'numberofactiveusers' => 3600,
150 'numberofpages' => 3600,
151 'currentversion' => 86400,
152 'currenttimestamp' => 3600,
153 'localtimestamp' => 3600,
154 'pagesinnamespace' => 3600,
155 'numberofadmins' => 3600,
156 'numberofviews' => 3600,
157 'numberingroup' => 3600,
160 static public $mDoubleUnderscoreIDs = array(
161 'notoc',
162 'nogallery',
163 'forcetoc',
164 'toc',
165 'noeditsection',
166 'newsectionlink',
167 'nonewsectionlink',
168 'hiddencat',
169 'index',
170 'noindex',
171 'staticredirect',
172 'notitleconvert',
173 'nocontentconvert',
176 static public $mSubstIDs = array(
177 'subst',
178 'safesubst',
181 static public $mObjects = array();
182 static public $mDoubleUnderscoreArray = null;
184 /**#@-*/
186 function __construct($id = 0, $syn = array(), $cs = false) {
187 $this->mId = $id;
188 $this->mSynonyms = (array)$syn;
189 $this->mCaseSensitive = $cs;
193 * Factory: creates an object representing an ID
195 * @param $id
197 * @return MagicWord
199 static function &get( $id ) {
200 if ( !isset( self::$mObjects[$id] ) ) {
201 $mw = new MagicWord();
202 $mw->load( $id );
203 self::$mObjects[$id] = $mw;
205 return self::$mObjects[$id];
209 * Get an array of parser variable IDs
211 * @return array
213 static function getVariableIDs() {
214 if ( !self::$mVariableIDsInitialised ) {
215 # Deprecated constant definition hook, available for extensions that need it
216 $magicWords = array();
217 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
218 foreach ( $magicWords as $word ) {
219 define( $word, $word );
222 # Get variable IDs
223 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
224 self::$mVariableIDsInitialised = true;
226 return self::$mVariableIDs;
230 * Get an array of parser substitution modifier IDs
231 * @return array
233 static function getSubstIDs() {
234 return self::$mSubstIDs;
238 * Allow external reads of TTL array
240 * @param $id int
241 * @return array
243 static function getCacheTTL( $id ) {
244 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
245 return self::$mCacheTTLs[$id];
246 } else {
247 return -1;
252 * Get a MagicWordArray of double-underscore entities
254 * @return MagicWordArray
256 static function getDoubleUnderscoreArray() {
257 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
258 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
260 return self::$mDoubleUnderscoreArray;
264 * Clear the self::$mObjects variable
265 * For use in parser tests
267 public static function clearCache() {
268 self::$mObjects = array();
272 * Initialises this object with an ID
274 * @param $id
276 function load( $id ) {
277 global $wgContLang;
278 wfProfileIn( __METHOD__ );
279 $this->mId = $id;
280 $wgContLang->getMagic( $this );
281 if ( !$this->mSynonyms ) {
282 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
283 #throw new MWException( "Error: invalid magic word '$id'" );
284 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
286 wfProfileOut( __METHOD__ );
290 * Preliminary initialisation
291 * @private
293 function initRegex() {
294 // Sort the synonyms by length, descending, so that the longest synonym
295 // matches in precedence to the shortest
296 $synonyms = $this->mSynonyms;
297 usort( $synonyms, array( $this, 'compareStringLength' ) );
299 $escSyn = array();
300 foreach ( $synonyms as $synonym )
301 // In case a magic word contains /, like that's going to happen;)
302 $escSyn[] = preg_quote( $synonym, '/' );
303 $this->mBaseRegex = implode( '|', $escSyn );
305 $case = $this->mCaseSensitive ? '' : 'iu';
306 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
307 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
308 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
309 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
310 "/^(?:{$this->mBaseRegex})$/{$case}" );
314 * A comparison function that returns -1, 0 or 1 depending on whether the
315 * first string is longer, the same length or shorter than the second
316 * string.
318 * @param $s1 string
319 * @param $s2 string
321 * @return int
323 function compareStringLength( $s1, $s2 ) {
324 $l1 = strlen( $s1 );
325 $l2 = strlen( $s2 );
326 if ( $l1 < $l2 ) {
327 return 1;
328 } elseif ( $l1 > $l2 ) {
329 return -1;
330 } else {
331 return 0;
336 * Gets a regex representing matching the word
338 * @return string
340 function getRegex() {
341 if ($this->mRegex == '' ) {
342 $this->initRegex();
344 return $this->mRegex;
348 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
349 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
350 * the complete expression
352 * @return string
354 function getRegexCase() {
355 if ( $this->mRegex === '' )
356 $this->initRegex();
358 return $this->mCaseSensitive ? '' : 'iu';
362 * Gets a regex matching the word, if it is at the string start
364 * @return string
366 function getRegexStart() {
367 if ($this->mRegex == '' ) {
368 $this->initRegex();
370 return $this->mRegexStart;
374 * regex without the slashes and what not
376 * @return string
378 function getBaseRegex() {
379 if ($this->mRegex == '') {
380 $this->initRegex();
382 return $this->mBaseRegex;
386 * Returns true if the text contains the word
388 * @param $text string
390 * @return bool
392 function match( $text ) {
393 return (bool)preg_match( $this->getRegex(), $text );
397 * Returns true if the text starts with the word
399 * @param $text string
401 * @return bool
403 function matchStart( $text ) {
404 return (bool)preg_match( $this->getRegexStart(), $text );
408 * Returns NULL if there's no match, the value of $1 otherwise
409 * The return code is the matched string, if there's no variable
410 * part in the regex and the matched variable part ($1) if there
411 * is one.
413 * @param $text string
415 * @return string
417 function matchVariableStartToEnd( $text ) {
418 $matches = array();
419 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
420 if ( $matchcount == 0 ) {
421 return null;
422 } else {
423 # multiple matched parts (variable match); some will be empty because of
424 # synonyms. The variable will be the second non-empty one so remove any
425 # blank elements and re-sort the indices.
426 # See also bug 6526
428 $matches = array_values(array_filter($matches));
430 if ( count($matches) == 1 ) {
431 return $matches[0];
432 } else {
433 return $matches[1];
440 * Returns true if the text matches the word, and alters the
441 * input string, removing all instances of the word
443 * @param $text string
445 * @return bool
447 function matchAndRemove( &$text ) {
448 $this->mFound = false;
449 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
450 return $this->mFound;
454 * @param $text
455 * @return bool
457 function matchStartAndRemove( &$text ) {
458 $this->mFound = false;
459 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
460 return $this->mFound;
464 * Used in matchAndRemove()
466 * @return string
468 function pregRemoveAndRecord() {
469 $this->mFound = true;
470 return '';
474 * Replaces the word with something else
476 * @param $replacement
477 * @param $subject
478 * @param $limit int
480 * @return string
482 function replace( $replacement, $subject, $limit = -1 ) {
483 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
484 $this->mModified = !($res === $subject);
485 return $res;
489 * Variable handling: {{SUBST:xxx}} style words
490 * Calls back a function to determine what to replace xxx with
491 * Input word must contain $1
493 * @param $text string
494 * @param $callback
496 * @return string
498 function substituteCallback( $text, $callback ) {
499 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
500 $this->mModified = !($res === $text);
501 return $res;
505 * Matches the word, where $1 is a wildcard
507 * @return string
509 function getVariableRegex() {
510 if ( $this->mVariableRegex == '' ) {
511 $this->initRegex();
513 return $this->mVariableRegex;
517 * Matches the entire string, where $1 is a wildcard
519 * @return string
521 function getVariableStartToEndRegex() {
522 if ( $this->mVariableStartToEndRegex == '' ) {
523 $this->initRegex();
525 return $this->mVariableStartToEndRegex;
529 * Accesses the synonym list directly
531 * @param $i int
533 * @return string
535 function getSynonym( $i ) {
536 return $this->mSynonyms[$i];
540 * @return array
542 function getSynonyms() {
543 return $this->mSynonyms;
547 * Returns true if the last call to replace() or substituteCallback()
548 * returned a modified text, otherwise false.
550 * @return bool
552 function getWasModified(){
553 return $this->mModified;
557 * $magicarr is an associative array of (magic word ID => replacement)
558 * This method uses the php feature to do several replacements at the same time,
559 * thereby gaining some efficiency. The result is placed in the out variable
560 * $result. The return value is true if something was replaced.
561 * @todo Should this be static? It doesn't seem to be used at all
563 * @param $magicarr
564 * @param $subject
565 * @param $result
567 * @return bool
569 function replaceMultiple( $magicarr, $subject, &$result ){
570 $search = array();
571 $replace = array();
572 foreach( $magicarr as $id => $replacement ){
573 $mw = MagicWord::get( $id );
574 $search[] = $mw->getRegex();
575 $replace[] = $replacement;
578 $result = preg_replace( $search, $replace, $subject );
579 return !($result === $subject);
583 * Adds all the synonyms of this MagicWord to an array, to allow quick
584 * lookup in a list of magic words
586 * @param $array
587 * @param $value
589 function addToArray( &$array, $value ) {
590 global $wgContLang;
591 foreach ( $this->mSynonyms as $syn ) {
592 $array[$wgContLang->lc($syn)] = $value;
597 * @return bool
599 function isCaseSensitive() {
600 return $this->mCaseSensitive;
604 * @return int
606 function getId() {
607 return $this->mId;
612 * Class for handling an array of magic words
613 * @ingroup Parser
615 class MagicWordArray {
616 var $names = array();
617 var $hash;
618 var $baseRegex, $regex;
619 var $matches;
621 function __construct( $names = array() ) {
622 $this->names = $names;
626 * Add a magic word by name
628 * @param $name string
630 public function add( $name ) {
631 $this->names[] = $name;
632 $this->hash = $this->baseRegex = $this->regex = null;
636 * Add a number of magic words by name
638 * @param $names array
640 public function addArray( $names ) {
641 $this->names = array_merge( $this->names, array_values( $names ) );
642 $this->hash = $this->baseRegex = $this->regex = null;
646 * Get a 2-d hashtable for this array
648 function getHash() {
649 if ( is_null( $this->hash ) ) {
650 global $wgContLang;
651 $this->hash = array( 0 => array(), 1 => array() );
652 foreach ( $this->names as $name ) {
653 $magic = MagicWord::get( $name );
654 $case = intval( $magic->isCaseSensitive() );
655 foreach ( $magic->getSynonyms() as $syn ) {
656 if ( !$case ) {
657 $syn = $wgContLang->lc( $syn );
659 $this->hash[$case][$syn] = $name;
663 return $this->hash;
667 * Get the base regex
669 function getBaseRegex() {
670 if ( is_null( $this->baseRegex ) ) {
671 $this->baseRegex = array( 0 => '', 1 => '' );
672 foreach ( $this->names as $name ) {
673 $magic = MagicWord::get( $name );
674 $case = intval( $magic->isCaseSensitive() );
675 foreach ( $magic->getSynonyms() as $i => $syn ) {
676 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
677 if ( $this->baseRegex[$case] === '' ) {
678 $this->baseRegex[$case] = $group;
679 } else {
680 $this->baseRegex[$case] .= '|' . $group;
685 return $this->baseRegex;
689 * Get an unanchored regex that does not match parameters
691 function getRegex() {
692 if ( is_null( $this->regex ) ) {
693 $base = $this->getBaseRegex();
694 $this->regex = array( '', '' );
695 if ( $this->baseRegex[0] !== '' ) {
696 $this->regex[0] = "/{$base[0]}/iuS";
698 if ( $this->baseRegex[1] !== '' ) {
699 $this->regex[1] = "/{$base[1]}/S";
702 return $this->regex;
706 * Get a regex for matching variables with parameters
708 * @return string
710 function getVariableRegex() {
711 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
715 * Get a regex anchored to the start of the string that does not match parameters
717 * @return array
719 function getRegexStart() {
720 $base = $this->getBaseRegex();
721 $newRegex = array( '', '' );
722 if ( $base[0] !== '' ) {
723 $newRegex[0] = "/^(?:{$base[0]})/iuS";
725 if ( $base[1] !== '' ) {
726 $newRegex[1] = "/^(?:{$base[1]})/S";
728 return $newRegex;
732 * Get an anchored regex for matching variables with parameters
734 * @return array
736 function getVariableStartToEndRegex() {
737 $base = $this->getBaseRegex();
738 $newRegex = array( '', '' );
739 if ( $base[0] !== '' ) {
740 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
742 if ( $base[1] !== '' ) {
743 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
745 return $newRegex;
749 * Parse a match array from preg_match
750 * Returns array(magic word ID, parameter value)
751 * If there is no parameter value, that element will be false.
753 * @param $m array
755 * @return array
757 function parseMatch( $m ) {
758 reset( $m );
759 while ( list( $key, $value ) = each( $m ) ) {
760 if ( $key === 0 || $value === '' ) {
761 continue;
763 $parts = explode( '_', $key, 2 );
764 if ( count( $parts ) != 2 ) {
765 // This shouldn't happen
766 // continue;
767 throw new MWException( __METHOD__ . ': bad parameter name' );
769 list( /* $synIndex */, $magicName ) = $parts;
770 $paramValue = next( $m );
771 return array( $magicName, $paramValue );
773 // This shouldn't happen either
774 throw new MWException( __METHOD__.': parameter not found' );
778 * Match some text, with parameter capture
779 * Returns an array with the magic word name in the first element and the
780 * parameter in the second element.
781 * Both elements are false if there was no match.
783 * @param $text string
785 * @return array
787 public function matchVariableStartToEnd( $text ) {
788 $regexes = $this->getVariableStartToEndRegex();
789 foreach ( $regexes as $regex ) {
790 if ( $regex !== '' ) {
791 $m = false;
792 if ( preg_match( $regex, $text, $m ) ) {
793 return $this->parseMatch( $m );
797 return array( false, false );
801 * Match some text, without parameter capture
802 * Returns the magic word name, or false if there was no capture
804 * @param $text string
806 * @return string|false
808 public function matchStartToEnd( $text ) {
809 $hash = $this->getHash();
810 if ( isset( $hash[1][$text] ) ) {
811 return $hash[1][$text];
813 global $wgContLang;
814 $lc = $wgContLang->lc( $text );
815 if ( isset( $hash[0][$lc] ) ) {
816 return $hash[0][$lc];
818 return false;
822 * Returns an associative array, ID => param value, for all items that match
823 * Removes the matched items from the input string (passed by reference)
825 * @param $text string
827 * @return array
829 public function matchAndRemove( &$text ) {
830 $found = array();
831 $regexes = $this->getRegex();
832 foreach ( $regexes as $regex ) {
833 if ( $regex === '' ) {
834 continue;
836 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
837 foreach ( $matches as $m ) {
838 list( $name, $param ) = $this->parseMatch( $m );
839 $found[$name] = $param;
841 $text = preg_replace( $regex, '', $text );
843 return $found;
847 * Return the ID of the magic word at the start of $text, and remove
848 * the prefix from $text.
849 * Return false if no match found and $text is not modified.
850 * Does not match parameters.
852 * @param $text string
854 * @return int|false
856 public function matchStartAndRemove( &$text ) {
857 $regexes = $this->getRegexStart();
858 foreach ( $regexes as $regex ) {
859 if ( $regex === '' ) {
860 continue;
862 if ( preg_match( $regex, $text, $m ) ) {
863 list( $id, ) = $this->parseMatch( $m );
864 if ( strlen( $m[0] ) >= strlen( $text ) ) {
865 $text = '';
866 } else {
867 $text = substr( $text, strlen( $m[0] ) );
869 return $id;
872 return false;