(bug 39665) optimize API query generator list
[mediawiki.git] / includes / MagicWord.php
blob42791f57a505b9b05ed9373e3a12799dab2d987e
1 <?php
2 /**
3 * File for magic words.
5 * See docs/magicword.txt.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
22 * @file
23 * @ingroup Parser
26 /**
27 * This class encapsulates "magic words" such as "#redirect", __NOTOC__, etc.
29 * @par Usage:
30 * @code
31 * if (MagicWord::get( 'redirect' )->match( $text ) ) {
32 * // some code
33 * }
34 * @endcode
36 * Possible future improvements:
37 * * Simultaneous searching for a number of magic words
38 * * MagicWord::$mObjects in shared memory
40 * Please avoid reading the data out of one of these objects and then writing
41 * special case code. If possible, add another match()-like function here.
43 * To add magic words in an extension, use $magicWords in a file listed in
44 * $wgExtensionMessagesFiles[].
46 * @par Example:
47 * @code
48 * $magicWords = array();
50 * $magicWords['en'] = array(
51 * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
52 * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
53 * );
54 * @endcode
56 * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
57 * hook. Use string keys.
59 * @ingroup Parser
61 class MagicWord {
62 /**#@+
63 * @private
65 var $mId, $mSynonyms, $mCaseSensitive;
66 var $mRegex = '';
67 var $mRegexStart = '';
68 var $mBaseRegex = '';
69 var $mVariableRegex = '';
70 var $mVariableStartToEndRegex = '';
71 var $mModified = false;
72 var $mFound = false;
74 static public $mVariableIDsInitialised = false;
75 static public $mVariableIDs = array(
76 'currentmonth',
77 'currentmonth1',
78 'currentmonthname',
79 'currentmonthnamegen',
80 'currentmonthabbrev',
81 'currentday',
82 'currentday2',
83 'currentdayname',
84 'currentyear',
85 'currenttime',
86 'currenthour',
87 'localmonth',
88 'localmonth1',
89 'localmonthname',
90 'localmonthnamegen',
91 'localmonthabbrev',
92 'localday',
93 'localday2',
94 'localdayname',
95 'localyear',
96 'localtime',
97 'localhour',
98 'numberofarticles',
99 'numberoffiles',
100 'numberofedits',
101 'articlepath',
102 'pageid',
103 'sitename',
104 'server',
105 'servername',
106 'scriptpath',
107 'stylepath',
108 'pagename',
109 'pagenamee',
110 'fullpagename',
111 'fullpagenamee',
112 'namespace',
113 'namespacee',
114 'namespacenumber',
115 'currentweek',
116 'currentdow',
117 'localweek',
118 'localdow',
119 'revisionid',
120 'revisionday',
121 'revisionday2',
122 'revisionmonth',
123 'revisionmonth1',
124 'revisionyear',
125 'revisiontimestamp',
126 'revisionuser',
127 'subpagename',
128 'subpagenamee',
129 'talkspace',
130 'talkspacee',
131 'subjectspace',
132 'subjectspacee',
133 'talkpagename',
134 'talkpagenamee',
135 'subjectpagename',
136 'subjectpagenamee',
137 'numberofusers',
138 'numberofactiveusers',
139 'numberofpages',
140 'currentversion',
141 'basepagename',
142 'basepagenamee',
143 'currenttimestamp',
144 'localtimestamp',
145 'directionmark',
146 'contentlanguage',
147 'numberofadmins',
148 'numberofviews',
151 /* Array of caching hints for ParserCache */
152 static public $mCacheTTLs = array (
153 'currentmonth' => 86400,
154 'currentmonth1' => 86400,
155 'currentmonthname' => 86400,
156 'currentmonthnamegen' => 86400,
157 'currentmonthabbrev' => 86400,
158 'currentday' => 3600,
159 'currentday2' => 3600,
160 'currentdayname' => 3600,
161 'currentyear' => 86400,
162 'currenttime' => 3600,
163 'currenthour' => 3600,
164 'localmonth' => 86400,
165 'localmonth1' => 86400,
166 'localmonthname' => 86400,
167 'localmonthnamegen' => 86400,
168 'localmonthabbrev' => 86400,
169 'localday' => 3600,
170 'localday2' => 3600,
171 'localdayname' => 3600,
172 'localyear' => 86400,
173 'localtime' => 3600,
174 'localhour' => 3600,
175 'numberofarticles' => 3600,
176 'numberoffiles' => 3600,
177 'numberofedits' => 3600,
178 'currentweek' => 3600,
179 'currentdow' => 3600,
180 'localweek' => 3600,
181 'localdow' => 3600,
182 'numberofusers' => 3600,
183 'numberofactiveusers' => 3600,
184 'numberofpages' => 3600,
185 'currentversion' => 86400,
186 'currenttimestamp' => 3600,
187 'localtimestamp' => 3600,
188 'pagesinnamespace' => 3600,
189 'numberofadmins' => 3600,
190 'numberofviews' => 3600,
191 'numberingroup' => 3600,
194 static public $mDoubleUnderscoreIDs = array(
195 'notoc',
196 'nogallery',
197 'forcetoc',
198 'toc',
199 'noeditsection',
200 'newsectionlink',
201 'nonewsectionlink',
202 'hiddencat',
203 'index',
204 'noindex',
205 'staticredirect',
206 'notitleconvert',
207 'nocontentconvert',
210 static public $mSubstIDs = array(
211 'subst',
212 'safesubst',
215 static public $mObjects = array();
216 static public $mDoubleUnderscoreArray = null;
218 /**#@-*/
220 function __construct($id = 0, $syn = array(), $cs = false) {
221 $this->mId = $id;
222 $this->mSynonyms = (array)$syn;
223 $this->mCaseSensitive = $cs;
227 * Factory: creates an object representing an ID
229 * @param $id
231 * @return MagicWord
233 static function &get( $id ) {
234 if ( !isset( self::$mObjects[$id] ) ) {
235 $mw = new MagicWord();
236 $mw->load( $id );
237 self::$mObjects[$id] = $mw;
239 return self::$mObjects[$id];
243 * Get an array of parser variable IDs
245 * @return array
247 static function getVariableIDs() {
248 if ( !self::$mVariableIDsInitialised ) {
249 # Get variable IDs
250 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
251 self::$mVariableIDsInitialised = true;
253 return self::$mVariableIDs;
257 * Get an array of parser substitution modifier IDs
258 * @return array
260 static function getSubstIDs() {
261 return self::$mSubstIDs;
265 * Allow external reads of TTL array
267 * @param $id int
268 * @return array
270 static function getCacheTTL( $id ) {
271 if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
272 return self::$mCacheTTLs[$id];
273 } else {
274 return -1;
279 * Get a MagicWordArray of double-underscore entities
281 * @return MagicWordArray
283 static function getDoubleUnderscoreArray() {
284 if ( is_null( self::$mDoubleUnderscoreArray ) ) {
285 self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
287 return self::$mDoubleUnderscoreArray;
291 * Clear the self::$mObjects variable
292 * For use in parser tests
294 public static function clearCache() {
295 self::$mObjects = array();
299 * Initialises this object with an ID
301 * @param $id
302 * @throws MWException
304 function load( $id ) {
305 global $wgContLang;
306 wfProfileIn( __METHOD__ );
307 $this->mId = $id;
308 $wgContLang->getMagic( $this );
309 if ( !$this->mSynonyms ) {
310 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
311 throw new MWException( "Error: invalid magic word '$id'" );
312 #wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
314 wfProfileOut( __METHOD__ );
318 * Preliminary initialisation
319 * @private
321 function initRegex() {
322 // Sort the synonyms by length, descending, so that the longest synonym
323 // matches in precedence to the shortest
324 $synonyms = $this->mSynonyms;
325 usort( $synonyms, array( $this, 'compareStringLength' ) );
327 $escSyn = array();
328 foreach ( $synonyms as $synonym )
329 // In case a magic word contains /, like that's going to happen;)
330 $escSyn[] = preg_quote( $synonym, '/' );
331 $this->mBaseRegex = implode( '|', $escSyn );
333 $case = $this->mCaseSensitive ? '' : 'iu';
334 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
335 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
336 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
337 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
338 "/^(?:{$this->mBaseRegex})$/{$case}" );
342 * A comparison function that returns -1, 0 or 1 depending on whether the
343 * first string is longer, the same length or shorter than the second
344 * string.
346 * @param $s1 string
347 * @param $s2 string
349 * @return int
351 function compareStringLength( $s1, $s2 ) {
352 $l1 = strlen( $s1 );
353 $l2 = strlen( $s2 );
354 if ( $l1 < $l2 ) {
355 return 1;
356 } elseif ( $l1 > $l2 ) {
357 return -1;
358 } else {
359 return 0;
364 * Gets a regex representing matching the word
366 * @return string
368 function getRegex() {
369 if ($this->mRegex == '' ) {
370 $this->initRegex();
372 return $this->mRegex;
376 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
377 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
378 * the complete expression
380 * @return string
382 function getRegexCase() {
383 if ( $this->mRegex === '' )
384 $this->initRegex();
386 return $this->mCaseSensitive ? '' : 'iu';
390 * Gets a regex matching the word, if it is at the string start
392 * @return string
394 function getRegexStart() {
395 if ($this->mRegex == '' ) {
396 $this->initRegex();
398 return $this->mRegexStart;
402 * regex without the slashes and what not
404 * @return string
406 function getBaseRegex() {
407 if ($this->mRegex == '') {
408 $this->initRegex();
410 return $this->mBaseRegex;
414 * Returns true if the text contains the word
416 * @param $text string
418 * @return bool
420 function match( $text ) {
421 return (bool)preg_match( $this->getRegex(), $text );
425 * Returns true if the text starts with the word
427 * @param $text string
429 * @return bool
431 function matchStart( $text ) {
432 return (bool)preg_match( $this->getRegexStart(), $text );
436 * Returns NULL if there's no match, the value of $1 otherwise
437 * The return code is the matched string, if there's no variable
438 * part in the regex and the matched variable part ($1) if there
439 * is one.
441 * @param $text string
443 * @return string
445 function matchVariableStartToEnd( $text ) {
446 $matches = array();
447 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
448 if ( $matchcount == 0 ) {
449 return null;
450 } else {
451 # multiple matched parts (variable match); some will be empty because of
452 # synonyms. The variable will be the second non-empty one so remove any
453 # blank elements and re-sort the indices.
454 # See also bug 6526
456 $matches = array_values(array_filter($matches));
458 if ( count($matches) == 1 ) {
459 return $matches[0];
460 } else {
461 return $matches[1];
468 * Returns true if the text matches the word, and alters the
469 * input string, removing all instances of the word
471 * @param $text string
473 * @return bool
475 function matchAndRemove( &$text ) {
476 $this->mFound = false;
477 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
478 return $this->mFound;
482 * @param $text
483 * @return bool
485 function matchStartAndRemove( &$text ) {
486 $this->mFound = false;
487 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
488 return $this->mFound;
492 * Used in matchAndRemove()
494 * @return string
496 function pregRemoveAndRecord() {
497 $this->mFound = true;
498 return '';
502 * Replaces the word with something else
504 * @param $replacement
505 * @param $subject
506 * @param $limit int
508 * @return string
510 function replace( $replacement, $subject, $limit = -1 ) {
511 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
512 $this->mModified = !($res === $subject);
513 return $res;
517 * Variable handling: {{SUBST:xxx}} style words
518 * Calls back a function to determine what to replace xxx with
519 * Input word must contain $1
521 * @param $text string
522 * @param $callback
524 * @return string
526 function substituteCallback( $text, $callback ) {
527 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
528 $this->mModified = !($res === $text);
529 return $res;
533 * Matches the word, where $1 is a wildcard
535 * @return string
537 function getVariableRegex() {
538 if ( $this->mVariableRegex == '' ) {
539 $this->initRegex();
541 return $this->mVariableRegex;
545 * Matches the entire string, where $1 is a wildcard
547 * @return string
549 function getVariableStartToEndRegex() {
550 if ( $this->mVariableStartToEndRegex == '' ) {
551 $this->initRegex();
553 return $this->mVariableStartToEndRegex;
557 * Accesses the synonym list directly
559 * @param $i int
561 * @return string
563 function getSynonym( $i ) {
564 return $this->mSynonyms[$i];
568 * @return array
570 function getSynonyms() {
571 return $this->mSynonyms;
575 * Returns true if the last call to replace() or substituteCallback()
576 * returned a modified text, otherwise false.
578 * @return bool
580 function getWasModified(){
581 return $this->mModified;
585 * $magicarr is an associative array of (magic word ID => replacement)
586 * This method uses the php feature to do several replacements at the same time,
587 * thereby gaining some efficiency. The result is placed in the out variable
588 * $result. The return value is true if something was replaced.
589 * @todo Should this be static? It doesn't seem to be used at all
591 * @param $magicarr
592 * @param $subject
593 * @param $result
595 * @return bool
597 function replaceMultiple( $magicarr, $subject, &$result ){
598 $search = array();
599 $replace = array();
600 foreach( $magicarr as $id => $replacement ){
601 $mw = MagicWord::get( $id );
602 $search[] = $mw->getRegex();
603 $replace[] = $replacement;
606 $result = preg_replace( $search, $replace, $subject );
607 return !($result === $subject);
611 * Adds all the synonyms of this MagicWord to an array, to allow quick
612 * lookup in a list of magic words
614 * @param $array
615 * @param $value
617 function addToArray( &$array, $value ) {
618 global $wgContLang;
619 foreach ( $this->mSynonyms as $syn ) {
620 $array[$wgContLang->lc($syn)] = $value;
625 * @return bool
627 function isCaseSensitive() {
628 return $this->mCaseSensitive;
632 * @return int
634 function getId() {
635 return $this->mId;
640 * Class for handling an array of magic words
641 * @ingroup Parser
643 class MagicWordArray {
644 var $names = array();
645 var $hash;
646 var $baseRegex, $regex;
647 var $matches;
650 * @param $names array
652 function __construct( $names = array() ) {
653 $this->names = $names;
657 * Add a magic word by name
659 * @param $name string
661 public function add( $name ) {
662 $this->names[] = $name;
663 $this->hash = $this->baseRegex = $this->regex = null;
667 * Add a number of magic words by name
669 * @param $names array
671 public function addArray( $names ) {
672 $this->names = array_merge( $this->names, array_values( $names ) );
673 $this->hash = $this->baseRegex = $this->regex = null;
677 * Get a 2-d hashtable for this array
679 function getHash() {
680 if ( is_null( $this->hash ) ) {
681 global $wgContLang;
682 $this->hash = array( 0 => array(), 1 => array() );
683 foreach ( $this->names as $name ) {
684 $magic = MagicWord::get( $name );
685 $case = intval( $magic->isCaseSensitive() );
686 foreach ( $magic->getSynonyms() as $syn ) {
687 if ( !$case ) {
688 $syn = $wgContLang->lc( $syn );
690 $this->hash[$case][$syn] = $name;
694 return $this->hash;
698 * Get the base regex
700 function getBaseRegex() {
701 if ( is_null( $this->baseRegex ) ) {
702 $this->baseRegex = array( 0 => '', 1 => '' );
703 foreach ( $this->names as $name ) {
704 $magic = MagicWord::get( $name );
705 $case = intval( $magic->isCaseSensitive() );
706 foreach ( $magic->getSynonyms() as $i => $syn ) {
707 $group = "(?P<{$i}_{$name}>" . preg_quote( $syn, '/' ) . ')';
708 if ( $this->baseRegex[$case] === '' ) {
709 $this->baseRegex[$case] = $group;
710 } else {
711 $this->baseRegex[$case] .= '|' . $group;
716 return $this->baseRegex;
720 * Get an unanchored regex that does not match parameters
722 function getRegex() {
723 if ( is_null( $this->regex ) ) {
724 $base = $this->getBaseRegex();
725 $this->regex = array( '', '' );
726 if ( $this->baseRegex[0] !== '' ) {
727 $this->regex[0] = "/{$base[0]}/iuS";
729 if ( $this->baseRegex[1] !== '' ) {
730 $this->regex[1] = "/{$base[1]}/S";
733 return $this->regex;
737 * Get a regex for matching variables with parameters
739 * @return string
741 function getVariableRegex() {
742 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
746 * Get a regex anchored to the start of the string that does not match parameters
748 * @return array
750 function getRegexStart() {
751 $base = $this->getBaseRegex();
752 $newRegex = array( '', '' );
753 if ( $base[0] !== '' ) {
754 $newRegex[0] = "/^(?:{$base[0]})/iuS";
756 if ( $base[1] !== '' ) {
757 $newRegex[1] = "/^(?:{$base[1]})/S";
759 return $newRegex;
763 * Get an anchored regex for matching variables with parameters
765 * @return array
767 function getVariableStartToEndRegex() {
768 $base = $this->getBaseRegex();
769 $newRegex = array( '', '' );
770 if ( $base[0] !== '' ) {
771 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
773 if ( $base[1] !== '' ) {
774 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
776 return $newRegex;
780 * @since 1.20
781 * @return array
783 public function getNames() {
784 return $this->names;
788 * Parse a match array from preg_match
789 * Returns array(magic word ID, parameter value)
790 * If there is no parameter value, that element will be false.
792 * @param $m array
794 * @throws MWException
795 * @return array
797 function parseMatch( $m ) {
798 reset( $m );
799 while ( list( $key, $value ) = each( $m ) ) {
800 if ( $key === 0 || $value === '' ) {
801 continue;
803 $parts = explode( '_', $key, 2 );
804 if ( count( $parts ) != 2 ) {
805 // This shouldn't happen
806 // continue;
807 throw new MWException( __METHOD__ . ': bad parameter name' );
809 list( /* $synIndex */, $magicName ) = $parts;
810 $paramValue = next( $m );
811 return array( $magicName, $paramValue );
813 // This shouldn't happen either
814 throw new MWException( __METHOD__.': parameter not found' );
818 * Match some text, with parameter capture
819 * Returns an array with the magic word name in the first element and the
820 * parameter in the second element.
821 * Both elements are false if there was no match.
823 * @param $text string
825 * @return array
827 public function matchVariableStartToEnd( $text ) {
828 $regexes = $this->getVariableStartToEndRegex();
829 foreach ( $regexes as $regex ) {
830 if ( $regex !== '' ) {
831 $m = array();
832 if ( preg_match( $regex, $text, $m ) ) {
833 return $this->parseMatch( $m );
837 return array( false, false );
841 * Match some text, without parameter capture
842 * Returns the magic word name, or false if there was no capture
844 * @param $text string
846 * @return string|bool False on failure
848 public function matchStartToEnd( $text ) {
849 $hash = $this->getHash();
850 if ( isset( $hash[1][$text] ) ) {
851 return $hash[1][$text];
853 global $wgContLang;
854 $lc = $wgContLang->lc( $text );
855 if ( isset( $hash[0][$lc] ) ) {
856 return $hash[0][$lc];
858 return false;
862 * Returns an associative array, ID => param value, for all items that match
863 * Removes the matched items from the input string (passed by reference)
865 * @param $text string
867 * @return array
869 public function matchAndRemove( &$text ) {
870 $found = array();
871 $regexes = $this->getRegex();
872 foreach ( $regexes as $regex ) {
873 if ( $regex === '' ) {
874 continue;
876 preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
877 foreach ( $matches as $m ) {
878 list( $name, $param ) = $this->parseMatch( $m );
879 $found[$name] = $param;
881 $text = preg_replace( $regex, '', $text );
883 return $found;
887 * Return the ID of the magic word at the start of $text, and remove
888 * the prefix from $text.
889 * Return false if no match found and $text is not modified.
890 * Does not match parameters.
892 * @param $text string
894 * @return int|bool False on failure
896 public function matchStartAndRemove( &$text ) {
897 $regexes = $this->getRegexStart();
898 foreach ( $regexes as $regex ) {
899 if ( $regex === '' ) {
900 continue;
902 if ( preg_match( $regex, $text, $m ) ) {
903 list( $id, ) = $this->parseMatch( $m );
904 if ( strlen( $m[0] ) >= strlen( $text ) ) {
905 $text = '';
906 } else {
907 $text = substr( $text, strlen( $m[0] ) );
909 return $id;
912 return false;