* API: added categories property
[mediawiki.git] / includes / MagicWord.php
blobbf72a0c84181cf6332a59e6c90738117ba3835a9
1 <?php
2 /**
3 * File for magic words
4 * @addtogroup Parser
5 */
7 /**
8 * This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
9 * Usage:
10 * if (MagicWord::get( 'redirect' )->match( $text ) )
12 * Possible future improvements:
13 * * Simultaneous searching for a number of magic words
14 * * MagicWord::$mObjects in shared memory
16 * Please avoid reading the data out of one of these objects and then writing
17 * special case code. If possible, add another match()-like function here.
19 * To add magic words in an extension, use the LanguageGetMagic hook. For
20 * magic words which are also Parser variables, add a MagicWordwgVariableIDs
21 * hook. Use string keys.
24 class MagicWord {
25 /**#@+
26 * @private
28 var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
29 var $mRegexStart, $mBaseRegex, $mVariableRegex;
30 var $mModified, $mFound;
32 static public $mVariableIDsInitialised = false;
33 static public $mVariableIDs = array(
34 'currentmonth',
35 'currentmonthname',
36 'currentmonthnamegen',
37 'currentmonthabbrev',
38 'currentday',
39 'currentday2',
40 'currentdayname',
41 'currentyear',
42 'currenttime',
43 'currenthour',
44 'localmonth',
45 'localmonthname',
46 'localmonthnamegen',
47 'localmonthabbrev',
48 'localday',
49 'localday2',
50 'localdayname',
51 'localyear',
52 'localtime',
53 'localhour',
54 'numberofarticles',
55 'numberoffiles',
56 'numberofedits',
57 'sitename',
58 'server',
59 'servername',
60 'scriptpath',
61 'pagename',
62 'pagenamee',
63 'fullpagename',
64 'fullpagenamee',
65 'namespace',
66 'namespacee',
67 'currentweek',
68 'currentdow',
69 'localweek',
70 'localdow',
71 'revisionid',
72 'revisionday',
73 'revisionday2',
74 'revisionmonth',
75 'revisionyear',
76 'revisiontimestamp',
77 'subpagename',
78 'subpagenamee',
79 'displaytitle',
80 'talkspace',
81 'talkspacee',
82 'subjectspace',
83 'subjectspacee',
84 'talkpagename',
85 'talkpagenamee',
86 'subjectpagename',
87 'subjectpagenamee',
88 'numberofusers',
89 'rawsuffix',
90 'newsectionlink',
91 'numberofpages',
92 'currentversion',
93 'basepagename',
94 'basepagenamee',
95 'urlencode',
96 'currenttimestamp',
97 'localtimestamp',
98 'directionmark',
99 'language',
100 'contentlanguage',
101 'pagesinnamespace',
102 'numberofadmins',
103 'defaultsort',
106 static public $mObjects = array();
108 /**#@-*/
110 function __construct($id = 0, $syn = '', $cs = false) {
111 $this->mId = $id;
112 $this->mSynonyms = (array)$syn;
113 $this->mCaseSensitive = $cs;
114 $this->mRegex = '';
115 $this->mRegexStart = '';
116 $this->mVariableRegex = '';
117 $this->mVariableStartToEndRegex = '';
118 $this->mModified = false;
122 * Factory: creates an object representing an ID
123 * @static
125 static function &get( $id ) {
126 if (!array_key_exists( $id, self::$mObjects ) ) {
127 $mw = new MagicWord();
128 $mw->load( $id );
129 self::$mObjects[$id] = $mw;
131 return self::$mObjects[$id];
135 * Get an array of parser variable IDs
137 static function getVariableIDs() {
138 if ( !self::$mVariableIDsInitialised ) {
139 # Deprecated constant definition hook, available for extensions that need it
140 $magicWords = array();
141 wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
142 foreach ( $magicWords as $word ) {
143 define( $word, $word );
146 # Get variable IDs
147 wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
148 self::$mVariableIDsInitialised = true;
150 return self::$mVariableIDs;
153 # Initialises this object with an ID
154 function load( $id ) {
155 global $wgContLang;
156 $this->mId = $id;
157 $wgContLang->getMagic( $this );
158 if ( !$this->mSynonyms ) {
159 $this->mSynonyms = array( 'dkjsagfjsgashfajsh' );
160 #throw new MWException( "Error: invalid magic word '$id'" );
161 wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
166 * Preliminary initialisation
167 * @private
169 function initRegex() {
170 #$variableClass = Title::legalChars();
171 # This was used for matching "$1" variables, but different uses of the feature will have
172 # different restrictions, which should be checked *after* the MagicWord has been matched,
173 # not here. - IMSoP
175 $escSyn = array();
176 foreach ( $this->mSynonyms as $synonym )
177 // In case a magic word contains /, like that's going to happen;)
178 $escSyn[] = preg_quote( $synonym, '/' );
179 $this->mBaseRegex = implode( '|', $escSyn );
181 $case = $this->mCaseSensitive ? '' : 'iu';
182 $this->mRegex = "/{$this->mBaseRegex}/{$case}";
183 $this->mRegexStart = "/^(?:{$this->mBaseRegex})/{$case}";
184 $this->mVariableRegex = str_replace( "\\$1", "(.*?)", $this->mRegex );
185 $this->mVariableStartToEndRegex = str_replace( "\\$1", "(.*?)",
186 "/^(?:{$this->mBaseRegex})$/{$case}" );
190 * Gets a regex representing matching the word
192 function getRegex() {
193 if ($this->mRegex == '' ) {
194 $this->initRegex();
196 return $this->mRegex;
200 * Gets the regexp case modifier to use, i.e. i or nothing, to be used if
201 * one is using MagicWord::getBaseRegex(), otherwise it'll be included in
202 * the complete expression
204 function getRegexCase() {
205 if ( $this->mRegex === '' )
206 $this->initRegex();
208 return $this->mCaseSensitive ? '' : 'iu';
212 * Gets a regex matching the word, if it is at the string start
214 function getRegexStart() {
215 if ($this->mRegex == '' ) {
216 $this->initRegex();
218 return $this->mRegexStart;
222 * regex without the slashes and what not
224 function getBaseRegex() {
225 if ($this->mRegex == '') {
226 $this->initRegex();
228 return $this->mBaseRegex;
232 * Returns true if the text contains the word
233 * @return bool
235 function match( $text ) {
236 return preg_match( $this->getRegex(), $text );
240 * Returns true if the text starts with the word
241 * @return bool
243 function matchStart( $text ) {
244 return preg_match( $this->getRegexStart(), $text );
248 * Returns NULL if there's no match, the value of $1 otherwise
249 * The return code is the matched string, if there's no variable
250 * part in the regex and the matched variable part ($1) if there
251 * is one.
253 function matchVariableStartToEnd( $text ) {
254 $matches = array();
255 $matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
256 if ( $matchcount == 0 ) {
257 return NULL;
258 } else {
259 # multiple matched parts (variable match); some will be empty because of
260 # synonyms. The variable will be the second non-empty one so remove any
261 # blank elements and re-sort the indices.
262 # See also bug 6526
264 $matches = array_values(array_filter($matches));
266 if ( count($matches) == 1 ) { return $matches[0]; }
267 else { return $matches[1]; }
273 * Returns true if the text matches the word, and alters the
274 * input string, removing all instances of the word
276 function matchAndRemove( &$text ) {
277 $this->mFound = false;
278 $text = preg_replace_callback( $this->getRegex(), array( &$this, 'pregRemoveAndRecord' ), $text );
279 return $this->mFound;
282 function matchStartAndRemove( &$text ) {
283 $this->mFound = false;
284 $text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
285 return $this->mFound;
289 * Used in matchAndRemove()
290 * @private
292 function pregRemoveAndRecord( ) {
293 $this->mFound = true;
294 return '';
298 * Replaces the word with something else
300 function replace( $replacement, $subject, $limit=-1 ) {
301 $res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
302 $this->mModified = !($res === $subject);
303 return $res;
307 * Variable handling: {{SUBST:xxx}} style words
308 * Calls back a function to determine what to replace xxx with
309 * Input word must contain $1
311 function substituteCallback( $text, $callback ) {
312 $res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
313 $this->mModified = !($res === $text);
314 return $res;
318 * Matches the word, where $1 is a wildcard
320 function getVariableRegex() {
321 if ( $this->mVariableRegex == '' ) {
322 $this->initRegex();
324 return $this->mVariableRegex;
328 * Matches the entire string, where $1 is a wildcard
330 function getVariableStartToEndRegex() {
331 if ( $this->mVariableStartToEndRegex == '' ) {
332 $this->initRegex();
334 return $this->mVariableStartToEndRegex;
338 * Accesses the synonym list directly
340 function getSynonym( $i ) {
341 return $this->mSynonyms[$i];
344 function getSynonyms() {
345 return $this->mSynonyms;
349 * Returns true if the last call to replace() or substituteCallback()
350 * returned a modified text, otherwise false.
352 function getWasModified(){
353 return $this->mModified;
357 * $magicarr is an associative array of (magic word ID => replacement)
358 * This method uses the php feature to do several replacements at the same time,
359 * thereby gaining some efficiency. The result is placed in the out variable
360 * $result. The return value is true if something was replaced.
361 * @static
363 function replaceMultiple( $magicarr, $subject, &$result ){
364 $search = array();
365 $replace = array();
366 foreach( $magicarr as $id => $replacement ){
367 $mw = MagicWord::get( $id );
368 $search[] = $mw->getRegex();
369 $replace[] = $replacement;
372 $result = preg_replace( $search, $replace, $subject );
373 return !($result === $subject);
377 * Adds all the synonyms of this MagicWord to an array, to allow quick
378 * lookup in a list of magic words
380 function addToArray( &$array, $value ) {
381 global $wgContLang;
382 foreach ( $this->mSynonyms as $syn ) {
383 $array[$wgContLang->lc($syn)] = $value;
387 function isCaseSensitive() {
388 return $this->mCaseSensitive;