Special case opus mime detction
[mediawiki.git] / includes / MagicWordArray.php
blob6a9ead5f1ebe67dc825597a3259d315b2630ee9b
1 <?php
3 /**
4 * See docs/magicword.txt.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
21 * @file
22 * @ingroup Parser
25 use MediaWiki\Logger\LoggerFactory;
27 /**
28 * Class for handling an array of magic words
29 * @ingroup Parser
31 class MagicWordArray {
32 /** @var array */
33 public $names = [];
35 /** @var array */
36 private $hash;
38 private $baseRegex;
40 private $regex;
42 /**
43 * @param array $names
45 public function __construct( $names = [] ) {
46 $this->names = $names;
49 /**
50 * Add a magic word by name
52 * @param string $name
54 public function add( $name ) {
55 $this->names[] = $name;
56 $this->hash = $this->baseRegex = $this->regex = null;
59 /**
60 * Add a number of magic words by name
62 * @param array $names
64 public function addArray( $names ) {
65 $this->names = array_merge( $this->names, array_values( $names ) );
66 $this->hash = $this->baseRegex = $this->regex = null;
69 /**
70 * Get a 2-d hashtable for this array
71 * @return array
73 public function getHash() {
74 if ( is_null( $this->hash ) ) {
75 global $wgContLang;
76 $this->hash = [ 0 => [], 1 => [] ];
77 foreach ( $this->names as $name ) {
78 $magic = MagicWord::get( $name );
79 $case = intval( $magic->isCaseSensitive() );
80 foreach ( $magic->getSynonyms() as $syn ) {
81 if ( !$case ) {
82 $syn = $wgContLang->lc( $syn );
84 $this->hash[$case][$syn] = $name;
88 return $this->hash;
91 /**
92 * Get the base regex
93 * @return array
95 public function getBaseRegex() {
96 if ( is_null( $this->baseRegex ) ) {
97 $this->baseRegex = [ 0 => '', 1 => '' ];
98 foreach ( $this->names as $name ) {
99 $magic = MagicWord::get( $name );
100 $case = intval( $magic->isCaseSensitive() );
101 foreach ( $magic->getSynonyms() as $i => $syn ) {
102 // Group name must start with a non-digit in PCRE 8.34+
103 $it = strtr( $i, '0123456789', 'abcdefghij' );
104 $group = "(?P<{$it}_{$name}>" . preg_quote( $syn, '/' ) . ')';
105 if ( $this->baseRegex[$case] === '' ) {
106 $this->baseRegex[$case] = $group;
107 } else {
108 $this->baseRegex[$case] .= '|' . $group;
113 return $this->baseRegex;
117 * Get an unanchored regex that does not match parameters
118 * @return array
120 public function getRegex() {
121 if ( is_null( $this->regex ) ) {
122 $base = $this->getBaseRegex();
123 $this->regex = [ '', '' ];
124 if ( $this->baseRegex[0] !== '' ) {
125 $this->regex[0] = "/{$base[0]}/iuS";
127 if ( $this->baseRegex[1] !== '' ) {
128 $this->regex[1] = "/{$base[1]}/S";
131 return $this->regex;
135 * Get a regex for matching variables with parameters
137 * @return string
139 public function getVariableRegex() {
140 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
144 * Get a regex anchored to the start of the string that does not match parameters
146 * @return array
148 public function getRegexStart() {
149 $base = $this->getBaseRegex();
150 $newRegex = [ '', '' ];
151 if ( $base[0] !== '' ) {
152 $newRegex[0] = "/^(?:{$base[0]})/iuS";
154 if ( $base[1] !== '' ) {
155 $newRegex[1] = "/^(?:{$base[1]})/S";
157 return $newRegex;
161 * Get an anchored regex for matching variables with parameters
163 * @return array
165 public function getVariableStartToEndRegex() {
166 $base = $this->getBaseRegex();
167 $newRegex = [ '', '' ];
168 if ( $base[0] !== '' ) {
169 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
171 if ( $base[1] !== '' ) {
172 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
174 return $newRegex;
178 * @since 1.20
179 * @return array
181 public function getNames() {
182 return $this->names;
186 * Parse a match array from preg_match
187 * Returns array(magic word ID, parameter value)
188 * If there is no parameter value, that element will be false.
190 * @param array $m
192 * @throws MWException
193 * @return array
195 public function parseMatch( $m ) {
196 reset( $m );
197 while ( list( $key, $value ) = each( $m ) ) {
198 if ( $key === 0 || $value === '' ) {
199 continue;
201 $parts = explode( '_', $key, 2 );
202 if ( count( $parts ) != 2 ) {
203 // This shouldn't happen
204 // continue;
205 throw new MWException( __METHOD__ . ': bad parameter name' );
207 list( /* $synIndex */, $magicName ) = $parts;
208 $paramValue = next( $m );
209 return [ $magicName, $paramValue ];
211 // This shouldn't happen either
212 throw new MWException( __METHOD__ . ': parameter not found' );
216 * Match some text, with parameter capture
217 * Returns an array with the magic word name in the first element and the
218 * parameter in the second element.
219 * Both elements are false if there was no match.
221 * @param string $text
223 * @return array
225 public function matchVariableStartToEnd( $text ) {
226 $regexes = $this->getVariableStartToEndRegex();
227 foreach ( $regexes as $regex ) {
228 if ( $regex !== '' ) {
229 $m = [];
230 if ( preg_match( $regex, $text, $m ) ) {
231 return $this->parseMatch( $m );
235 return [ false, false ];
239 * Match some text, without parameter capture
240 * Returns the magic word name, or false if there was no capture
242 * @param string $text
244 * @return string|bool False on failure
246 public function matchStartToEnd( $text ) {
247 $hash = $this->getHash();
248 if ( isset( $hash[1][$text] ) ) {
249 return $hash[1][$text];
251 global $wgContLang;
252 $lc = $wgContLang->lc( $text );
253 if ( isset( $hash[0][$lc] ) ) {
254 return $hash[0][$lc];
256 return false;
260 * Returns an associative array, ID => param value, for all items that match
261 * Removes the matched items from the input string (passed by reference)
263 * @param string $text
265 * @return array
267 public function matchAndRemove( &$text ) {
268 $found = [];
269 $regexes = $this->getRegex();
270 foreach ( $regexes as $regex ) {
271 if ( $regex === '' ) {
272 continue;
274 $matches = [];
275 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
276 if ( $res === false ) {
277 LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
278 'code' => preg_last_error(),
279 'regex' => $regex,
280 'text' => $text,
281 ] );
282 } elseif ( $res ) {
283 foreach ( $matches as $m ) {
284 list( $name, $param ) = $this->parseMatch( $m );
285 $found[$name] = $param;
288 $res = preg_replace( $regex, '', $text );
289 if ( $res === null ) {
290 LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
291 'code' => preg_last_error(),
292 'regex' => $regex,
293 'text' => $text,
294 ] );
296 $text = $res;
298 return $found;
302 * Return the ID of the magic word at the start of $text, and remove
303 * the prefix from $text.
304 * Return false if no match found and $text is not modified.
305 * Does not match parameters.
307 * @param string $text
309 * @return int|bool False on failure
311 public function matchStartAndRemove( &$text ) {
312 $regexes = $this->getRegexStart();
313 foreach ( $regexes as $regex ) {
314 if ( $regex === '' ) {
315 continue;
317 if ( preg_match( $regex, $text, $m ) ) {
318 list( $id, ) = $this->parseMatch( $m );
319 if ( strlen( $m[0] ) >= strlen( $text ) ) {
320 $text = '';
321 } else {
322 $text = substr( $text, strlen( $m[0] ) );
324 return $id;
327 return false;