Non-word characters don't terminate tag names.
[mediawiki.git] / includes / parser / StripState.php
blob5f3f18eaebf37a857fce50fdc490fed3db836311
1 <?php
2 /**
3 * Holder for stripped items when parsing wiki markup.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Parser
24 /**
25 * @todo document, briefly.
26 * @ingroup Parser
28 class StripState {
29 protected $prefix;
30 protected $data;
31 protected $regex;
33 protected $tempType, $tempMergePrefix;
34 protected $circularRefGuard;
35 protected $recursionLevel = 0;
37 const UNSTRIP_RECURSION_LIMIT = 20;
39 /**
40 * @param $prefix string
42 function __construct( $prefix ) {
43 $this->prefix = $prefix;
44 $this->data = array(
45 'nowiki' => array(),
46 'general' => array()
48 $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
49 $this->circularRefGuard = array();
52 /**
53 * Add a nowiki strip item
54 * @param $marker
55 * @param $value
57 function addNoWiki( $marker, $value ) {
58 $this->addItem( 'nowiki', $marker, $value );
61 /**
62 * @param $marker
63 * @param $value
65 function addGeneral( $marker, $value ) {
66 $this->addItem( 'general', $marker, $value );
69 /**
70 * @throws MWException
71 * @param $type
72 * @param $marker
73 * @param $value
75 protected function addItem( $type, $marker, $value ) {
76 if ( !preg_match( $this->regex, $marker, $m ) ) {
77 throw new MWException( "Invalid marker: $marker" );
80 $this->data[$type][$m[1]] = $value;
83 /**
84 * @param $text
85 * @return mixed
87 function unstripGeneral( $text ) {
88 return $this->unstripType( 'general', $text );
91 /**
92 * @param $text
93 * @return mixed
95 function unstripNoWiki( $text ) {
96 return $this->unstripType( 'nowiki', $text );
99 /**
100 * @param $text
101 * @return mixed
103 function unstripBoth( $text ) {
104 $text = $this->unstripType( 'general', $text );
105 $text = $this->unstripType( 'nowiki', $text );
106 return $text;
110 * @param $type
111 * @param $text
112 * @return mixed
114 protected function unstripType( $type, $text ) {
115 // Shortcut
116 if ( !count( $this->data[$type] ) ) {
117 return $text;
120 wfProfileIn( __METHOD__ );
121 $oldType = $this->tempType;
122 $this->tempType = $type;
123 $text = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
124 $this->tempType = $oldType;
125 wfProfileOut( __METHOD__ );
126 return $text;
130 * @param $m array
131 * @return array
133 protected function unstripCallback( $m ) {
134 $marker = $m[1];
135 if ( isset( $this->data[$this->tempType][$marker] ) ) {
136 if ( isset( $this->circularRefGuard[$marker] ) ) {
137 return '<span class="error">'
138 . wfMessage( 'parser-unstrip-loop-warning' )->inContentLanguage()->text()
139 . '</span>';
141 if ( $this->recursionLevel >= self::UNSTRIP_RECURSION_LIMIT ) {
142 return '<span class="error">' .
143 wfMessage( 'parser-unstrip-recursion-limit' )
144 ->numParams( self::UNSTRIP_RECURSION_LIMIT )->inContentLanguage()->text() .
145 '</span>';
147 $this->circularRefGuard[$marker] = true;
148 $this->recursionLevel++;
149 $ret = $this->unstripType( $this->tempType, $this->data[$this->tempType][$marker] );
150 $this->recursionLevel--;
151 unset( $this->circularRefGuard[$marker] );
152 return $ret;
153 } else {
154 return $m[0];
159 * Get a StripState object which is sufficient to unstrip the given text.
160 * It will contain the minimum subset of strip items necessary.
162 * @param $text string
164 * @return StripState
166 function getSubState( $text ) {
167 $subState = new StripState( $this->prefix );
168 $pos = 0;
169 while ( true ) {
170 $startPos = strpos( $text, $this->prefix, $pos );
171 $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
172 if ( $startPos === false || $endPos === false ) {
173 break;
176 $endPos += strlen( Parser::MARKER_SUFFIX );
177 $marker = substr( $text, $startPos, $endPos - $startPos );
178 if ( !preg_match( $this->regex, $marker, $m ) ) {
179 continue;
182 $key = $m[1];
183 if ( isset( $this->data['nowiki'][$key] ) ) {
184 $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
185 } elseif ( isset( $this->data['general'][$key] ) ) {
186 $subState->data['general'][$key] = $this->data['general'][$key];
188 $pos = $endPos;
190 return $subState;
194 * Merge another StripState object into this one. The strip marker keys
195 * will not be preserved. The strings in the $texts array will have their
196 * strip markers rewritten, the resulting array of strings will be returned.
198 * @param $otherState StripState
199 * @param $texts Array
200 * @return Array
202 function merge( $otherState, $texts ) {
203 $mergePrefix = Parser::getRandomString();
205 foreach ( $otherState->data as $type => $items ) {
206 foreach ( $items as $key => $value ) {
207 $this->data[$type]["$mergePrefix-$key"] = $value;
211 $this->tempMergePrefix = $mergePrefix;
212 $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts );
213 $this->tempMergePrefix = null;
214 return $texts;
218 * @param $m
219 * @return string
221 protected function mergeCallback( $m ) {
222 $key = $m[1];
223 return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
227 * Remove any strip markers found in the given text.
229 * @param $text Input string
230 * @return string
232 function killMarkers( $text ) {
233 return preg_replace( $this->regex, '', $text );