MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / htmlpurifier / HTMLPurifier / Strategy / MakeWellFormed.php
blobb3e8aa74532394569ca59894033f89b98dfe4e70
1 <?php
3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
9 require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
11 HTMLPurifier_ConfigSchema::define(
12 'AutoFormat', 'Custom', array(), 'list', '
13 <p>
14 This directive can be used to add custom auto-format injectors.
15 Specify an array of injector names (class name minus the prefix)
16 or concrete implementations. Injector class must exist. This directive
17 has been available since 2.0.1.
18 </p>
22 /**
23 * Takes tokens makes them well-formed (balance end tags, etc.)
25 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
28 /**
29 * Locally shared variable references
30 * @private
32 var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
33 $currentInjector, $injectors;
35 function execute($tokens, $config, &$context) {
37 $definition = $config->getHTMLDefinition();
39 // CurrentNesting
40 $this->currentNesting = array();
41 $context->register('CurrentNesting', $this->currentNesting);
43 // InputIndex
44 $this->inputIndex = false;
45 $context->register('InputIndex', $this->inputIndex);
47 // InputTokens
48 $context->register('InputTokens', $tokens);
49 $this->inputTokens =& $tokens;
51 // OutputTokens
52 $result = array();
53 $this->outputTokens =& $result;
55 // %Core.EscapeInvalidTags
56 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
57 $generator = new HTMLPurifier_Generator();
59 $e =& $context->get('ErrorCollector', true);
61 // -- begin INJECTOR --
63 $this->injectors = array();
65 $injectors = $config->getBatch('AutoFormat');
66 $custom_injectors = $injectors['Custom'];
67 unset($injectors['Custom']); // special case
68 foreach ($injectors as $injector => $b) {
69 $injector = "HTMLPurifier_Injector_$injector";
70 if (!$b) continue;
71 $this->injectors[] = new $injector;
73 foreach ($custom_injectors as $injector) {
74 if (is_string($injector)) {
75 $injector = "HTMLPurifier_Injector_$injector";
76 $injector = new $injector;
78 $this->injectors[] = $injector;
81 // array index of the injector that resulted in an array
82 // substitution. This enables processTokens() to know which
83 // injectors are affected by the added tokens and which are
84 // not (namely, the ones after the current injector are not
85 // affected)
86 $this->currentInjector = false;
88 // give the injectors references to the definition and context
89 // variables for performance reasons
90 foreach ($this->injectors as $i => $x) {
91 $error = $this->injectors[$i]->prepare($config, $context);
92 if (!$error) continue;
93 list($injector) = array_splice($this->injectors, $i, 1);
94 $name = $injector->name;
95 trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
98 // -- end INJECTOR --
100 $token = false;
101 $context->register('CurrentToken', $token);
103 for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
105 // if all goes well, this token will be passed through unharmed
106 $token = $tokens[$this->inputIndex];
108 foreach ($this->injectors as $i => $x) {
109 if ($x->skip > 0) $this->injectors[$i]->skip--;
112 // quick-check: if it's not a tag, no need to process
113 if (empty( $token->is_tag )) {
114 if ($token->type === 'text') {
115 // injector handler code; duplicated for performance reasons
116 foreach ($this->injectors as $i => $x) {
117 if (!$x->skip) $x->handleText($token);
118 if (is_array($token)) {
119 $this->currentInjector = $i;
120 break;
124 $this->processToken($token, $config, $context);
125 continue;
128 $info = $definition->info[$token->name]->child;
130 // quick tag checks: anything that's *not* an end tag
131 $ok = false;
132 if ($info->type == 'empty' && $token->type == 'start') {
133 // test if it claims to be a start tag but is empty
134 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
135 $ok = true;
136 } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
137 // claims to be empty but really is a start tag
138 $token = array(
139 new HTMLPurifier_Token_Start($token->name, $token->attr),
140 new HTMLPurifier_Token_End($token->name)
142 $ok = true;
143 } elseif ($token->type == 'empty') {
144 // real empty token
145 $ok = true;
146 } elseif ($token->type == 'start') {
147 // start tag
149 // ...unless they also have to close their parent
150 if (!empty($this->currentNesting)) {
152 $parent = array_pop($this->currentNesting);
153 $parent_info = $definition->info[$parent->name];
155 // this can be replaced with a more general algorithm:
156 // if the token is not allowed by the parent, auto-close
157 // the parent
158 if (!isset($parent_info->child->elements[$token->name])) {
159 if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
160 // close the parent, then append the token
161 $result[] = new HTMLPurifier_Token_End($parent->name);
162 $result[] = $token;
163 $this->currentNesting[] = $token;
164 continue;
167 $this->currentNesting[] = $parent; // undo the pop
169 $ok = true;
172 // injector handler code; duplicated for performance reasons
173 if ($ok) {
174 foreach ($this->injectors as $i => $x) {
175 if (!$x->skip) $x->handleElement($token);
176 if (is_array($token)) {
177 $this->currentInjector = $i;
178 break;
181 $this->processToken($token, $config, $context);
182 continue;
185 // sanity check: we should be dealing with a closing tag
186 if ($token->type != 'end') continue;
188 // make sure that we have something open
189 if (empty($this->currentNesting)) {
190 if ($escape_invalid_tags) {
191 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
192 $result[] = new HTMLPurifier_Token_Text(
193 $generator->generateFromToken($token, $config, $context)
195 } elseif ($e) {
196 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
198 continue;
201 // first, check for the simplest case: everything closes neatly
202 $current_parent = array_pop($this->currentNesting);
203 if ($current_parent->name == $token->name) {
204 $result[] = $token;
205 continue;
208 // okay, so we're trying to close the wrong tag
210 // undo the pop previous pop
211 $this->currentNesting[] = $current_parent;
213 // scroll back the entire nest, trying to find our tag.
214 // (feature could be to specify how far you'd like to go)
215 $size = count($this->currentNesting);
216 // -2 because -1 is the last element, but we already checked that
217 $skipped_tags = false;
218 for ($i = $size - 2; $i >= 0; $i--) {
219 if ($this->currentNesting[$i]->name == $token->name) {
220 // current nesting is modified
221 $skipped_tags = array_splice($this->currentNesting, $i);
222 break;
226 // we still didn't find the tag, so remove
227 if ($skipped_tags === false) {
228 if ($escape_invalid_tags) {
229 $result[] = new HTMLPurifier_Token_Text(
230 $generator->generateFromToken($token, $config, $context)
232 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
233 } elseif ($e) {
234 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
236 continue;
239 // okay, we found it, close all the skipped tags
240 // note that skipped tags contains the element we need closed
241 $size = count($skipped_tags);
242 for ($i = $size - 1; $i > 0; $i--) {
243 if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
244 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
246 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
249 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
253 $context->destroy('CurrentNesting');
254 $context->destroy('InputTokens');
255 $context->destroy('InputIndex');
256 $context->destroy('CurrentToken');
258 // we're at the end now, fix all still unclosed tags
259 // not using processToken() because at this point we don't
260 // care about current nesting
261 if (!empty($this->currentNesting)) {
262 $size = count($this->currentNesting);
263 for ($i = $size - 1; $i >= 0; $i--) {
264 if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
265 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
267 $result[] =
268 new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
272 unset($this->outputTokens, $this->injectors, $this->currentInjector,
273 $this->currentNesting, $this->inputTokens, $this->inputIndex);
275 return $result;
278 function processToken($token, $config, &$context) {
279 if (is_array($token)) {
280 // the original token was overloaded by an injector, time
281 // to some fancy acrobatics
283 // $this->inputIndex is decremented so that the entire set gets
284 // re-processed
285 array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
287 // adjust the injector skips based on the array substitution
288 if ($this->injectors) {
289 $offset = count($token) + 1;
290 for ($i = 0; $i <= $this->currentInjector; $i++) {
291 $this->injectors[$i]->skip += $offset;
294 } elseif ($token) {
295 // regular case
296 $this->outputTokens[] = $token;
297 if ($token->type == 'start') {
298 $this->currentNesting[] = $token;
299 } elseif ($token->type == 'end') {
300 array_pop($this->currentNesting); // not actually used