Merge commit 'catalyst/MOODLE_19_STABLE' into mdl19-linuxchix
[moodle-linuxchix.git] / lib / htmlpurifier / HTMLPurifier / Strategy / MakeWellFormed.php
blob30208ba14793833c32cc6900641e23e2bfbf732b
1 <?php
3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
9 require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
11 HTMLPurifier_ConfigSchema::define(
12 'AutoFormat', 'Custom', array(), 'list', '
13 <p>
14 This directive can be used to add custom auto-format injectors.
15 Specify an array of injector names (class name minus the prefix)
16 or concrete implementations. Injector class must exist. This directive
17 has been available since 2.0.1.
18 </p>
22 /**
23 * Takes tokens makes them well-formed (balance end tags, etc.)
25 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
28 /**
29 * Locally shared variable references
30 * @private
32 var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
33 $currentInjector, $injectors;
35 function execute($tokens, $config, &$context) {
37 $definition = $config->getHTMLDefinition();
39 // local variables
40 $result = array();
41 $generator = new HTMLPurifier_Generator();
42 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
43 $e =& $context->get('ErrorCollector', true);
45 // member variables
46 $this->currentNesting = array();
47 $this->inputIndex = false;
48 $this->inputTokens =& $tokens;
49 $this->outputTokens =& $result;
51 // context variables
52 $context->register('CurrentNesting', $this->currentNesting);
53 $context->register('InputIndex', $this->inputIndex);
54 $context->register('InputTokens', $tokens);
56 // -- begin INJECTOR --
58 $this->injectors = array();
60 $injectors = $config->getBatch('AutoFormat');
61 $custom_injectors = $injectors['Custom'];
62 unset($injectors['Custom']); // special case
63 foreach ($injectors as $injector => $b) {
64 $injector = "HTMLPurifier_Injector_$injector";
65 if (!$b) continue;
66 $this->injectors[] = new $injector;
68 foreach ($custom_injectors as $injector) {
69 if (is_string($injector)) {
70 $injector = "HTMLPurifier_Injector_$injector";
71 $injector = new $injector;
73 $this->injectors[] = $injector;
76 // array index of the injector that resulted in an array
77 // substitution. This enables processTokens() to know which
78 // injectors are affected by the added tokens and which are
79 // not (namely, the ones after the current injector are not
80 // affected)
81 $this->currentInjector = false;
83 // give the injectors references to the definition and context
84 // variables for performance reasons
85 foreach ($this->injectors as $i => $x) {
86 $error = $this->injectors[$i]->prepare($config, $context);
87 if (!$error) continue;
88 list($injector) = array_splice($this->injectors, $i, 1);
89 $name = $injector->name;
90 trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
93 // warning: most foreach loops follow the convention $i => $x.
94 // be sure, for PHP4 compatibility, to only perform write operations
95 // directly referencing the object using $i: $x is only safe for reads
97 // -- end INJECTOR --
99 $token = false;
100 $context->register('CurrentToken', $token);
102 for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
104 // if all goes well, this token will be passed through unharmed
105 $token = $tokens[$this->inputIndex];
107 //printTokens($tokens, $this->inputIndex);
109 foreach ($this->injectors as $i => $x) {
110 if ($x->skip > 0) $this->injectors[$i]->skip--;
113 // quick-check: if it's not a tag, no need to process
114 if (empty( $token->is_tag )) {
115 if ($token->type === 'text') {
116 // injector handler code; duplicated for performance reasons
117 foreach ($this->injectors as $i => $x) {
118 if (!$x->skip) $this->injectors[$i]->handleText($token);
119 if (is_array($token)) {
120 $this->currentInjector = $i;
121 break;
125 $this->processToken($token, $config, $context);
126 continue;
129 $info = $definition->info[$token->name]->child;
131 // quick tag checks: anything that's *not* an end tag
132 $ok = false;
133 if ($info->type == 'empty' && $token->type == 'start') {
134 // test if it claims to be a start tag but is empty
135 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
136 $ok = true;
137 } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
138 // claims to be empty but really is a start tag
139 $token = array(
140 new HTMLPurifier_Token_Start($token->name, $token->attr),
141 new HTMLPurifier_Token_End($token->name)
143 $ok = true;
144 } elseif ($token->type == 'empty') {
145 // real empty token
146 $ok = true;
147 } elseif ($token->type == 'start') {
148 // start tag
150 // ...unless they also have to close their parent
151 if (!empty($this->currentNesting)) {
153 $parent = array_pop($this->currentNesting);
154 $parent_info = $definition->info[$parent->name];
156 // this can be replaced with a more general algorithm:
157 // if the token is not allowed by the parent, auto-close
158 // the parent
159 if (!isset($parent_info->child->elements[$token->name])) {
160 if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
161 // close the parent, then re-loop to reprocess token
162 $result[] = new HTMLPurifier_Token_End($parent->name);
163 $this->inputIndex--;
164 continue;
167 $this->currentNesting[] = $parent; // undo the pop
169 $ok = true;
172 // injector handler code; duplicated for performance reasons
173 if ($ok) {
174 foreach ($this->injectors as $i => $x) {
175 if (!$x->skip) $this->injectors[$i]->handleElement($token);
176 if (is_array($token)) {
177 $this->currentInjector = $i;
178 break;
181 $this->processToken($token, $config, $context);
182 continue;
185 // sanity check: we should be dealing with a closing tag
186 if ($token->type != 'end') continue;
188 // make sure that we have something open
189 if (empty($this->currentNesting)) {
190 if ($escape_invalid_tags) {
191 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
192 $result[] = new HTMLPurifier_Token_Text(
193 $generator->generateFromToken($token, $config, $context)
195 } elseif ($e) {
196 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
198 continue;
201 // first, check for the simplest case: everything closes neatly
202 $current_parent = array_pop($this->currentNesting);
203 if ($current_parent->name == $token->name) {
204 $result[] = $token;
205 foreach ($this->injectors as $i => $x) {
206 $this->injectors[$i]->notifyEnd($token);
208 continue;
211 // okay, so we're trying to close the wrong tag
213 // undo the pop previous pop
214 $this->currentNesting[] = $current_parent;
216 // scroll back the entire nest, trying to find our tag.
217 // (feature could be to specify how far you'd like to go)
218 $size = count($this->currentNesting);
219 // -2 because -1 is the last element, but we already checked that
220 $skipped_tags = false;
221 for ($i = $size - 2; $i >= 0; $i--) {
222 if ($this->currentNesting[$i]->name == $token->name) {
223 // current nesting is modified
224 $skipped_tags = array_splice($this->currentNesting, $i);
225 break;
229 // we still didn't find the tag, so remove
230 if ($skipped_tags === false) {
231 if ($escape_invalid_tags) {
232 $result[] = new HTMLPurifier_Token_Text(
233 $generator->generateFromToken($token, $config, $context)
235 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
236 } elseif ($e) {
237 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
239 continue;
242 // okay, we found it, close all the skipped tags
243 // note that skipped tags contains the element we need closed
244 for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
245 if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
246 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
248 $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
249 foreach ($this->injectors as $j => $x) { // $j, not $i!!!
250 $this->injectors[$j]->notifyEnd($new_token);
256 $context->destroy('CurrentNesting');
257 $context->destroy('InputTokens');
258 $context->destroy('InputIndex');
259 $context->destroy('CurrentToken');
261 // we're at the end now, fix all still unclosed tags (this is
262 // duplicated from the end of the loop with some slight modifications)
263 // not using $skipped_tags since it would invariably be all of them
264 if (!empty($this->currentNesting)) {
265 for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
266 if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
267 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
269 $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
270 foreach ($this->injectors as $j => $x) { // $j, not $i!!!
271 $this->injectors[$j]->notifyEnd($new_token);
276 unset($this->outputTokens, $this->injectors, $this->currentInjector,
277 $this->currentNesting, $this->inputTokens, $this->inputIndex);
279 return $result;
282 function processToken($token, $config, &$context) {
283 if (is_array($token)) {
284 // the original token was overloaded by an injector, time
285 // to some fancy acrobatics
287 // $this->inputIndex is decremented so that the entire set gets
288 // re-processed
289 array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
291 // adjust the injector skips based on the array substitution
292 if ($this->injectors) {
293 $offset = count($token);
294 for ($i = 0; $i <= $this->currentInjector; $i++) {
295 // because of the skip back, we need to add one more
296 // for uninitialized injectors. I'm not exactly
297 // sure why this is the case, but I think it has to
298 // do with the fact that we're decrementing skips
299 // before re-checking text
300 if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
301 $this->injectors[$i]->skip += $offset;
304 } elseif ($token) {
305 // regular case
306 $this->outputTokens[] = $token;
307 if ($token->type == 'start') {
308 $this->currentNesting[] = $token;
309 } elseif ($token->type == 'end') {
310 array_pop($this->currentNesting); // not actually used