3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
9 require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
11 HTMLPurifier_ConfigSchema
::define(
12 'AutoFormat', 'Custom', array(), 'list', '
14 This directive can be used to add custom auto-format injectors.
15 Specify an array of injector names (class name minus the prefix)
16 or concrete implementations. Injector class must exist. This directive
17 has been available since 2.0.1.
23 * Takes tokens makes them well-formed (balance end tags, etc.)
25 class HTMLPurifier_Strategy_MakeWellFormed
extends HTMLPurifier_Strategy
29 * Locally shared variable references
32 var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
33 $currentInjector, $injectors;
35 function execute($tokens, $config, &$context) {
37 $definition = $config->getHTMLDefinition();
41 $generator = new HTMLPurifier_Generator();
42 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
43 $e =& $context->get('ErrorCollector', true);
46 $this->currentNesting
= array();
47 $this->inputIndex
= false;
48 $this->inputTokens
=& $tokens;
49 $this->outputTokens
=& $result;
52 $context->register('CurrentNesting', $this->currentNesting
);
53 $context->register('InputIndex', $this->inputIndex
);
54 $context->register('InputTokens', $tokens);
56 // -- begin INJECTOR --
58 $this->injectors
= array();
60 $injectors = $config->getBatch('AutoFormat');
61 $custom_injectors = $injectors['Custom'];
62 unset($injectors['Custom']); // special case
63 foreach ($injectors as $injector => $b) {
64 $injector = "HTMLPurifier_Injector_$injector";
66 $this->injectors
[] = new $injector;
68 foreach ($custom_injectors as $injector) {
69 if (is_string($injector)) {
70 $injector = "HTMLPurifier_Injector_$injector";
71 $injector = new $injector;
73 $this->injectors
[] = $injector;
76 // array index of the injector that resulted in an array
77 // substitution. This enables processTokens() to know which
78 // injectors are affected by the added tokens and which are
79 // not (namely, the ones after the current injector are not
81 $this->currentInjector
= false;
83 // give the injectors references to the definition and context
84 // variables for performance reasons
85 foreach ($this->injectors
as $i => $x) {
86 $error = $this->injectors
[$i]->prepare($config, $context);
87 if (!$error) continue;
88 list($injector) = array_splice($this->injectors
, $i, 1);
89 $name = $injector->name
;
90 trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING
);
93 // warning: most foreach loops follow the convention $i => $x.
94 // be sure, for PHP4 compatibility, to only perform write operations
95 // directly referencing the object using $i: $x is only safe for reads
100 $context->register('CurrentToken', $token);
102 for ($this->inputIndex
= 0; isset($tokens[$this->inputIndex
]); $this->inputIndex++
) {
104 // if all goes well, this token will be passed through unharmed
105 $token = $tokens[$this->inputIndex
];
107 //printTokens($tokens, $this->inputIndex);
109 foreach ($this->injectors
as $i => $x) {
110 if ($x->skip
> 0) $this->injectors
[$i]->skip
--;
113 // quick-check: if it's not a tag, no need to process
114 if (empty( $token->is_tag
)) {
115 if ($token->type
=== 'text') {
116 // injector handler code; duplicated for performance reasons
117 foreach ($this->injectors
as $i => $x) {
118 if (!$x->skip
) $this->injectors
[$i]->handleText($token);
119 if (is_array($token)) {
120 $this->currentInjector
= $i;
125 $this->processToken($token, $config, $context);
129 $info = $definition->info
[$token->name
]->child
;
131 // quick tag checks: anything that's *not* an end tag
133 if ($info->type
== 'empty' && $token->type
== 'start') {
134 // test if it claims to be a start tag but is empty
135 $token = new HTMLPurifier_Token_Empty($token->name
, $token->attr
);
137 } elseif ($info->type
!= 'empty' && $token->type
== 'empty' ) {
138 // claims to be empty but really is a start tag
140 new HTMLPurifier_Token_Start($token->name
, $token->attr
),
141 new HTMLPurifier_Token_End($token->name
)
144 } elseif ($token->type
== 'empty') {
147 } elseif ($token->type
== 'start') {
150 // ...unless they also have to close their parent
151 if (!empty($this->currentNesting
)) {
153 $parent = array_pop($this->currentNesting
);
154 $parent_info = $definition->info
[$parent->name
];
156 // this can be replaced with a more general algorithm:
157 // if the token is not allowed by the parent, auto-close
159 if (!isset($parent_info->child
->elements
[$token->name
])) {
160 if ($e) $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
161 // close the parent, then re-loop to reprocess token
162 $result[] = new HTMLPurifier_Token_End($parent->name
);
167 $this->currentNesting
[] = $parent; // undo the pop
172 // injector handler code; duplicated for performance reasons
174 foreach ($this->injectors
as $i => $x) {
175 if (!$x->skip
) $this->injectors
[$i]->handleElement($token);
176 if (is_array($token)) {
177 $this->currentInjector
= $i;
181 $this->processToken($token, $config, $context);
185 // sanity check: we should be dealing with a closing tag
186 if ($token->type
!= 'end') continue;
188 // make sure that we have something open
189 if (empty($this->currentNesting
)) {
190 if ($escape_invalid_tags) {
191 if ($e) $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
192 $result[] = new HTMLPurifier_Token_Text(
193 $generator->generateFromToken($token, $config, $context)
196 $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
201 // first, check for the simplest case: everything closes neatly
202 $current_parent = array_pop($this->currentNesting
);
203 if ($current_parent->name
== $token->name
) {
205 foreach ($this->injectors
as $i => $x) {
206 $this->injectors
[$i]->notifyEnd($token);
211 // okay, so we're trying to close the wrong tag
213 // undo the pop previous pop
214 $this->currentNesting
[] = $current_parent;
216 // scroll back the entire nest, trying to find our tag.
217 // (feature could be to specify how far you'd like to go)
218 $size = count($this->currentNesting
);
219 // -2 because -1 is the last element, but we already checked that
220 $skipped_tags = false;
221 for ($i = $size - 2; $i >= 0; $i--) {
222 if ($this->currentNesting
[$i]->name
== $token->name
) {
223 // current nesting is modified
224 $skipped_tags = array_splice($this->currentNesting
, $i);
229 // we still didn't find the tag, so remove
230 if ($skipped_tags === false) {
231 if ($escape_invalid_tags) {
232 $result[] = new HTMLPurifier_Token_Text(
233 $generator->generateFromToken($token, $config, $context)
235 if ($e) $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Stray end tag to text');
237 $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Stray end tag removed');
242 // okay, we found it, close all the skipped tags
243 // note that skipped tags contains the element we need closed
244 for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
245 if ($i && $e && !isset($skipped_tags[$i]->armor
['MakeWellFormed_TagClosedError'])) {
246 $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
248 $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name
);
249 foreach ($this->injectors
as $j => $x) { // $j, not $i!!!
250 $this->injectors
[$j]->notifyEnd($new_token);
256 $context->destroy('CurrentNesting');
257 $context->destroy('InputTokens');
258 $context->destroy('InputIndex');
259 $context->destroy('CurrentToken');
261 // we're at the end now, fix all still unclosed tags (this is
262 // duplicated from the end of the loop with some slight modifications)
263 // not using $skipped_tags since it would invariably be all of them
264 if (!empty($this->currentNesting
)) {
265 for ($i = count($this->currentNesting
) - 1; $i >= 0; $i--) {
266 if ($e && !isset($this->currentNesting
[$i]->armor
['MakeWellFormed_TagClosedError'])) {
267 $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting
[$i]);
269 $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting
[$i]->name
);
270 foreach ($this->injectors
as $j => $x) { // $j, not $i!!!
271 $this->injectors
[$j]->notifyEnd($new_token);
276 unset($this->outputTokens
, $this->injectors
, $this->currentInjector
,
277 $this->currentNesting
, $this->inputTokens
, $this->inputIndex
);
282 function processToken($token, $config, &$context) {
283 if (is_array($token)) {
284 // the original token was overloaded by an injector, time
285 // to some fancy acrobatics
287 // $this->inputIndex is decremented so that the entire set gets
289 array_splice($this->inputTokens
, $this->inputIndex
--, 1, $token);
291 // adjust the injector skips based on the array substitution
292 if ($this->injectors
) {
293 $offset = count($token);
294 for ($i = 0; $i <= $this->currentInjector
; $i++
) {
295 // because of the skip back, we need to add one more
296 // for uninitialized injectors. I'm not exactly
297 // sure why this is the case, but I think it has to
298 // do with the fact that we're decrementing skips
299 // before re-checking text
300 if (!$this->injectors
[$i]->skip
) $this->injectors
[$i]->skip++
;
301 $this->injectors
[$i]->skip +
= $offset;
306 $this->outputTokens
[] = $token;
307 if ($token->type
== 'start') {
308 $this->currentNesting
[] = $token;
309 } elseif ($token->type
== 'end') {
310 array_pop($this->currentNesting
); // not actually used