3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
9 require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
11 HTMLPurifier_ConfigSchema
::define(
12 'AutoFormat', 'Custom', array(), 'list', '
14 This directive can be used to add custom auto-format injectors.
15 Specify an array of injector names (class name minus the prefix)
16 or concrete implementations. Injector class must exist. This directive
17 has been available since 2.0.1.
23 * Takes tokens makes them well-formed (balance end tags, etc.)
25 class HTMLPurifier_Strategy_MakeWellFormed
extends HTMLPurifier_Strategy
29 * Locally shared variable references
32 var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
33 $currentInjector, $injectors;
35 function execute($tokens, $config, &$context) {
37 $definition = $config->getHTMLDefinition();
40 $this->currentNesting
= array();
41 $context->register('CurrentNesting', $this->currentNesting
);
44 $this->inputIndex
= false;
45 $context->register('InputIndex', $this->inputIndex
);
48 $context->register('InputTokens', $tokens);
49 $this->inputTokens
=& $tokens;
53 $this->outputTokens
=& $result;
55 // %Core.EscapeInvalidTags
56 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
57 $generator = new HTMLPurifier_Generator();
59 $e =& $context->get('ErrorCollector', true);
61 // -- begin INJECTOR --
63 $this->injectors
= array();
65 $injectors = $config->getBatch('AutoFormat');
66 $custom_injectors = $injectors['Custom'];
67 unset($injectors['Custom']); // special case
68 foreach ($injectors as $injector => $b) {
69 $injector = "HTMLPurifier_Injector_$injector";
71 $this->injectors
[] = new $injector;
73 foreach ($custom_injectors as $injector) {
74 if (is_string($injector)) {
75 $injector = "HTMLPurifier_Injector_$injector";
76 $injector = new $injector;
78 $this->injectors
[] = $injector;
81 // array index of the injector that resulted in an array
82 // substitution. This enables processTokens() to know which
83 // injectors are affected by the added tokens and which are
84 // not (namely, the ones after the current injector are not
86 $this->currentInjector
= false;
88 // give the injectors references to the definition and context
89 // variables for performance reasons
90 foreach ($this->injectors
as $i => $x) {
91 $error = $this->injectors
[$i]->prepare($config, $context);
92 if (!$error) continue;
93 list($injector) = array_splice($this->injectors
, $i, 1);
94 $name = $injector->name
;
95 trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING
);
101 $context->register('CurrentToken', $token);
103 for ($this->inputIndex
= 0; isset($tokens[$this->inputIndex
]); $this->inputIndex++
) {
105 // if all goes well, this token will be passed through unharmed
106 $token = $tokens[$this->inputIndex
];
108 foreach ($this->injectors
as $i => $x) {
109 if ($x->skip
> 0) $this->injectors
[$i]->skip
--;
112 // quick-check: if it's not a tag, no need to process
113 if (empty( $token->is_tag
)) {
114 if ($token->type
=== 'text') {
115 // injector handler code; duplicated for performance reasons
116 foreach ($this->injectors
as $i => $x) {
117 if (!$x->skip
) $x->handleText($token);
118 if (is_array($token)) {
119 $this->currentInjector
= $i;
124 $this->processToken($token, $config, $context);
128 $info = $definition->info
[$token->name
]->child
;
130 // quick tag checks: anything that's *not* an end tag
132 if ($info->type
== 'empty' && $token->type
== 'start') {
133 // test if it claims to be a start tag but is empty
134 $token = new HTMLPurifier_Token_Empty($token->name
, $token->attr
);
136 } elseif ($info->type
!= 'empty' && $token->type
== 'empty' ) {
137 // claims to be empty but really is a start tag
139 new HTMLPurifier_Token_Start($token->name
, $token->attr
),
140 new HTMLPurifier_Token_End($token->name
)
143 } elseif ($token->type
== 'empty') {
146 } elseif ($token->type
== 'start') {
149 // ...unless they also have to close their parent
150 if (!empty($this->currentNesting
)) {
152 $parent = array_pop($this->currentNesting
);
153 $parent_info = $definition->info
[$parent->name
];
155 // this can be replaced with a more general algorithm:
156 // if the token is not allowed by the parent, auto-close
158 if (!isset($parent_info->child
->elements
[$token->name
])) {
159 if ($e) $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
160 // close the parent, then append the token
161 $result[] = new HTMLPurifier_Token_End($parent->name
);
163 $this->currentNesting
[] = $token;
167 $this->currentNesting
[] = $parent; // undo the pop
172 // injector handler code; duplicated for performance reasons
174 foreach ($this->injectors
as $i => $x) {
175 if (!$x->skip
) $x->handleElement($token);
176 if (is_array($token)) {
177 $this->currentInjector
= $i;
181 $this->processToken($token, $config, $context);
185 // sanity check: we should be dealing with a closing tag
186 if ($token->type
!= 'end') continue;
188 // make sure that we have something open
189 if (empty($this->currentNesting
)) {
190 if ($escape_invalid_tags) {
191 if ($e) $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
192 $result[] = new HTMLPurifier_Token_Text(
193 $generator->generateFromToken($token, $config, $context)
196 $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
201 // first, check for the simplest case: everything closes neatly
202 $current_parent = array_pop($this->currentNesting
);
203 if ($current_parent->name
== $token->name
) {
208 // okay, so we're trying to close the wrong tag
210 // undo the pop previous pop
211 $this->currentNesting
[] = $current_parent;
213 // scroll back the entire nest, trying to find our tag.
214 // (feature could be to specify how far you'd like to go)
215 $size = count($this->currentNesting
);
216 // -2 because -1 is the last element, but we already checked that
217 $skipped_tags = false;
218 for ($i = $size - 2; $i >= 0; $i--) {
219 if ($this->currentNesting
[$i]->name
== $token->name
) {
220 // current nesting is modified
221 $skipped_tags = array_splice($this->currentNesting
, $i);
226 // we still didn't find the tag, so remove
227 if ($skipped_tags === false) {
228 if ($escape_invalid_tags) {
229 $result[] = new HTMLPurifier_Token_Text(
230 $generator->generateFromToken($token, $config, $context)
232 if ($e) $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Stray end tag to text');
234 $e->send(E_WARNING
, 'Strategy_MakeWellFormed: Stray end tag removed');
239 // okay, we found it, close all the skipped tags
240 // note that skipped tags contains the element we need closed
241 $size = count($skipped_tags);
242 for ($i = $size - 1; $i > 0; $i--) {
243 if ($e && !isset($skipped_tags[$i]->armor
['MakeWellFormed_TagClosedError'])) {
244 $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
246 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name
);
249 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name
);
253 $context->destroy('CurrentNesting');
254 $context->destroy('InputTokens');
255 $context->destroy('InputIndex');
256 $context->destroy('CurrentToken');
258 // we're at the end now, fix all still unclosed tags
259 // not using processToken() because at this point we don't
260 // care about current nesting
261 if (!empty($this->currentNesting
)) {
262 $size = count($this->currentNesting
);
263 for ($i = $size - 1; $i >= 0; $i--) {
264 if ($e && !isset($this->currentNesting
[$i]->armor
['MakeWellFormed_TagClosedError'])) {
265 $e->send(E_NOTICE
, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting
[$i]);
268 new HTMLPurifier_Token_End($this->currentNesting
[$i]->name
);
272 unset($this->outputTokens
, $this->injectors
, $this->currentInjector
,
273 $this->currentNesting
, $this->inputTokens
, $this->inputIndex
);
278 function processToken($token, $config, &$context) {
279 if (is_array($token)) {
280 // the original token was overloaded by an injector, time
281 // to some fancy acrobatics
283 // $this->inputIndex is decremented so that the entire set gets
285 array_splice($this->inputTokens
, $this->inputIndex
--, 1, $token);
287 // adjust the injector skips based on the array substitution
288 if ($this->injectors
) {
289 $offset = count($token) +
1;
290 for ($i = 0; $i <= $this->currentInjector
; $i++
) {
291 $this->injectors
[$i]->skip +
= $offset;
296 $this->outputTokens
[] = $token;
297 if ($token->type
== 'start') {
298 $this->currentNesting
[] = $token;
299 } elseif ($token->type
== 'end') {
300 array_pop($this->currentNesting
); // not actually used