3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
6 require_once 'HTMLPurifier/TagTransform.php';
8 require_once 'HTMLPurifier/AttrValidator.php';
10 HTMLPurifier_ConfigSchema
::define(
11 'Core', 'RemoveInvalidImg', true, 'bool', '
13 This directive enables pre-emptive URI checking in <code>img</code>
14 tags, as the attribute validation strategy is not authorized to
15 remove elements from the document. This directive has been available
16 since 1.3.0, revert to pre-1.3.0 behavior by setting to false.
21 HTMLPurifier_ConfigSchema
::define(
22 'Core', 'RemoveScriptContents', null, 'bool/null', '
24 This directive enables HTML Purifier to remove not only script tags
25 but all of their contents. This directive has been deprecated since 2.1.0,
26 and when not set the value of %Core.HiddenElements will take
27 precedence. This directive has been available since 2.0.0, and can be used to
28 revert to pre-2.0.0 behavior by setting it to false.
33 HTMLPurifier_ConfigSchema
::define(
34 'Core', 'HiddenElements', array('script' => true, 'style' => true), 'lookup', '
36 This directive is a lookup array of elements which should have their
37 contents removed when they are not allowed by the HTML definition.
38 For example, the contents of a <code>script</code> tag are not
39 normally shown in a document, so if script tags are to be removed,
40 their contents should be removed to. This is opposed to a <code>b</code>
41 tag, which defines some presentational changes but does not hide its
48 * Removes all unrecognized tags from the list of tokens.
50 * This strategy iterates through all the tokens and removes unrecognized
51 * tokens. If a token is not recognized but a TagTransform is defined for
52 * that element, the element will be transformed accordingly.
55 class HTMLPurifier_Strategy_RemoveForeignElements
extends HTMLPurifier_Strategy
58 function execute($tokens, $config, &$context) {
59 $definition = $config->getHTMLDefinition();
60 $generator = new HTMLPurifier_Generator();
63 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
64 $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
66 $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
67 $hidden_elements = $config->get('Core', 'HiddenElements');
69 // remove script contents compatibility
70 if ($remove_script_contents === true) {
71 $hidden_elements['script'] = true;
72 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
73 unset($hidden_elements['script']);
76 $attr_validator = new HTMLPurifier_AttrValidator();
78 // removes tokens until it reaches a closing tag with its value
79 $remove_until = false;
81 // converts comments into text tokens when this is equal to a tag name
82 $textify_comments = false;
85 $context->register('CurrentToken', $token);
88 if ($config->get('Core', 'CollectErrors')) {
89 $e =& $context->get('ErrorCollector');
92 foreach($tokens as $token) {
94 if (empty($token->is_tag
) ||
$token->name
!== $remove_until) {
98 if (!empty( $token->is_tag
)) {
101 // before any processing, try to transform the element
103 isset($definition->info_tag_transform
[$token->name
])
105 $original_name = $token->name
;
106 // there is a transformation for this tag
108 $token = $definition->
109 info_tag_transform
[$token->name
]->
110 transform($token, $config, $context);
111 if ($e) $e->send(E_NOTICE
, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
114 if (isset($definition->info
[$token->name
])) {
116 // mostly everything's good, but
117 // we need to make sure required attributes are in order
119 $definition->info
[$token->name
]->required_attr
&&
120 ($token->name
!= 'img' ||
$remove_invalid_img) // ensure config option still works
122 $attr_validator->validateToken($token, $config, $context);
124 foreach ($definition->info
[$token->name
]->required_attr
as $name) {
125 if (!isset($token->attr
[$name])) {
131 if ($e) $e->send(E_ERROR
, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
134 $token->armor
['ValidateAttributes'] = true;
137 // CAN BE GENERICIZED
138 if (isset($hidden_elements[$token->name
]) && $token->type
== 'start') {
139 $textify_comments = $token->name
;
140 } elseif ($token->name
=== $textify_comments && $token->type
== 'end') {
141 $textify_comments = false;
144 } elseif ($escape_invalid_tags) {
145 // invalid tag, generate HTML representation and insert in
146 if ($e) $e->send(E_WARNING
, 'Strategy_RemoveForeignElements: Foreign element to text');
147 $token = new HTMLPurifier_Token_Text(
148 $generator->generateFromToken($token, $config, $context)
151 // check if we need to destroy all of the tag's children
152 // CAN BE GENERICIZED
153 if (isset($hidden_elements[$token->name
])) {
154 if ($token->type
== 'start') {
155 $remove_until = $token->name
;
156 } elseif ($token->type
== 'empty') {
157 // do nothing: we're still looking
159 $remove_until = false;
161 if ($e) $e->send(E_ERROR
, 'Strategy_RemoveForeignElements: Foreign meta element removed');
163 if ($e) $e->send(E_ERROR
, 'Strategy_RemoveForeignElements: Foreign element removed');
167 } elseif ($token->type
== 'comment') {
168 // textify comments in script tags when they are allowed
169 if ($textify_comments !== false) {
170 $data = $token->data
;
171 $token = new HTMLPurifier_Token_Text($data);
174 if ($e) $e->send(E_NOTICE
, 'Strategy_RemoveForeignElements: Comment removed');
177 } elseif ($token->type
== 'text') {
183 if ($remove_until && $e) {
184 // we removed tokens until the end, throw error
185 $e->send(E_ERROR
, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
188 $context->destroy('CurrentToken');