3 require_once 'HTMLPurifier/HTMLModule.php';
4 require_once 'HTMLPurifier/ElementDef.php';
5 require_once 'HTMLPurifier/Doctype.php';
6 require_once 'HTMLPurifier/DoctypeRegistry.php';
8 require_once 'HTMLPurifier/ContentSets.php';
9 require_once 'HTMLPurifier/AttrTypes.php';
10 require_once 'HTMLPurifier/AttrCollections.php';
12 require_once 'HTMLPurifier/AttrDef.php';
13 require_once 'HTMLPurifier/AttrDef/Enum.php';
16 require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
17 require_once 'HTMLPurifier/HTMLModule/Text.php';
18 require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
19 require_once 'HTMLPurifier/HTMLModule/List.php';
20 require_once 'HTMLPurifier/HTMLModule/Presentation.php';
21 require_once 'HTMLPurifier/HTMLModule/Edit.php';
22 require_once 'HTMLPurifier/HTMLModule/Bdo.php';
23 require_once 'HTMLPurifier/HTMLModule/Tables.php';
24 require_once 'HTMLPurifier/HTMLModule/Image.php';
25 require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
26 require_once 'HTMLPurifier/HTMLModule/Legacy.php';
27 require_once 'HTMLPurifier/HTMLModule/Target.php';
28 require_once 'HTMLPurifier/HTMLModule/Scripting.php';
29 require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
30 require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
31 require_once 'HTMLPurifier/HTMLModule/Ruby.php';
32 require_once 'HTMLPurifier/HTMLModule/Object.php';
35 require_once 'HTMLPurifier/HTMLModule/Tidy.php';
36 require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
37 require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
38 require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
40 HTMLPurifier_ConfigSchema
::define(
41 'HTML', 'Doctype', '', 'string',
42 'Doctype to use during filtering. '.
43 'Technically speaking this is not actually a doctype (as it does '.
44 'not identify a corresponding DTD), but we are using this name '.
45 'for sake of simplicity. When non-blank, this will override any older directives '.
46 'like %HTML.XHTML or %HTML.Strict.'
48 HTMLPurifier_ConfigSchema
::defineAllowedValues('HTML', 'Doctype', array(
49 '', 'HTML 4.01 Transitional', 'HTML 4.01 Strict',
50 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict',
54 HTMLPurifier_ConfigSchema
::define(
55 'HTML', 'CustomDoctype', null, 'string/null',
57 A custom doctype for power-users who defined there own document
58 type. This directive only applies when %HTML.Doctype is blank.
59 This directive has been available since 2.0.1.
63 HTMLPurifier_ConfigSchema
::define(
64 'HTML', 'Trusted', false, 'bool',
65 'Indicates whether or not the user input is trusted or not. If the '.
66 'input is trusted, a more expansive set of allowed tags and attributes '.
67 'will be used. This directive has been available since 2.0.0.'
70 HTMLPurifier_ConfigSchema
::define(
71 'HTML', 'AllowedModules', null, 'lookup/null', '
73 A doctype comes with a set of usual modules to use. Without having
74 to mucking about with the doctypes, you can quickly activate or
75 disable these modules by specifying which modules you wish to allow
76 with this directive. This is most useful for unit testing specific
77 modules, although end users may find it useful for their own ends.
80 If you specify a module that does not exist, the manager will silently
81 fail to use it, so be careful! User-defined modules are not affected
82 by this directive. Modules defined in %HTML.CoreModules are not
83 affected by this directive. This directive has been available since 2.0.0.
87 HTMLPurifier_ConfigSchema
::define(
88 'HTML', 'CoreModules', array(
93 'NonXMLCommonAttributes' => true,
94 'XMLCommonAttributes' => true,
95 'CommonAttributes' => true
98 Certain modularized doctypes (XHTML, namely), have certain modules
99 that must be included for the doctype to be an conforming document
100 type: put those modules here. By default, XHTML\'s core modules
101 are used. You can set this to a blank array to disable core module
102 protection, but this is not recommended. This directive has been
103 available since 2.0.0.
107 class HTMLPurifier_HTMLModuleManager
111 * Instance of HTMLPurifier_DoctypeRegistry
117 * Instance of current doctype
123 * Instance of HTMLPurifier_AttrTypes
129 * Active instances of modules for the specified doctype are
130 * indexed, by name, in this array.
132 var $modules = array();
135 * Array of recognized HTMLPurifier_Module instances, indexed by
136 * module's class name. This array is usually lazy loaded, but a
137 * user can overload a module by pre-emptively registering it.
139 var $registeredModules = array();
142 * List of extra modules that were added by the user using addModule().
143 * These get unconditionally merged into the current doctype, whatever
146 var $userModules = array();
149 * Associative array of element name to list of modules that have
150 * definitions for the element; this array is dynamically filled.
152 var $elementLookup = array();
154 /** List of prefixes we should use for registering small names */
155 var $prefixes = array('HTMLPurifier_HTMLModule_');
157 var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
158 var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
160 /** If set to true, unsafe elements and attributes will be allowed */
161 var $trusted = false;
163 function HTMLPurifier_HTMLModuleManager() {
165 // editable internal objects
166 $this->attrTypes
= new HTMLPurifier_AttrTypes();
167 $this->doctypes
= new HTMLPurifier_DoctypeRegistry();
169 // setup default HTML doctypes
173 'CommonAttributes', 'Text', 'Hypertext', 'List',
174 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
175 'StyleAttribute', 'Scripting', 'Object'
177 $transitional = array('Legacy', 'Target');
178 $xml = array('XMLCommonAttributes');
179 $non_xml = array('NonXMLCommonAttributes');
181 $this->doctypes
->register(
182 'HTML 4.01 Transitional', false,
183 array_merge($common, $transitional, $non_xml),
184 array('Tidy_Transitional', 'Tidy_Proprietary'),
186 '-//W3C//DTD HTML 4.01 Transitional//EN',
187 'http://www.w3.org/TR/html4/loose.dtd'
190 $this->doctypes
->register(
191 'HTML 4.01 Strict', false,
192 array_merge($common, $non_xml),
193 array('Tidy_Strict', 'Tidy_Proprietary'),
195 '-//W3C//DTD HTML 4.01//EN',
196 'http://www.w3.org/TR/html4/strict.dtd'
199 $this->doctypes
->register(
200 'XHTML 1.0 Transitional', true,
201 array_merge($common, $transitional, $xml, $non_xml),
202 array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary'),
204 '-//W3C//DTD XHTML 1.0 Transitional//EN',
205 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
208 $this->doctypes
->register(
209 'XHTML 1.0 Strict', true,
210 array_merge($common, $xml, $non_xml),
211 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
213 '-//W3C//DTD XHTML 1.0 Strict//EN',
214 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
217 $this->doctypes
->register(
219 array_merge($common, $xml, array('Ruby')),
220 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
222 '-//W3C//DTD XHTML 1.1//EN',
223 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
229 * Registers a module to the recognized module list, useful for
230 * overloading pre-existing modules.
231 * @param $module Mixed: string module name, with or without
232 * HTMLPurifier_HTMLModule prefix, or instance of
233 * subclass of HTMLPurifier_HTMLModule.
234 * @note This function will not call autoload, you must instantiate
235 * (and thus invoke) autoload outside the method.
236 * @note If a string is passed as a module name, different variants
237 * will be tested in this order:
238 * - Check for HTMLPurifier_HTMLModule_$name
239 * - Check all prefixes with $name in order they were added
240 * - Check for literal object name
241 * - Throw fatal error
242 * If your object name collides with an internal class, specify
243 * your module manually. All modules must have been included
244 * externally: registerModule will not perform inclusions for you!
245 * @warning If your module has the same name as an already loaded
246 * module, your module will overload the old one WITHOUT
249 function registerModule($module) {
250 if (is_string($module)) {
251 // attempt to load the module
252 $original_module = $module;
254 foreach ($this->prefixes
as $prefix) {
255 $module = $prefix . $original_module;
256 if ($this->_classExists($module)) {
262 $module = $original_module;
263 if (!$this->_classExists($module)) {
264 trigger_error($original_module . ' module does not exist',
269 $module = new $module();
271 if (empty($module->name
)) {
272 trigger_error('Module instance of ' . get_class($module) . ' must have name');
275 $this->registeredModules
[$module->name
] = $module;
279 * Safely tests for class existence without invoking __autoload in PHP5
281 * @param $name String class name to test
282 * @note If any other class needs it, we'll need to stash in a
283 * conjectured "compatibility" class
286 function _classExists($name) {
287 static $is_php_4 = null;
288 if ($is_php_4 === null) {
289 $is_php_4 = version_compare(PHP_VERSION
, '5', '<');
292 return class_exists($name);
294 return class_exists($name, false);
299 * Adds a module to the current doctype by first registering it,
300 * and then tacking it on to the active doctype
302 function addModule($module) {
303 $this->registerModule($module);
304 if (is_object($module)) $module = $module->name
;
305 $this->userModules
[] = $module;
309 * Adds a class prefix that registerModule() will use to resolve a
310 * string name to a concrete class
312 function addPrefix($prefix) {
313 $this->prefixes
[] = $prefix;
317 * Performs processing on modules, after being called you may
318 * use getElement() and getElements()
319 * @param $config Instance of HTMLPurifier_Config
321 function setup($config) {
323 $this->trusted
= $config->get('HTML', 'Trusted');
326 $this->doctype
= $this->doctypes
->make($config);
327 $modules = $this->doctype
->modules
;
329 // take out the default modules that aren't allowed
330 $lookup = $config->get('HTML', 'AllowedModules');
331 $special_cases = $config->get('HTML', 'CoreModules');
333 if (is_array($lookup)) {
334 foreach ($modules as $k => $m) {
335 if (isset($special_cases[$m])) continue;
336 if (!isset($lookup[$m])) unset($modules[$k]);
340 // merge in custom modules
341 $modules = array_merge($modules, $this->userModules
);
343 foreach ($modules as $module) {
344 $this->processModule($module);
347 foreach ($this->doctype
->tidyModules
as $module) {
348 $this->processModule($module);
349 if (method_exists($this->modules
[$module], 'construct')) {
350 $this->modules
[$module]->construct($config);
354 // setup lookup table based on all valid modules
355 foreach ($this->modules
as $module) {
356 foreach ($module->info
as $name => $def) {
357 if (!isset($this->elementLookup
[$name])) {
358 $this->elementLookup
[$name] = array();
360 $this->elementLookup
[$name][] = $module->name
;
364 // note the different choice
365 $this->contentSets
= new HTMLPurifier_ContentSets(
366 // content set assembly deals with all possible modules,
367 // not just ones deemed to be "safe"
370 $this->attrCollections
= new HTMLPurifier_AttrCollections(
372 // there is no way to directly disable a global attribute,
373 // but using AllowedAttributes or simply not including
374 // the module in your custom doctype should be sufficient
380 * Takes a module and adds it to the active module collection,
381 * registering it if necessary.
383 function processModule($module) {
384 if (!isset($this->registeredModules
[$module]) ||
is_object($module)) {
385 $this->registerModule($module);
387 $this->modules
[$module] = $this->registeredModules
[$module];
391 * Retrieves merged element definitions.
392 * @return Array of HTMLPurifier_ElementDef
394 function getElements() {
397 foreach ($this->modules
as $module) {
398 foreach ($module->info
as $name => $v) {
399 if (isset($elements[$name])) continue;
400 // if element is not safe, don't use it
401 if (!$this->trusted
&& ($v->safe
=== false)) continue;
402 $elements[$name] = $this->getElement($name);
406 // remove dud elements, this happens when an element that
407 // appeared to be safe actually wasn't
408 foreach ($elements as $n => $v) {
409 if ($v === false) unset($elements[$n]);
417 * Retrieves a single merged element definition
418 * @param $name Name of element
419 * @param $trusted Boolean trusted overriding parameter: set to true
420 * if you want the full version of an element
421 * @return Merged HTMLPurifier_ElementDef
423 function getElement($name, $trusted = null) {
426 if ($trusted === null) $trusted = $this->trusted
;
428 $modules = $this->modules
;
430 if (!isset($this->elementLookup
[$name])) {
434 foreach($this->elementLookup
[$name] as $module_name) {
436 $module = $modules[$module_name];
438 // copy is used because, ideally speaking, the original
439 // definition should not be modified. Usually, this will
440 // make no difference, but for consistency's sake
441 $new_def = $module->info
[$name]->copy();
443 // refuse to create/merge in a definition that is deemed unsafe
444 if (!$trusted && ($new_def->safe
=== false)) {
449 if (!$def && $new_def->standalone
) {
450 // element with unknown safety is not to be trusted.
451 // however, a merge-in definition with undefined safety
453 if (!$trusted && !$new_def->safe
) continue;
456 $def->mergeIn($new_def);
458 // could "save it for another day":
459 // non-standalone definitions that don't have a standalone
460 // to merge into could be deferred to the end
464 // attribute value expansions
465 $this->attrCollections
->performInclusions($def->attr
);
466 $this->attrCollections
->expandIdentifiers($def->attr
, $this->attrTypes
);
468 // descendants_are_inline, for ChildDef_Chameleon
469 if (is_string($def->content_model
) &&
470 strpos($def->content_model
, 'Inline') !== false) {
471 if ($name != 'del' && $name != 'ins') {
472 // this is for you, ins/del
473 $def->descendants_are_inline
= true;
477 $this->contentSets
->generateChildDef($def, $module);
480 // add information on required attributes
481 foreach ($def->attr
as $attr_name => $attr_def) {
482 if ($attr_def->required
) {
483 $def->required_attr
[] = $attr_name;