MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / htmlpurifier / HTMLPurifier / HTMLDefinition.php
blobaaeb8bae38547cd1b1fc2bf904763697171b8d78
1 <?php
3 require_once 'HTMLPurifier/Definition.php';
4 require_once 'HTMLPurifier/HTMLModuleManager.php';
6 // this definition and its modules MUST NOT define configuration directives
7 // outside of the HTML or Attr namespaces
9 HTMLPurifier_ConfigSchema::define(
10 'HTML', 'DefinitionID', null, 'string/null', '
11 <p>
12 Unique identifier for a custom-built HTML definition. If you edit
13 the raw version of the HTMLDefinition, introducing changes that the
14 configuration object does not reflect, you must specify this variable.
15 If you change your custom edits, you should change this directive, or
16 clear your cache. Example:
17 </p>
18 <pre>
19 $config = HTMLPurifier_Config::createDefault();
20 $config->set(\'HTML\', \'DefinitionID\', \'1\');
21 $def = $config->getHTMLDefinition();
22 $def->addAttribute(\'a\', \'tabindex\', \'Number\');
23 </pre>
24 <p>
25 In the above example, the configuration is still at the defaults, but
26 using the advanced API, an extra attribute has been added. The
27 configuration object normally has no way of knowing that this change
28 has taken place, so it needs an extra directive: %HTML.DefinitionID.
29 If someone else attempts to use the default configuration, these two
30 pieces of code will not clobber each other in the cache, since one has
31 an extra directive attached to it.
32 </p>
33 <p>
34 This directive has been available since 2.0.0, and in that version or
35 later you <em>must</em> specify a value to this directive to use the
36 advanced API features.
37 </p>
38 ');
40 HTMLPurifier_ConfigSchema::define(
41 'HTML', 'DefinitionRev', 1, 'int', '
42 <p>
43 Revision identifier for your custom definition specified in
44 %HTML.DefinitionID. This serves the same purpose: uniquely identifying
45 your custom definition, but this one does so in a chronological
46 context: revision 3 is more up-to-date then revision 2. Thus, when
47 this gets incremented, the cache handling is smart enough to clean
48 up any older revisions of your definition as well as flush the
49 cache. This directive has been available since 2.0.0.
50 </p>
51 ');
53 HTMLPurifier_ConfigSchema::define(
54 'HTML', 'BlockWrapper', 'p', 'string', '
55 <p>
56 String name of element to wrap inline elements that are inside a block
57 context. This only occurs in the children of blockquote in strict mode.
58 </p>
59 <p>
60 Example: by default value,
61 <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
62 <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
63 The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
64 as long as it is a block level element. This directive has been available
65 since 1.3.0.
66 </p>
67 ');
69 HTMLPurifier_ConfigSchema::define(
70 'HTML', 'Parent', 'div', 'string', '
71 <p>
72 String name of element that HTML fragment passed to library will be
73 inserted in. An interesting variation would be using span as the
74 parent element, meaning that only inline tags would be allowed.
75 This directive has been available since 1.3.0.
76 </p>
77 ');
79 HTMLPurifier_ConfigSchema::define(
80 'HTML', 'AllowedElements', null, 'lookup/null', '
81 <p>
82 If HTML Purifier\'s tag set is unsatisfactory for your needs, you
83 can overload it with your own list of tags to allow. Note that this
84 method is subtractive: it does its job by taking away from HTML Purifier
85 usual feature set, so you cannot add a tag that HTML Purifier never
86 supported in the first place (like embed, form or head). If you
87 change this, you probably also want to change %HTML.AllowedAttributes.
88 </p>
89 <p>
90 <strong>Warning:</strong> If another directive conflicts with the
91 elements here, <em>that</em> directive will win and override.
92 This directive has been available since 1.3.0.
93 </p>
94 ');
96 HTMLPurifier_ConfigSchema::define(
97 'HTML', 'AllowedAttributes', null, 'lookup/null', '
98 <p>
99 If HTML Purifier\'s attribute set is unsatisfactory, overload it!
100 The syntax is "tag.attr" or "*.attr" for the global attributes
101 (style, id, class, dir, lang, xml:lang).
102 </p>
104 <strong>Warning:</strong> If another directive conflicts with the
105 elements here, <em>that</em> directive will win and override. For
106 example, %HTML.EnableAttrID will take precedence over *.id in this
107 directive. You must set that directive to true before you can use
108 IDs at all. This directive has been available since 1.3.0.
109 </p>
112 HTMLPurifier_ConfigSchema::define(
113 'HTML', 'Allowed', null, 'itext/null', '
115 This is a convenience directive that rolls the functionality of
116 %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
117 Specify elements and attributes that are allowed using:
118 <code>element1[attr1|attr2],element2...</code>. You can also use
119 newlines instead of commas to separate elements.
120 </p>
122 <strong>Warning</strong>:
123 All of the constraints on the component directives are still enforced.
124 The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
125 whitelist: directly copy-pasting it here will probably result in
126 broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
127 are set, this directive has no effect.
128 This directive has been available since 2.0.0.
129 </p>
133 * Definition of the purified HTML that describes allowed children,
134 * attributes, and many other things.
136 * Conventions:
138 * All member variables that are prefixed with info
139 * (including the main $info array) are used by HTML Purifier internals
140 * and should not be directly edited when customizing the HTMLDefinition.
141 * They can usually be set via configuration directives or custom
142 * modules.
144 * On the other hand, member variables without the info prefix are used
145 * internally by the HTMLDefinition and MUST NOT be used by other HTML
146 * Purifier internals. Many of them, however, are public, and may be
147 * edited by userspace code to tweak the behavior of HTMLDefinition.
149 * @note This class is inspected by Printer_HTMLDefinition; please
150 * update that class if things here change.
152 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
155 // FULLY-PUBLIC VARIABLES ---------------------------------------------
158 * Associative array of element names to HTMLPurifier_ElementDef
159 * @public
161 var $info = array();
164 * Associative array of global attribute name to attribute definition.
165 * @public
167 var $info_global_attr = array();
170 * String name of parent element HTML will be going into.
171 * @public
173 var $info_parent = 'div';
176 * Definition for parent element, allows parent element to be a
177 * tag that's not allowed inside the HTML fragment.
178 * @public
180 var $info_parent_def;
183 * String name of element used to wrap inline elements in block context
184 * @note This is rarely used except for BLOCKQUOTEs in strict mode
185 * @public
187 var $info_block_wrapper = 'p';
190 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
191 * @public
193 var $info_tag_transform = array();
196 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
197 * @public
199 var $info_attr_transform_pre = array();
202 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
203 * @public
205 var $info_attr_transform_post = array();
208 * Nested lookup array of content set name (Block, Inline) to
209 * element name to whether or not it belongs in that content set.
210 * @public
212 var $info_content_sets = array();
215 * Doctype object
217 var $doctype;
221 // RAW CUSTOMIZATION STUFF --------------------------------------------
224 * Adds a custom attribute to a pre-existing element
225 * @param $element_name String element name to add attribute to
226 * @param $attr_name String name of attribute
227 * @param $def Attribute definition, can be string or object, see
228 * HTMLPurifier_AttrTypes for details
230 function addAttribute($element_name, $attr_name, $def) {
231 $module =& $this->getAnonymousModule();
232 $element =& $module->addBlankElement($element_name);
233 $element->attr[$attr_name] = $def;
237 * Adds a custom element to your HTML definition
238 * @note See HTMLPurifier_HTMLModule::addElement for detailed
239 * parameter descriptions.
241 function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
242 $module =& $this->getAnonymousModule();
243 // assume that if the user is calling this, the element
244 // is safe. This may not be a good idea
245 $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
249 * Retrieves a reference to the anonymous module, so you can
250 * bust out advanced features without having to make your own
251 * module.
253 function &getAnonymousModule() {
254 if (!$this->_anonModule) {
255 $this->_anonModule = new HTMLPurifier_HTMLModule();
256 $this->_anonModule->name = 'Anonymous';
258 return $this->_anonModule;
261 var $_anonModule;
264 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
266 var $type = 'HTML';
267 var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
270 * Performs low-cost, preliminary initialization.
272 function HTMLPurifier_HTMLDefinition() {
273 $this->manager = new HTMLPurifier_HTMLModuleManager();
276 function doSetup($config) {
277 $this->processModules($config);
278 $this->setupConfigStuff($config);
279 unset($this->manager);
281 // cleanup some of the element definitions
282 foreach ($this->info as $k => $v) {
283 unset($this->info[$k]->content_model);
284 unset($this->info[$k]->content_model_type);
289 * Extract out the information from the manager
291 function processModules($config) {
293 if ($this->_anonModule) {
294 // for user specific changes
295 // this is late-loaded so we don't have to deal with PHP4
296 // reference wonky-ness
297 $this->manager->addModule($this->_anonModule);
298 unset($this->_anonModule);
301 $this->manager->setup($config);
302 $this->doctype = $this->manager->doctype;
304 foreach ($this->manager->modules as $module) {
305 foreach($module->info_tag_transform as $k => $v) {
306 if ($v === false) unset($this->info_tag_transform[$k]);
307 else $this->info_tag_transform[$k] = $v;
309 foreach($module->info_attr_transform_pre as $k => $v) {
310 if ($v === false) unset($this->info_attr_transform_pre[$k]);
311 else $this->info_attr_transform_pre[$k] = $v;
313 foreach($module->info_attr_transform_post as $k => $v) {
314 if ($v === false) unset($this->info_attr_transform_post[$k]);
315 else $this->info_attr_transform_post[$k] = $v;
319 $this->info = $this->manager->getElements();
320 $this->info_content_sets = $this->manager->contentSets->lookup;
325 * Sets up stuff based on config. We need a better way of doing this.
327 function setupConfigStuff($config) {
329 $block_wrapper = $config->get('HTML', 'BlockWrapper');
330 if (isset($this->info_content_sets['Block'][$block_wrapper])) {
331 $this->info_block_wrapper = $block_wrapper;
332 } else {
333 trigger_error('Cannot use non-block element as block wrapper.',
334 E_USER_ERROR);
337 $parent = $config->get('HTML', 'Parent');
338 $def = $this->manager->getElement($parent, true);
339 if ($def) {
340 $this->info_parent = $parent;
341 $this->info_parent_def = $def;
342 } else {
343 trigger_error('Cannot use unrecognized element as parent.',
344 E_USER_ERROR);
345 $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
348 // support template text
349 $support = "(for information on implementing this, see the ".
350 "support forums) ";
352 // setup allowed elements
354 $allowed_elements = $config->get('HTML', 'AllowedElements');
355 $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
357 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
358 $allowed = $config->get('HTML', 'Allowed');
359 if (is_string($allowed)) {
360 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
364 if (is_array($allowed_elements)) {
365 foreach ($this->info as $name => $d) {
366 if(!isset($allowed_elements[$name])) unset($this->info[$name]);
367 unset($allowed_elements[$name]);
369 // emit errors
370 foreach ($allowed_elements as $element => $d) {
371 $element = htmlspecialchars($element);
372 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
376 $allowed_attributes_mutable = $allowed_attributes; // by copy!
377 if (is_array($allowed_attributes)) {
378 foreach ($this->info_global_attr as $attr_key => $info) {
379 if (!isset($allowed_attributes["*.$attr_key"])) {
380 unset($this->info_global_attr[$attr_key]);
381 } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
382 unset($allowed_attributes_mutable["*.$attr_key"]);
385 foreach ($this->info as $tag => $info) {
386 foreach ($info->attr as $attr => $attr_info) {
387 if (!isset($allowed_attributes["$tag.$attr"]) &&
388 !isset($allowed_attributes["*.$attr"])) {
389 unset($this->info[$tag]->attr[$attr]);
390 } else {
391 if (isset($allowed_attributes_mutable["$tag.$attr"])) {
392 unset($allowed_attributes_mutable["$tag.$attr"]);
393 } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
394 unset($allowed_attributes_mutable["*.$attr"]);
399 // emit errors
400 foreach ($allowed_attributes_mutable as $elattr => $d) {
401 list($element, $attribute) = explode('.', $elattr);
402 $element = htmlspecialchars($element);
403 $attribute = htmlspecialchars($attribute);
404 if ($element == '*') {
405 trigger_error("Global attribute '$attribute' is not ".
406 "supported in any elements $support",
407 E_USER_WARNING);
408 } else {
409 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
410 E_USER_WARNING);
418 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
419 * separate lists for processing. Format is element[attr1|attr2],element2...
420 * @warning Although it's largely drawn from TinyMCE's implementation,
421 * it is different, and you'll probably have to modify your lists
422 * @param $list String list to parse
423 * @param array($allowed_elements, $allowed_attributes)
425 function parseTinyMCEAllowedList($list) {
427 $elements = array();
428 $attributes = array();
430 $chunks = preg_split('/(,|[\n\r]+)/', $list);
431 foreach ($chunks as $chunk) {
432 if (empty($chunk)) continue;
433 // remove TinyMCE element control characters
434 if (!strpos($chunk, '[')) {
435 $element = $chunk;
436 $attr = false;
437 } else {
438 list($element, $attr) = explode('[', $chunk);
440 if ($element !== '*') $elements[$element] = true;
441 if (!$attr) continue;
442 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
443 $attr = explode('|', $attr);
444 foreach ($attr as $key) {
445 $attributes["$element.$key"] = true;
449 return array($elements, $attributes);