"MDL-12304, fix double text"
[moodle-linuxchix.git] / lib / htmlpurifier / HTMLPurifier / HTMLDefinition.php
blob51367ca4038bb20afc8c21f7d520da57892e097d
1 <?php
3 require_once 'HTMLPurifier/Definition.php';
4 require_once 'HTMLPurifier/HTMLModuleManager.php';
6 // this definition and its modules MUST NOT define configuration directives
7 // outside of the HTML or Attr namespaces
9 HTMLPurifier_ConfigSchema::define(
10 'HTML', 'DefinitionID', null, 'string/null', '
11 <p>
12 Unique identifier for a custom-built HTML definition. If you edit
13 the raw version of the HTMLDefinition, introducing changes that the
14 configuration object does not reflect, you must specify this variable.
15 If you change your custom edits, you should change this directive, or
16 clear your cache. Example:
17 </p>
18 <pre>
19 $config = HTMLPurifier_Config::createDefault();
20 $config->set(\'HTML\', \'DefinitionID\', \'1\');
21 $def = $config->getHTMLDefinition();
22 $def->addAttribute(\'a\', \'tabindex\', \'Number\');
23 </pre>
24 <p>
25 In the above example, the configuration is still at the defaults, but
26 using the advanced API, an extra attribute has been added. The
27 configuration object normally has no way of knowing that this change
28 has taken place, so it needs an extra directive: %HTML.DefinitionID.
29 If someone else attempts to use the default configuration, these two
30 pieces of code will not clobber each other in the cache, since one has
31 an extra directive attached to it.
32 </p>
33 <p>
34 This directive has been available since 2.0.0, and in that version or
35 later you <em>must</em> specify a value to this directive to use the
36 advanced API features.
37 </p>
38 ');
40 HTMLPurifier_ConfigSchema::define(
41 'HTML', 'DefinitionRev', 1, 'int', '
42 <p>
43 Revision identifier for your custom definition specified in
44 %HTML.DefinitionID. This serves the same purpose: uniquely identifying
45 your custom definition, but this one does so in a chronological
46 context: revision 3 is more up-to-date then revision 2. Thus, when
47 this gets incremented, the cache handling is smart enough to clean
48 up any older revisions of your definition as well as flush the
49 cache. This directive has been available since 2.0.0.
50 </p>
51 ');
53 HTMLPurifier_ConfigSchema::define(
54 'HTML', 'BlockWrapper', 'p', 'string', '
55 <p>
56 String name of element to wrap inline elements that are inside a block
57 context. This only occurs in the children of blockquote in strict mode.
58 </p>
59 <p>
60 Example: by default value,
61 <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
62 <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
63 The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
64 as long as it is a block level element. This directive has been available
65 since 1.3.0.
66 </p>
67 ');
69 HTMLPurifier_ConfigSchema::define(
70 'HTML', 'Parent', 'div', 'string', '
71 <p>
72 String name of element that HTML fragment passed to library will be
73 inserted in. An interesting variation would be using span as the
74 parent element, meaning that only inline tags would be allowed.
75 This directive has been available since 1.3.0.
76 </p>
77 ');
79 HTMLPurifier_ConfigSchema::define(
80 'HTML', 'AllowedElements', null, 'lookup/null', '
81 <p>
82 If HTML Purifier\'s tag set is unsatisfactory for your needs, you
83 can overload it with your own list of tags to allow. Note that this
84 method is subtractive: it does its job by taking away from HTML Purifier
85 usual feature set, so you cannot add a tag that HTML Purifier never
86 supported in the first place (like embed, form or head). If you
87 change this, you probably also want to change %HTML.AllowedAttributes.
88 </p>
89 <p>
90 <strong>Warning:</strong> If another directive conflicts with the
91 elements here, <em>that</em> directive will win and override.
92 This directive has been available since 1.3.0.
93 </p>
94 ');
96 HTMLPurifier_ConfigSchema::define(
97 'HTML', 'AllowedAttributes', null, 'lookup/null', '
98 <p>
99 If HTML Purifier\'s attribute set is unsatisfactory, overload it!
100 The syntax is "tag.attr" or "*.attr" for the global attributes
101 (style, id, class, dir, lang, xml:lang).
102 </p>
104 <strong>Warning:</strong> If another directive conflicts with the
105 elements here, <em>that</em> directive will win and override. For
106 example, %HTML.EnableAttrID will take precedence over *.id in this
107 directive. You must set that directive to true before you can use
108 IDs at all. This directive has been available since 1.3.0.
109 </p>
112 HTMLPurifier_ConfigSchema::define(
113 'HTML', 'Allowed', null, 'itext/null', '
115 This is a convenience directive that rolls the functionality of
116 %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
117 Specify elements and attributes that are allowed using:
118 <code>element1[attr1|attr2],element2...</code>. You can also use
119 newlines instead of commas to separate elements.
120 </p>
122 <strong>Warning</strong>:
123 All of the constraints on the component directives are still enforced.
124 The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
125 whitelist: directly copy-pasting it here will probably result in
126 broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
127 are set, this directive has no effect.
128 This directive has been available since 2.0.0.
129 </p>
133 * Definition of the purified HTML that describes allowed children,
134 * attributes, and many other things.
136 * Conventions:
138 * All member variables that are prefixed with info
139 * (including the main $info array) are used by HTML Purifier internals
140 * and should not be directly edited when customizing the HTMLDefinition.
141 * They can usually be set via configuration directives or custom
142 * modules.
144 * On the other hand, member variables without the info prefix are used
145 * internally by the HTMLDefinition and MUST NOT be used by other HTML
146 * Purifier internals. Many of them, however, are public, and may be
147 * edited by userspace code to tweak the behavior of HTMLDefinition.
149 * @note This class is inspected by Printer_HTMLDefinition; please
150 * update that class if things here change.
152 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
155 // FULLY-PUBLIC VARIABLES ---------------------------------------------
158 * Associative array of element names to HTMLPurifier_ElementDef
159 * @public
161 var $info = array();
164 * Associative array of global attribute name to attribute definition.
165 * @public
167 var $info_global_attr = array();
170 * String name of parent element HTML will be going into.
171 * @public
173 var $info_parent = 'div';
176 * Definition for parent element, allows parent element to be a
177 * tag that's not allowed inside the HTML fragment.
178 * @public
180 var $info_parent_def;
183 * String name of element used to wrap inline elements in block context
184 * @note This is rarely used except for BLOCKQUOTEs in strict mode
185 * @public
187 var $info_block_wrapper = 'p';
190 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
191 * @public
193 var $info_tag_transform = array();
196 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
197 * @public
199 var $info_attr_transform_pre = array();
202 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
203 * @public
205 var $info_attr_transform_post = array();
208 * Nested lookup array of content set name (Block, Inline) to
209 * element name to whether or not it belongs in that content set.
210 * @public
212 var $info_content_sets = array();
215 * Doctype object
217 var $doctype;
221 // RAW CUSTOMIZATION STUFF --------------------------------------------
224 * Adds a custom attribute to a pre-existing element
225 * @note This is strictly convenience, and does not have a corresponding
226 * method in HTMLPurifier_HTMLModule
227 * @param $element_name String element name to add attribute to
228 * @param $attr_name String name of attribute
229 * @param $def Attribute definition, can be string or object, see
230 * HTMLPurifier_AttrTypes for details
232 function addAttribute($element_name, $attr_name, $def) {
233 $module =& $this->getAnonymousModule();
234 if (!isset($module->info[$element_name])) {
235 $element =& $module->addBlankElement($element_name);
236 } else {
237 $element =& $module->info[$element_name];
239 $element->attr[$attr_name] = $def;
243 * Adds a custom element to your HTML definition
244 * @note See HTMLPurifier_HTMLModule::addElement for detailed
245 * parameter and return value descriptions.
247 function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
248 $module =& $this->getAnonymousModule();
249 // assume that if the user is calling this, the element
250 // is safe. This may not be a good idea
251 $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
252 return $element;
256 * Adds a blank element to your HTML definition, for overriding
257 * existing behavior
258 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
259 * parameter and return value descriptions.
261 function &addBlankElement($element_name) {
262 $module =& $this->getAnonymousModule();
263 $element =& $module->addBlankElement($element_name);
264 return $element;
268 * Retrieves a reference to the anonymous module, so you can
269 * bust out advanced features without having to make your own
270 * module.
272 function &getAnonymousModule() {
273 if (!$this->_anonModule) {
274 $this->_anonModule = new HTMLPurifier_HTMLModule();
275 $this->_anonModule->name = 'Anonymous';
277 return $this->_anonModule;
280 var $_anonModule;
283 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
285 var $type = 'HTML';
286 var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
289 * Performs low-cost, preliminary initialization.
291 function HTMLPurifier_HTMLDefinition() {
292 $this->manager = new HTMLPurifier_HTMLModuleManager();
295 function doSetup($config) {
296 $this->processModules($config);
297 $this->setupConfigStuff($config);
298 unset($this->manager);
300 // cleanup some of the element definitions
301 foreach ($this->info as $k => $v) {
302 unset($this->info[$k]->content_model);
303 unset($this->info[$k]->content_model_type);
308 * Extract out the information from the manager
310 function processModules($config) {
312 if ($this->_anonModule) {
313 // for user specific changes
314 // this is late-loaded so we don't have to deal with PHP4
315 // reference wonky-ness
316 $this->manager->addModule($this->_anonModule);
317 unset($this->_anonModule);
320 $this->manager->setup($config);
321 $this->doctype = $this->manager->doctype;
323 foreach ($this->manager->modules as $module) {
324 foreach($module->info_tag_transform as $k => $v) {
325 if ($v === false) unset($this->info_tag_transform[$k]);
326 else $this->info_tag_transform[$k] = $v;
328 foreach($module->info_attr_transform_pre as $k => $v) {
329 if ($v === false) unset($this->info_attr_transform_pre[$k]);
330 else $this->info_attr_transform_pre[$k] = $v;
332 foreach($module->info_attr_transform_post as $k => $v) {
333 if ($v === false) unset($this->info_attr_transform_post[$k]);
334 else $this->info_attr_transform_post[$k] = $v;
338 $this->info = $this->manager->getElements();
339 $this->info_content_sets = $this->manager->contentSets->lookup;
344 * Sets up stuff based on config. We need a better way of doing this.
346 function setupConfigStuff($config) {
348 $block_wrapper = $config->get('HTML', 'BlockWrapper');
349 if (isset($this->info_content_sets['Block'][$block_wrapper])) {
350 $this->info_block_wrapper = $block_wrapper;
351 } else {
352 trigger_error('Cannot use non-block element as block wrapper',
353 E_USER_ERROR);
356 $parent = $config->get('HTML', 'Parent');
357 $def = $this->manager->getElement($parent, true);
358 if ($def) {
359 $this->info_parent = $parent;
360 $this->info_parent_def = $def;
361 } else {
362 trigger_error('Cannot use unrecognized element as parent',
363 E_USER_ERROR);
364 $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
367 // support template text
368 $support = "(for information on implementing this, see the ".
369 "support forums) ";
371 // setup allowed elements
373 $allowed_elements = $config->get('HTML', 'AllowedElements');
374 $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
376 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
377 $allowed = $config->get('HTML', 'Allowed');
378 if (is_string($allowed)) {
379 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
383 if (is_array($allowed_elements)) {
384 foreach ($this->info as $name => $d) {
385 if(!isset($allowed_elements[$name])) unset($this->info[$name]);
386 unset($allowed_elements[$name]);
388 // emit errors
389 foreach ($allowed_elements as $element => $d) {
390 $element = htmlspecialchars($element);
391 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
395 $allowed_attributes_mutable = $allowed_attributes; // by copy!
396 if (is_array($allowed_attributes)) {
397 foreach ($this->info_global_attr as $attr_key => $info) {
398 if (!isset($allowed_attributes["*.$attr_key"])) {
399 unset($this->info_global_attr[$attr_key]);
400 } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
401 unset($allowed_attributes_mutable["*.$attr_key"]);
404 foreach ($this->info as $tag => $info) {
405 foreach ($info->attr as $attr => $attr_info) {
406 if (!isset($allowed_attributes["$tag.$attr"]) &&
407 !isset($allowed_attributes["*.$attr"])) {
408 unset($this->info[$tag]->attr[$attr]);
409 } else {
410 if (isset($allowed_attributes_mutable["$tag.$attr"])) {
411 unset($allowed_attributes_mutable["$tag.$attr"]);
412 } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
413 unset($allowed_attributes_mutable["*.$attr"]);
418 // emit errors
419 foreach ($allowed_attributes_mutable as $elattr => $d) {
420 list($element, $attribute) = explode('.', $elattr);
421 $element = htmlspecialchars($element);
422 $attribute = htmlspecialchars($attribute);
423 if ($element == '*') {
424 trigger_error("Global attribute '$attribute' is not ".
425 "supported in any elements $support",
426 E_USER_WARNING);
427 } else {
428 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
429 E_USER_WARNING);
437 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
438 * separate lists for processing. Format is element[attr1|attr2],element2...
439 * @warning Although it's largely drawn from TinyMCE's implementation,
440 * it is different, and you'll probably have to modify your lists
441 * @param $list String list to parse
442 * @param array($allowed_elements, $allowed_attributes)
444 function parseTinyMCEAllowedList($list) {
446 $elements = array();
447 $attributes = array();
449 $chunks = preg_split('/(,|[\n\r]+)/', $list);
450 foreach ($chunks as $chunk) {
451 if (empty($chunk)) continue;
452 // remove TinyMCE element control characters
453 if (!strpos($chunk, '[')) {
454 $element = $chunk;
455 $attr = false;
456 } else {
457 list($element, $attr) = explode('[', $chunk);
459 if ($element !== '*') $elements[$element] = true;
460 if (!$attr) continue;
461 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
462 $attr = explode('|', $attr);
463 foreach ($attr as $key) {
464 $attributes["$element.$key"] = true;
468 return array($elements, $attributes);