4 * Defines a set of immutable value object tokens for HTML representation.
10 * Abstract base token class that all others inherit from.
12 class HTMLPurifier_Token
{
13 var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
14 var $line; /**< Line number node was on in source document. Null if unknown. @public */
17 * Lookup array of processing that this token is exempt from.
18 * Currently, valid values are "ValidateAttributes" and
19 * "MakeWellFormed_TagClosedError"
24 * Copies the tag into a new one (clone substitute).
25 * @return Copied token
28 return unserialize(serialize($this));
33 * Abstract class of a tag token (start, end or empty), and its behavior.
35 class HTMLPurifier_Token_Tag
extends HTMLPurifier_Token
// abstract
38 * Static bool marker that indicates the class is a tag.
40 * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
41 * without having to use a function call <tt>is_a()</tt>.
48 * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
50 * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
51 * be lower-casing them, but these tokens cater to HTML tags, which are
59 * Associative array of the tag's attributes.
64 * Non-overloaded constructor, which lower-cases passed tag name.
66 * @param $name String name.
67 * @param $attr Associative array of attributes.
69 function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) {
70 $this->name
= ctype_lower($name) ?
$name : strtolower($name);
71 foreach ($attr as $key => $value) {
72 // normalization only necessary when key is not lowercase
73 if (!ctype_lower($key)) {
74 $new_key = strtolower($key);
75 if (!isset($attr[$new_key])) {
76 $attr[$new_key] = $attr[$key];
78 if ($new_key !== $key) {
89 * Concrete start token class.
91 class HTMLPurifier_Token_Start
extends HTMLPurifier_Token_Tag
97 * Concrete empty token class.
99 class HTMLPurifier_Token_Empty
extends HTMLPurifier_Token_Tag
105 * Concrete end token class.
107 * @warning This class accepts attributes even though end tags cannot. This
108 * is for optimization reasons, as under normal circumstances, the Lexers
109 * do not pass attributes.
111 class HTMLPurifier_Token_End
extends HTMLPurifier_Token_Tag
117 * Concrete text token class.
119 * Text tokens comprise of regular parsed character data (PCDATA) and raw
120 * character data (from the CDATA sections). Internally, their
121 * data is parsed with all entities expanded. Surprisingly, the text token
122 * does have a "tag name" called #PCDATA, which is how the DTD represents it
123 * in permissible child nodes.
125 class HTMLPurifier_Token_Text
extends HTMLPurifier_Token
128 var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
130 var $data; /**< Parsed character data of text. @public */
131 var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
134 * Constructor, accepts data and determines if it is whitespace.
136 * @param $data String parsed character data.
138 function HTMLPurifier_Token_Text($data, $line = null) {
140 $this->is_whitespace
= ctype_space($data);
147 * Concrete comment token class. Generally will be ignored.
149 class HTMLPurifier_Token_Comment
extends HTMLPurifier_Token
151 var $data; /**< Character data within comment. @public */
152 var $type = 'comment';
154 * Transparent constructor.
156 * @param $data String comment data.
158 function HTMLPurifier_Token_Comment($data, $line = null) {