4 * Defines a set of immutable value object tokens for HTML representation.
10 * Abstract base token class that all others inherit from.
12 class HTMLPurifier_Token
{
13 var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
16 * Copies the tag into a new one (clone substitute).
17 * @return Copied token
20 trigger_error('Cannot copy abstract class', E_USER_ERROR
);
25 * Abstract class of a tag token (start, end or empty), and its behavior.
27 class HTMLPurifier_Token_Tag
extends HTMLPurifier_Token
// abstract
30 * Static bool marker that indicates the class is a tag.
32 * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
33 * without having to use a function call <tt>is_a()</tt>.
40 * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
42 * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
43 * be lower-casing them, but these tokens cater to HTML tags, which are
51 * Associative array of the tag's attributes.
56 * Non-overloaded constructor, which lower-cases passed tag name.
58 * @param $name String name.
59 * @param $attr Associative array of attributes.
61 function HTMLPurifier_Token_Tag($name, $attr = array()) {
62 $this->name
= ctype_lower($name) ?
$name : strtolower($name);
63 foreach ($attr as $key => $value) {
64 // normalization only necessary when key is not lowercase
65 if (!ctype_lower($key)) {
66 $new_key = strtolower($key);
67 if (!isset($attr[$new_key])) {
68 $attr[$new_key] = $attr[$key];
70 if ($new_key !== $key) {
80 * Concrete start token class.
82 class HTMLPurifier_Token_Start
extends HTMLPurifier_Token_Tag
86 return new HTMLPurifier_Token_Start($this->name
, $this->attr
);
91 * Concrete empty token class.
93 class HTMLPurifier_Token_Empty
extends HTMLPurifier_Token_Tag
97 return new HTMLPurifier_Token_Empty($this->name
, $this->attr
);
102 * Concrete end token class.
104 * @warning This class accepts attributes even though end tags cannot. This
105 * is for optimization reasons, as under normal circumstances, the Lexers
106 * do not pass attributes.
108 class HTMLPurifier_Token_End
extends HTMLPurifier_Token_Tag
112 return new HTMLPurifier_Token_End($this->name
);
117 * Concrete text token class.
119 * Text tokens comprise of regular parsed character data (PCDATA) and raw
120 * character data (from the CDATA sections). Internally, their
121 * data is parsed with all entities expanded. Surprisingly, the text token
122 * does have a "tag name" called #PCDATA, which is how the DTD represents it
123 * in permissible child nodes.
125 class HTMLPurifier_Token_Text
extends HTMLPurifier_Token
128 var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
130 var $data; /**< Parsed character data of text. @public */
131 var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
134 * Constructor, accepts data and determines if it is whitespace.
136 * @param $data String parsed character data.
138 function HTMLPurifier_Token_Text($data) {
140 $this->is_whitespace
= ctype_space($data);
143 return new HTMLPurifier_Token_Text($this->data
);
149 * Concrete comment token class. Generally will be ignored.
151 class HTMLPurifier_Token_Comment
extends HTMLPurifier_Token
153 var $data; /**< Character data within comment. @public */
154 var $type = 'comment';
156 * Transparent constructor.
158 * @param $data String comment data.
160 function HTMLPurifier_Token_Comment($data) {
164 return new HTMLPurifier_Token_Comment($this->data
);