lib/htmlpurifier/HTMLPurifier/Token.php

   1 <?php
   2
   3 /**
   4  * Defines a set of immutable value object tokens for HTML representation.
   5  *
   6  * @file
   7  */
   8
   9 /**
  10  * Abstract base token class that all others inherit from.
  11  */
  12 class HTMLPurifier_Token {
  13     var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
  14
  15     /**
  16      * Copies the tag into a new one (clone substitute).
  17      * @return Copied token
  18      */
  19     function copy() {
  20         trigger_error('Cannot copy abstract class', E_USER_ERROR);
  21     }
  22 }
  23
  24 /**
  25  * Abstract class of a tag token (start, end or empty), and its behavior.
  26  */
  27 class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
  28 {
  29     /**
  30      * Static bool marker that indicates the class is a tag.
  31      *
  32      * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
  33      * without having to use a function call <tt>is_a()</tt>.
  34      *
  35      * @public
  36      */
  37     var $is_tag = true;
  38
  39     /**
  40      * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
  41      *
  42      * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
  43      * be lower-casing them, but these tokens cater to HTML tags, which are
  44      * insensitive.
  45      *
  46      * @public
  47      */
  48     var $name;
  49
  50     /**
  51      * Associative array of the tag's attributes.
  52      */
  53     var $attr = array();
  54
  55     /**
  56      * Non-overloaded constructor, which lower-cases passed tag name.
  57      *
  58      * @param $name String name.
  59      * @param $attr Associative array of attributes.
  60      */
  61     function HTMLPurifier_Token_Tag($name, $attr = array()) {
  62         $this->name = ctype_lower($name) ? $name : strtolower($name);
  63         foreach ($attr as $key => $value) {
  64             // normalization only necessary when key is not lowercase
  65             if (!ctype_lower($key)) {
  66                 $new_key = strtolower($key);
  67                 if (!isset($attr[$new_key])) {
  68                     $attr[$new_key] = $attr[$key];
  69                 }
  70                 if ($new_key !== $key) {
  71                     unset($attr[$key]);
  72                 }
  73             }
  74         }
  75         $this->attr = $attr;
  76     }
  77 }
  78
  79 /**
  80  * Concrete start token class.
  81  */
  82 class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
  83 {
  84     var $type = 'start';
  85     function copy() {
  86         return new HTMLPurifier_Token_Start($this->name, $this->attr);
  87     }
  88 }
  89
  90 /**
  91  * Concrete empty token class.
  92  */
  93 class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
  94 {
  95     var $type = 'empty';
  96     function copy() {
  97         return new HTMLPurifier_Token_Empty($this->name, $this->attr);
  98     }
  99 }
 100
 101 /**
 102  * Concrete end token class.
 103  *
 104  * @warning This class accepts attributes even though end tags cannot. This
 105  * is for optimization reasons, as under normal circumstances, the Lexers
 106  * do not pass attributes.
 107  */
 108 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
 109 {
 110     var $type = 'end';
 111     function copy() {
 112         return new HTMLPurifier_Token_End($this->name);
 113     }
 114 }
 115
 116 /**
 117  * Concrete text token class.
 118  *
 119  * Text tokens comprise of regular parsed character data (PCDATA) and raw
 120  * character data (from the CDATA sections). Internally, their
 121  * data is parsed with all entities expanded. Surprisingly, the text token
 122  * does have a "tag name" called #PCDATA, which is how the DTD represents it
 123  * in permissible child nodes.
 124  */
 125 class HTMLPurifier_Token_Text extends HTMLPurifier_Token
 126 {
 127
 128     var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
 129     var $type = 'text';
 130     var $data; /**< Parsed character data of text. @public */
 131     var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
 132
 133     /**
 134      * Constructor, accepts data and determines if it is whitespace.
 135      *
 136      * @param $data String parsed character data.
 137      */
 138     function HTMLPurifier_Token_Text($data) {
 139         $this->data = $data;
 140         $this->is_whitespace = ctype_space($data);
 141     }
 142     function copy() {
 143         return new HTMLPurifier_Token_Text($this->data);
 144     }
 145
 146 }
 147
 148 /**
 149  * Concrete comment token class. Generally will be ignored.
 150  */
 151 class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
 152 {
 153     var $data; /**< Character data within comment. @public */
 154     var $type = 'comment';
 155     /**
 156      * Transparent constructor.
 157      *
 158      * @param $data String comment data.
 159      */
 160     function HTMLPurifier_Token_Comment($data) {
 161         $this->data = $data;
 162     }
 163     function copy() {
 164         return new HTMLPurifier_Token_Comment($this->data);
 165     }
 166 }
 167
 168 ?>