lib/htmlpurifier/HTMLPurifier/Token.php

   1 <?php
   2
   3 /**
   4  * Defines a set of immutable value object tokens for HTML representation.
   5  *
   6  * @file
   7  */
   8
   9 /**
  10  * Abstract base token class that all others inherit from.
  11  */
  12 class HTMLPurifier_Token {
  13     var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
  14     var $line; /**< Line number node was on in source document. Null if unknown. @public */
  15
  16     /**
  17      * Lookup array of processing that this token is exempt from.
  18      * Currently, valid values are "ValidateAttributes" and
  19      * "MakeWellFormed_TagClosedError"
  20      */
  21     var $armor = array();
  22
  23     /**
  24      * Copies the tag into a new one (clone substitute).
  25      * @return Copied token
  26      */
  27     function copy() {
  28         return unserialize(serialize($this));
  29     }
  30 }
  31
  32 /**
  33  * Abstract class of a tag token (start, end or empty), and its behavior.
  34  */
  35 class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
  36 {
  37     /**
  38      * Static bool marker that indicates the class is a tag.
  39      *
  40      * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
  41      * without having to use a function call <tt>is_a()</tt>.
  42      *
  43      * @public
  44      */
  45     var $is_tag = true;
  46
  47     /**
  48      * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
  49      *
  50      * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
  51      * be lower-casing them, but these tokens cater to HTML tags, which are
  52      * insensitive.
  53      *
  54      * @public
  55      */
  56     var $name;
  57
  58     /**
  59      * Associative array of the tag's attributes.
  60      */
  61     var $attr = array();
  62
  63     /**
  64      * Non-overloaded constructor, which lower-cases passed tag name.
  65      *
  66      * @param $name String name.
  67      * @param $attr Associative array of attributes.
  68      */
  69     function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) {
  70         $this->name = ctype_lower($name) ? $name : strtolower($name);
  71         foreach ($attr as $key => $value) {
  72             // normalization only necessary when key is not lowercase
  73             if (!ctype_lower($key)) {
  74                 $new_key = strtolower($key);
  75                 if (!isset($attr[$new_key])) {
  76                     $attr[$new_key] = $attr[$key];
  77                 }
  78                 if ($new_key !== $key) {
  79                     unset($attr[$key]);
  80                 }
  81             }
  82         }
  83         $this->attr = $attr;
  84         $this->line = $line;
  85     }
  86 }
  87
  88 /**
  89  * Concrete start token class.
  90  */
  91 class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
  92 {
  93     var $type = 'start';
  94 }
  95
  96 /**
  97  * Concrete empty token class.
  98  */
  99 class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
 100 {
 101     var $type = 'empty';
 102 }
 103
 104 /**
 105  * Concrete end token class.
 106  *
 107  * @warning This class accepts attributes even though end tags cannot. This
 108  * is for optimization reasons, as under normal circumstances, the Lexers
 109  * do not pass attributes.
 110  */
 111 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
 112 {
 113     var $type = 'end';
 114 }
 115
 116 /**
 117  * Concrete text token class.
 118  *
 119  * Text tokens comprise of regular parsed character data (PCDATA) and raw
 120  * character data (from the CDATA sections). Internally, their
 121  * data is parsed with all entities expanded. Surprisingly, the text token
 122  * does have a "tag name" called #PCDATA, which is how the DTD represents it
 123  * in permissible child nodes.
 124  */
 125 class HTMLPurifier_Token_Text extends HTMLPurifier_Token
 126 {
 127
 128     var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
 129     var $type = 'text';
 130     var $data; /**< Parsed character data of text. @public */
 131     var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
 132
 133     /**
 134      * Constructor, accepts data and determines if it is whitespace.
 135      *
 136      * @param $data String parsed character data.
 137      */
 138     function HTMLPurifier_Token_Text($data, $line = null) {
 139         $this->data = $data;
 140         $this->is_whitespace = ctype_space($data);
 141         $this->line = $line;
 142     }
 143
 144 }
 145
 146 /**
 147  * Concrete comment token class. Generally will be ignored.
 148  */
 149 class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
 150 {
 151     var $data; /**< Character data within comment. @public */
 152     var $type = 'comment';
 153     /**
 154      * Transparent constructor.
 155      *
 156      * @param $data String comment data.
 157      */
 158     function HTMLPurifier_Token_Comment($data, $line = null) {
 159         $this->data = $data;
 160         $this->line = $line;
 161     }
 162 }
 163