3 * SAXY_Parser_Base is a base class for SAXY and SAXY Lite
4 * @package saxy-xmlparser
6 * @copyright (C) 2004 John Heinstein. All rights reserved
7 * @license http://www.gnu.org/copyleft/lesser.html LGPL License
8 * @author John Heinstein <johnkarl@nbnet.nb.ca>
9 * @link http://www.engageinteractive.com/saxy/ SAXY Home Page
10 * SAXY is Free Software
13 /** the initial characters of a cdata section */
14 define('SAXY_SEARCH_CDATA', '![CDATA[');
15 /** the length of the initial characters of a cdata section */
16 define('SAXY_CDATA_LEN', 8);
17 /** the initial characters of a notation */
18 define('SAXY_SEARCH_NOTATION', '!NOTATION');
19 /** the initial characters of a doctype */
20 define('SAXY_SEARCH_DOCTYPE', '!DOCTYPE');
21 /** saxy parse state, just before parsing an attribute */
22 define('SAXY_STATE_ATTR_NONE', 0);
23 /** saxy parse state, parsing an attribute key */
24 define('SAXY_STATE_ATTR_KEY', 1);
25 /** saxy parse state, parsing an attribute value */
26 define('SAXY_STATE_ATTR_VALUE', 2);
29 * The base SAX Parser class
31 * @package saxy-xmlparser
32 * @author John Heinstein <johnkarl@nbnet.nb.ca>
34 class SAXY_Parser_Base
{
35 /** @var int The current state of the parser */
37 /** @var int A temporary container for parsed characters */
39 /** @var Object A reference to the start event handler */
40 var $startElementHandler;
41 /** @var Object A reference to the end event handler */
42 var $endElementHandler;
43 /** @var Object A reference to the data event handler */
44 var $characterDataHandler;
45 /** @var Object A reference to the CDATA Section event handler */
46 var $cDataSectionHandler = null;
47 /** @var boolean True if predefined entities are to be converted into characters */
48 var $convertEntities = true;
49 /** @var Array Translation table for predefined entities */
50 var $predefinedEntities = array('&' => '&', '<' => '<', '>' => '>',
51 '"' => '"', ''' => "'");
52 /** @var Array User defined translation table for entities */
53 var $definedEntities = array();
57 * Constructor for SAX parser
59 function SAXY_Parser_Base() {
60 $this->charContainer
= '';
64 * Sets a reference to the handler for the start element event
65 * @param mixed A reference to the start element handler
67 function xml_set_element_handler($startHandler, $endHandler) {
68 $this->startElementHandler
= $startHandler;
69 $this->endElementHandler
= $endHandler;
70 } //xml_set_element_handler
73 * Sets a reference to the handler for the data event
74 * @param mixed A reference to the data handler
76 function xml_set_character_data_handler($handler) {
77 $this->characterDataHandler
=& $handler;
78 } //xml_set_character_data_handler
81 * Sets a reference to the handler for the CDATA Section event
82 * @param mixed A reference to the CDATA Section handler
84 function xml_set_cdata_section_handler($handler) {
85 $this->cDataSectionHandler
=& $handler;
86 } //xml_set_cdata_section_handler
89 * Sets whether predefined entites should be replaced with their equivalent characters during parsing
90 * @param boolean True if entity replacement is to occur
92 function convertEntities($truthVal) {
93 $this->convertEntities
= $truthVal;
97 * Appends an array of entity mappings to the existing translation table
99 * Intended mainly to facilitate the conversion of non-ASCII entities into equivalent characters
101 * @param array A list of entity mappings in the format: array('&' => '&');
103 function appendEntityTranslationTable($table) {
104 $this->definedEntities
= $table;
105 } //appendEntityTranslationTable
109 * Gets the nth character from the end of the string
110 * @param string The text to be queried
111 * @param int The index from the end of the string
112 * @return string The found character
114 function getCharFromEnd($text, $index) {
115 $len = strlen($text);
116 $char = $text{($len - 1 - $index)};
122 * Parses the attributes string into an array of key / value pairs
123 * @param string The attribute text
124 * @return Array An array of key / value pairs
126 function parseAttributes($attrText) {
127 $attrText = trim($attrText);
128 $attrArray = array();
129 $maybeEntity = false;
131 $total = strlen($attrText);
134 $currentState = SAXY_STATE_ATTR_NONE
;
137 for ($i = 0; $i < $total; $i++
) {
138 $currentChar = $attrText{$i};
140 if ($currentState == SAXY_STATE_ATTR_NONE
) {
141 if (trim($currentChar != '')) {
142 $currentState = SAXY_STATE_ATTR_KEY
;
146 switch ($currentChar) {
148 if ($currentState == SAXY_STATE_ATTR_VALUE
) {
149 $valueDump .= $currentChar;
156 case "\x0B": //vertical tab
163 if ($currentState == SAXY_STATE_ATTR_VALUE
) {
164 $valueDump .= $currentChar;
167 $currentState = SAXY_STATE_ATTR_VALUE
;
169 $maybeEntity = false;
174 if ($currentState == SAXY_STATE_ATTR_VALUE
) {
175 if ($quoteType == '') {
179 if ($quoteType == $currentChar) {
180 if ($this->convertEntities
&& $maybeEntity) {
181 $valueDump = strtr($valueDump, $this->predefinedEntities
);
182 $valueDump = strtr($valueDump, $this->definedEntities
);
185 $attrArray[trim($keyDump)] = $valueDump;
186 $keyDump = $valueDump = $quoteType = '';
187 $currentState = SAXY_STATE_ATTR_NONE
;
190 $valueDump .= $currentChar;
197 if ($currentState == SAXY_STATE_ATTR_VALUE
) {
198 if ($quoteType == '') {
202 if ($quoteType == $currentChar) {
203 if ($this->convertEntities
&& $maybeEntity) {
204 $valueDump = strtr($valueDump, $this->predefinedEntities
);
205 $valueDump = strtr($valueDump, $this->definedEntities
);
208 $attrArray[trim($keyDump)] = $valueDump;
209 $keyDump = $valueDump = $quoteType = '';
210 $currentState = SAXY_STATE_ATTR_NONE
;
213 $valueDump .= $currentChar;
222 $valueDump .= $currentChar;
226 if ($currentState == SAXY_STATE_ATTR_KEY
) {
227 $keyDump .= $currentChar;
230 $valueDump .= $currentChar;
239 * Parses character data
240 * @param string The character data
242 function parseBetweenTags($betweenTagText) {
243 if (trim($betweenTagText) != ''){
244 $this->fireCharacterDataEvent($betweenTagText);
249 * Fires a start element event
250 * @param string The start element tag name
251 * @param Array The start element attributes
253 function fireStartElementEvent($tagName, $attributes) {
254 call_user_func($this->startElementHandler
, $this, $tagName, $attributes);
255 } //fireStartElementEvent
258 * Fires an end element event
259 * @param string The end element tag name
261 function fireEndElementEvent($tagName) {
262 call_user_func($this->endElementHandler
, $this, $tagName);
263 } //fireEndElementEvent
266 * Fires a character data event
267 * @param string The character data
269 function fireCharacterDataEvent($data) {
270 if ($this->convertEntities
&& ((strpos($data, "&") != -1))) {
271 $data = strtr($data, $this->predefinedEntities
);
272 $data = strtr($data, $this->definedEntities
);
275 call_user_func($this->characterDataHandler
, $this, $data);
276 } //fireCharacterDataEvent
279 * Fires a CDATA Section event
280 * @param string The CDATA Section data
282 function fireCDataSectionEvent($data) {
283 call_user_func($this->cDataSectionHandler
, $this, $data);
284 } //fireCDataSectionEvent