3 * SAXY Lite is a non-validating, but lightweight and fast SAX parser for PHP, modelled on the Expat parser
4 * @package saxy-xmlparser
5 * @subpackage saxy-xmlparser-lite
7 * @copyright (C) 2004 John Heinstein. All rights reserved
8 * @license http://www.gnu.org/copyleft/lesser.html LGPL License
9 * @author John Heinstein <johnkarl@nbnet.nb.ca>
10 * @link http://www.engageinteractive.com/saxy/ SAXY Home Page
11 * SAXY is Free Software
14 if (!defined('SAXY_INCLUDE_PATH')) {
15 define('SAXY_INCLUDE_PATH', (dirname(__FILE__
) . "/"));
18 /** current version of SAXY Lite */
19 define ('SAXY_LITE_VERSION', '0.17');
21 /** initial saxy lite parse state, before anything is encountered */
22 define('SAXY_STATE_NONE', 0);
23 /** saxy lite parse state, processing main document */
24 define('SAXY_STATE_PARSING', 1);
26 require_once(SAXY_INCLUDE_PATH
. 'xml_saxy_shared.php');
29 * The SAX Parser class
31 * @package saxy-xmlparser
32 * @subpackage saxy-xmlparser-lite
33 * @author John Heinstein <johnkarl@nbnet.nb.ca>
35 class SAXY_Lite_Parser
extends SAXY_Parser_Base
{
37 * Constructor for SAX parser
39 function SAXY_Lite_Parser() {
40 $this->SAXY_Parser_Base();
41 $this->state
= SAXY_STATE_NONE
;
45 * Returns the current version of SAXY Lite
46 * @return Object The current version of SAXY Lite
48 function getVersion() {
49 return SAXY_LITE_VERSION
;
53 * Processes the xml prolog, doctype, and any other nodes that exist outside of the main xml document
54 * @param string The xml text to be processed
55 * @return string The preprocessed xml text
57 function preprocessXML($xmlText) {
59 $xmlText = trim($xmlText);
60 $total = strlen($xmlText);
62 for ($i = 0; $i < $total; $i++
) {
63 if ($xmlText{$i} == '<') {
64 switch ($xmlText{($i +
1)}) {
69 $this->state
= SAXY_STATE_PARSING
;
70 return (substr($xmlText, $i));
77 * The controlling method for the parsing process
78 * @param string The xml text to be processed
79 * @return boolean True if parsing is successful
81 function parse ($xmlText) {
82 $xmlText = $this->preprocessXML($xmlText);
83 $total = strlen($xmlText);
85 for ($i = 0; $i < $total; $i++
) {
86 $currentChar = $xmlText{$i};
88 switch ($this->state
) {
89 case SAXY_STATE_PARSING
:
91 switch ($currentChar) {
93 if (substr($this->charContainer
, 0, SAXY_CDATA_LEN
) == SAXY_SEARCH_CDATA
) {
94 $this->charContainer
.= $currentChar;
97 $this->parseBetweenTags($this->charContainer
);
98 $this->charContainer
= '';
103 if ((substr($this->charContainer
, 0, SAXY_CDATA_LEN
) == SAXY_SEARCH_CDATA
) &&
104 !(($this->getCharFromEnd($this->charContainer
, 0) == ']') &&
105 ($this->getCharFromEnd($this->charContainer
, 1) == ']'))) {
106 $this->charContainer
.= $currentChar;
109 $this->parseTag($this->charContainer
);
110 $this->charContainer
= '';
115 $this->charContainer
.= $currentChar;
126 * Parses an element tag
127 * @param string The interior text of the element tag
129 function parseTag($tagText) {
130 $tagText = trim($tagText);
131 $firstChar = $tagText{0};
132 $myAttributes = array();
134 switch ($firstChar) {
136 $tagName = substr($tagText, 1);
137 $this->fireEndElementEvent($tagName);
141 $upperCaseTagText = strtoupper($tagText);
143 if (strpos($upperCaseTagText, SAXY_SEARCH_CDATA
) !== false) { //CDATA Section
144 $total = strlen($tagText);
148 for ($i = 0; $i < $total; $i++
) {
149 $currentChar = $tagText{$i};
151 if (($currentChar == ']') && ($tagText{($i +
1)} == ']')) {
154 else if ($openBraceCount > 1) {
155 $textNodeText .= $currentChar;
157 else if ($currentChar == '[') { //this won't be reached after the first open brace is found
162 if ($this->cDataSectionHandler
== null) {
163 $this->fireCharacterDataEvent($textNodeText);
166 $this->fireCDataSectionEvent($textNodeText);
169 else if (strpos($upperCaseTagText, SAXY_SEARCH_NOTATION
) !== false) { //NOTATION node, discard
172 else if (substr($tagText, 0, 2) == '!-') { //comment node, discard
179 //Processing Instruction node, discard
183 if ((strpos($tagText, '"') !== false) ||
(strpos($tagText, "'") !== false)) {
184 $total = strlen($tagText);
187 for ($i = 0; $i < $total; $i++
) {
188 $currentChar = $tagText{$i};
190 if (($currentChar == ' ') ||
($currentChar == "\t") ||
191 ($currentChar == "\n") ||
($currentChar == "\r") ||
192 ($currentChar == "\x0B")) {
193 $myAttributes = $this->parseAttributes(substr($tagText, $i));
197 $tagName .= $currentChar;
201 if (strrpos($tagText, '/') == (strlen($tagText) - 1)) { //check $tagText, but send $tagName
202 $this->fireStartElementEvent($tagName, $myAttributes);
203 $this->fireEndElementEvent($tagName);
206 $this->fireStartElementEvent($tagName, $myAttributes);
210 if (strpos($tagText, '/') !== false) {
211 $tagText = trim(substr($tagText, 0, (strrchr($tagText, '/') - 1)));
212 $this->fireStartElementEvent($tagText, $myAttributes);
213 $this->fireEndElementEvent($tagText);
216 $this->fireStartElementEvent($tagText, $myAttributes);
223 * Returns the current error code (non-functional for SAXY Lite)
224 * @return int The current error code
226 function xml_get_error_code() {
228 } //xml_get_error_code
231 * Returns a textual description of the error code (non-functional for SAXY Lite)
232 * @param int The error code
233 * @return string The error message
235 function xml_error_string($code) {