3 * XML syntax and type checker.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
25 * Will be set to true or false to indicate whether the file is
26 * well-formed XML. Note that this doesn't check schema validity.
28 public $wellFormed = false;
31 * Will be set to true if the optional element filter returned
32 * a match at some point.
34 public $filterMatch = false;
37 * Name of the document's root element, including any namespace
40 public $rootElement = '';
43 * A stack of strings containing the data of each xml element as it's processed. Append
44 * data to the top string of the stack, then pop off the string and process it when the
47 protected $elementData = array();
50 * A stack of element names and attributes, as we process them.
52 protected $elementDataContext = array();
55 * Current depth of the data stack.
57 protected $stackDepth = 0;
60 * Additional parsing options
62 private $parserOptions = array(
63 'processing_instruction_handler' => '',
67 * @param string $input a filename or string containing the XML element
68 * @param callable $filterCallback (optional)
69 * Function to call to do additional custom validity checks from the
70 * SAX element handler event. This gives you access to the element
71 * namespace, name, attributes, and text contents.
72 * Filter should return 'true' to toggle on $this->filterMatch
73 * @param bool $isFile (optional) indicates if the first parameter is a
74 * filename (default, true) or if it is a string (false)
75 * @param array $options list of additional parsing options:
76 * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
78 function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
79 $this->filterCallback
= $filterCallback;
80 $this->parserOptions
= array_merge( $this->parserOptions
, $options );
83 $this->validateFromFile( $input );
85 $this->validateFromString( $input );
90 * Alternative constructor: from filename
92 * @param string $fname the filename of an XML document
93 * @param callable $filterCallback (optional)
94 * Function to call to do additional custom validity checks from the
95 * SAX element handler event. This gives you access to the element
96 * namespace, name, and attributes, but not to text contents.
97 * Filter should return 'true' to toggle on $this->filterMatch
98 * @return XmlTypeCheck
100 public static function newFromFilename( $fname, $filterCallback = null ) {
101 return new self( $fname, $filterCallback, true );
105 * Alternative constructor: from string
107 * @param string $string a string containing an XML element
108 * @param callable $filterCallback (optional)
109 * Function to call to do additional custom validity checks from the
110 * SAX element handler event. This gives you access to the element
111 * namespace, name, and attributes, but not to text contents.
112 * Filter should return 'true' to toggle on $this->filterMatch
113 * @return XmlTypeCheck
115 public static function newFromString( $string, $filterCallback = null ) {
116 return new self( $string, $filterCallback, false );
120 * Get the root element. Simple accessor to $rootElement
124 public function getRootElement() {
125 return $this->rootElement
;
129 * Get an XML parser with the root element handler.
130 * @see XmlTypeCheck::rootElementOpen()
131 * @return resource a resource handle for the XML parser
133 private function getParser() {
134 $parser = xml_parser_create_ns( 'UTF-8' );
135 // case folding violates XML standard, turn it off
136 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING
, false );
137 xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
138 if ( $this->parserOptions
['processing_instruction_handler'] ) {
139 xml_set_processing_instruction_handler(
141 array( $this, 'processingInstructionHandler' )
148 * @param string $fname the filename
150 private function validateFromFile( $fname ) {
151 $parser = $this->getParser();
153 if ( file_exists( $fname ) ) {
154 $file = fopen( $fname, "rb" );
157 $chunk = fread( $file, 32768 );
158 $ret = xml_parse( $parser, $chunk, feof( $file ) );
160 $this->wellFormed
= false;
162 xml_parser_free( $parser );
165 } while ( !feof( $file ) );
170 $this->wellFormed
= true;
172 xml_parser_free( $parser );
177 * @param string $string the XML-input-string to be checked.
179 private function validateFromString( $string ) {
180 $parser = $this->getParser();
181 $ret = xml_parse( $parser, $string, true );
182 xml_parser_free( $parser );
184 $this->wellFormed
= false;
187 $this->wellFormed
= true;
195 private function rootElementOpen( $parser, $name, $attribs ) {
196 $this->rootElement
= $name;
198 if ( is_callable( $this->filterCallback
) ) {
199 xml_set_element_handler(
201 array( $this, 'elementOpen' ),
202 array( $this, 'elementClose' )
204 xml_set_character_data_handler( $parser, array( $this, 'elementData' ) );
205 $this->elementOpen( $parser, $name, $attribs );
207 // We only need the first open element
208 xml_set_element_handler( $parser, false, false );
217 private function elementOpen( $parser, $name, $attribs ) {
218 $this->elementDataContext
[] = array( $name, $attribs );
219 $this->elementData
[] = '';
227 private function elementClose( $parser, $name ) {
228 list( $name, $attribs ) = array_pop( $this->elementDataContext
);
229 $data = array_pop( $this->elementData
);
233 $this->filterCallback
,
239 $this->filterMatch
= true;
247 private function elementData( $parser, $data ) {
248 // xml_set_character_data_handler breaks the data on & characters, so
249 // we collect any data here, and we'll run the callback in elementClose
250 $this->elementData
[ $this->stackDepth
- 1 ] .= trim( $data );
258 private function processingInstructionHandler( $parser, $target, $data ) {
259 if ( call_user_func( $this->parserOptions
['processing_instruction_handler'], $target, $data ) ) {
261 $this->filterMatch
= true;