Support offsets in prefix searching
[mediawiki.git] / includes / libs / XmlTypeCheck.php
blobaca857e9fa0e8eaa8cc360077bcc9ecae3853dea
1 <?php
2 /**
3 * XML syntax and type checker.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 class XmlTypeCheck {
24 /**
25 * Will be set to true or false to indicate whether the file is
26 * well-formed XML. Note that this doesn't check schema validity.
28 public $wellFormed = false;
30 /**
31 * Will be set to true if the optional element filter returned
32 * a match at some point.
34 public $filterMatch = false;
36 /**
37 * Name of the document's root element, including any namespace
38 * as an expanded URL.
40 public $rootElement = '';
42 /**
43 * A stack of strings containing the data of each xml element as it's processed. Append
44 * data to the top string of the stack, then pop off the string and process it when the
45 * element is closed.
47 protected $elementData = array();
49 /**
50 * A stack of element names and attributes, as we process them.
52 protected $elementDataContext = array();
54 /**
55 * Current depth of the data stack.
57 protected $stackDepth = 0;
59 /**
60 * Additional parsing options
62 private $parserOptions = array(
63 'processing_instruction_handler' => '',
66 /**
67 * @param string $input a filename or string containing the XML element
68 * @param callable $filterCallback (optional)
69 * Function to call to do additional custom validity checks from the
70 * SAX element handler event. This gives you access to the element
71 * namespace, name, attributes, and text contents.
72 * Filter should return 'true' to toggle on $this->filterMatch
73 * @param boolean $isFile (optional) indicates if the first parameter is a
74 * filename (default, true) or if it is a string (false)
75 * @param array $options list of additional parsing options:
76 * processing_instruction_handler: Callback for xml_set_processing_instruction_handler
78 function __construct( $input, $filterCallback = null, $isFile = true, $options = array() ) {
79 $this->filterCallback = $filterCallback;
80 $this->parserOptions = array_merge( $this->parserOptions, $options );
82 if ( $isFile ) {
83 $this->validateFromFile( $input );
84 } else {
85 $this->validateFromString( $input );
89 /**
90 * Alternative constructor: from filename
92 * @param string $fname the filename of an XML document
93 * @param callable $filterCallback (optional)
94 * Function to call to do additional custom validity checks from the
95 * SAX element handler event. This gives you access to the element
96 * namespace, name, and attributes, but not to text contents.
97 * Filter should return 'true' to toggle on $this->filterMatch
98 * @return XmlTypeCheck
100 public static function newFromFilename( $fname, $filterCallback = null ) {
101 return new self( $fname, $filterCallback, true );
105 * Alternative constructor: from string
107 * @param string $string a string containing an XML element
108 * @param callable $filterCallback (optional)
109 * Function to call to do additional custom validity checks from the
110 * SAX element handler event. This gives you access to the element
111 * namespace, name, and attributes, but not to text contents.
112 * Filter should return 'true' to toggle on $this->filterMatch
113 * @return XmlTypeCheck
115 public static function newFromString( $string, $filterCallback = null ) {
116 return new self( $string, $filterCallback, false );
120 * Get the root element. Simple accessor to $rootElement
122 * @return string
124 public function getRootElement() {
125 return $this->rootElement;
129 * Get an XML parser with the root element handler.
130 * @see XmlTypeCheck::rootElementOpen()
131 * @return resource a resource handle for the XML parser
133 private function getParser() {
134 $parser = xml_parser_create_ns( 'UTF-8' );
135 // case folding violates XML standard, turn it off
136 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
137 xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
138 if ( $this->parserOptions['processing_instruction_handler'] ) {
139 xml_set_processing_instruction_handler(
140 $parser,
141 array( $this, 'processingInstructionHandler' )
144 return $parser;
148 * @param string $fname the filename
150 private function validateFromFile( $fname ) {
151 $parser = $this->getParser();
153 if ( file_exists( $fname ) ) {
154 $file = fopen( $fname, "rb" );
155 if ( $file ) {
156 do {
157 $chunk = fread( $file, 32768 );
158 $ret = xml_parse( $parser, $chunk, feof( $file ) );
159 if ( $ret == 0 ) {
160 $this->wellFormed = false;
161 fclose( $file );
162 xml_parser_free( $parser );
163 return;
165 } while ( !feof( $file ) );
167 fclose( $file );
170 $this->wellFormed = true;
172 xml_parser_free( $parser );
177 * @param string $string the XML-input-string to be checked.
179 private function validateFromString( $string ) {
180 $parser = $this->getParser();
181 $ret = xml_parse( $parser, $string, true );
182 xml_parser_free( $parser );
183 if ( $ret == 0 ) {
184 $this->wellFormed = false;
185 return;
187 $this->wellFormed = true;
191 * @param $parser
192 * @param $name
193 * @param $attribs
195 private function rootElementOpen( $parser, $name, $attribs ) {
196 $this->rootElement = $name;
198 if ( is_callable( $this->filterCallback ) ) {
199 xml_set_element_handler(
200 $parser,
201 array( $this, 'elementOpen' ),
202 array( $this, 'elementClose' )
204 xml_set_character_data_handler( $parser, array( $this, 'elementData' ) );
205 $this->elementOpen( $parser, $name, $attribs );
206 } else {
207 // We only need the first open element
208 xml_set_element_handler( $parser, false, false );
213 * @param $parser
214 * @param $name
215 * @param $attribs
217 private function elementOpen( $parser, $name, $attribs ) {
218 $this->elementDataContext[] = array( $name, $attribs );
219 $this->elementData[] = '';
220 $this->stackDepth++;
224 * @param $parser
225 * @param $name
227 private function elementClose( $parser, $name ) {
228 list( $name, $attribs ) = array_pop( $this->elementDataContext );
229 $data = array_pop( $this->elementData );
230 $this->stackDepth--;
232 if ( call_user_func(
233 $this->filterCallback,
234 $name,
235 $attribs,
236 $data
237 ) ) {
238 // Filter hit!
239 $this->filterMatch = true;
244 * @param $parser
245 * @param $data
247 private function elementData( $parser, $data ) {
248 // xml_set_character_data_handler breaks the data on & characters, so
249 // we collect any data here, and we'll run the callback in elementClose
250 $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
254 * @param $parser
255 * @param $target
256 * @param $data
258 private function processingInstructionHandler( $parser, $target, $data ) {
259 if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) {
260 // Filter hit!
261 $this->filterMatch = true;