2 SAX driver for the Pyexpat C module. This driver works with
3 pyexpat.__version__ == '2.22'.
8 from xml
.sax
._exceptions
import *
9 from xml
.sax
.handler
import feature_validation
, feature_namespaces
10 from xml
.sax
.handler
import feature_namespace_prefixes
11 from xml
.sax
.handler
import feature_external_ges
, feature_external_pes
12 from xml
.sax
.handler
import feature_string_interning
13 from xml
.sax
.handler
import property_xml_string
, property_interning_dict
15 # xml.parsers.expat does not raise ImportError in Jython
17 if sys
.platform
[:4] == "java":
18 raise SAXReaderNotAvailable("expat not available in Java", None)
22 from xml
.parsers
import expat
24 raise SAXReaderNotAvailable("expat not supported", None)
26 if not hasattr(expat
, "ParserCreate"):
27 raise SAXReaderNotAvailable("expat not supported", None)
28 from xml
.sax
import xmlreader
, saxutils
, handler
30 AttributesImpl
= xmlreader
.AttributesImpl
31 AttributesNSImpl
= xmlreader
.AttributesNSImpl
33 # If we're using a sufficiently recent version of Python, we can use
34 # weak references to avoid cycles between the parser and content
35 # handler, otherwise we'll just have to pretend.
43 _mkproxy
= weakref
.proxy
48 class ExpatLocator(xmlreader
.Locator
):
49 """Locator for use with the ExpatParser class.
51 This uses a weak reference to the parser object to avoid creating
52 a circular reference between the parser and the content handler.
54 def __init__(self
, parser
):
55 self
._ref
= _mkproxy(parser
)
57 def getColumnNumber(self
):
59 if parser
._parser
is None:
61 return parser
._parser
.ErrorColumnNumber
63 def getLineNumber(self
):
65 if parser
._parser
is None:
67 return parser
._parser
.ErrorLineNumber
69 def getPublicId(self
):
73 return parser
._source
.getPublicId()
75 def getSystemId(self
):
79 return parser
._source
.getSystemId()
84 class ExpatParser(xmlreader
.IncrementalParser
, xmlreader
.Locator
):
85 "SAX driver for the Pyexpat C module."
87 def __init__(self
, namespaceHandling
=0, bufsize
=2**16-20):
88 xmlreader
.IncrementalParser
.__init
__(self
, bufsize
)
89 self
._source
= xmlreader
.InputSource()
91 self
._namespaces
= namespaceHandling
92 self
._lex
_handler
_prop
= None
94 self
._entity
_stack
= []
95 self
._external
_ges
= 1
96 self
._interning
= None
100 def parse(self
, source
):
101 "Parse an XML document from a URL or an InputSource."
102 source
= saxutils
.prepare_input_source(source
)
104 self
._source
= source
106 self
._cont
_handler
.setDocumentLocator(ExpatLocator(self
))
107 xmlreader
.IncrementalParser
.parse(self
, source
)
109 def prepareParser(self
, source
):
110 if source
.getSystemId() != None:
111 self
._parser
.SetBase(source
.getSystemId())
113 # Redefined setContentHandle to allow changing handlers during parsing
115 def setContentHandler(self
, handler
):
116 xmlreader
.IncrementalParser
.setContentHandler(self
, handler
)
118 self
._reset
_cont
_handler
()
120 def getFeature(self
, name
):
121 if name
== feature_namespaces
:
122 return self
._namespaces
123 elif name
== feature_string_interning
:
124 return self
._interning
is not None
125 elif name
in (feature_validation
, feature_external_pes
,
126 feature_namespace_prefixes
):
128 elif name
== feature_external_ges
:
129 return self
._external
_ges
130 raise SAXNotRecognizedException("Feature '%s' not recognized" % name
)
132 def setFeature(self
, name
, state
):
134 raise SAXNotSupportedException("Cannot set features while parsing")
136 if name
== feature_namespaces
:
137 self
._namespaces
= state
138 elif name
== feature_external_ges
:
139 self
._external
_ges
= state
140 elif name
== feature_string_interning
:
142 if self
._interning
is None:
145 self
._interning
= None
146 elif name
== feature_validation
:
148 raise SAXNotSupportedException("expat does not support validation")
149 elif name
== feature_external_pes
:
151 raise SAXNotSupportedException("expat does not read external parameter entities")
152 elif name
== feature_namespace_prefixes
:
154 raise SAXNotSupportedException("expat does not report namespace prefixes")
156 raise SAXNotRecognizedException("Feature '%s' not recognized" %
159 def getProperty(self
, name
):
160 if name
== handler
.property_lexical_handler
:
161 return self
._lex
_handler
_prop
162 elif name
== property_interning_dict
:
163 return self
._interning
164 elif name
== property_xml_string
:
166 if hasattr(self
._parser
, "GetInputContext"):
167 return self
._parser
.GetInputContext()
169 raise SAXNotRecognizedException("This version of expat does not support getting the XML string")
171 raise SAXNotSupportedException("XML string cannot be returned when not parsing")
172 raise SAXNotRecognizedException("Property '%s' not recognized" % name
)
174 def setProperty(self
, name
, value
):
175 if name
== handler
.property_lexical_handler
:
176 self
._lex
_handler
_prop
= value
178 self
._reset
_lex
_handler
_prop
()
179 elif name
== property_interning_dict
:
180 self
._interning
= value
181 elif name
== property_xml_string
:
182 raise SAXNotSupportedException("Property '%s' cannot be set" %
185 raise SAXNotRecognizedException("Property '%s' not recognized" %
188 # IncrementalParser methods
190 def feed(self
, data
, isFinal
= 0):
191 if not self
._parsing
:
194 self
._cont
_handler
.startDocument()
197 # The isFinal parameter is internal to the expat reader.
198 # If it is set to true, expat will check validity of the entire
199 # document. When feeding chunks, they are not normally final -
200 # except when invoked from close.
201 self
._parser
.Parse(data
, isFinal
)
203 error_code
= self
._parser
.ErrorCode
204 exc
= SAXParseException(expat
.ErrorString(error_code
), None, self
)
205 # FIXME: when to invoke error()?
206 self
._err
_handler
.fatalError(exc
)
209 if self
._entity
_stack
:
210 # If we are completing an external entity, do nothing here
212 self
.feed("", isFinal
= 1)
213 self
._cont
_handler
.endDocument()
215 # break cycle created by expat handlers pointing to our methods
218 def _reset_cont_handler(self
):
219 self
._parser
.ProcessingInstructionHandler
= \
220 self
._cont
_handler
.processingInstruction
221 self
._parser
.CharacterDataHandler
= self
._cont
_handler
.characters
223 def _reset_lex_handler_prop(self
):
224 self
._parser
.CommentHandler
= self
._lex
_handler
_prop
.comment
225 self
._parser
.StartCdataSectionHandler
= self
._lex
_handler
_prop
.startCDATA
226 self
._parser
.EndCdataSectionHandler
= self
._lex
_handler
_prop
.endCDATA
230 self
._parser
= expat
.ParserCreate(None, " ", intern = self
._interning
)
231 self
._parser
.StartElementHandler
= self
.start_element_ns
232 self
._parser
.EndElementHandler
= self
.end_element_ns
234 self
._parser
= expat
.ParserCreate(intern = self
._interning
)
235 self
._parser
.StartElementHandler
= self
.start_element
236 self
._parser
.EndElementHandler
= self
.end_element
238 self
._reset
_cont
_handler
()
239 self
._parser
.UnparsedEntityDeclHandler
= self
.unparsed_entity_decl
240 self
._parser
.NotationDeclHandler
= self
.notation_decl
241 self
._parser
.StartNamespaceDeclHandler
= self
.start_namespace_decl
242 self
._parser
.EndNamespaceDeclHandler
= self
.end_namespace_decl
244 self
._decl
_handler
_prop
= None
245 if self
._lex
_handler
_prop
:
246 self
._reset
_lex
_handler
_prop
()
247 # self._parser.DefaultHandler =
248 # self._parser.DefaultHandlerExpand =
249 # self._parser.NotStandaloneHandler =
250 self
._parser
.ExternalEntityRefHandler
= self
.external_entity_ref
251 self
._parser
.SetParamEntityParsing(expat
.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
)
254 self
._entity
_stack
= []
258 def getColumnNumber(self
):
259 if self
._parser
is None:
261 return self
._parser
.ErrorColumnNumber
263 def getLineNumber(self
):
264 if self
._parser
is None:
266 return self
._parser
.ErrorLineNumber
268 def getPublicId(self
):
269 return self
._source
.getPublicId()
271 def getSystemId(self
):
272 return self
._source
.getSystemId()
275 def start_element(self
, name
, attrs
):
276 self
._cont
_handler
.startElement(name
, AttributesImpl(attrs
))
278 def end_element(self
, name
):
279 self
._cont
_handler
.endElement(name
)
281 def start_element_ns(self
, name
, attrs
):
289 for (aname
, value
) in attrs
.items():
290 apair
= aname
.split()
292 apair
= (None, aname
)
296 newattrs
[apair
] = value
298 self
._cont
_handler
.startElementNS(pair
, None,
299 AttributesNSImpl(newattrs
, {}))
301 def end_element_ns(self
, name
):
308 self
._cont
_handler
.endElementNS(pair
, None)
310 # this is not used (call directly to ContentHandler)
311 def processing_instruction(self
, target
, data
):
312 self
._cont
_handler
.processingInstruction(target
, data
)
314 # this is not used (call directly to ContentHandler)
315 def character_data(self
, data
):
316 self
._cont
_handler
.characters(data
)
318 def start_namespace_decl(self
, prefix
, uri
):
319 self
._cont
_handler
.startPrefixMapping(prefix
, uri
)
321 def end_namespace_decl(self
, prefix
):
322 self
._cont
_handler
.endPrefixMapping(prefix
)
324 def unparsed_entity_decl(self
, name
, base
, sysid
, pubid
, notation_name
):
325 self
._dtd
_handler
.unparsedEntityDecl(name
, pubid
, sysid
, notation_name
)
327 def notation_decl(self
, name
, base
, sysid
, pubid
):
328 self
._dtd
_handler
.notationDecl(name
, pubid
, sysid
)
330 def external_entity_ref(self
, context
, base
, sysid
, pubid
):
331 if not self
._external
_ges
:
334 source
= self
._ent
_handler
.resolveEntity(pubid
, sysid
)
335 source
= saxutils
.prepare_input_source(source
,
336 self
._source
.getSystemId() or
339 self
._entity
_stack
.append((self
._parser
, self
._source
))
340 self
._parser
= self
._parser
.ExternalEntityParserCreate(context
)
341 self
._source
= source
344 xmlreader
.IncrementalParser
.parse(self
, source
)
346 return 0 # FIXME: save error info here?
348 (self
._parser
, self
._source
) = self
._entity
_stack
[-1]
349 del self
._entity
_stack
[-1]
354 def create_parser(*args
, **kwargs
):
355 return apply(ExpatParser
, args
, kwargs
)
359 if __name__
== "__main__":
362 p
.setContentHandler(xml
.sax
.XMLGenerator())
363 p
.setErrorHandler(xml
.sax
.ErrorHandler())
364 p
.parse("../../../hamlet.xml")