2 SAX driver for the Pyexpat C module. This driver works with
3 pyexpat.__version__ == '2.22'.
8 from xml
.sax
._exceptions
import *
10 # xml.parsers.expat does not raise ImportError in Jython
12 if sys
.platform
[ : 4] == "java":
13 raise SAXReaderNotAvailable("expat not available in Java", None)
17 from xml
.parsers
import expat
19 raise SAXReaderNotAvailable("expat not supported",None)
20 from xml
.sax
import xmlreader
, saxutils
, handler
22 AttributesImpl
= xmlreader
.AttributesImpl
23 AttributesNSImpl
= xmlreader
.AttributesNSImpl
29 class ExpatParser(xmlreader
.IncrementalParser
, xmlreader
.Locator
):
30 "SAX driver for the Pyexpat C module."
32 def __init__(self
, namespaceHandling
=0, bufsize
=2**16-20):
33 xmlreader
.IncrementalParser
.__init
__(self
, bufsize
)
34 self
._source
= xmlreader
.InputSource()
36 self
._namespaces
= namespaceHandling
37 self
._lex
_handler
_prop
= None
39 self
._entity
_stack
= []
43 def parse(self
, source
):
44 "Parse an XML document from a URL or an InputSource."
45 source
= saxutils
.prepare_input_source(source
)
49 self
._cont
_handler
.setDocumentLocator(self
)
50 xmlreader
.IncrementalParser
.parse(self
, source
)
52 def prepareParser(self
, source
):
53 if source
.getSystemId() != None:
54 self
._parser
.SetBase(source
.getSystemId())
56 # Redefined setContentHandle to allow changing handlers during parsing
58 def setContentHandler(self
, handler
):
59 xmlreader
.IncrementalParser
.setContentHandler(self
, handler
)
61 self
._reset
_cont
_handler
()
63 def getFeature(self
, name
):
64 if name
== handler
.feature_namespaces
:
65 return self
._namespaces
66 raise SAXNotRecognizedException("Feature '%s' not recognized" % name
)
68 def setFeature(self
, name
, state
):
70 raise SAXNotSupportedException("Cannot set features while parsing")
71 if name
== handler
.feature_namespaces
:
72 self
._namespaces
= state
74 raise SAXNotRecognizedException("Feature '%s' not recognized" %
77 def getProperty(self
, name
):
78 if name
== handler
.property_lexical_handler
:
79 return self
._lex
_handler
_prop
80 raise SAXNotRecognizedException("Property '%s' not recognized" % name
)
82 def setProperty(self
, name
, value
):
83 if name
== handler
.property_lexical_handler
:
84 self
._lex
_handler
_prop
= value
86 self
._reset
_lex
_handler
_prop
()
88 raise SAXNotRecognizedException("Property '%s' not recognized" % name
)
90 # IncrementalParser methods
92 def feed(self
, data
, isFinal
= 0):
96 self
._cont
_handler
.startDocument()
99 # The isFinal parameter is internal to the expat reader.
100 # If it is set to true, expat will check validity of the entire
101 # document. When feeding chunks, they are not normally final -
102 # except when invoked from close.
103 self
._parser
.Parse(data
, isFinal
)
105 error_code
= self
._parser
.ErrorCode
106 exc
= SAXParseException(expat
.ErrorString(error_code
), None, self
)
107 # FIXME: when to invoke error()?
108 self
._err
_handler
.fatalError(exc
)
111 if self
._entity
_stack
:
112 # If we are completing an external entity, do nothing here
114 self
.feed("", isFinal
= 1)
115 self
._cont
_handler
.endDocument()
117 # break cycle created by expat handlers pointing to our methods
120 def _reset_cont_handler(self
):
121 self
._parser
.ProcessingInstructionHandler
= \
122 self
._cont
_handler
.processingInstruction
123 self
._parser
.CharacterDataHandler
= self
._cont
_handler
.characters
125 def _reset_lex_handler_prop(self
):
126 self
._parser
.CommentHandler
= self
._lex
_handler
_prop
.comment
127 self
._parser
.StartCdataSectionHandler
= self
._lex
_handler
_prop
.startCDATA
128 self
._parser
.EndCdataSectionHandler
= self
._lex
_handler
_prop
.endCDATA
132 self
._parser
= expat
.ParserCreate(None, " ")
133 self
._parser
.StartElementHandler
= self
.start_element_ns
134 self
._parser
.EndElementHandler
= self
.end_element_ns
136 self
._parser
= expat
.ParserCreate()
137 self
._parser
.StartElementHandler
= self
.start_element
138 self
._parser
.EndElementHandler
= self
.end_element
140 self
._reset
_cont
_handler
()
141 self
._parser
.UnparsedEntityDeclHandler
= self
.unparsed_entity_decl
142 self
._parser
.NotationDeclHandler
= self
.notation_decl
143 self
._parser
.StartNamespaceDeclHandler
= self
.start_namespace_decl
144 self
._parser
.EndNamespaceDeclHandler
= self
.end_namespace_decl
146 self
._decl
_handler
_prop
= None
147 if self
._lex
_handler
_prop
:
148 self
._reset
_lex
_handler
_prop
()
149 # self._parser.DefaultHandler =
150 # self._parser.DefaultHandlerExpand =
151 # self._parser.NotStandaloneHandler =
152 self
._parser
.ExternalEntityRefHandler
= self
.external_entity_ref
155 self
._entity
_stack
= []
159 def getColumnNumber(self
):
160 if self
._parser
is None:
162 return self
._parser
.ErrorColumnNumber
164 def getLineNumber(self
):
165 if self
._parser
is None:
167 return self
._parser
.ErrorLineNumber
169 def getPublicId(self
):
170 return self
._source
.getPublicId()
172 def getSystemId(self
):
173 return self
._source
.getSystemId()
176 def start_element(self
, name
, attrs
):
177 self
._cont
_handler
.startElement(name
, AttributesImpl(attrs
))
179 def end_element(self
, name
):
180 self
._cont
_handler
.endElement(name
)
182 def start_element_ns(self
, name
, attrs
):
183 pair
= string
.split(name
)
190 for (aname
, value
) in attrs
.items():
191 apair
= string
.split(aname
)
193 apair
= (None, aname
)
197 newattrs
[apair
] = value
199 self
._cont
_handler
.startElementNS(pair
, None,
200 AttributesNSImpl(newattrs
, {}))
202 def end_element_ns(self
, name
):
203 pair
= string
.split(name
)
209 self
._cont
_handler
.endElementNS(pair
, None)
211 # this is not used (call directly to ContentHandler)
212 def processing_instruction(self
, target
, data
):
213 self
._cont
_handler
.processingInstruction(target
, data
)
215 # this is not used (call directly to ContentHandler)
216 def character_data(self
, data
):
217 self
._cont
_handler
.characters(data
)
219 def start_namespace_decl(self
, prefix
, uri
):
220 self
._cont
_handler
.startPrefixMapping(prefix
, uri
)
222 def end_namespace_decl(self
, prefix
):
223 self
._cont
_handler
.endPrefixMapping(prefix
)
225 def unparsed_entity_decl(self
, name
, base
, sysid
, pubid
, notation_name
):
226 self
._dtd
_handler
.unparsedEntityDecl(name
, pubid
, sysid
, notation_name
)
228 def notation_decl(self
, name
, base
, sysid
, pubid
):
229 self
._dtd
_handler
.notationDecl(name
, pubid
, sysid
)
231 def external_entity_ref(self
, context
, base
, sysid
, pubid
):
232 source
= self
._ent
_handler
.resolveEntity(pubid
, sysid
)
233 source
= saxutils
.prepare_input_source(source
,
234 self
._source
.getSystemId() or
237 self
._entity
_stack
.append((self
._parser
, self
._source
))
238 self
._parser
= self
._parser
.ExternalEntityParserCreate(context
)
239 self
._source
= source
242 xmlreader
.IncrementalParser
.parse(self
, source
)
244 return 0 # FIXME: save error info here?
246 (self
._parser
, self
._source
) = self
._entity
_stack
[-1]
247 del self
._entity
_stack
[-1]
252 def create_parser(*args
, **kwargs
):
253 return apply(ExpatParser
, args
, kwargs
)
257 if __name__
== "__main__":
260 p
.setContentHandler(xml
.sax
.XMLGenerator())
261 p
.setErrorHandler(xml
.sax
.ErrorHandler())
262 p
.parse("../../../hamlet.xml")