More installation info. Bump alpha version.
[python/dscho.git] / Lib / xml / sax / expatreader.py
blob679ac21af5cf7e3d237bbafe30a878cf63024f25
1 """
2 SAX driver for the Pyexpat C module. This driver works with
3 pyexpat.__version__ == '2.22'.
4 """
6 version = "0.20"
8 from xml.sax._exceptions import *
9 from xml.sax.handler import feature_validation, feature_namespaces
10 from xml.sax.handler import feature_namespace_prefixes
11 from xml.sax.handler import feature_external_ges, feature_external_pes
12 from xml.sax.handler import feature_string_interning
13 from xml.sax.handler import property_xml_string, property_interning_dict
15 # xml.parsers.expat does not raise ImportError in Jython
16 import sys
17 if sys.platform[:4] == "java":
18 raise SAXReaderNotAvailable("expat not available in Java", None)
19 del sys
21 try:
22 from xml.parsers import expat
23 except ImportError:
24 raise SAXReaderNotAvailable("expat not supported", None)
25 else:
26 if not hasattr(expat, "ParserCreate"):
27 raise SAXReaderNotAvailable("expat not supported", None)
28 from xml.sax import xmlreader, saxutils, handler
30 AttributesImpl = xmlreader.AttributesImpl
31 AttributesNSImpl = xmlreader.AttributesNSImpl
33 # If we're using a sufficiently recent version of Python, we can use
34 # weak references to avoid cycles between the parser and content
35 # handler, otherwise we'll just have to pretend.
36 try:
37 import _weakref
38 except ImportError:
39 def _mkproxy(o):
40 return o
41 else:
42 import weakref
43 _mkproxy = weakref.proxy
44 del weakref, _weakref
46 # --- ExpatLocator
48 class ExpatLocator(xmlreader.Locator):
49 """Locator for use with the ExpatParser class.
51 This uses a weak reference to the parser object to avoid creating
52 a circular reference between the parser and the content handler.
53 """
54 def __init__(self, parser):
55 self._ref = _mkproxy(parser)
57 def getColumnNumber(self):
58 parser = self._ref
59 if parser._parser is None:
60 return None
61 return parser._parser.ErrorColumnNumber
63 def getLineNumber(self):
64 parser = self._ref
65 if parser._parser is None:
66 return 1
67 return parser._parser.ErrorLineNumber
69 def getPublicId(self):
70 parser = self._ref
71 if parser is None:
72 return None
73 return parser._source.getPublicId()
75 def getSystemId(self):
76 parser = self._ref
77 if parser is None:
78 return None
79 return parser._source.getSystemId()
82 # --- ExpatParser
84 class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
85 "SAX driver for the Pyexpat C module."
87 def __init__(self, namespaceHandling=0, bufsize=2**16-20):
88 xmlreader.IncrementalParser.__init__(self, bufsize)
89 self._source = xmlreader.InputSource()
90 self._parser = None
91 self._namespaces = namespaceHandling
92 self._lex_handler_prop = None
93 self._parsing = 0
94 self._entity_stack = []
95 self._external_ges = 1
96 self._interning = None
98 # XMLReader methods
100 def parse(self, source):
101 "Parse an XML document from a URL or an InputSource."
102 source = saxutils.prepare_input_source(source)
104 self._source = source
105 self.reset()
106 self._cont_handler.setDocumentLocator(ExpatLocator(self))
107 xmlreader.IncrementalParser.parse(self, source)
109 def prepareParser(self, source):
110 if source.getSystemId() != None:
111 self._parser.SetBase(source.getSystemId())
113 # Redefined setContentHandle to allow changing handlers during parsing
115 def setContentHandler(self, handler):
116 xmlreader.IncrementalParser.setContentHandler(self, handler)
117 if self._parsing:
118 self._reset_cont_handler()
120 def getFeature(self, name):
121 if name == feature_namespaces:
122 return self._namespaces
123 elif name == feature_string_interning:
124 return self._interning is not None
125 elif name in (feature_validation, feature_external_pes,
126 feature_namespace_prefixes):
127 return 0
128 elif name == feature_external_ges:
129 return self._external_ges
130 raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
132 def setFeature(self, name, state):
133 if self._parsing:
134 raise SAXNotSupportedException("Cannot set features while parsing")
136 if name == feature_namespaces:
137 self._namespaces = state
138 elif name == feature_external_ges:
139 self._external_ges = state
140 elif name == feature_string_interning:
141 if state:
142 if self._interning is None:
143 self._interning = {}
144 else:
145 self._interning = None
146 elif name == feature_validation:
147 if state:
148 raise SAXNotSupportedException("expat does not support validation")
149 elif name == feature_external_pes:
150 if state:
151 raise SAXNotSupportedException("expat does not read external parameter entities")
152 elif name == feature_namespace_prefixes:
153 if state:
154 raise SAXNotSupportedException("expat does not report namespace prefixes")
155 else:
156 raise SAXNotRecognizedException("Feature '%s' not recognized" %
157 name)
159 def getProperty(self, name):
160 if name == handler.property_lexical_handler:
161 return self._lex_handler_prop
162 elif name == property_interning_dict:
163 return self._interning
164 elif name == property_xml_string:
165 if self._parser:
166 if hasattr(self._parser, "GetInputContext"):
167 return self._parser.GetInputContext()
168 else:
169 raise SAXNotRecognizedException("This version of expat does not support getting the XML string")
170 else:
171 raise SAXNotSupportedException("XML string cannot be returned when not parsing")
172 raise SAXNotRecognizedException("Property '%s' not recognized" % name)
174 def setProperty(self, name, value):
175 if name == handler.property_lexical_handler:
176 self._lex_handler_prop = value
177 if self._parsing:
178 self._reset_lex_handler_prop()
179 elif name == property_interning_dict:
180 self._interning = value
181 elif name == property_xml_string:
182 raise SAXNotSupportedException("Property '%s' cannot be set" %
183 name)
184 else:
185 raise SAXNotRecognizedException("Property '%s' not recognized" %
186 name)
188 # IncrementalParser methods
190 def feed(self, data, isFinal = 0):
191 if not self._parsing:
192 self.reset()
193 self._parsing = 1
194 self._cont_handler.startDocument()
196 try:
197 # The isFinal parameter is internal to the expat reader.
198 # If it is set to true, expat will check validity of the entire
199 # document. When feeding chunks, they are not normally final -
200 # except when invoked from close.
201 self._parser.Parse(data, isFinal)
202 except expat.error:
203 error_code = self._parser.ErrorCode
204 exc = SAXParseException(expat.ErrorString(error_code), None, self)
205 # FIXME: when to invoke error()?
206 self._err_handler.fatalError(exc)
208 def close(self):
209 if self._entity_stack:
210 # If we are completing an external entity, do nothing here
211 return
212 self.feed("", isFinal = 1)
213 self._cont_handler.endDocument()
214 self._parsing = 0
215 # break cycle created by expat handlers pointing to our methods
216 self._parser = None
218 def _reset_cont_handler(self):
219 self._parser.ProcessingInstructionHandler = \
220 self._cont_handler.processingInstruction
221 self._parser.CharacterDataHandler = self._cont_handler.characters
223 def _reset_lex_handler_prop(self):
224 self._parser.CommentHandler = self._lex_handler_prop.comment
225 self._parser.StartCdataSectionHandler = self._lex_handler_prop.startCDATA
226 self._parser.EndCdataSectionHandler = self._lex_handler_prop.endCDATA
228 def reset(self):
229 if self._namespaces:
230 self._parser = expat.ParserCreate(None, " ", intern = self._interning)
231 self._parser.StartElementHandler = self.start_element_ns
232 self._parser.EndElementHandler = self.end_element_ns
233 else:
234 self._parser = expat.ParserCreate(intern = self._interning)
235 self._parser.StartElementHandler = self.start_element
236 self._parser.EndElementHandler = self.end_element
238 self._reset_cont_handler()
239 self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
240 self._parser.NotationDeclHandler = self.notation_decl
241 self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
242 self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
244 self._decl_handler_prop = None
245 if self._lex_handler_prop:
246 self._reset_lex_handler_prop()
247 # self._parser.DefaultHandler =
248 # self._parser.DefaultHandlerExpand =
249 # self._parser.NotStandaloneHandler =
250 self._parser.ExternalEntityRefHandler = self.external_entity_ref
251 self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
253 self._parsing = 0
254 self._entity_stack = []
256 # Locator methods
258 def getColumnNumber(self):
259 if self._parser is None:
260 return None
261 return self._parser.ErrorColumnNumber
263 def getLineNumber(self):
264 if self._parser is None:
265 return 1
266 return self._parser.ErrorLineNumber
268 def getPublicId(self):
269 return self._source.getPublicId()
271 def getSystemId(self):
272 return self._source.getSystemId()
274 # event handlers
275 def start_element(self, name, attrs):
276 self._cont_handler.startElement(name, AttributesImpl(attrs))
278 def end_element(self, name):
279 self._cont_handler.endElement(name)
281 def start_element_ns(self, name, attrs):
282 pair = name.split()
283 if len(pair) == 1:
284 pair = (None, name)
285 else:
286 pair = tuple(pair)
288 newattrs = {}
289 for (aname, value) in attrs.items():
290 apair = aname.split()
291 if len(apair) == 1:
292 apair = (None, aname)
293 else:
294 apair = tuple(apair)
296 newattrs[apair] = value
298 self._cont_handler.startElementNS(pair, None,
299 AttributesNSImpl(newattrs, {}))
301 def end_element_ns(self, name):
302 pair = name.split()
303 if len(pair) == 1:
304 pair = (None, name)
305 else:
306 pair = tuple(pair)
308 self._cont_handler.endElementNS(pair, None)
310 # this is not used (call directly to ContentHandler)
311 def processing_instruction(self, target, data):
312 self._cont_handler.processingInstruction(target, data)
314 # this is not used (call directly to ContentHandler)
315 def character_data(self, data):
316 self._cont_handler.characters(data)
318 def start_namespace_decl(self, prefix, uri):
319 self._cont_handler.startPrefixMapping(prefix, uri)
321 def end_namespace_decl(self, prefix):
322 self._cont_handler.endPrefixMapping(prefix)
324 def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
325 self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
327 def notation_decl(self, name, base, sysid, pubid):
328 self._dtd_handler.notationDecl(name, pubid, sysid)
330 def external_entity_ref(self, context, base, sysid, pubid):
331 if not self._external_ges:
332 return 1
334 source = self._ent_handler.resolveEntity(pubid, sysid)
335 source = saxutils.prepare_input_source(source,
336 self._source.getSystemId() or
339 self._entity_stack.append((self._parser, self._source))
340 self._parser = self._parser.ExternalEntityParserCreate(context)
341 self._source = source
343 try:
344 xmlreader.IncrementalParser.parse(self, source)
345 except:
346 return 0 # FIXME: save error info here?
348 (self._parser, self._source) = self._entity_stack[-1]
349 del self._entity_stack[-1]
350 return 1
352 # ---
354 def create_parser(*args, **kwargs):
355 return apply(ExpatParser, args, kwargs)
357 # ---
359 if __name__ == "__main__":
360 import xml.sax
361 p = create_parser()
362 p.setContentHandler(xml.sax.XMLGenerator())
363 p.setErrorHandler(xml.sax.ErrorHandler())
364 p.parse("../../../hamlet.xml")