2 A library of useful helper classes to the SAX classes, for the
3 convenience of application and driver writers.
6 import os
, urlparse
, urllib
, types
11 _StringTypes
= [types
.StringType
, types
.UnicodeType
]
12 except AttributeError:
13 _StringTypes
= [types
.StringType
]
15 # See whether the xmlcharrefreplace error handler is
18 from codecs
import xmlcharrefreplace_errors
19 _error_handling
= "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
22 _error_handling
= "strict"
24 def __dict_replace(s
, d
):
25 """Replace substrings of a string using a dictionary."""
26 for key
, value
in d
.items():
27 s
= s
.replace(key
, value
)
30 def escape(data
, entities
={}):
31 """Escape &, <, and > in a string of data.
33 You can escape other strings of data by passing a dictionary as
34 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
38 # must do ampersand first
39 data
= data
.replace("&", "&")
40 data
= data
.replace(">", ">")
41 data
= data
.replace("<", "<")
43 data
= __dict_replace(data
, entities
)
46 def unescape(data
, entities
={}):
47 """Unescape &, <, and > in a string of data.
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
53 data
= data
.replace("<", "<")
54 data
= data
.replace(">", ">")
56 data
= __dict_replace(data
, entities
)
57 # must do ampersand last
58 return data
.replace("&", "&")
60 def quoteattr(data
, entities
={}):
61 """Escape and quote an attribute value.
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
71 data
= escape(data
, entities
)
74 data
= '"%s"' % data
.replace('"', """)
82 class XMLGenerator(handler
.ContentHandler
):
84 def __init__(self
, out
=None, encoding
="iso-8859-1"):
88 handler
.ContentHandler
.__init
__(self
)
90 self
._ns
_contexts
= [{}] # contains uri -> prefix dicts
91 self
._current
_context
= self
._ns
_contexts
[-1]
92 self
._undeclared
_ns
_maps
= []
93 self
._encoding
= encoding
95 def _write(self
, text
):
96 if isinstance(text
, str):
99 self
._out
.write(text
.encode(self
._encoding
, _error_handling
))
101 # ContentHandler methods
103 def startDocument(self
):
104 self
._write
('<?xml version="1.0" encoding="%s"?>\n' %
107 def startPrefixMapping(self
, prefix
, uri
):
108 self
._ns
_contexts
.append(self
._current
_context
.copy())
109 self
._current
_context
[uri
] = prefix
110 self
._undeclared
_ns
_maps
.append((prefix
, uri
))
112 def endPrefixMapping(self
, prefix
):
113 self
._current
_context
= self
._ns
_contexts
[-1]
114 del self
._ns
_contexts
[-1]
116 def startElement(self
, name
, attrs
):
117 self
._write
('<' + name
)
118 for (name
, value
) in attrs
.items():
119 self
._write
(' %s=%s' % (name
, quoteattr(value
)))
122 def endElement(self
, name
):
123 self
._write
('</%s>' % name
)
125 def startElementNS(self
, name
, qname
, attrs
):
127 # if the name was not namespace-scoped, use the unqualified part
130 # else try to restore the original prefix from the namespace
131 name
= self
._current
_context
[name
[0]] + ":" + name
[1]
132 self
._write
('<' + name
)
134 for pair
in self
._undeclared
_ns
_maps
:
135 self
._write
(' xmlns:%s="%s"' % pair
)
136 self
._undeclared
_ns
_maps
= []
138 for (name
, value
) in attrs
.items():
139 name
= self
._current
_context
[name
[0]] + ":" + name
[1]
140 self
._write
(' %s=%s' % (name
, quoteattr(value
)))
143 def endElementNS(self
, name
, qname
):
147 name
= self
._current
_context
[name
[0]] + ":" + name
[1]
148 self
._write
('</%s>' % name
)
150 def characters(self
, content
):
151 self
._write
(escape(content
))
153 def ignorableWhitespace(self
, content
):
156 def processingInstruction(self
, target
, data
):
157 self
._write
('<?%s %s?>' % (target
, data
))
160 class XMLFilterBase(xmlreader
.XMLReader
):
161 """This class is designed to sit between an XMLReader and the
162 client application's event handlers. By default, it does nothing
163 but pass requests up to the reader and events on to the handlers
164 unmodified, but subclasses can override specific methods to modify
165 the event stream or the configuration requests as they pass
168 def __init__(self
, parent
= None):
169 xmlreader
.XMLReader
.__init
__(self
)
170 self
._parent
= parent
172 # ErrorHandler methods
174 def error(self
, exception
):
175 self
._err
_handler
.error(exception
)
177 def fatalError(self
, exception
):
178 self
._err
_handler
.fatalError(exception
)
180 def warning(self
, exception
):
181 self
._err
_handler
.warning(exception
)
183 # ContentHandler methods
185 def setDocumentLocator(self
, locator
):
186 self
._cont
_handler
.setDocumentLocator(locator
)
188 def startDocument(self
):
189 self
._cont
_handler
.startDocument()
191 def endDocument(self
):
192 self
._cont
_handler
.endDocument()
194 def startPrefixMapping(self
, prefix
, uri
):
195 self
._cont
_handler
.startPrefixMapping(prefix
, uri
)
197 def endPrefixMapping(self
, prefix
):
198 self
._cont
_handler
.endPrefixMapping(prefix
)
200 def startElement(self
, name
, attrs
):
201 self
._cont
_handler
.startElement(name
, attrs
)
203 def endElement(self
, name
):
204 self
._cont
_handler
.endElement(name
)
206 def startElementNS(self
, name
, qname
, attrs
):
207 self
._cont
_handler
.startElementNS(name
, qname
, attrs
)
209 def endElementNS(self
, name
, qname
):
210 self
._cont
_handler
.endElementNS(name
, qname
)
212 def characters(self
, content
):
213 self
._cont
_handler
.characters(content
)
215 def ignorableWhitespace(self
, chars
):
216 self
._cont
_handler
.ignorableWhitespace(chars
)
218 def processingInstruction(self
, target
, data
):
219 self
._cont
_handler
.processingInstruction(target
, data
)
221 def skippedEntity(self
, name
):
222 self
._cont
_handler
.skippedEntity(name
)
226 def notationDecl(self
, name
, publicId
, systemId
):
227 self
._dtd
_handler
.notationDecl(name
, publicId
, systemId
)
229 def unparsedEntityDecl(self
, name
, publicId
, systemId
, ndata
):
230 self
._dtd
_handler
.unparsedEntityDecl(name
, publicId
, systemId
, ndata
)
232 # EntityResolver methods
234 def resolveEntity(self
, publicId
, systemId
):
235 return self
._ent
_handler
.resolveEntity(publicId
, systemId
)
239 def parse(self
, source
):
240 self
._parent
.setContentHandler(self
)
241 self
._parent
.setErrorHandler(self
)
242 self
._parent
.setEntityResolver(self
)
243 self
._parent
.setDTDHandler(self
)
244 self
._parent
.parse(source
)
246 def setLocale(self
, locale
):
247 self
._parent
.setLocale(locale
)
249 def getFeature(self
, name
):
250 return self
._parent
.getFeature(name
)
252 def setFeature(self
, name
, state
):
253 self
._parent
.setFeature(name
, state
)
255 def getProperty(self
, name
):
256 return self
._parent
.getProperty(name
)
258 def setProperty(self
, name
, value
):
259 self
._parent
.setProperty(name
, value
)
266 def setParent(self
, parent
):
267 self
._parent
= parent
269 # --- Utility functions
271 def prepare_input_source(source
, base
= ""):
272 """This function takes an InputSource and an optional base URL and
273 returns a fully resolved InputSource object ready for reading."""
275 if type(source
) in _StringTypes
:
276 source
= xmlreader
.InputSource(source
)
277 elif hasattr(source
, "read"):
279 source
= xmlreader
.InputSource()
280 source
.setByteStream(f
)
281 if hasattr(f
, "name"):
282 source
.setSystemId(f
.name
)
284 if source
.getByteStream() is None:
285 sysid
= source
.getSystemId()
286 basehead
= os
.path
.dirname(os
.path
.normpath(base
))
287 sysidfilename
= os
.path
.join(basehead
, sysid
)
288 if os
.path
.isfile(sysidfilename
):
289 source
.setSystemId(sysidfilename
)
290 f
= open(sysidfilename
, "rb")
292 source
.setSystemId(urlparse
.urljoin(base
, sysid
))
293 f
= urllib
.urlopen(source
.getSystemId())
295 source
.setByteStream(f
)