This commit was manufactured by cvs2svn to create tag 'r241c1'.
[python/dscho.git] / Lib / xml / sax / saxutils.py
blob582b0089c4bf6d98e636198d085f21ef3ba9d709
1 """\
2 A library of useful helper classes to the SAX classes, for the
3 convenience of application and driver writers.
4 """
6 import os, urlparse, urllib, types
7 import handler
8 import xmlreader
10 try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12 except AttributeError:
13 _StringTypes = [types.StringType]
15 # See whether the xmlcharrefreplace error handler is
16 # supported
17 try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21 except ImportError:
22 _error_handling = "strict"
24 def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
30 def escape(data, entities={}):
31 """Escape &, <, and > in a string of data.
33 You can escape other strings of data by passing a dictionary as
34 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
38 # must do ampersand first
39 data = data.replace("&", "&amp;")
40 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
46 def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
53 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
55 if entities:
56 data = __dict_replace(data, entities)
57 # must do ampersand last
58 return data.replace("&amp;", "&")
60 def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
71 data = escape(data, entities)
72 if '"' in data:
73 if "'" in data:
74 data = '"%s"' % data.replace('"', "&quot;")
75 else:
76 data = "'%s'" % data
77 else:
78 data = '"%s"' % data
79 return data
82 class XMLGenerator(handler.ContentHandler):
84 def __init__(self, out=None, encoding="iso-8859-1"):
85 if out is None:
86 import sys
87 out = sys.stdout
88 handler.ContentHandler.__init__(self)
89 self._out = out
90 self._ns_contexts = [{}] # contains uri -> prefix dicts
91 self._current_context = self._ns_contexts[-1]
92 self._undeclared_ns_maps = []
93 self._encoding = encoding
95 def _write(self, text):
96 if isinstance(text, str):
97 self._out.write(text)
98 else:
99 self._out.write(text.encode(self._encoding, _error_handling))
101 # ContentHandler methods
103 def startDocument(self):
104 self._write('<?xml version="1.0" encoding="%s"?>\n' %
105 self._encoding)
107 def startPrefixMapping(self, prefix, uri):
108 self._ns_contexts.append(self._current_context.copy())
109 self._current_context[uri] = prefix
110 self._undeclared_ns_maps.append((prefix, uri))
112 def endPrefixMapping(self, prefix):
113 self._current_context = self._ns_contexts[-1]
114 del self._ns_contexts[-1]
116 def startElement(self, name, attrs):
117 self._write('<' + name)
118 for (name, value) in attrs.items():
119 self._write(' %s=%s' % (name, quoteattr(value)))
120 self._write('>')
122 def endElement(self, name):
123 self._write('</%s>' % name)
125 def startElementNS(self, name, qname, attrs):
126 if name[0] is None:
127 # if the name was not namespace-scoped, use the unqualified part
128 name = name[1]
129 else:
130 # else try to restore the original prefix from the namespace
131 name = self._current_context[name[0]] + ":" + name[1]
132 self._write('<' + name)
134 for pair in self._undeclared_ns_maps:
135 self._write(' xmlns:%s="%s"' % pair)
136 self._undeclared_ns_maps = []
138 for (name, value) in attrs.items():
139 name = self._current_context[name[0]] + ":" + name[1]
140 self._write(' %s=%s' % (name, quoteattr(value)))
141 self._write('>')
143 def endElementNS(self, name, qname):
144 if name[0] is None:
145 name = name[1]
146 else:
147 name = self._current_context[name[0]] + ":" + name[1]
148 self._write('</%s>' % name)
150 def characters(self, content):
151 self._write(escape(content))
153 def ignorableWhitespace(self, content):
154 self._write(content)
156 def processingInstruction(self, target, data):
157 self._write('<?%s %s?>' % (target, data))
160 class XMLFilterBase(xmlreader.XMLReader):
161 """This class is designed to sit between an XMLReader and the
162 client application's event handlers. By default, it does nothing
163 but pass requests up to the reader and events on to the handlers
164 unmodified, but subclasses can override specific methods to modify
165 the event stream or the configuration requests as they pass
166 through."""
168 def __init__(self, parent = None):
169 xmlreader.XMLReader.__init__(self)
170 self._parent = parent
172 # ErrorHandler methods
174 def error(self, exception):
175 self._err_handler.error(exception)
177 def fatalError(self, exception):
178 self._err_handler.fatalError(exception)
180 def warning(self, exception):
181 self._err_handler.warning(exception)
183 # ContentHandler methods
185 def setDocumentLocator(self, locator):
186 self._cont_handler.setDocumentLocator(locator)
188 def startDocument(self):
189 self._cont_handler.startDocument()
191 def endDocument(self):
192 self._cont_handler.endDocument()
194 def startPrefixMapping(self, prefix, uri):
195 self._cont_handler.startPrefixMapping(prefix, uri)
197 def endPrefixMapping(self, prefix):
198 self._cont_handler.endPrefixMapping(prefix)
200 def startElement(self, name, attrs):
201 self._cont_handler.startElement(name, attrs)
203 def endElement(self, name):
204 self._cont_handler.endElement(name)
206 def startElementNS(self, name, qname, attrs):
207 self._cont_handler.startElementNS(name, qname, attrs)
209 def endElementNS(self, name, qname):
210 self._cont_handler.endElementNS(name, qname)
212 def characters(self, content):
213 self._cont_handler.characters(content)
215 def ignorableWhitespace(self, chars):
216 self._cont_handler.ignorableWhitespace(chars)
218 def processingInstruction(self, target, data):
219 self._cont_handler.processingInstruction(target, data)
221 def skippedEntity(self, name):
222 self._cont_handler.skippedEntity(name)
224 # DTDHandler methods
226 def notationDecl(self, name, publicId, systemId):
227 self._dtd_handler.notationDecl(name, publicId, systemId)
229 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
230 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
232 # EntityResolver methods
234 def resolveEntity(self, publicId, systemId):
235 return self._ent_handler.resolveEntity(publicId, systemId)
237 # XMLReader methods
239 def parse(self, source):
240 self._parent.setContentHandler(self)
241 self._parent.setErrorHandler(self)
242 self._parent.setEntityResolver(self)
243 self._parent.setDTDHandler(self)
244 self._parent.parse(source)
246 def setLocale(self, locale):
247 self._parent.setLocale(locale)
249 def getFeature(self, name):
250 return self._parent.getFeature(name)
252 def setFeature(self, name, state):
253 self._parent.setFeature(name, state)
255 def getProperty(self, name):
256 return self._parent.getProperty(name)
258 def setProperty(self, name, value):
259 self._parent.setProperty(name, value)
261 # XMLFilter methods
263 def getParent(self):
264 return self._parent
266 def setParent(self, parent):
267 self._parent = parent
269 # --- Utility functions
271 def prepare_input_source(source, base = ""):
272 """This function takes an InputSource and an optional base URL and
273 returns a fully resolved InputSource object ready for reading."""
275 if type(source) in _StringTypes:
276 source = xmlreader.InputSource(source)
277 elif hasattr(source, "read"):
278 f = source
279 source = xmlreader.InputSource()
280 source.setByteStream(f)
281 if hasattr(f, "name"):
282 source.setSystemId(f.name)
284 if source.getByteStream() is None:
285 sysid = source.getSystemId()
286 basehead = os.path.dirname(os.path.normpath(base))
287 sysidfilename = os.path.join(basehead, sysid)
288 if os.path.isfile(sysidfilename):
289 source.setSystemId(sysidfilename)
290 f = open(sysidfilename, "rb")
291 else:
292 source.setSystemId(urlparse.urljoin(base, sysid))
293 f = urllib.urlopen(source.getSystemId())
295 source.setByteStream(f)
297 return source