minor changes
[worddb.git] / libs / elementtree / SimpleXMLWriter.py
blobaf3023f4cb654c143f00d9aded082aa2d0478fbc
2 # SimpleXMLWriter
3 # $Id: SimpleXMLWriter.py 2312 2005-03-02 18:13:39Z fredrik $
5 # a simple XML writer
7 # history:
8 # 2001-12-28 fl created
9 # 2002-11-25 fl fixed attribute encoding
10 # 2002-12-02 fl minor fixes for 1.5.2
11 # 2004-06-17 fl added pythondoc markup
12 # 2004-07-23 fl added flush method (from Jay Graves)
13 # 2004-10-03 fl added declaration method
15 # Copyright (c) 2001-2004 by Fredrik Lundh
17 # fredrik@pythonware.com
18 # http://www.pythonware.com
20 # --------------------------------------------------------------------
21 # The SimpleXMLWriter module is
23 # Copyright (c) 2001-2004 by Fredrik Lundh
25 # By obtaining, using, and/or copying this software and/or its
26 # associated documentation, you agree that you have read, understood,
27 # and will comply with the following terms and conditions:
29 # Permission to use, copy, modify, and distribute this software and
30 # its associated documentation for any purpose and without fee is
31 # hereby granted, provided that the above copyright notice appears in
32 # all copies, and that both that copyright notice and this permission
33 # notice appear in supporting documentation, and that the name of
34 # Secret Labs AB or the author not be used in advertising or publicity
35 # pertaining to distribution of the software without specific, written
36 # prior permission.
38 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
39 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
40 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
41 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
42 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
43 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
44 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
45 # OF THIS SOFTWARE.
46 # --------------------------------------------------------------------
49 # Tools to write XML files, without having to deal with encoding
50 # issues, well-formedness, etc.
51 # <p>
52 # The current version does not provide built-in support for
53 # namespaces. To create files using namespaces, you have to provide
54 # "xmlns" attributes and explicitly add prefixes to tags and
55 # attributes.
57 # <h3>Patterns</h3>
59 # The following example generates a small XHTML document.
60 # <pre>
62 # from elementtree.SimpleXMLWriter import XMLWriter
63 # import sys
65 # w = XMLWriter(sys.stdout)
67 # html = w.start("html")
69 # w.start("head")
70 # w.element("title", "my document")
71 # w.element("meta", name="generator", value="my application 1.0")
72 # w.end()
74 # w.start("body")
75 # w.element("h1", "this is a heading")
76 # w.element("p", "this is a paragraph")
78 # w.start("p")
79 # w.data("this is ")
80 # w.element("b", "bold")
81 # w.data(" and ")
82 # w.element("i", "italic")
83 # w.data(".")
84 # w.end("p")
86 # w.close(html)
87 # </pre>
90 import re, sys, string
92 try:
93 unicode("")
94 except NameError:
95 def encode(s, encoding):
96 # 1.5.2: application must use the right encoding
97 return s
98 _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2
99 else:
100 def encode(s, encoding):
101 return s.encode(encoding)
102 _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"'))
104 def encode_entity(text, pattern=_escape):
105 # map reserved and non-ascii characters to numerical entities
106 def escape_entities(m):
107 out = []
108 for char in m.group():
109 out.append("&#%d;" % ord(char))
110 return string.join(out, "")
111 return encode(pattern.sub(escape_entities, text), "ascii")
113 del _escape
116 # the following functions assume an ascii-compatible encoding
117 # (or "utf-16")
119 def escape_cdata(s, encoding=None, replace=string.replace):
120 s = replace(s, "&", "&amp;")
121 s = replace(s, "<", "&lt;")
122 s = replace(s, ">", "&gt;")
123 if encoding:
124 try:
125 return encode(s, encoding)
126 except UnicodeError:
127 return encode_entity(s)
128 return s
130 def escape_attrib(s, encoding=None, replace=string.replace):
131 s = replace(s, "&", "&amp;")
132 s = replace(s, "'", "&apos;")
133 s = replace(s, "\"", "&quot;")
134 s = replace(s, "<", "&lt;")
135 s = replace(s, ">", "&gt;")
136 if encoding:
137 try:
138 return encode(s, encoding)
139 except UnicodeError:
140 return encode_entity(s)
141 return s
144 # XML writer class.
146 # @param file A file or file-like object. This object must implement
147 # a <b>write</b> method that takes an 8-bit string.
148 # @param encoding Optional encoding.
150 class XMLWriter:
152 def __init__(self, file, encoding="us-ascii"):
153 if not hasattr(file, "write"):
154 file = open(file, "w")
155 self.__write = file.write
156 if hasattr(file, "flush"):
157 self.flush = file.flush
158 self.__open = 0 # true if start tag is open
159 self.__tags = []
160 self.__data = []
161 self.__encoding = encoding
163 def __flush(self):
164 # flush internal buffers
165 if self.__open:
166 self.__write(">")
167 self.__open = 0
168 if self.__data:
169 data = string.join(self.__data, "")
170 self.__write(escape_cdata(data, self.__encoding))
171 self.__data = []
174 # Writes an XML declaration.
176 def declaration(self):
177 encoding = self.__encoding
178 if encoding == "us-ascii" or encoding == "utf-8":
179 self.__write("<?xml version='1.0'?>\n")
180 else:
181 self.__write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
184 # Opens a new element. Attributes can be given as keyword
185 # arguments, or as a string/string dictionary. You can pass in
186 # 8-bit strings or Unicode strings; the former are assumed to use
187 # the encoding passed to the constructor. The method returns an
188 # opaque identifier that can be passed to the <b>close</b> method,
189 # to close all open elements up to and including this one.
191 # @param tag Element tag.
192 # @param attrib Attribute dictionary. Alternatively, attributes
193 # can be given as keyword arguments.
194 # @return An element identifier.
196 def start(self, tag, attrib={}, **extra):
197 self.__flush()
198 tag = escape_cdata(tag, self.__encoding)
199 self.__data = []
200 self.__tags.append(tag)
201 self.__write("<%s" % tag)
202 if attrib or extra:
203 attrib = attrib.copy()
204 attrib.update(extra)
205 attrib = attrib.items()
206 attrib.sort()
207 for k, v in attrib:
208 k = escape_cdata(k, self.__encoding)
209 v = escape_attrib(v, self.__encoding)
210 self.__write(" %s=\"%s\"" % (k, v))
211 self.__open = 1
212 return len(self.__tags)-1
215 # Adds a comment to the output stream.
217 # @param comment Comment text, as an 8-bit string or Unicode string.
219 def comment(self, comment):
220 self.__flush()
221 self.__write("<!-- %s -->\n" % escape_cdata(comment, self.__encoding))
224 # Adds character data to the output stream.
226 # @param text Character data, as an 8-bit string or Unicode string.
228 def data(self, text):
229 self.__data.append(text)
232 # Closes the current element (opened by the most recent call to
233 # <b>start</b>).
235 # @param tag Element tag. If given, the tag must match the start
236 # tag. If omitted, the current element is closed.
238 def end(self, tag=None):
239 if tag:
240 assert self.__tags, "unbalanced end(%s)" % tag
241 assert escape_cdata(tag, self.__encoding) == self.__tags[-1],\
242 "expected end(%s), got %s" % (self.__tags[-1], tag)
243 else:
244 assert self.__tags, "unbalanced end()"
245 tag = self.__tags.pop()
246 if self.__data:
247 self.__flush()
248 elif self.__open:
249 self.__open = 0
250 self.__write(" />")
251 return
252 self.__write("</%s>" % tag)
255 # Closes open elements, up to (and including) the element identified
256 # by the given identifier.
258 # @param id Element identifier, as returned by the <b>start</b> method.
260 def close(self, id):
261 while len(self.__tags) > id:
262 self.end()
265 # Adds an entire element. This is the same as calling <b>start</b>,
266 # <b>data</b>, and <b>end</b> in sequence. The <b>text</b> argument
267 # can be omitted.
269 def element(self, tag, text=None, attrib={}, **extra):
270 apply(self.start, (tag, attrib), extra)
271 if text:
272 self.data(text)
273 self.end()
276 # Flushes the output stream.
278 def flush(self):
279 pass # replaced by the constructor