struct.pack has become picky about h (short) and H (unsigned short).
[python/dscho.git] / Lib / xml / dom / minidom.py
blob00bd4ca0831c6ef1eef84dcc1914d428f06cb7a1
1 """\
2 minidom.py -- a lightweight DOM implementation based on SAX.
4 parse( "foo.xml" )
6 parseString( "<foo><bar/></foo>" )
8 Todo:
9 =====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15 """
17 import pulldom
18 import string
19 from StringIO import StringIO
20 import types
22 class Node:
23 ELEMENT_NODE = 1
24 ATTRIBUTE_NODE = 2
25 TEXT_NODE = 3
26 CDATA_SECTION_NODE = 4
27 ENTITY_REFERENCE_NODE = 5
28 ENTITY_NODE = 6
29 PROCESSING_INSTRUCTION_NODE = 7
30 COMMENT_NODE = 8
31 DOCUMENT_NODE = 9
32 DOCUMENT_TYPE_NODE = 10
33 DOCUMENT_FRAGMENT_NODE = 11
34 NOTATION_NODE = 12
36 allnodes = {}
37 _debug = 0
38 _makeParentNodes = 1
39 debug = None
41 def __init__(self):
42 self.childNodes = []
43 if Node._debug:
44 index = repr(id(self)) + repr(self.__class__)
45 Node.allnodes[index] = repr(self.__dict__)
46 if Node.debug is None:
47 Node.debug = StringIO()
48 #open( "debug4.out", "w" )
49 Node.debug.write("create %s\n" % index)
51 def __getattr__(self, key):
52 if key[0:2] == "__":
53 raise AttributeError
54 # getattr should never call getattr!
55 if self.__dict__.has_key("inGetAttr"):
56 del self.inGetAttr
57 raise AttributeError, key
59 prefix, attrname = key[:5], key[5:]
60 if prefix == "_get_":
61 self.inGetAttr = 1
62 if hasattr(self, attrname):
63 del self.inGetAttr
64 return (lambda self=self, attrname=attrname:
65 getattr(self, attrname))
66 else:
67 del self.inGetAttr
68 raise AttributeError, key
69 else:
70 self.inGetAttr = 1
71 try:
72 func = getattr(self, "_get_" + key)
73 except AttributeError:
74 raise AttributeError, key
75 del self.inGetAttr
76 return func()
78 def __nonzero__(self):
79 return 1
81 def toxml(self):
82 writer = StringIO()
83 self.writexml(writer)
84 return writer.getvalue()
86 def hasChildNodes(self):
87 if self.childNodes:
88 return 1
89 else:
90 return 0
92 def _get_firstChild(self):
93 return self.childNodes[0]
95 def _get_lastChild(self):
96 return self.childNodes[-1]
98 def insertBefore(self, newChild, refChild):
99 index = self.childNodes.index(refChild)
100 self.childNodes.insert(index, newChild)
101 if self._makeParentNodes:
102 newChild.parentNode = self
104 def appendChild(self, node):
105 if self.childNodes:
106 last = self.lastChild
107 node.previousSibling = last
108 last.nextSibling = node
109 else:
110 node.previousSibling = None
111 node.nextSibling = None
112 self.childNodes.append(node)
113 return node
115 def replaceChild(self, newChild, oldChild):
116 index = self.childNodes.index(oldChild)
117 self.childNodes[index] = oldChild
119 def removeChild(self, oldChild):
120 index = self.childNodes.index(oldChild)
121 del self.childNodes[index]
123 def cloneNode(self, deep):
124 import new
125 clone = new.instance(self.__class__, self.__dict__)
126 clone.attributes = self.attributes.copy()
127 if not deep:
128 clone.childNodes = []
129 else:
130 clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
131 return clone
133 def unlink(self):
134 self.parentNode = None
135 while self.childNodes:
136 self.childNodes[-1].unlink()
137 del self.childNodes[-1] # probably not most efficient!
138 self.childNodes = None
139 self.previousSibling = None
140 self.nextSibling = None
141 if self.attributes:
142 for attr in self._attrs.values():
143 self.removeAttributeNode(attr)
144 assert not len(self._attrs)
145 assert not len(self._attrsNS)
146 if Node._debug:
147 index = repr(id(self)) + repr(self.__class__)
148 self.debug.write("Deleting: %s\n" % index)
149 del Node.allnodes[index]
151 def _write_data(writer, data):
152 "Writes datachars to writer."
153 data = string.replace(data, "&", "&amp;")
154 data = string.replace(data, "<", "&lt;")
155 data = string.replace(data, "\"", "&quot;")
156 data = string.replace(data, ">", "&gt;")
157 writer.write(data)
159 def _getElementsByTagNameHelper(parent, name, rc):
160 for node in parent.childNodes:
161 if node.nodeType == Node.ELEMENT_NODE and \
162 (name == "*" or node.tagName == name):
163 rc.append(node)
164 _getElementsByTagNameHelper(node, name, rc)
165 return rc
167 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
168 for node in parent.childNodes:
169 if node.nodeType == Node.ELEMENT_NODE:
170 if ((localName == "*" or node.tagName == localName) and
171 (nsURI == "*" or node.namespaceURI == nsURI)):
172 rc.append(node)
173 _getElementsByTagNameNSHelper(node, name, rc)
175 class Attr(Node):
176 nodeType = Node.ATTRIBUTE_NODE
178 def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
179 # skip setattr for performance
180 self.__dict__["localName"] = localName or qName
181 self.__dict__["nodeName"] = self.__dict__["name"] = qName
182 self.__dict__["namespaceURI"] = namespaceURI
183 self.__dict__["prefix"] = prefix
184 self.attributes = None
185 Node.__init__(self)
186 # nodeValue and value are set elsewhere
188 def __setattr__(self, name, value):
189 if name in ("value", "nodeValue"):
190 self.__dict__["value"] = self.__dict__["nodeValue"] = value
191 else:
192 self.__dict__[name] = value
194 class AttributeList:
195 """the attribute list is a transient interface to the underlying
196 dictionaries. mutations here will change the underlying element's
197 dictionary"""
198 def __init__(self, attrs, attrsNS):
199 self._attrs = attrs
200 self._attrsNS = attrsNS
201 self.length = len(self._attrs.keys())
203 def item(self, index):
204 try:
205 return self[self.keys()[index]]
206 except IndexError:
207 return None
209 def items(self):
210 return map(lambda node: (node.tagName, node.value),
211 self._attrs.values())
213 def itemsNS(self):
214 return map(lambda node: ((node.URI, node.localName), node.value),
215 self._attrs.values())
217 def keys(self):
218 return self._attrs.keys()
220 def keysNS(self):
221 return self._attrsNS.keys()
223 def values(self):
224 return self._attrs.values()
226 def __len__(self):
227 return self.length
229 def __cmp__(self, other):
230 if self._attrs is getattr(other, "_attrs", None):
231 return 0
232 else:
233 return cmp(id(self), id(other))
235 #FIXME: is it appropriate to return .value?
236 def __getitem__(self, attname_or_tuple):
237 if type(attname_or_tuple) is types.TupleType:
238 return self._attrsNS[attname_or_tuple]
239 else:
240 return self._attrs[attname_or_tuple]
242 # same as set
243 def __setitem__(self, attname, value):
244 if type(value) is types.StringType:
245 node = Attr(attname)
246 node.value=value
247 else:
248 assert isinstance(value, Attr) or type(value) is types.StringType
249 node = value
250 old = self._attrs.get(attname, None)
251 if old:
252 old.unlink()
253 self._attrs[node.name] = node
254 self._attrsNS[(node.namespaceURI, node.localName)] = node
256 def __delitem__(self, attname_or_tuple):
257 node = self[attname_or_tuple]
258 node.unlink()
259 del self._attrs[node.name]
260 del self._attrsNS[(node.namespaceURI, node.localName)]
262 class Element(Node):
263 nodeType = Node.ELEMENT_NODE
265 def __init__(self, tagName, namespaceURI="", prefix="",
266 localName=None):
267 Node.__init__(self)
268 self.tagName = self.nodeName = tagName
269 self.localName = localName or tagName
270 self.prefix = prefix
271 self.namespaceURI = namespaceURI
272 self.nodeValue = None
274 self._attrs={} # attributes are double-indexed:
275 self._attrsNS={}# tagName -> Attribute
276 # URI,localName -> Attribute
277 # in the future: consider lazy generation of attribute objects
278 # this is too tricky for now because of headaches
279 # with namespaces.
281 def getAttribute(self, attname):
282 return self._attrs[attname].value
284 def getAttributeNS(self, namespaceURI, localName):
285 return self._attrsNS[(namespaceURI, localName)].value
287 def setAttribute(self, attname, value):
288 attr = Attr(attname)
289 # for performance
290 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
291 self.setAttributeNode(attr)
293 def setAttributeNS(self, namespaceURI, qualifiedName, value):
294 prefix, localname = _nssplit(qualifiedName)
295 # for performance
296 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
297 attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
298 self.setAttributeNode(attr)
299 # FIXME: return original node if something changed.
301 def getAttributeNode(self, attrname):
302 return self._attrs.get(attrname)
304 def getAttributeNodeNS(self, namespaceURI, localName):
305 return self._attrsNS[(namespaceURI, localName)]
307 def setAttributeNode(self, attr):
308 old = self._attrs.get(attr.name, None)
309 if old:
310 old.unlink()
311 self._attrs[attr.name] = attr
312 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
313 # FIXME: return old value if something changed
315 def removeAttribute(self, name):
316 attr = self._attrs[name]
317 self.removeAttributeNode(attr)
319 def removeAttributeNS(self, namespaceURI, localName):
320 attr = self._attrsNS[(namespaceURI, localName)]
321 self.removeAttributeNode(attr)
323 def removeAttributeNode(self, node):
324 node.unlink()
325 del self._attrs[node.name]
326 del self._attrsNS[(node.namespaceURI, node.localName)]
328 def getElementsByTagName(self, name):
329 return _getElementsByTagNameHelper(self, name, [])
331 def getElementsByTagNameNS(self, namespaceURI, localName):
332 _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
334 def __repr__(self):
335 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
337 # undocumented
338 def writexml(self, writer):
339 writer.write("<" + self.tagName)
341 a_names = self._get_attributes().keys()
342 a_names.sort()
344 for a_name in a_names:
345 writer.write(" %s=\"" % a_name)
346 _write_data(writer, self._get_attributes()[a_name].value)
347 writer.write("\"")
348 if self.childNodes:
349 writer.write(">")
350 for node in self.childNodes:
351 node.writexml(writer)
352 writer.write("</%s>" % self.tagName)
353 else:
354 writer.write("/>")
356 def _get_attributes(self):
357 return AttributeList(self._attrs, self._attrsNS)
359 class Comment(Node):
360 nodeType = Node.COMMENT_NODE
362 def __init__(self, data):
363 Node.__init__(self)
364 self.data = self.nodeValue = data
365 self.nodeName = "#comment"
366 self.attributes = None
368 def writexml(self, writer):
369 writer.write("<!--%s-->" % self.data)
371 class ProcessingInstruction(Node):
372 nodeType = Node.PROCESSING_INSTRUCTION_NODE
374 def __init__(self, target, data):
375 Node.__init__(self)
376 self.target = self.nodeName = target
377 self.data = self.nodeValue = data
378 self.attributes = None
380 def writexml(self, writer):
381 writer.write("<?%s %s?>" % (self.target, self.data))
383 class Text(Node):
384 nodeType = Node.TEXT_NODE
385 nodeName = "#text"
387 def __init__(self, data):
388 Node.__init__(self)
389 self.data = self.nodeValue = data
390 self.attributes = None
392 def __repr__(self):
393 if len(self.data) > 10:
394 dotdotdot = "..."
395 else:
396 dotdotdot = ""
397 return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
399 def writexml(self, writer):
400 _write_data(writer, self.data)
402 def _nssplit(qualifiedName):
403 import string
404 fields = string.split(qualifiedName,':', 1)
405 if len(fields) == 2:
406 return fields
407 elif len(fields) == 1:
408 return ('', fields[0])
410 class Document(Node):
411 nodeType = Node.DOCUMENT_NODE
412 documentElement = None
414 def __init__(self):
415 Node.__init__(self)
416 self.attributes = None
417 self.nodeName = "#document"
418 self.nodeValue = None
420 def appendChild(self, node):
421 if node.nodeType == Node.ELEMENT_NODE:
422 if self.documentElement:
423 raise TypeError, "Two document elements disallowed"
424 else:
425 self.documentElement = node
426 Node.appendChild(self, node)
427 return node
429 createElement = Element
431 createTextNode = Text
433 createComment = Comment
435 createProcessingInstruction = ProcessingInstruction
437 createAttribute = Attr
439 def createElementNS(self, namespaceURI, qualifiedName):
440 prefix,localName = _nssplit(qualifiedName)
441 return Element(qualifiedName, namespaceURI, prefix, localName)
443 def createAttributeNS(self, namespaceURI, qualifiedName):
444 prefix,localName = _nssplit(qualifiedName)
445 return Attr(qualifiedName, namespaceURI, localName, prefix)
447 def getElementsByTagNameNS(self, namespaceURI, localName):
448 _getElementsByTagNameNSHelper(self, namespaceURI, localName)
450 def unlink(self):
451 self.documentElement = None
452 Node.unlink(self)
454 def getElementsByTagName(self, name):
455 rc = []
456 _getElementsByTagNameHelper(self, name, rc)
457 return rc
459 def writexml(self, writer):
460 for node in self.childNodes:
461 node.writexml(writer)
463 def _doparse(func, args, kwargs):
464 events = apply(func, args, kwargs)
465 toktype, rootNode = events.getEvent()
466 events.expandNode(rootNode)
467 return rootNode
469 def parse(*args, **kwargs):
470 "Parse a file into a DOM by filename or file object"
471 return _doparse(pulldom.parse, args, kwargs)
473 def parseString(*args, **kwargs):
474 "Parse a file into a DOM from a string"
475 return _doparse(pulldom.parseString, args, kwargs)