More installation info. Bump alpha version.
[python/dscho.git] / Lib / xml / dom / minidom.py
blob967320f5aa5fc1cfb086b230a07f3f934717c98d
1 """\
2 minidom.py -- a lightweight DOM implementation.
4 parse("foo.xml")
6 parseString("<foo><bar/></foo>")
8 Todo:
9 =====
10 * convenience methods for getting elements and text.
11 * more testing
12 * bring some of the writer and linearizer code into conformance with this
13 interface
14 * SAX 2 namespaces
15 """
17 from xml.dom import HierarchyRequestErr, EMPTY_NAMESPACE
19 # localize the types, and allow support for Unicode values if available:
20 import types
21 _TupleType = types.TupleType
22 try:
23 _StringTypes = (types.StringType, types.UnicodeType)
24 except AttributeError:
25 _StringTypes = (types.StringType,)
26 del types
28 import xml.dom
31 if list is type([]):
32 class NodeList(list):
33 def item(self, index):
34 if 0 <= index < len(self):
35 return self[index]
37 length = property(lambda self: len(self),
38 doc="The number of nodes in the NodeList.")
40 else:
41 def NodeList():
42 return []
45 class Node(xml.dom.Node):
46 allnodes = {}
47 _debug = 0
48 _makeParentNodes = 1
49 debug = None
50 childNodeTypes = ()
51 namespaceURI = None # this is non-null only for elements and attributes
52 parentNode = None
53 ownerDocument = None
55 def __init__(self):
56 self.childNodes = NodeList()
57 if Node._debug:
58 index = repr(id(self)) + repr(self.__class__)
59 Node.allnodes[index] = repr(self.__dict__)
60 if Node.debug is None:
61 Node.debug = _get_StringIO()
62 #open("debug4.out", "w")
63 Node.debug.write("create %s\n" % index)
65 def __nonzero__(self):
66 return 1
68 def toxml(self, encoding = None):
69 return self.toprettyxml("", "", encoding)
71 def toprettyxml(self, indent="\t", newl="\n", encoding = None):
72 # indent = the indentation string to prepend, per level
73 # newl = the newline string to append
74 writer = _get_StringIO()
75 if encoding is not None:
76 import codecs
77 # Can't use codecs.getwriter to preserve 2.0 compatibility
78 writer = codecs.lookup(encoding)[3](writer)
79 if self.nodeType == Node.DOCUMENT_NODE:
80 # Can pass encoding only to document, to put it into XML header
81 self.writexml(writer, "", indent, newl, encoding)
82 else:
83 self.writexml(writer, "", indent, newl)
84 return writer.getvalue()
86 def hasChildNodes(self):
87 if self.childNodes:
88 return 1
89 else:
90 return 0
92 def _get_firstChild(self):
93 if self.childNodes:
94 return self.childNodes[0]
96 def _get_lastChild(self):
97 if self.childNodes:
98 return self.childNodes[-1]
100 try:
101 property
102 except NameError:
103 def __getattr__(self, key):
104 if key[0:2] == "__":
105 raise AttributeError, key
106 # getattr should never call getattr!
107 if self.__dict__.has_key("inGetAttr"):
108 del self.inGetAttr
109 raise AttributeError, key
111 prefix, attrname = key[:5], key[5:]
112 if prefix == "_get_":
113 self.inGetAttr = 1
114 if hasattr(self, attrname):
115 del self.inGetAttr
116 return (lambda self=self, attrname=attrname:
117 getattr(self, attrname))
118 else:
119 del self.inGetAttr
120 raise AttributeError, key
121 else:
122 self.inGetAttr = 1
123 try:
124 func = getattr(self, "_get_" + key)
125 except AttributeError:
126 raise AttributeError, key
127 del self.inGetAttr
128 return func()
129 else:
130 firstChild = property(_get_firstChild,
131 doc="First child node, or None.")
132 lastChild = property(_get_lastChild,
133 doc="Last child node, or None.")
135 def insertBefore(self, newChild, refChild):
136 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
137 for c in tuple(newChild.childNodes):
138 self.insertBefore(c, refChild)
139 ### The DOM does not clearly specify what to return in this case
140 return newChild
141 if newChild.nodeType not in self.childNodeTypes:
142 raise HierarchyRequestErr, \
143 "%s cannot be child of %s" % (repr(newChild), repr(self))
144 if newChild.parentNode is not None:
145 newChild.parentNode.removeChild(newChild)
146 if refChild is None:
147 self.appendChild(newChild)
148 else:
149 index = self.childNodes.index(refChild)
150 self.childNodes.insert(index, newChild)
151 newChild.nextSibling = refChild
152 refChild.previousSibling = newChild
153 if index:
154 node = self.childNodes[index-1]
155 node.nextSibling = newChild
156 newChild.previousSibling = node
157 else:
158 newChild.previousSibling = None
159 if self._makeParentNodes:
160 newChild.parentNode = self
161 return newChild
163 def appendChild(self, node):
164 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
165 for c in tuple(node.childNodes):
166 self.appendChild(c)
167 ### The DOM does not clearly specify what to return in this case
168 return node
169 if node.nodeType not in self.childNodeTypes:
170 raise HierarchyRequestErr, \
171 "%s cannot be child of %s" % (repr(node), repr(self))
172 if node.parentNode is not None:
173 node.parentNode.removeChild(node)
174 if self.childNodes:
175 last = self.lastChild
176 node.previousSibling = last
177 last.nextSibling = node
178 else:
179 node.previousSibling = None
180 node.nextSibling = None
181 self.childNodes.append(node)
182 if self._makeParentNodes:
183 node.parentNode = self
184 return node
186 def replaceChild(self, newChild, oldChild):
187 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
188 refChild = oldChild.nextSibling
189 self.removeChild(oldChild)
190 return self.insertBefore(newChild, refChild)
191 if newChild.nodeType not in self.childNodeTypes:
192 raise HierarchyRequestErr, \
193 "%s cannot be child of %s" % (repr(newChild), repr(self))
194 if newChild.parentNode is not None:
195 newChild.parentNode.removeChild(newChild)
196 if newChild is oldChild:
197 return
198 index = self.childNodes.index(oldChild)
199 self.childNodes[index] = newChild
200 if self._makeParentNodes:
201 newChild.parentNode = self
202 oldChild.parentNode = None
203 newChild.nextSibling = oldChild.nextSibling
204 newChild.previousSibling = oldChild.previousSibling
205 oldChild.nextSibling = None
206 oldChild.previousSibling = None
207 if newChild.previousSibling:
208 newChild.previousSibling.nextSibling = newChild
209 if newChild.nextSibling:
210 newChild.nextSibling.previousSibling = newChild
211 return oldChild
213 def removeChild(self, oldChild):
214 self.childNodes.remove(oldChild)
215 if oldChild.nextSibling is not None:
216 oldChild.nextSibling.previousSibling = oldChild.previousSibling
217 if oldChild.previousSibling is not None:
218 oldChild.previousSibling.nextSibling = oldChild.nextSibling
219 oldChild.nextSibling = oldChild.previousSibling = None
221 if self._makeParentNodes:
222 oldChild.parentNode = None
223 return oldChild
225 def normalize(self):
226 L = []
227 for child in self.childNodes:
228 if child.nodeType == Node.TEXT_NODE:
229 data = child.data
230 if data and L and L[-1].nodeType == child.nodeType:
231 # collapse text node
232 node = L[-1]
233 node.data = node.nodeValue = node.data + child.data
234 node.nextSibling = child.nextSibling
235 child.unlink()
236 elif data:
237 if L:
238 L[-1].nextSibling = child
239 child.previousSibling = L[-1]
240 else:
241 child.previousSibling = None
242 L.append(child)
243 else:
244 # empty text node; discard
245 child.unlink()
246 else:
247 if L:
248 L[-1].nextSibling = child
249 child.previousSibling = L[-1]
250 else:
251 child.previousSibling = None
252 L.append(child)
253 if child.nodeType == Node.ELEMENT_NODE:
254 child.normalize()
255 self.childNodes[:] = L
257 def cloneNode(self, deep):
258 import new
259 clone = new.instance(self.__class__, self.__dict__.copy())
260 if self._makeParentNodes:
261 clone.parentNode = None
262 clone.childNodes = NodeList()
263 if deep:
264 for child in self.childNodes:
265 clone.appendChild(child.cloneNode(1))
266 return clone
268 # DOM Level 3 (Working Draft 2001-Jan-26)
270 def isSameNode(self, other):
271 return self is other
273 # minidom-specific API:
275 def unlink(self):
276 self.parentNode = self.ownerDocument = None
277 for child in self.childNodes:
278 child.unlink()
279 self.childNodes = None
280 self.previousSibling = None
281 self.nextSibling = None
282 if Node._debug:
283 index = repr(id(self)) + repr(self.__class__)
284 self.debug.write("Deleting: %s\n" % index)
285 del Node.allnodes[index]
287 def _write_data(writer, data):
288 "Writes datachars to writer."
289 data = data.replace("&", "&amp;")
290 data = data.replace("<", "&lt;")
291 data = data.replace("\"", "&quot;")
292 data = data.replace(">", "&gt;")
293 writer.write(data)
295 def _getElementsByTagNameHelper(parent, name, rc):
296 for node in parent.childNodes:
297 if node.nodeType == Node.ELEMENT_NODE and \
298 (name == "*" or node.tagName == name):
299 rc.append(node)
300 _getElementsByTagNameHelper(node, name, rc)
301 return rc
303 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
304 for node in parent.childNodes:
305 if node.nodeType == Node.ELEMENT_NODE:
306 if ((localName == "*" or node.localName == localName) and
307 (nsURI == "*" or node.namespaceURI == nsURI)):
308 rc.append(node)
309 _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
310 return rc
312 class DocumentFragment(Node):
313 nodeType = Node.DOCUMENT_FRAGMENT_NODE
314 nodeName = "#document-fragment"
315 nodeValue = None
316 attributes = None
317 parentNode = None
318 childNodeTypes = (Node.ELEMENT_NODE,
319 Node.TEXT_NODE,
320 Node.CDATA_SECTION_NODE,
321 Node.ENTITY_REFERENCE_NODE,
322 Node.PROCESSING_INSTRUCTION_NODE,
323 Node.COMMENT_NODE,
324 Node.NOTATION_NODE)
327 class Attr(Node):
328 nodeType = Node.ATTRIBUTE_NODE
329 attributes = None
330 ownerElement = None
331 childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
333 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None):
334 # skip setattr for performance
335 d = self.__dict__
336 d["localName"] = localName or qName
337 d["nodeName"] = d["name"] = qName
338 d["namespaceURI"] = namespaceURI
339 d["prefix"] = prefix
340 Node.__init__(self)
341 # nodeValue and value are set elsewhere
343 def __setattr__(self, name, value):
344 d = self.__dict__
345 if name in ("value", "nodeValue"):
346 d["value"] = d["nodeValue"] = value
347 elif name in ("name", "nodeName"):
348 d["name"] = d["nodeName"] = value
349 else:
350 d[name] = value
352 def cloneNode(self, deep):
353 clone = Node.cloneNode(self, deep)
354 if clone.__dict__.has_key("ownerElement"):
355 del clone.ownerElement
356 return clone
359 class NamedNodeMap:
360 """The attribute list is a transient interface to the underlying
361 dictionaries. Mutations here will change the underlying element's
362 dictionary.
364 Ordering is imposed artificially and does not reflect the order of
365 attributes as found in an input document.
368 def __init__(self, attrs, attrsNS, ownerElement):
369 self._attrs = attrs
370 self._attrsNS = attrsNS
371 self._ownerElement = ownerElement
373 try:
374 property
375 except NameError:
376 def __getattr__(self, name):
377 if name == "length":
378 return len(self._attrs)
379 raise AttributeError, name
380 else:
381 length = property(lambda self: len(self._attrs),
382 doc="Number of nodes in the NamedNodeMap.")
384 def item(self, index):
385 try:
386 return self[self._attrs.keys()[index]]
387 except IndexError:
388 return None
390 def items(self):
391 L = []
392 for node in self._attrs.values():
393 L.append((node.nodeName, node.value))
394 return L
396 def itemsNS(self):
397 L = []
398 for node in self._attrs.values():
399 L.append(((node.namespaceURI, node.localName), node.value))
400 return L
402 def keys(self):
403 return self._attrs.keys()
405 def keysNS(self):
406 return self._attrsNS.keys()
408 def values(self):
409 return self._attrs.values()
411 def get(self, name, value = None):
412 return self._attrs.get(name, value)
414 def __len__(self):
415 return self.length
417 def __cmp__(self, other):
418 if self._attrs is getattr(other, "_attrs", None):
419 return 0
420 else:
421 return cmp(id(self), id(other))
423 #FIXME: is it appropriate to return .value?
424 def __getitem__(self, attname_or_tuple):
425 if type(attname_or_tuple) is _TupleType:
426 return self._attrsNS[attname_or_tuple]
427 else:
428 return self._attrs[attname_or_tuple]
430 # same as set
431 def __setitem__(self, attname, value):
432 if type(value) in _StringTypes:
433 node = Attr(attname)
434 node.value = value
435 node.ownerDocument = self._ownerElement.ownerDocument
436 else:
437 if not isinstance(value, Attr):
438 raise TypeError, "value must be a string or Attr object"
439 node = value
440 self.setNamedItem(node)
442 def setNamedItem(self, node):
443 if not isinstance(node, Attr):
444 raise HierarchyRequestErr, \
445 "%s cannot be child of %s" % (repr(node), repr(self))
446 old = self._attrs.get(node.name)
447 if old:
448 old.unlink()
449 self._attrs[node.name] = node
450 self._attrsNS[(node.namespaceURI, node.localName)] = node
451 node.ownerElement = self._ownerElement
452 return old
454 def setNamedItemNS(self, node):
455 return self.setNamedItem(node)
457 def __delitem__(self, attname_or_tuple):
458 node = self[attname_or_tuple]
459 node.unlink()
460 del self._attrs[node.name]
461 del self._attrsNS[(node.namespaceURI, node.localName)]
463 AttributeList = NamedNodeMap
466 class Element(Node):
467 nodeType = Node.ELEMENT_NODE
468 nextSibling = None
469 previousSibling = None
470 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
471 Node.COMMENT_NODE, Node.TEXT_NODE,
472 Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
474 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
475 localName=None):
476 Node.__init__(self)
477 self.tagName = self.nodeName = tagName
478 self.localName = localName or tagName
479 self.prefix = prefix
480 self.namespaceURI = namespaceURI
481 self.nodeValue = None
483 self._attrs = {} # attributes are double-indexed:
484 self._attrsNS = {} # tagName -> Attribute
485 # URI,localName -> Attribute
486 # in the future: consider lazy generation
487 # of attribute objects this is too tricky
488 # for now because of headaches with
489 # namespaces.
491 def cloneNode(self, deep):
492 clone = Node.cloneNode(self, deep)
493 clone._attrs = {}
494 clone._attrsNS = {}
495 for attr in self._attrs.values():
496 node = attr.cloneNode(1)
497 clone._attrs[node.name] = node
498 clone._attrsNS[(node.namespaceURI, node.localName)] = node
499 node.ownerElement = clone
500 return clone
502 def unlink(self):
503 for attr in self._attrs.values():
504 attr.unlink()
505 self._attrs = None
506 self._attrsNS = None
507 Node.unlink(self)
509 def getAttribute(self, attname):
510 try:
511 return self._attrs[attname].value
512 except KeyError:
513 return ""
515 def getAttributeNS(self, namespaceURI, localName):
516 try:
517 return self._attrsNS[(namespaceURI, localName)].value
518 except KeyError:
519 return ""
521 def setAttribute(self, attname, value):
522 attr = Attr(attname)
523 # for performance
524 d = attr.__dict__
525 d["value"] = d["nodeValue"] = value
526 d["ownerDocument"] = self.ownerDocument
527 self.setAttributeNode(attr)
529 def setAttributeNS(self, namespaceURI, qualifiedName, value):
530 prefix, localname = _nssplit(qualifiedName)
531 # for performance
532 attr = Attr(qualifiedName, namespaceURI, localname, prefix)
533 d = attr.__dict__
534 d["value"] = d["nodeValue"] = value
535 d["ownerDocument"] = self.ownerDocument
536 self.setAttributeNode(attr)
538 def getAttributeNode(self, attrname):
539 return self._attrs.get(attrname)
541 def getAttributeNodeNS(self, namespaceURI, localName):
542 return self._attrsNS.get((namespaceURI, localName))
544 def setAttributeNode(self, attr):
545 if attr.ownerElement not in (None, self):
546 raise xml.dom.InuseAttributeErr("attribute node already owned")
547 old = self._attrs.get(attr.name, None)
548 if old:
549 old.unlink()
550 self._attrs[attr.name] = attr
551 self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
553 # This creates a circular reference, but Element.unlink()
554 # breaks the cycle since the references to the attribute
555 # dictionaries are tossed.
556 attr.ownerElement = self
558 if old is not attr:
559 # It might have already been part of this node, in which case
560 # it doesn't represent a change, and should not be returned.
561 return old
563 setAttributeNodeNS = setAttributeNode
565 def removeAttribute(self, name):
566 attr = self._attrs[name]
567 self.removeAttributeNode(attr)
569 def removeAttributeNS(self, namespaceURI, localName):
570 attr = self._attrsNS[(namespaceURI, localName)]
571 self.removeAttributeNode(attr)
573 def removeAttributeNode(self, node):
574 node.unlink()
575 del self._attrs[node.name]
576 del self._attrsNS[(node.namespaceURI, node.localName)]
578 removeAttributeNodeNS = removeAttributeNode
580 def hasAttribute(self, name):
581 return self._attrs.has_key(name)
583 def hasAttributeNS(self, namespaceURI, localName):
584 return self._attrsNS.has_key((namespaceURI, localName))
586 def getElementsByTagName(self, name):
587 return _getElementsByTagNameHelper(self, name, NodeList())
589 def getElementsByTagNameNS(self, namespaceURI, localName):
590 return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
591 NodeList())
593 def __repr__(self):
594 return "<DOM Element: %s at %s>" % (self.tagName, id(self))
596 def writexml(self, writer, indent="", addindent="", newl=""):
597 # indent = current indentation
598 # addindent = indentation to add to higher levels
599 # newl = newline string
600 writer.write(indent+"<" + self.tagName)
602 attrs = self._get_attributes()
603 a_names = attrs.keys()
604 a_names.sort()
606 for a_name in a_names:
607 writer.write(" %s=\"" % a_name)
608 _write_data(writer, attrs[a_name].value)
609 writer.write("\"")
610 if self.childNodes:
611 writer.write(">%s"%(newl))
612 for node in self.childNodes:
613 node.writexml(writer,indent+addindent,addindent,newl)
614 writer.write("%s</%s>%s" % (indent,self.tagName,newl))
615 else:
616 writer.write("/>%s"%(newl))
618 def _get_attributes(self):
619 return NamedNodeMap(self._attrs, self._attrsNS, self)
621 try:
622 property
623 except NameError:
624 pass
625 else:
626 attributes = property(_get_attributes,
627 doc="NamedNodeMap of attributes on the element.")
629 def hasAttributes(self):
630 if self._attrs or self._attrsNS:
631 return 1
632 else:
633 return 0
635 class Comment(Node):
636 nodeType = Node.COMMENT_NODE
637 nodeName = "#comment"
638 attributes = None
639 childNodeTypes = ()
641 def __init__(self, data):
642 Node.__init__(self)
643 self.data = self.nodeValue = data
645 def writexml(self, writer, indent="", addindent="", newl=""):
646 writer.write("%s<!--%s-->%s" % (indent,self.data,newl))
648 class ProcessingInstruction(Node):
649 nodeType = Node.PROCESSING_INSTRUCTION_NODE
650 attributes = None
651 childNodeTypes = ()
653 def __init__(self, target, data):
654 Node.__init__(self)
655 self.target = self.nodeName = target
656 self.data = self.nodeValue = data
658 def writexml(self, writer, indent="", addindent="", newl=""):
659 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
661 class CharacterData(Node):
662 def __init__(self, data):
663 if type(data) not in _StringTypes:
664 raise TypeError, "node contents must be a string"
665 Node.__init__(self)
666 self.data = self.nodeValue = data
667 self.length = len(data)
669 def __repr__(self):
670 if len(self.data) > 10:
671 dotdotdot = "..."
672 else:
673 dotdotdot = ""
674 return "<DOM %s node \"%s%s\">" % (
675 self.__class__.__name__, self.data[0:10], dotdotdot)
677 def substringData(self, offset, count):
678 if offset < 0:
679 raise xml.dom.IndexSizeErr("offset cannot be negative")
680 if offset >= len(self.data):
681 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
682 if count < 0:
683 raise xml.dom.IndexSizeErr("count cannot be negative")
684 return self.data[offset:offset+count]
686 def appendData(self, arg):
687 self.data = self.data + arg
688 self.nodeValue = self.data
689 self.length = len(self.data)
691 def insertData(self, offset, arg):
692 if offset < 0:
693 raise xml.dom.IndexSizeErr("offset cannot be negative")
694 if offset >= len(self.data):
695 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
696 if arg:
697 self.data = "%s%s%s" % (
698 self.data[:offset], arg, self.data[offset:])
699 self.nodeValue = self.data
700 self.length = len(self.data)
702 def deleteData(self, offset, count):
703 if offset < 0:
704 raise xml.dom.IndexSizeErr("offset cannot be negative")
705 if offset >= len(self.data):
706 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
707 if count < 0:
708 raise xml.dom.IndexSizeErr("count cannot be negative")
709 if count:
710 self.data = self.data[:offset] + self.data[offset+count:]
711 self.nodeValue = self.data
712 self.length = len(self.data)
714 def replaceData(self, offset, count, arg):
715 if offset < 0:
716 raise xml.dom.IndexSizeErr("offset cannot be negative")
717 if offset >= len(self.data):
718 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
719 if count < 0:
720 raise xml.dom.IndexSizeErr("count cannot be negative")
721 if count:
722 self.data = "%s%s%s" % (
723 self.data[:offset], arg, self.data[offset+count:])
724 self.nodeValue = self.data
725 self.length = len(self.data)
727 class Text(CharacterData):
728 nodeType = Node.TEXT_NODE
729 nodeName = "#text"
730 attributes = None
731 childNodeTypes = ()
733 def splitText(self, offset):
734 if offset < 0 or offset > len(self.data):
735 raise xml.dom.IndexSizeErr("illegal offset value")
736 newText = Text(self.data[offset:])
737 next = self.nextSibling
738 if self.parentNode and self in self.parentNode.childNodes:
739 if next is None:
740 self.parentNode.appendChild(newText)
741 else:
742 self.parentNode.insertBefore(newText, next)
743 self.data = self.data[:offset]
744 self.nodeValue = self.data
745 self.length = len(self.data)
746 return newText
748 def writexml(self, writer, indent="", addindent="", newl=""):
749 _write_data(writer, "%s%s%s"%(indent, self.data, newl))
752 class CDATASection(Text):
753 nodeType = Node.CDATA_SECTION_NODE
754 nodeName = "#cdata-section"
756 def writexml(self, writer, indent="", addindent="", newl=""):
757 writer.write("<![CDATA[%s]]>" % self.data)
760 def _nssplit(qualifiedName):
761 fields = qualifiedName.split(':', 1)
762 if len(fields) == 2:
763 return fields
764 elif len(fields) == 1:
765 return (None, fields[0])
768 class DocumentType(Node):
769 nodeType = Node.DOCUMENT_TYPE_NODE
770 nodeValue = None
771 attributes = None
772 name = None
773 publicId = None
774 systemId = None
775 internalSubset = None
776 entities = None
777 notations = None
779 def __init__(self, qualifiedName):
780 Node.__init__(self)
781 if qualifiedName:
782 prefix, localname = _nssplit(qualifiedName)
783 self.name = localname
786 class DOMImplementation:
787 def hasFeature(self, feature, version):
788 if version not in ("1.0", "2.0"):
789 return 0
790 feature = feature.lower()
791 return feature == "core"
793 def createDocument(self, namespaceURI, qualifiedName, doctype):
794 if doctype and doctype.parentNode is not None:
795 raise xml.dom.WrongDocumentErr(
796 "doctype object owned by another DOM tree")
797 doc = self._createDocument()
798 if doctype is None:
799 doctype = self.createDocumentType(qualifiedName, None, None)
800 if not qualifiedName:
801 # The spec is unclear what to raise here; SyntaxErr
802 # would be the other obvious candidate. Since Xerces raises
803 # InvalidCharacterErr, and since SyntaxErr is not listed
804 # for createDocument, that seems to be the better choice.
805 # XXX: need to check for illegal characters here and in
806 # createElement.
807 raise xml.dom.InvalidCharacterErr("Element with no name")
808 prefix, localname = _nssplit(qualifiedName)
809 if prefix == "xml" \
810 and namespaceURI != "http://www.w3.org/XML/1998/namespace":
811 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
812 if prefix and not namespaceURI:
813 raise xml.dom.NamespaceErr(
814 "illegal use of prefix without namespaces")
815 element = doc.createElementNS(namespaceURI, qualifiedName)
816 doc.appendChild(element)
817 doctype.parentNode = doctype.ownerDocument = doc
818 doc.doctype = doctype
819 doc.implementation = self
820 return doc
822 def createDocumentType(self, qualifiedName, publicId, systemId):
823 doctype = DocumentType(qualifiedName)
824 doctype.publicId = publicId
825 doctype.systemId = systemId
826 return doctype
828 # internal
829 def _createDocument(self):
830 return Document()
832 class Document(Node):
833 nodeType = Node.DOCUMENT_NODE
834 nodeName = "#document"
835 nodeValue = None
836 attributes = None
837 doctype = None
838 parentNode = None
839 previousSibling = nextSibling = None
841 implementation = DOMImplementation()
842 childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
843 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
845 def appendChild(self, node):
846 if node.nodeType not in self.childNodeTypes:
847 raise HierarchyRequestErr, \
848 "%s cannot be child of %s" % (repr(node), repr(self))
849 if node.parentNode is not None:
850 node.parentNode.removeChild(node)
852 if node.nodeType == Node.ELEMENT_NODE \
853 and self._get_documentElement():
854 raise xml.dom.HierarchyRequestErr(
855 "two document elements disallowed")
856 return Node.appendChild(self, node)
858 def removeChild(self, oldChild):
859 self.childNodes.remove(oldChild)
860 oldChild.nextSibling = oldChild.previousSibling = None
861 oldChild.parentNode = None
862 if self.documentElement is oldChild:
863 self.documentElement = None
865 return oldChild
867 def _get_documentElement(self):
868 for node in self.childNodes:
869 if node.nodeType == Node.ELEMENT_NODE:
870 return node
872 try:
873 property
874 except NameError:
875 pass
876 else:
877 documentElement = property(_get_documentElement,
878 doc="Top-level element of this document.")
880 def unlink(self):
881 if self.doctype is not None:
882 self.doctype.unlink()
883 self.doctype = None
884 Node.unlink(self)
886 def createDocumentFragment(self):
887 d = DocumentFragment()
888 d.ownerDoc = self
889 return d
891 def createElement(self, tagName):
892 e = Element(tagName)
893 e.ownerDocument = self
894 return e
896 def createTextNode(self, data):
897 t = Text(data)
898 t.ownerDocument = self
899 return t
901 def createCDATASection(self, data):
902 c = CDATASection(data)
903 c.ownerDocument = self
904 return c
906 def createComment(self, data):
907 c = Comment(data)
908 c.ownerDocument = self
909 return c
911 def createProcessingInstruction(self, target, data):
912 p = ProcessingInstruction(target, data)
913 p.ownerDocument = self
914 return p
916 def createAttribute(self, qName):
917 a = Attr(qName)
918 a.ownerDocument = self
919 a.value = ""
920 return a
922 def createElementNS(self, namespaceURI, qualifiedName):
923 prefix, localName = _nssplit(qualifiedName)
924 e = Element(qualifiedName, namespaceURI, prefix, localName)
925 e.ownerDocument = self
926 return e
928 def createAttributeNS(self, namespaceURI, qualifiedName):
929 prefix, localName = _nssplit(qualifiedName)
930 a = Attr(qualifiedName, namespaceURI, localName, prefix)
931 a.ownerDocument = self
932 a.value = ""
933 return a
935 def getElementsByTagName(self, name):
936 return _getElementsByTagNameHelper(self, name, NodeList())
938 def getElementsByTagNameNS(self, namespaceURI, localName):
939 return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
940 NodeList())
942 def writexml(self, writer, indent="", addindent="", newl="",
943 encoding = None):
944 if encoding is None:
945 writer.write('<?xml version="1.0" ?>\n')
946 else:
947 writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
948 for node in self.childNodes:
949 node.writexml(writer, indent, addindent, newl)
951 def _get_StringIO():
952 # we can't use cStringIO since it doesn't support Unicode strings
953 from StringIO import StringIO
954 return StringIO()
956 def _doparse(func, args, kwargs):
957 events = apply(func, args, kwargs)
958 toktype, rootNode = events.getEvent()
959 events.expandNode(rootNode)
960 events.clear()
961 return rootNode
963 def parse(*args, **kwargs):
964 """Parse a file into a DOM by filename or file object."""
965 from xml.dom import pulldom
966 return _doparse(pulldom.parse, args, kwargs)
968 def parseString(*args, **kwargs):
969 """Parse a file into a DOM from a string."""
970 from xml.dom import pulldom
971 return _doparse(pulldom.parseString, args, kwargs)
973 def getDOMImplementation():
974 return Document.implementation