Lib/xml/dom/minidom.py

   1 """\
   2 minidom.py -- a lightweight DOM implementation.
   3
   4 parse("foo.xml")
   5
   6 parseString("<foo><bar/></foo>")
   7
   8 Todo:
   9 =====
  10  * convenience methods for getting elements and text.
  11  * more testing
  12  * bring some of the writer and linearizer code into conformance with this
  13         interface
  14  * SAX 2 namespaces
  15 """
  16
  17 from xml.dom import HierarchyRequestErr, EMPTY_NAMESPACE
  18
  19 # localize the types, and allow support for Unicode values if available:
  20 import types
  21 _TupleType = types.TupleType
  22 try:
  23     _StringTypes = (types.StringType, types.UnicodeType)
  24 except AttributeError:
  25     _StringTypes = (types.StringType,)
  26 del types
  27
  28 import xml.dom
  29
  30
  31 if list is type([]):
  32     class NodeList(list):
  33         def item(self, index):
  34             if 0 <= index < len(self):
  35                 return self[index]
  36
  37         length = property(lambda self: len(self),
  38                           doc="The number of nodes in the NodeList.")
  39
  40 else:
  41     def NodeList():
  42         return []
  43
  44
  45 class Node(xml.dom.Node):
  46     allnodes = {}
  47     _debug = 0
  48     _makeParentNodes = 1
  49     debug = None
  50     childNodeTypes = ()
  51     namespaceURI = None # this is non-null only for elements and attributes
  52     parentNode = None
  53     ownerDocument = None
  54
  55     def __init__(self):
  56         self.childNodes = NodeList()
  57         if Node._debug:
  58             index = repr(id(self)) + repr(self.__class__)
  59             Node.allnodes[index] = repr(self.__dict__)
  60             if Node.debug is None:
  61                 Node.debug = _get_StringIO()
  62                 #open("debug4.out", "w")
  63             Node.debug.write("create %s\n" % index)
  64
  65     def __nonzero__(self):
  66         return 1
  67
  68     def toxml(self, encoding = None):
  69         return self.toprettyxml("", "", encoding)
  70
  71     def toprettyxml(self, indent="\t", newl="\n", encoding = None):
  72         # indent = the indentation string to prepend, per level
  73         # newl = the newline string to append
  74         writer = _get_StringIO()
  75         if encoding is not None:
  76             import codecs
  77             # Can't use codecs.getwriter to preserve 2.0 compatibility
  78             writer = codecs.lookup(encoding)[3](writer)
  79         if self.nodeType == Node.DOCUMENT_NODE:
  80             # Can pass encoding only to document, to put it into XML header
  81             self.writexml(writer, "", indent, newl, encoding)
  82         else:
  83             self.writexml(writer, "", indent, newl)
  84         return writer.getvalue()
  85
  86     def hasChildNodes(self):
  87         if self.childNodes:
  88             return 1
  89         else:
  90             return 0
  91
  92     def _get_firstChild(self):
  93         if self.childNodes:
  94             return self.childNodes[0]
  95
  96     def _get_lastChild(self):
  97         if self.childNodes:
  98             return self.childNodes[-1]
  99
 100     try:
 101         property
 102     except NameError:
 103         def __getattr__(self, key):
 104             if key[0:2] == "__":
 105                 raise AttributeError, key
 106             # getattr should never call getattr!
 107             if self.__dict__.has_key("inGetAttr"):
 108                 del self.inGetAttr
 109                 raise AttributeError, key
 110
 111             prefix, attrname = key[:5], key[5:]
 112             if prefix == "_get_":
 113                 self.inGetAttr = 1
 114                 if hasattr(self, attrname):
 115                     del self.inGetAttr
 116                     return (lambda self=self, attrname=attrname:
 117                                     getattr(self, attrname))
 118                 else:
 119                     del self.inGetAttr
 120                     raise AttributeError, key
 121             else:
 122                 self.inGetAttr = 1
 123                 try:
 124                     func = getattr(self, "_get_" + key)
 125                 except AttributeError:
 126                     raise AttributeError, key
 127                 del self.inGetAttr
 128                 return func()
 129     else:
 130         firstChild = property(_get_firstChild,
 131                               doc="First child node, or None.")
 132         lastChild = property(_get_lastChild,
 133                              doc="Last child node, or None.")
 134
 135     def insertBefore(self, newChild, refChild):
 136         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 137             for c in tuple(newChild.childNodes):
 138                 self.insertBefore(c, refChild)
 139             ### The DOM does not clearly specify what to return in this case
 140             return newChild
 141         if newChild.nodeType not in self.childNodeTypes:
 142             raise HierarchyRequestErr, \
 143                   "%s cannot be child of %s" % (repr(newChild), repr(self))
 144         if newChild.parentNode is not None:
 145             newChild.parentNode.removeChild(newChild)
 146         if refChild is None:
 147             self.appendChild(newChild)
 148         else:
 149             index = self.childNodes.index(refChild)
 150             self.childNodes.insert(index, newChild)
 151             newChild.nextSibling = refChild
 152             refChild.previousSibling = newChild
 153             if index:
 154                 node = self.childNodes[index-1]
 155                 node.nextSibling = newChild
 156                 newChild.previousSibling = node
 157             else:
 158                 newChild.previousSibling = None
 159             if self._makeParentNodes:
 160                 newChild.parentNode = self
 161         return newChild
 162
 163     def appendChild(self, node):
 164         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 165             for c in tuple(node.childNodes):
 166                 self.appendChild(c)
 167             ### The DOM does not clearly specify what to return in this case
 168             return node
 169         if node.nodeType not in self.childNodeTypes:
 170             raise HierarchyRequestErr, \
 171                   "%s cannot be child of %s" % (repr(node), repr(self))
 172         if node.parentNode is not None:
 173             node.parentNode.removeChild(node)
 174         if self.childNodes:
 175             last = self.lastChild
 176             node.previousSibling = last
 177             last.nextSibling = node
 178         else:
 179             node.previousSibling = None
 180         node.nextSibling = None
 181         self.childNodes.append(node)
 182         if self._makeParentNodes:
 183             node.parentNode = self
 184         return node
 185
 186     def replaceChild(self, newChild, oldChild):
 187         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 188             refChild = oldChild.nextSibling
 189             self.removeChild(oldChild)
 190             return self.insertBefore(newChild, refChild)
 191         if newChild.nodeType not in self.childNodeTypes:
 192             raise HierarchyRequestErr, \
 193                   "%s cannot be child of %s" % (repr(newChild), repr(self))
 194         if newChild.parentNode is not None:
 195             newChild.parentNode.removeChild(newChild)
 196         if newChild is oldChild:
 197             return
 198         index = self.childNodes.index(oldChild)
 199         self.childNodes[index] = newChild
 200         if self._makeParentNodes:
 201             newChild.parentNode = self
 202             oldChild.parentNode = None
 203         newChild.nextSibling = oldChild.nextSibling
 204         newChild.previousSibling = oldChild.previousSibling
 205         oldChild.nextSibling = None
 206         oldChild.previousSibling = None
 207         if newChild.previousSibling:
 208             newChild.previousSibling.nextSibling = newChild
 209         if newChild.nextSibling:
 210             newChild.nextSibling.previousSibling = newChild
 211         return oldChild
 212
 213     def removeChild(self, oldChild):
 214         self.childNodes.remove(oldChild)
 215         if oldChild.nextSibling is not None:
 216             oldChild.nextSibling.previousSibling = oldChild.previousSibling
 217         if oldChild.previousSibling is not None:
 218             oldChild.previousSibling.nextSibling = oldChild.nextSibling
 219         oldChild.nextSibling = oldChild.previousSibling = None
 220
 221         if self._makeParentNodes:
 222             oldChild.parentNode = None
 223         return oldChild
 224
 225     def normalize(self):
 226         L = []
 227         for child in self.childNodes:
 228             if child.nodeType == Node.TEXT_NODE:
 229                 data = child.data
 230                 if data and L and L[-1].nodeType == child.nodeType:
 231                     # collapse text node
 232                     node = L[-1]
 233                     node.data = node.nodeValue = node.data + child.data
 234                     node.nextSibling = child.nextSibling
 235                     child.unlink()
 236                 elif data:
 237                     if L:
 238                         L[-1].nextSibling = child
 239                         child.previousSibling = L[-1]
 240                     else:
 241                         child.previousSibling = None
 242                     L.append(child)
 243                 else:
 244                     # empty text node; discard
 245                     child.unlink()
 246             else:
 247                 if L:
 248                     L[-1].nextSibling = child
 249                     child.previousSibling = L[-1]
 250                 else:
 251                     child.previousSibling = None
 252                 L.append(child)
 253                 if child.nodeType == Node.ELEMENT_NODE:
 254                     child.normalize()
 255         self.childNodes[:] = L
 256
 257     def cloneNode(self, deep):
 258         import new
 259         clone = new.instance(self.__class__, self.__dict__.copy())
 260         if self._makeParentNodes:
 261             clone.parentNode = None
 262         clone.childNodes = NodeList()
 263         if deep:
 264             for child in self.childNodes:
 265                 clone.appendChild(child.cloneNode(1))
 266         return clone
 267
 268     # DOM Level 3 (Working Draft 2001-Jan-26)
 269
 270     def isSameNode(self, other):
 271         return self is other
 272
 273     # minidom-specific API:
 274
 275     def unlink(self):
 276         self.parentNode = self.ownerDocument = None
 277         for child in self.childNodes:
 278             child.unlink()
 279         self.childNodes = None
 280         self.previousSibling = None
 281         self.nextSibling = None
 282         if Node._debug:
 283             index = repr(id(self)) + repr(self.__class__)
 284             self.debug.write("Deleting: %s\n" % index)
 285             del Node.allnodes[index]
 286
 287 def _write_data(writer, data):
 288     "Writes datachars to writer."
 289     data = data.replace("&", "&amp;")
 290     data = data.replace("<", "&lt;")
 291     data = data.replace("\"", "&quot;")
 292     data = data.replace(">", "&gt;")
 293     writer.write(data)
 294
 295 def _getElementsByTagNameHelper(parent, name, rc):
 296     for node in parent.childNodes:
 297         if node.nodeType == Node.ELEMENT_NODE and \
 298             (name == "*" or node.tagName == name):
 299             rc.append(node)
 300         _getElementsByTagNameHelper(node, name, rc)
 301     return rc
 302
 303 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
 304     for node in parent.childNodes:
 305         if node.nodeType == Node.ELEMENT_NODE:
 306             if ((localName == "*" or node.localName == localName) and
 307                 (nsURI == "*" or node.namespaceURI == nsURI)):
 308                 rc.append(node)
 309             _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
 310     return rc
 311
 312 class DocumentFragment(Node):
 313     nodeType = Node.DOCUMENT_FRAGMENT_NODE
 314     nodeName = "#document-fragment"
 315     nodeValue = None
 316     attributes = None
 317     parentNode = None
 318     childNodeTypes = (Node.ELEMENT_NODE,
 319                       Node.TEXT_NODE,
 320                       Node.CDATA_SECTION_NODE,
 321                       Node.ENTITY_REFERENCE_NODE,
 322                       Node.PROCESSING_INSTRUCTION_NODE,
 323                       Node.COMMENT_NODE,
 324                       Node.NOTATION_NODE)
 325
 326
 327 class Attr(Node):
 328     nodeType = Node.ATTRIBUTE_NODE
 329     attributes = None
 330     ownerElement = None
 331     childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
 332
 333     def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, prefix=None):
 334         # skip setattr for performance
 335         d = self.__dict__
 336         d["localName"] = localName or qName
 337         d["nodeName"] = d["name"] = qName
 338         d["namespaceURI"] = namespaceURI
 339         d["prefix"] = prefix
 340         Node.__init__(self)
 341         # nodeValue and value are set elsewhere
 342
 343     def __setattr__(self, name, value):
 344         d = self.__dict__
 345         if name in ("value", "nodeValue"):
 346             d["value"] = d["nodeValue"] = value
 347         elif name in ("name", "nodeName"):
 348             d["name"] = d["nodeName"] = value
 349         else:
 350             d[name] = value
 351
 352     def cloneNode(self, deep):
 353         clone = Node.cloneNode(self, deep)
 354         if clone.__dict__.has_key("ownerElement"):
 355             del clone.ownerElement
 356         return clone
 357
 358
 359 class NamedNodeMap:
 360     """The attribute list is a transient interface to the underlying
 361     dictionaries.  Mutations here will change the underlying element's
 362     dictionary.
 363
 364     Ordering is imposed artificially and does not reflect the order of
 365     attributes as found in an input document.
 366     """
 367
 368     def __init__(self, attrs, attrsNS, ownerElement):
 369         self._attrs = attrs
 370         self._attrsNS = attrsNS
 371         self._ownerElement = ownerElement
 372
 373     try:
 374         property
 375     except NameError:
 376         def __getattr__(self, name):
 377             if name == "length":
 378                 return len(self._attrs)
 379             raise AttributeError, name
 380     else:
 381         length = property(lambda self: len(self._attrs),
 382                           doc="Number of nodes in the NamedNodeMap.")
 383
 384     def item(self, index):
 385         try:
 386             return self[self._attrs.keys()[index]]
 387         except IndexError:
 388             return None
 389
 390     def items(self):
 391         L = []
 392         for node in self._attrs.values():
 393             L.append((node.nodeName, node.value))
 394         return L
 395
 396     def itemsNS(self):
 397         L = []
 398         for node in self._attrs.values():
 399             L.append(((node.namespaceURI, node.localName), node.value))
 400         return L
 401
 402     def keys(self):
 403         return self._attrs.keys()
 404
 405     def keysNS(self):
 406         return self._attrsNS.keys()
 407
 408     def values(self):
 409         return self._attrs.values()
 410
 411     def get(self, name, value = None):
 412         return self._attrs.get(name, value)
 413
 414     def __len__(self):
 415         return self.length
 416
 417     def __cmp__(self, other):
 418         if self._attrs is getattr(other, "_attrs", None):
 419             return 0
 420         else:
 421             return cmp(id(self), id(other))
 422
 423     #FIXME: is it appropriate to return .value?
 424     def __getitem__(self, attname_or_tuple):
 425         if type(attname_or_tuple) is _TupleType:
 426             return self._attrsNS[attname_or_tuple]
 427         else:
 428             return self._attrs[attname_or_tuple]
 429
 430     # same as set
 431     def __setitem__(self, attname, value):
 432         if type(value) in _StringTypes:
 433             node = Attr(attname)
 434             node.value = value
 435             node.ownerDocument = self._ownerElement.ownerDocument
 436         else:
 437             if not isinstance(value, Attr):
 438                 raise TypeError, "value must be a string or Attr object"
 439             node = value
 440         self.setNamedItem(node)
 441
 442     def setNamedItem(self, node):
 443         if not isinstance(node, Attr):
 444             raise HierarchyRequestErr, \
 445                   "%s cannot be child of %s" % (repr(node), repr(self))
 446         old = self._attrs.get(node.name)
 447         if old:
 448             old.unlink()
 449         self._attrs[node.name] = node
 450         self._attrsNS[(node.namespaceURI, node.localName)] = node
 451         node.ownerElement = self._ownerElement
 452         return old
 453
 454     def setNamedItemNS(self, node):
 455         return self.setNamedItem(node)
 456
 457     def __delitem__(self, attname_or_tuple):
 458         node = self[attname_or_tuple]
 459         node.unlink()
 460         del self._attrs[node.name]
 461         del self._attrsNS[(node.namespaceURI, node.localName)]
 462
 463 AttributeList = NamedNodeMap
 464
 465
 466 class Element(Node):
 467     nodeType = Node.ELEMENT_NODE
 468     nextSibling = None
 469     previousSibling = None
 470     childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
 471                       Node.COMMENT_NODE, Node.TEXT_NODE,
 472                       Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
 473
 474     def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
 475                  localName=None):
 476         Node.__init__(self)
 477         self.tagName = self.nodeName = tagName
 478         self.localName = localName or tagName
 479         self.prefix = prefix
 480         self.namespaceURI = namespaceURI
 481         self.nodeValue = None
 482
 483         self._attrs = {}   # attributes are double-indexed:
 484         self._attrsNS = {} #    tagName -> Attribute
 485                            #    URI,localName -> Attribute
 486                            # in the future: consider lazy generation
 487                            # of attribute objects this is too tricky
 488                            # for now because of headaches with
 489                            # namespaces.
 490
 491     def cloneNode(self, deep):
 492         clone = Node.cloneNode(self, deep)
 493         clone._attrs = {}
 494         clone._attrsNS = {}
 495         for attr in self._attrs.values():
 496             node = attr.cloneNode(1)
 497             clone._attrs[node.name] = node
 498             clone._attrsNS[(node.namespaceURI, node.localName)] = node
 499             node.ownerElement = clone
 500         return clone
 501
 502     def unlink(self):
 503         for attr in self._attrs.values():
 504             attr.unlink()
 505         self._attrs = None
 506         self._attrsNS = None
 507         Node.unlink(self)
 508
 509     def getAttribute(self, attname):
 510         try:
 511             return self._attrs[attname].value
 512         except KeyError:
 513             return ""
 514
 515     def getAttributeNS(self, namespaceURI, localName):
 516         try:
 517             return self._attrsNS[(namespaceURI, localName)].value
 518         except KeyError:
 519             return ""
 520
 521     def setAttribute(self, attname, value):
 522         attr = Attr(attname)
 523         # for performance
 524         d = attr.__dict__
 525         d["value"] = d["nodeValue"] = value
 526         d["ownerDocument"] = self.ownerDocument
 527         self.setAttributeNode(attr)
 528
 529     def setAttributeNS(self, namespaceURI, qualifiedName, value):
 530         prefix, localname = _nssplit(qualifiedName)
 531         # for performance
 532         attr = Attr(qualifiedName, namespaceURI, localname, prefix)
 533         d = attr.__dict__
 534         d["value"] = d["nodeValue"] = value
 535         d["ownerDocument"] = self.ownerDocument
 536         self.setAttributeNode(attr)
 537
 538     def getAttributeNode(self, attrname):
 539         return self._attrs.get(attrname)
 540
 541     def getAttributeNodeNS(self, namespaceURI, localName):
 542         return self._attrsNS.get((namespaceURI, localName))
 543
 544     def setAttributeNode(self, attr):
 545         if attr.ownerElement not in (None, self):
 546             raise xml.dom.InuseAttributeErr("attribute node already owned")
 547         old = self._attrs.get(attr.name, None)
 548         if old:
 549             old.unlink()
 550         self._attrs[attr.name] = attr
 551         self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
 552
 553         # This creates a circular reference, but Element.unlink()
 554         # breaks the cycle since the references to the attribute
 555         # dictionaries are tossed.
 556         attr.ownerElement = self
 557
 558         if old is not attr:
 559             # It might have already been part of this node, in which case
 560             # it doesn't represent a change, and should not be returned.
 561             return old
 562
 563     setAttributeNodeNS = setAttributeNode
 564
 565     def removeAttribute(self, name):
 566         attr = self._attrs[name]
 567         self.removeAttributeNode(attr)
 568
 569     def removeAttributeNS(self, namespaceURI, localName):
 570         attr = self._attrsNS[(namespaceURI, localName)]
 571         self.removeAttributeNode(attr)
 572
 573     def removeAttributeNode(self, node):
 574         node.unlink()
 575         del self._attrs[node.name]
 576         del self._attrsNS[(node.namespaceURI, node.localName)]
 577
 578     removeAttributeNodeNS = removeAttributeNode
 579
 580     def hasAttribute(self, name):
 581         return self._attrs.has_key(name)
 582
 583     def hasAttributeNS(self, namespaceURI, localName):
 584         return self._attrsNS.has_key((namespaceURI, localName))
 585
 586     def getElementsByTagName(self, name):
 587         return _getElementsByTagNameHelper(self, name, NodeList())
 588
 589     def getElementsByTagNameNS(self, namespaceURI, localName):
 590         return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
 591                                              NodeList())
 592
 593     def __repr__(self):
 594         return "<DOM Element: %s at %s>" % (self.tagName, id(self))
 595
 596     def writexml(self, writer, indent="", addindent="", newl=""):
 597         # indent = current indentation
 598         # addindent = indentation to add to higher levels
 599         # newl = newline string
 600         writer.write(indent+"<" + self.tagName)
 601
 602         attrs = self._get_attributes()
 603         a_names = attrs.keys()
 604         a_names.sort()
 605
 606         for a_name in a_names:
 607             writer.write(" %s=\"" % a_name)
 608             _write_data(writer, attrs[a_name].value)
 609             writer.write("\"")
 610         if self.childNodes:
 611             writer.write(">%s"%(newl))
 612             for node in self.childNodes:
 613                 node.writexml(writer,indent+addindent,addindent,newl)
 614             writer.write("%s</%s>%s" % (indent,self.tagName,newl))
 615         else:
 616             writer.write("/>%s"%(newl))
 617
 618     def _get_attributes(self):
 619         return NamedNodeMap(self._attrs, self._attrsNS, self)
 620
 621     try:
 622         property
 623     except NameError:
 624         pass
 625     else:
 626         attributes = property(_get_attributes,
 627                               doc="NamedNodeMap of attributes on the element.")
 628
 629     def hasAttributes(self):
 630         if self._attrs or self._attrsNS:
 631             return 1
 632         else:
 633             return 0
 634
 635 class Comment(Node):
 636     nodeType = Node.COMMENT_NODE
 637     nodeName = "#comment"
 638     attributes = None
 639     childNodeTypes = ()
 640
 641     def __init__(self, data):
 642         Node.__init__(self)
 643         self.data = self.nodeValue = data
 644
 645     def writexml(self, writer, indent="", addindent="", newl=""):
 646         writer.write("%s<!--%s-->%s" % (indent,self.data,newl))
 647
 648 class ProcessingInstruction(Node):
 649     nodeType = Node.PROCESSING_INSTRUCTION_NODE
 650     attributes = None
 651     childNodeTypes = ()
 652
 653     def __init__(self, target, data):
 654         Node.__init__(self)
 655         self.target = self.nodeName = target
 656         self.data = self.nodeValue = data
 657
 658     def writexml(self, writer, indent="", addindent="", newl=""):
 659         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
 660
 661 class CharacterData(Node):
 662     def __init__(self, data):
 663         if type(data) not in _StringTypes:
 664             raise TypeError, "node contents must be a string"
 665         Node.__init__(self)
 666         self.data = self.nodeValue = data
 667         self.length = len(data)
 668
 669     def __repr__(self):
 670         if len(self.data) > 10:
 671             dotdotdot = "..."
 672         else:
 673             dotdotdot = ""
 674         return "<DOM %s node \"%s%s\">" % (
 675             self.__class__.__name__, self.data[0:10], dotdotdot)
 676
 677     def substringData(self, offset, count):
 678         if offset < 0:
 679             raise xml.dom.IndexSizeErr("offset cannot be negative")
 680         if offset >= len(self.data):
 681             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 682         if count < 0:
 683             raise xml.dom.IndexSizeErr("count cannot be negative")
 684         return self.data[offset:offset+count]
 685
 686     def appendData(self, arg):
 687         self.data = self.data + arg
 688         self.nodeValue = self.data
 689         self.length = len(self.data)
 690
 691     def insertData(self, offset, arg):
 692         if offset < 0:
 693             raise xml.dom.IndexSizeErr("offset cannot be negative")
 694         if offset >= len(self.data):
 695             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 696         if arg:
 697             self.data = "%s%s%s" % (
 698                 self.data[:offset], arg, self.data[offset:])
 699             self.nodeValue = self.data
 700             self.length = len(self.data)
 701
 702     def deleteData(self, offset, count):
 703         if offset < 0:
 704             raise xml.dom.IndexSizeErr("offset cannot be negative")
 705         if offset >= len(self.data):
 706             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 707         if count < 0:
 708             raise xml.dom.IndexSizeErr("count cannot be negative")
 709         if count:
 710             self.data = self.data[:offset] + self.data[offset+count:]
 711             self.nodeValue = self.data
 712             self.length = len(self.data)
 713
 714     def replaceData(self, offset, count, arg):
 715         if offset < 0:
 716             raise xml.dom.IndexSizeErr("offset cannot be negative")
 717         if offset >= len(self.data):
 718             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 719         if count < 0:
 720             raise xml.dom.IndexSizeErr("count cannot be negative")
 721         if count:
 722             self.data = "%s%s%s" % (
 723                 self.data[:offset], arg, self.data[offset+count:])
 724             self.nodeValue = self.data
 725             self.length = len(self.data)
 726
 727 class Text(CharacterData):
 728     nodeType = Node.TEXT_NODE
 729     nodeName = "#text"
 730     attributes = None
 731     childNodeTypes = ()
 732
 733     def splitText(self, offset):
 734         if offset < 0 or offset > len(self.data):
 735             raise xml.dom.IndexSizeErr("illegal offset value")
 736         newText = Text(self.data[offset:])
 737         next = self.nextSibling
 738         if self.parentNode and self in self.parentNode.childNodes:
 739             if next is None:
 740                 self.parentNode.appendChild(newText)
 741             else:
 742                 self.parentNode.insertBefore(newText, next)
 743         self.data = self.data[:offset]
 744         self.nodeValue = self.data
 745         self.length = len(self.data)
 746         return newText
 747
 748     def writexml(self, writer, indent="", addindent="", newl=""):
 749         _write_data(writer, "%s%s%s"%(indent, self.data, newl))
 750
 751
 752 class CDATASection(Text):
 753     nodeType = Node.CDATA_SECTION_NODE
 754     nodeName = "#cdata-section"
 755
 756     def writexml(self, writer, indent="", addindent="", newl=""):
 757         writer.write("<![CDATA[%s]]>" % self.data)
 758
 759
 760 def _nssplit(qualifiedName):
 761     fields = qualifiedName.split(':', 1)
 762     if len(fields) == 2:
 763         return fields
 764     elif len(fields) == 1:
 765         return (None, fields[0])
 766
 767
 768 class DocumentType(Node):
 769     nodeType = Node.DOCUMENT_TYPE_NODE
 770     nodeValue = None
 771     attributes = None
 772     name = None
 773     publicId = None
 774     systemId = None
 775     internalSubset = None
 776     entities = None
 777     notations = None
 778
 779     def __init__(self, qualifiedName):
 780         Node.__init__(self)
 781         if qualifiedName:
 782             prefix, localname = _nssplit(qualifiedName)
 783             self.name = localname
 784
 785
 786 class DOMImplementation:
 787     def hasFeature(self, feature, version):
 788         if version not in ("1.0", "2.0"):
 789             return 0
 790         feature = feature.lower()
 791         return feature == "core"
 792
 793     def createDocument(self, namespaceURI, qualifiedName, doctype):
 794         if doctype and doctype.parentNode is not None:
 795             raise xml.dom.WrongDocumentErr(
 796                 "doctype object owned by another DOM tree")
 797         doc = self._createDocument()
 798         if doctype is None:
 799             doctype = self.createDocumentType(qualifiedName, None, None)
 800         if not qualifiedName:
 801             # The spec is unclear what to raise here; SyntaxErr
 802             # would be the other obvious candidate. Since Xerces raises
 803             # InvalidCharacterErr, and since SyntaxErr is not listed
 804             # for createDocument, that seems to be the better choice.
 805             # XXX: need to check for illegal characters here and in
 806             # createElement.
 807             raise xml.dom.InvalidCharacterErr("Element with no name")
 808         prefix, localname = _nssplit(qualifiedName)
 809         if prefix == "xml" \
 810            and namespaceURI != "http://www.w3.org/XML/1998/namespace":
 811             raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
 812         if prefix and not namespaceURI:
 813             raise xml.dom.NamespaceErr(
 814                 "illegal use of prefix without namespaces")
 815         element = doc.createElementNS(namespaceURI, qualifiedName)
 816         doc.appendChild(element)
 817         doctype.parentNode = doctype.ownerDocument = doc
 818         doc.doctype = doctype
 819         doc.implementation = self
 820         return doc
 821
 822     def createDocumentType(self, qualifiedName, publicId, systemId):
 823         doctype = DocumentType(qualifiedName)
 824         doctype.publicId = publicId
 825         doctype.systemId = systemId
 826         return doctype
 827
 828     # internal
 829     def _createDocument(self):
 830         return Document()
 831
 832 class Document(Node):
 833     nodeType = Node.DOCUMENT_NODE
 834     nodeName = "#document"
 835     nodeValue = None
 836     attributes = None
 837     doctype = None
 838     parentNode = None
 839     previousSibling = nextSibling = None
 840
 841     implementation = DOMImplementation()
 842     childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
 843                       Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
 844
 845     def appendChild(self, node):
 846         if node.nodeType not in self.childNodeTypes:
 847             raise HierarchyRequestErr, \
 848                   "%s cannot be child of %s" % (repr(node), repr(self))
 849         if node.parentNode is not None:
 850             node.parentNode.removeChild(node)
 851
 852         if node.nodeType == Node.ELEMENT_NODE \
 853            and self._get_documentElement():
 854             raise xml.dom.HierarchyRequestErr(
 855                 "two document elements disallowed")
 856         return Node.appendChild(self, node)
 857
 858     def removeChild(self, oldChild):
 859         self.childNodes.remove(oldChild)
 860         oldChild.nextSibling = oldChild.previousSibling = None
 861         oldChild.parentNode = None
 862         if self.documentElement is oldChild:
 863             self.documentElement = None
 864
 865         return oldChild
 866
 867     def _get_documentElement(self):
 868         for node in self.childNodes:
 869             if node.nodeType == Node.ELEMENT_NODE:
 870                 return node
 871
 872     try:
 873         property
 874     except NameError:
 875         pass
 876     else:
 877         documentElement = property(_get_documentElement,
 878                                    doc="Top-level element of this document.")
 879
 880     def unlink(self):
 881         if self.doctype is not None:
 882             self.doctype.unlink()
 883             self.doctype = None
 884         Node.unlink(self)
 885
 886     def createDocumentFragment(self):
 887         d = DocumentFragment()
 888         d.ownerDoc = self
 889         return d
 890
 891     def createElement(self, tagName):
 892         e = Element(tagName)
 893         e.ownerDocument = self
 894         return e
 895
 896     def createTextNode(self, data):
 897         t = Text(data)
 898         t.ownerDocument = self
 899         return t
 900
 901     def createCDATASection(self, data):
 902         c = CDATASection(data)
 903         c.ownerDocument = self
 904         return c
 905
 906     def createComment(self, data):
 907         c = Comment(data)
 908         c.ownerDocument = self
 909         return c
 910
 911     def createProcessingInstruction(self, target, data):
 912         p = ProcessingInstruction(target, data)
 913         p.ownerDocument = self
 914         return p
 915
 916     def createAttribute(self, qName):
 917         a = Attr(qName)
 918         a.ownerDocument = self
 919         a.value = ""
 920         return a
 921
 922     def createElementNS(self, namespaceURI, qualifiedName):
 923         prefix, localName = _nssplit(qualifiedName)
 924         e = Element(qualifiedName, namespaceURI, prefix, localName)
 925         e.ownerDocument = self
 926         return e
 927
 928     def createAttributeNS(self, namespaceURI, qualifiedName):
 929         prefix, localName = _nssplit(qualifiedName)
 930         a = Attr(qualifiedName, namespaceURI, localName, prefix)
 931         a.ownerDocument = self
 932         a.value = ""
 933         return a
 934
 935     def getElementsByTagName(self, name):
 936         return _getElementsByTagNameHelper(self, name, NodeList())
 937
 938     def getElementsByTagNameNS(self, namespaceURI, localName):
 939         return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
 940                                              NodeList())
 941
 942     def writexml(self, writer, indent="", addindent="", newl="",
 943                  encoding = None):
 944         if encoding is None:
 945             writer.write('<?xml version="1.0" ?>\n')
 946         else:
 947             writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
 948         for node in self.childNodes:
 949             node.writexml(writer, indent, addindent, newl)
 950
 951 def _get_StringIO():
 952     # we can't use cStringIO since it doesn't support Unicode strings
 953     from StringIO import StringIO
 954     return StringIO()
 955
 956 def _doparse(func, args, kwargs):
 957     events = apply(func, args, kwargs)
 958     toktype, rootNode = events.getEvent()
 959     events.expandNode(rootNode)
 960     events.clear()
 961     return rootNode
 962
 963 def parse(*args, **kwargs):
 964     """Parse a file into a DOM by filename or file object."""
 965     from xml.dom import pulldom
 966     return _doparse(pulldom.parse, args, kwargs)
 967
 968 def parseString(*args, **kwargs):
 969     """Parse a file into a DOM from a string."""
 970     from xml.dom import pulldom
 971     return _doparse(pulldom.parseString, args, kwargs)
 972
 973 def getDOMImplementation():
 974     return Document.implementation