Lib/xml/dom/minidom.py

   1 """\
   2 minidom.py -- a lightweight DOM implementation.
   3
   4 parse("foo.xml")
   5
   6 parseString("<foo><bar/></foo>")
   7
   8 Todo:
   9 =====
  10  * convenience methods for getting elements and text.
  11  * more testing
  12  * bring some of the writer and linearizer code into conformance with this
  13         interface
  14  * SAX 2 namespaces
  15 """
  16
  17 import string
  18 _string = string
  19 del string
  20
  21 from xml.dom import HierarchyRequestErr
  22
  23 # localize the types, and allow support for Unicode values if available:
  24 import types
  25 _TupleType = types.TupleType
  26 try:
  27     _StringTypes = (types.StringType, types.UnicodeType)
  28 except AttributeError:
  29     _StringTypes = (types.StringType,)
  30 del types
  31
  32 import xml.dom
  33 _Node = xml.dom.Node
  34
  35 class Node(_Node):
  36     allnodes = {}
  37     _debug = 0
  38     _makeParentNodes = 1
  39     debug = None
  40     childNodeTypes = ()
  41     namespaceURI = None # this is non-null only for elements and attributes
  42
  43     def __init__(self):
  44         self.childNodes = []
  45         self.parentNode = self.ownerDocument = None
  46         if Node._debug:
  47             index = repr(id(self)) + repr(self.__class__)
  48             Node.allnodes[index] = repr(self.__dict__)
  49             if Node.debug is None:
  50                 Node.debug = _get_StringIO()
  51                 #open("debug4.out", "w")
  52             Node.debug.write("create %s\n" % index)
  53
  54     def __getattr__(self, key):
  55         if key[0:2] == "__":
  56             raise AttributeError, key
  57         # getattr should never call getattr!
  58         if self.__dict__.has_key("inGetAttr"):
  59             del self.inGetAttr
  60             raise AttributeError, key
  61
  62         prefix, attrname = key[:5], key[5:]
  63         if prefix == "_get_":
  64             self.inGetAttr = 1
  65             if hasattr(self, attrname):
  66                 del self.inGetAttr
  67                 return (lambda self=self, attrname=attrname:
  68                                 getattr(self, attrname))
  69             else:
  70                 del self.inGetAttr
  71                 raise AttributeError, key
  72         else:
  73             self.inGetAttr = 1
  74             try:
  75                 func = getattr(self, "_get_" + key)
  76             except AttributeError:
  77                 raise AttributeError, key
  78             del self.inGetAttr
  79             return func()
  80
  81     def __nonzero__(self):
  82         return 1
  83
  84     def toxml(self):
  85         writer = _get_StringIO()
  86         self.writexml(writer)
  87         return writer.getvalue()
  88
  89     def toprettyxml(self, indent="\t", newl="\n"):
  90         # indent = the indentation string to prepend, per level
  91         # newl = the newline string to append
  92         writer = _get_StringIO()
  93         self.writexml(writer, "", indent, newl)
  94         return writer.getvalue()
  95
  96     def hasChildNodes(self):
  97         if self.childNodes:
  98             return 1
  99         else:
 100             return 0
 101
 102     def _get_firstChild(self):
 103         if self.childNodes:
 104             return self.childNodes[0]
 105
 106     def _get_lastChild(self):
 107         if self.childNodes:
 108             return self.childNodes[-1]
 109
 110     def insertBefore(self, newChild, refChild):
 111         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 112             for c in newChild.childNodes:
 113                 self.insertBefore(c, refChild)
 114             ### The DOM does not clearly specify what to return in this case
 115             return newChild
 116         if newChild.nodeType not in self.childNodeTypes:
 117             raise HierarchyRequestErr, \
 118                   "%s cannot be child of %s" % (repr(newChild), repr(self))
 119         if newChild.parentNode is not None:
 120             newChild.parentNode.removeChild(newChild)
 121         if refChild is None:
 122             self.appendChild(newChild)
 123         else:
 124             index = self.childNodes.index(refChild)
 125             self.childNodes.insert(index, newChild)
 126             newChild.nextSibling = refChild
 127             refChild.previousSibling = newChild
 128             if index:
 129                 node = self.childNodes[index-1]
 130                 node.nextSibling = newChild
 131                 newChild.previousSibling = node
 132             else:
 133                 newChild.previousSibling = None
 134             if self._makeParentNodes:
 135                 newChild.parentNode = self
 136         return newChild
 137
 138     def appendChild(self, node):
 139         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 140             for c in node.childNodes:
 141                 self.appendChild(c)
 142             ### The DOM does not clearly specify what to return in this case
 143             return node
 144         if node.nodeType not in self.childNodeTypes:
 145             raise HierarchyRequestErr, \
 146                   "%s cannot be child of %s" % (repr(node), repr(self))
 147         if node.parentNode is not None:
 148             node.parentNode.removeChild(node)
 149         if self.childNodes:
 150             last = self.lastChild
 151             node.previousSibling = last
 152             last.nextSibling = node
 153         else:
 154             node.previousSibling = None
 155         node.nextSibling = None
 156         self.childNodes.append(node)
 157         if self._makeParentNodes:
 158             node.parentNode = self
 159         return node
 160
 161     def replaceChild(self, newChild, oldChild):
 162         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
 163             refChild = oldChild.nextSibling
 164             self.removeChild(oldChild)
 165             return self.insertBefore(newChild, refChild)
 166         if newChild.nodeType not in self.childNodeTypes:
 167             raise HierarchyRequestErr, \
 168                   "%s cannot be child of %s" % (repr(newChild), repr(self))
 169         if newChild.parentNode is not None:
 170             newChild.parentNode.removeChild(newChild)
 171         if newChild is oldChild:
 172             return
 173         index = self.childNodes.index(oldChild)
 174         self.childNodes[index] = newChild
 175         if self._makeParentNodes:
 176             newChild.parentNode = self
 177             oldChild.parentNode = None
 178         newChild.nextSibling = oldChild.nextSibling
 179         newChild.previousSibling = oldChild.previousSibling
 180         oldChild.nextSibling = None
 181         oldChild.previousSibling = None
 182         if newChild.previousSibling:
 183             newChild.previousSibling.nextSibling = newChild
 184         if newChild.nextSibling:
 185             newChild.nextSibling.previousSibling = newChild
 186         return oldChild
 187
 188     def removeChild(self, oldChild):
 189         self.childNodes.remove(oldChild)
 190         if oldChild.nextSibling is not None:
 191             oldChild.nextSibling.previousSibling = oldChild.previousSibling
 192         if oldChild.previousSibling is not None:
 193             oldChild.previousSibling.nextSibling = oldChild.nextSibling
 194         oldChild.nextSibling = oldChild.previousSibling = None
 195
 196         if self._makeParentNodes:
 197             oldChild.parentNode = None
 198         return oldChild
 199
 200     def normalize(self):
 201         L = []
 202         for child in self.childNodes:
 203             if child.nodeType == Node.TEXT_NODE:
 204                 data = child.data
 205                 if data and L and L[-1].nodeType == child.nodeType:
 206                     # collapse text node
 207                     node = L[-1]
 208                     node.data = node.nodeValue = node.data + child.data
 209                     node.nextSibling = child.nextSibling
 210                     child.unlink()
 211                 elif data:
 212                     if L:
 213                         L[-1].nextSibling = child
 214                         child.previousSibling = L[-1]
 215                     else:
 216                         child.previousSibling = None
 217                     L.append(child)
 218                 else:
 219                     # empty text node; discard
 220                     child.unlink()
 221             else:
 222                 if L:
 223                     L[-1].nextSibling = child
 224                     child.previousSibling = L[-1]
 225                 else:
 226                     child.previousSibling = None
 227                 L.append(child)
 228                 if child.nodeType == Node.ELEMENT_NODE:
 229                     child.normalize()
 230         self.childNodes[:] = L
 231
 232     def cloneNode(self, deep):
 233         import new
 234         clone = new.instance(self.__class__, self.__dict__.copy())
 235         if self._makeParentNodes:
 236             clone.parentNode = None
 237         clone.childNodes = []
 238         if deep:
 239             for child in self.childNodes:
 240                 clone.appendChild(child.cloneNode(1))
 241         return clone
 242
 243     # DOM Level 3 (Working Draft 2001-Jan-26)
 244
 245     def isSameNode(self, other):
 246         return self is other
 247
 248     # minidom-specific API:
 249
 250     def unlink(self):
 251         self.parentNode = self.ownerDocument = None
 252         for child in self.childNodes:
 253             child.unlink()
 254         self.childNodes = None
 255         self.previousSibling = None
 256         self.nextSibling = None
 257         if Node._debug:
 258             index = repr(id(self)) + repr(self.__class__)
 259             self.debug.write("Deleting: %s\n" % index)
 260             del Node.allnodes[index]
 261
 262 def _write_data(writer, data):
 263     "Writes datachars to writer."
 264     replace = _string.replace
 265     data = replace(data, "&", "&amp;")
 266     data = replace(data, "<", "&lt;")
 267     data = replace(data, "\"", "&quot;")
 268     data = replace(data, ">", "&gt;")
 269     writer.write(data)
 270
 271 def _getElementsByTagNameHelper(parent, name, rc):
 272     for node in parent.childNodes:
 273         if node.nodeType == Node.ELEMENT_NODE and \
 274             (name == "*" or node.tagName == name):
 275             rc.append(node)
 276         _getElementsByTagNameHelper(node, name, rc)
 277     return rc
 278
 279 def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
 280     for node in parent.childNodes:
 281         if node.nodeType == Node.ELEMENT_NODE:
 282             if ((localName == "*" or node.localName == localName) and
 283                 (nsURI == "*" or node.namespaceURI == nsURI)):
 284                 rc.append(node)
 285             _getElementsByTagNameNSHelper(node, nsURI, localName, rc)
 286     return rc
 287
 288 class DocumentFragment(Node):
 289     nodeType = Node.DOCUMENT_FRAGMENT_NODE
 290     nodeName = "#document-fragment"
 291     nodeValue = None
 292     attributes = None
 293     parentNode = None
 294     childNodeTypes = (Node.ELEMENT_NODE,
 295                       Node.TEXT_NODE,
 296                       Node.CDATA_SECTION_NODE,
 297                       Node.ENTITY_REFERENCE_NODE,
 298                       Node.PROCESSING_INSTRUCTION_NODE,
 299                       Node.COMMENT_NODE,
 300                       Node.NOTATION_NODE)
 301
 302
 303 class Attr(Node):
 304     nodeType = Node.ATTRIBUTE_NODE
 305     attributes = None
 306     ownerElement = None
 307     childNodeTypes = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
 308
 309     def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
 310         # skip setattr for performance
 311         d = self.__dict__
 312         d["localName"] = localName or qName
 313         d["nodeName"] = d["name"] = qName
 314         d["namespaceURI"] = namespaceURI
 315         d["prefix"] = prefix
 316         Node.__init__(self)
 317         # nodeValue and value are set elsewhere
 318
 319     def __setattr__(self, name, value):
 320         d = self.__dict__
 321         if name in ("value", "nodeValue"):
 322             d["value"] = d["nodeValue"] = value
 323         elif name in ("name", "nodeName"):
 324             d["name"] = d["nodeName"] = value
 325         else:
 326             d[name] = value
 327
 328     def cloneNode(self, deep):
 329         clone = Node.cloneNode(self, deep)
 330         if clone.__dict__.has_key("ownerElement"):
 331             del clone.ownerElement
 332         return clone
 333
 334
 335 class NamedNodeMap:
 336     """The attribute list is a transient interface to the underlying
 337     dictionaries.  Mutations here will change the underlying element's
 338     dictionary.
 339
 340     Ordering is imposed artificially and does not reflect the order of
 341     attributes as found in an input document.
 342     """
 343
 344     def __init__(self, attrs, attrsNS):
 345         self._attrs = attrs
 346         self._attrsNS = attrsNS
 347
 348     def __getattr__(self, name):
 349         if name == "length":
 350             return len(self._attrs)
 351         raise AttributeError, name
 352
 353     def item(self, index):
 354         try:
 355             return self[self._attrs.keys()[index]]
 356         except IndexError:
 357             return None
 358
 359     def items(self):
 360         L = []
 361         for node in self._attrs.values():
 362             L.append((node.nodeName, node.value))
 363         return L
 364
 365     def itemsNS(self):
 366         L = []
 367         for node in self._attrs.values():
 368             L.append(((node.URI, node.localName), node.value))
 369         return L
 370
 371     def keys(self):
 372         return self._attrs.keys()
 373
 374     def keysNS(self):
 375         return self._attrsNS.keys()
 376
 377     def values(self):
 378         return self._attrs.values()
 379
 380     def get(self, name, value = None):
 381         return self._attrs.get(name, value)
 382
 383     def __len__(self):
 384         return self.length
 385
 386     def __cmp__(self, other):
 387         if self._attrs is getattr(other, "_attrs", None):
 388             return 0
 389         else:
 390             return cmp(id(self), id(other))
 391
 392     #FIXME: is it appropriate to return .value?
 393     def __getitem__(self, attname_or_tuple):
 394         if type(attname_or_tuple) is _TupleType:
 395             return self._attrsNS[attname_or_tuple]
 396         else:
 397             return self._attrs[attname_or_tuple]
 398
 399     # same as set
 400     def __setitem__(self, attname, value):
 401         if type(value) in _StringTypes:
 402             node = Attr(attname)
 403             node.value = value
 404         else:
 405             if not isinstance(value, Attr):
 406                 raise TypeError, "value must be a string or Attr object"
 407             node = value
 408         self.setNamedItem(node)
 409
 410     def setNamedItem(self, node):
 411         if not isinstance(node, Attr):
 412             raise HierarchyRequestErr, \
 413                   "%s cannot be child of %s" % (repr(node), repr(self))
 414         old = self._attrs.get(node.name)
 415         if old:
 416             old.unlink()
 417         self._attrs[node.name] = node
 418         self._attrsNS[(node.namespaceURI, node.localName)] = node
 419         return old
 420
 421     def setNamedItemNS(self, node):
 422         return self.setNamedItem(node)
 423
 424     def __delitem__(self, attname_or_tuple):
 425         node = self[attname_or_tuple]
 426         node.unlink()
 427         del self._attrs[node.name]
 428         del self._attrsNS[(node.namespaceURI, node.localName)]
 429         self.length = len(self._attrs)
 430
 431 AttributeList = NamedNodeMap
 432
 433
 434 class Element(Node):
 435     nodeType = Node.ELEMENT_NODE
 436     nextSibling = None
 437     previousSibling = None
 438     childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
 439                       Node.COMMENT_NODE, Node.TEXT_NODE,
 440                       Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE)
 441
 442     def __init__(self, tagName, namespaceURI=None, prefix="",
 443                  localName=None):
 444         Node.__init__(self)
 445         self.tagName = self.nodeName = tagName
 446         self.localName = localName or tagName
 447         self.prefix = prefix
 448         self.namespaceURI = namespaceURI
 449         self.nodeValue = None
 450
 451         self._attrs = {}   # attributes are double-indexed:
 452         self._attrsNS = {} #    tagName -> Attribute
 453                            #    URI,localName -> Attribute
 454                            # in the future: consider lazy generation
 455                            # of attribute objects this is too tricky
 456                            # for now because of headaches with
 457                            # namespaces.
 458
 459     def cloneNode(self, deep):
 460         clone = Node.cloneNode(self, deep)
 461         clone._attrs = {}
 462         clone._attrsNS = {}
 463         for attr in self._attrs.values():
 464             node = attr.cloneNode(1)
 465             clone._attrs[node.name] = node
 466             clone._attrsNS[(node.namespaceURI, node.localName)] = node
 467             node.ownerElement = clone
 468         return clone
 469
 470     def unlink(self):
 471         for attr in self._attrs.values():
 472             attr.unlink()
 473         self._attrs = None
 474         self._attrsNS = None
 475         Node.unlink(self)
 476
 477     def getAttribute(self, attname):
 478         try:
 479             return self._attrs[attname].value
 480         except KeyError:
 481             return ""
 482
 483     def getAttributeNS(self, namespaceURI, localName):
 484         try:
 485             return self._attrsNS[(namespaceURI, localName)].value
 486         except KeyError:
 487             return ""
 488
 489     def setAttribute(self, attname, value):
 490         attr = Attr(attname)
 491         # for performance
 492         attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
 493         self.setAttributeNode(attr)
 494
 495     def setAttributeNS(self, namespaceURI, qualifiedName, value):
 496         prefix, localname = _nssplit(qualifiedName)
 497         # for performance
 498         attr = Attr(qualifiedName, namespaceURI, localname, prefix)
 499         attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
 500         self.setAttributeNode(attr)
 501
 502     def getAttributeNode(self, attrname):
 503         return self._attrs.get(attrname)
 504
 505     def getAttributeNodeNS(self, namespaceURI, localName):
 506         return self._attrsNS.get((namespaceURI, localName))
 507
 508     def setAttributeNode(self, attr):
 509         if attr.ownerElement not in (None, self):
 510             raise xml.dom.InuseAttributeErr("attribute node already owned")
 511         old = self._attrs.get(attr.name, None)
 512         if old:
 513             old.unlink()
 514         self._attrs[attr.name] = attr
 515         self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
 516
 517         # This creates a circular reference, but Element.unlink()
 518         # breaks the cycle since the references to the attribute
 519         # dictionaries are tossed.
 520         attr.ownerElement = self
 521
 522         if old is not attr:
 523             # It might have already been part of this node, in which case
 524             # it doesn't represent a change, and should not be returned.
 525             return old
 526
 527     setAttributeNodeNS = setAttributeNode
 528
 529     def removeAttribute(self, name):
 530         attr = self._attrs[name]
 531         self.removeAttributeNode(attr)
 532
 533     def removeAttributeNS(self, namespaceURI, localName):
 534         attr = self._attrsNS[(namespaceURI, localName)]
 535         self.removeAttributeNode(attr)
 536
 537     def removeAttributeNode(self, node):
 538         node.unlink()
 539         del self._attrs[node.name]
 540         del self._attrsNS[(node.namespaceURI, node.localName)]
 541
 542     removeAttributeNodeNS = removeAttributeNode
 543
 544     def hasAttribute(self, name):
 545         return self._attrs.has_key(name)
 546
 547     def hasAttributeNS(self, namespaceURI, localName):
 548         return self._attrsNS.has_key((namespaceURI, localName))
 549
 550     def getElementsByTagName(self, name):
 551         return _getElementsByTagNameHelper(self, name, [])
 552
 553     def getElementsByTagNameNS(self, namespaceURI, localName):
 554         return _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
 555
 556     def __repr__(self):
 557         return "<DOM Element: %s at %s>" % (self.tagName, id(self))
 558
 559     def writexml(self, writer, indent="", addindent="", newl=""):
 560         # indent = current indentation
 561         # addindent = indentation to add to higher levels
 562         # newl = newline string
 563         writer.write(indent+"<" + self.tagName)
 564
 565         attrs = self._get_attributes()
 566         a_names = attrs.keys()
 567         a_names.sort()
 568
 569         for a_name in a_names:
 570             writer.write(" %s=\"" % a_name)
 571             _write_data(writer, attrs[a_name].value)
 572             writer.write("\"")
 573         if self.childNodes:
 574             writer.write(">%s"%(newl))
 575             for node in self.childNodes:
 576                 node.writexml(writer,indent+addindent,addindent,newl)
 577             writer.write("%s</%s>%s" % (indent,self.tagName,newl))
 578         else:
 579             writer.write("/>%s"%(newl))
 580
 581     def _get_attributes(self):
 582         return AttributeList(self._attrs, self._attrsNS)
 583
 584     def hasAttributes(self):
 585         if self._attrs or self._attrsNS:
 586             return 1
 587         else:
 588             return 0
 589
 590 class Comment(Node):
 591     nodeType = Node.COMMENT_NODE
 592     nodeName = "#comment"
 593     attributes = None
 594     childNodeTypes = ()
 595
 596     def __init__(self, data):
 597         Node.__init__(self)
 598         self.data = self.nodeValue = data
 599
 600     def writexml(self, writer, indent="", addindent="", newl=""):
 601         writer.write("%s<!--%s-->%s" % (indent,self.data,newl))
 602
 603 class ProcessingInstruction(Node):
 604     nodeType = Node.PROCESSING_INSTRUCTION_NODE
 605     attributes = None
 606     childNodeTypes = ()
 607
 608     def __init__(self, target, data):
 609         Node.__init__(self)
 610         self.target = self.nodeName = target
 611         self.data = self.nodeValue = data
 612
 613     def writexml(self, writer, indent="", addindent="", newl=""):
 614         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
 615
 616 class CharacterData(Node):
 617     def __init__(self, data):
 618         if type(data) not in _StringTypes:
 619             raise TypeError, "node contents must be a string"
 620         Node.__init__(self)
 621         self.data = self.nodeValue = data
 622         self.length = len(data)
 623
 624     def __repr__(self):
 625         if len(self.data) > 10:
 626             dotdotdot = "..."
 627         else:
 628             dotdotdot = ""
 629         return "<DOM %s node \"%s%s\">" % (
 630             self.__class__.__name__, self.data[0:10], dotdotdot)
 631
 632     def substringData(self, offset, count):
 633         if offset < 0:
 634             raise xml.dom.IndexSizeErr("offset cannot be negative")
 635         if offset >= len(self.data):
 636             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 637         if count < 0:
 638             raise xml.dom.IndexSizeErr("count cannot be negative")
 639         return self.data[offset:offset+count]
 640
 641     def appendData(self, arg):
 642         self.data = self.data + arg
 643         self.nodeValue = self.data
 644         self.length = len(self.data)
 645
 646     def insertData(self, offset, arg):
 647         if offset < 0:
 648             raise xml.dom.IndexSizeErr("offset cannot be negative")
 649         if offset >= len(self.data):
 650             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 651         if arg:
 652             self.data = "%s%s%s" % (
 653                 self.data[:offset], arg, self.data[offset:])
 654             self.nodeValue = self.data
 655             self.length = len(self.data)
 656
 657     def deleteData(self, offset, count):
 658         if offset < 0:
 659             raise xml.dom.IndexSizeErr("offset cannot be negative")
 660         if offset >= len(self.data):
 661             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 662         if count < 0:
 663             raise xml.dom.IndexSizeErr("count cannot be negative")
 664         if count:
 665             self.data = self.data[:offset] + self.data[offset+count:]
 666             self.nodeValue = self.data
 667             self.length = len(self.data)
 668
 669     def replaceData(self, offset, count, arg):
 670         if offset < 0:
 671             raise xml.dom.IndexSizeErr("offset cannot be negative")
 672         if offset >= len(self.data):
 673             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
 674         if count < 0:
 675             raise xml.dom.IndexSizeErr("count cannot be negative")
 676         if count:
 677             self.data = "%s%s%s" % (
 678                 self.data[:offset], arg, self.data[offset+count:])
 679             self.nodeValue = self.data
 680             self.length = len(self.data)
 681
 682 class Text(CharacterData):
 683     nodeType = Node.TEXT_NODE
 684     nodeName = "#text"
 685     attributes = None
 686     childNodeTypes = ()
 687
 688     def splitText(self, offset):
 689         if offset < 0 or offset > len(self.data):
 690             raise xml.dom.IndexSizeErr("illegal offset value")
 691         newText = Text(self.data[offset:])
 692         next = self.nextSibling
 693         if self.parentNode and self in self.parentNode.childNodes:
 694             if next is None:
 695                 self.parentNode.appendChild(newText)
 696             else:
 697                 self.parentNode.insertBefore(newText, next)
 698         self.data = self.data[:offset]
 699         self.nodeValue = self.data
 700         self.length = len(self.data)
 701         return newText
 702
 703     def writexml(self, writer, indent="", addindent="", newl=""):
 704         _write_data(writer, "%s%s%s"%(indent, self.data, newl))
 705
 706
 707 class CDATASection(Text):
 708     nodeType = Node.CDATA_SECTION_NODE
 709     nodeName = "#cdata-section"
 710
 711     def writexml(self, writer, indent="", addindent="", newl=""):
 712         _write_data(writer, "<![CDATA[%s]]>" % self.data)
 713
 714
 715 def _nssplit(qualifiedName):
 716     fields = _string.split(qualifiedName, ':', 1)
 717     if len(fields) == 2:
 718         return fields
 719     elif len(fields) == 1:
 720         return ('', fields[0])
 721
 722
 723 class DocumentType(Node):
 724     nodeType = Node.DOCUMENT_TYPE_NODE
 725     nodeValue = None
 726     attributes = None
 727     name = None
 728     publicId = None
 729     systemId = None
 730     internalSubset = None
 731     entities = None
 732     notations = None
 733
 734     def __init__(self, qualifiedName):
 735         Node.__init__(self)
 736         if qualifiedName:
 737             prefix, localname = _nssplit(qualifiedName)
 738             self.name = localname
 739
 740
 741 class DOMImplementation:
 742     def hasFeature(self, feature, version):
 743         if version not in ("1.0", "2.0"):
 744             return 0
 745         feature = _string.lower(feature)
 746         return feature == "core"
 747
 748     def createDocument(self, namespaceURI, qualifiedName, doctype):
 749         if doctype and doctype.parentNode is not None:
 750             raise xml.dom.WrongDocumentErr(
 751                 "doctype object owned by another DOM tree")
 752         doc = self._createDocument()
 753         if doctype is None:
 754             doctype = self.createDocumentType(qualifiedName, None, None)
 755         if not qualifiedName:
 756             # The spec is unclear what to raise here; SyntaxErr
 757             # would be the other obvious candidate. Since Xerces raises
 758             # InvalidCharacterErr, and since SyntaxErr is not listed
 759             # for createDocument, that seems to be the better choice.
 760             # XXX: need to check for illegal characters here and in
 761             # createElement.
 762             raise xml.dom.InvalidCharacterErr("Element with no name")
 763         prefix, localname = _nssplit(qualifiedName)
 764         if prefix == "xml" \
 765            and namespaceURI != "http://www.w3.org/XML/1998/namespace":
 766             raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
 767         if prefix and not namespaceURI:
 768             raise xml.dom.NamespaceErr(
 769                 "illegal use of prefix without namespaces")
 770         element = doc.createElementNS(namespaceURI, qualifiedName)
 771         doc.appendChild(element)
 772         doctype.parentNode = doctype.ownerDocument = doc
 773         doc.doctype = doctype
 774         doc.implementation = self
 775         return doc
 776
 777     def createDocumentType(self, qualifiedName, publicId, systemId):
 778         doctype = DocumentType(qualifiedName)
 779         doctype.publicId = publicId
 780         doctype.systemId = systemId
 781         return doctype
 782
 783     # internal
 784     def _createDocument(self):
 785         return Document()
 786
 787 class Document(Node):
 788     nodeType = Node.DOCUMENT_NODE
 789     nodeName = "#document"
 790     nodeValue = None
 791     attributes = None
 792     doctype = None
 793     parentNode = None
 794     previousSibling = nextSibling = None
 795
 796     implementation = DOMImplementation()
 797     childNodeTypes = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
 798                       Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
 799
 800     def appendChild(self, node):
 801         if node.nodeType not in self.childNodeTypes:
 802             raise HierarchyRequestErr, \
 803                   "%s cannot be child of %s" % (repr(node), repr(self))
 804         if node.parentNode is not None:
 805             node.parentNode.removeChild(node)
 806
 807         if node.nodeType == Node.ELEMENT_NODE \
 808            and self._get_documentElement():
 809             raise xml.dom.HierarchyRequestErr(
 810                 "two document elements disallowed")
 811         return Node.appendChild(self, node)
 812
 813     def removeChild(self, oldChild):
 814         self.childNodes.remove(oldChild)
 815         oldChild.nextSibling = oldChild.previousSibling = None
 816         oldChild.parentNode = None
 817         if self.documentElement is oldChild:
 818             self.documentElement = None
 819
 820         return oldChild
 821
 822     def _get_documentElement(self):
 823         for node in self.childNodes:
 824             if node.nodeType == Node.ELEMENT_NODE:
 825                 return node
 826
 827     def unlink(self):
 828         if self.doctype is not None:
 829             self.doctype.unlink()
 830             self.doctype = None
 831         Node.unlink(self)
 832
 833     def createDocumentFragment(self):
 834         d = DocumentFragment()
 835         d.ownerDoc = self
 836         return d
 837
 838     def createElement(self, tagName):
 839         e = Element(tagName)
 840         e.ownerDocument = self
 841         return e
 842
 843     def createTextNode(self, data):
 844         t = Text(data)
 845         t.ownerDocument = self
 846         return t
 847
 848     def createCDATASection(self, data):
 849         c = CDATASection(data)
 850         c.ownerDocument = self
 851         return c
 852
 853     def createComment(self, data):
 854         c = Comment(data)
 855         c.ownerDocument = self
 856         return c
 857
 858     def createProcessingInstruction(self, target, data):
 859         p = ProcessingInstruction(target, data)
 860         p.ownerDocument = self
 861         return p
 862
 863     def createAttribute(self, qName):
 864         a = Attr(qName)
 865         a.ownerDocument = self
 866         a.value = ""
 867         return a
 868
 869     def createElementNS(self, namespaceURI, qualifiedName):
 870         prefix, localName = _nssplit(qualifiedName)
 871         e = Element(qualifiedName, namespaceURI, prefix, localName)
 872         e.ownerDocument = self
 873         return e
 874
 875     def createAttributeNS(self, namespaceURI, qualifiedName):
 876         prefix, localName = _nssplit(qualifiedName)
 877         a = Attr(qualifiedName, namespaceURI, localName, prefix)
 878         a.ownerDocument = self
 879         a.value = ""
 880         return a
 881
 882     def getElementsByTagName(self, name):
 883         return _getElementsByTagNameHelper(self, name, [])
 884
 885     def getElementsByTagNameNS(self, namespaceURI, localName):
 886         return _getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
 887
 888     def writexml(self, writer, indent="", addindent="", newl=""):
 889         writer.write('<?xml version="1.0" ?>\n')
 890         for node in self.childNodes:
 891             node.writexml(writer, indent, addindent, newl)
 892
 893 def _get_StringIO():
 894     # we can't use cStringIO since it doesn't support Unicode strings
 895     from StringIO import StringIO
 896     return StringIO()
 897
 898 def _doparse(func, args, kwargs):
 899     events = apply(func, args, kwargs)
 900     toktype, rootNode = events.getEvent()
 901     events.expandNode(rootNode)
 902     events.clear()
 903     return rootNode
 904
 905 def parse(*args, **kwargs):
 906     """Parse a file into a DOM by filename or file object."""
 907     from xml.dom import pulldom
 908     return _doparse(pulldom.parse, args, kwargs)
 909
 910 def parseString(*args, **kwargs):
 911     """Parse a file into a DOM from a string."""
 912     from xml.dom import pulldom
 913     return _doparse(pulldom.parseString, args, kwargs)
 914
 915 def getDOMImplementation():
 916     return Document.implementation