2 minidom.py -- a lightweight DOM implementation based on SAX.
6 parseString( "<foo><bar/></foo>" )
10 * convenience methods for getting elements and text.
12 * bring some of the writer and linearizer code into conformance with this
19 from StringIO
import StringIO
26 CDATA_SECTION_NODE
= 4
27 ENTITY_REFERENCE_NODE
= 5
29 PROCESSING_INSTRUCTION_NODE
= 7
32 DOCUMENT_TYPE_NODE
= 10
33 DOCUMENT_FRAGMENT_NODE
= 11
44 index
= repr(id(self
)) + repr(self
.__class
__)
45 Node
.allnodes
[index
] = repr(self
.__dict
__)
46 if Node
.debug
is None:
47 Node
.debug
= StringIO()
48 #open( "debug4.out", "w" )
49 Node
.debug
.write("create %s\n" % index
)
51 def __getattr__(self
, key
):
54 # getattr should never call getattr!
55 if self
.__dict
__.has_key("inGetAttr"):
57 raise AttributeError, key
59 prefix
, attrname
= key
[:5], key
[5:]
62 if hasattr(self
, attrname
):
64 return (lambda self
=self
, attrname
=attrname
:
65 getattr(self
, attrname
))
68 raise AttributeError, key
72 func
= getattr(self
, "_get_" + key
)
73 except AttributeError:
74 raise AttributeError, key
78 def __nonzero__(self
):
84 return writer
.getvalue()
86 def hasChildNodes(self
):
92 def _get_firstChild(self
):
93 return self
.childNodes
[0]
95 def _get_lastChild(self
):
96 return self
.childNodes
[-1]
98 def insertBefore(self
, newChild
, refChild
):
99 index
= self
.childNodes
.index(refChild
)
100 self
.childNodes
.insert(index
, newChild
)
101 if self
._makeParentNodes
:
102 newChild
.parentNode
= self
104 def appendChild(self
, node
):
106 last
= self
.lastChild
107 node
.previousSibling
= last
108 last
.nextSibling
= node
110 node
.previousSibling
= None
111 node
.nextSibling
= None
112 self
.childNodes
.append(node
)
115 def replaceChild(self
, newChild
, oldChild
):
116 index
= self
.childNodes
.index(oldChild
)
117 self
.childNodes
[index
] = oldChild
119 def removeChild(self
, oldChild
):
120 index
= self
.childNodes
.index(oldChild
)
121 del self
.childNodes
[index
]
123 def cloneNode(self
, deep
):
125 clone
= new
.instance(self
.__class
__, self
.__dict
__)
126 clone
.attributes
= self
.attributes
.copy()
128 clone
.childNodes
= []
130 clone
.childNodes
= map(lambda x
: x
.cloneNode
, self
.childNodes
)
134 self
.parentNode
= None
135 while self
.childNodes
:
136 self
.childNodes
[-1].unlink()
137 del self
.childNodes
[-1] # probably not most efficient!
138 self
.childNodes
= None
139 self
.previousSibling
= None
140 self
.nextSibling
= None
142 for attr
in self
._attrs
.values():
143 self
.removeAttributeNode(attr
)
144 assert not len(self
._attrs
)
145 assert not len(self
._attrsNS
)
147 index
= repr(id(self
)) + repr(self
.__class
__)
148 self
.debug
.write("Deleting: %s\n" % index
)
149 del Node
.allnodes
[index
]
151 def _write_data(writer
, data
):
152 "Writes datachars to writer."
153 data
= string
.replace(data
, "&", "&")
154 data
= string
.replace(data
, "<", "<")
155 data
= string
.replace(data
, "\"", """)
156 data
= string
.replace(data
, ">", ">")
159 def _getElementsByTagNameHelper(parent
, name
, rc
):
160 for node
in parent
.childNodes
:
161 if node
.nodeType
== Node
.ELEMENT_NODE
and \
162 (name
== "*" or node
.tagName
== name
):
164 _getElementsByTagNameHelper(node
, name
, rc
)
167 def _getElementsByTagNameNSHelper(parent
, nsURI
, localName
, rc
):
168 for node
in parent
.childNodes
:
169 if node
.nodeType
== Node
.ELEMENT_NODE
:
170 if ((localName
== "*" or node
.tagName
== localName
) and
171 (nsURI
== "*" or node
.namespaceURI
== nsURI
)):
173 _getElementsByTagNameNSHelper(node
, name
, rc
)
176 nodeType
= Node
.ATTRIBUTE_NODE
178 def __init__(self
, qName
, namespaceURI
="", localName
=None, prefix
=None):
179 # skip setattr for performance
180 self
.__dict
__["localName"] = localName
or qName
181 self
.__dict
__["nodeName"] = self
.__dict
__["name"] = qName
182 self
.__dict
__["namespaceURI"] = namespaceURI
183 self
.__dict
__["prefix"] = prefix
184 self
.attributes
= None
186 # nodeValue and value are set elsewhere
188 def __setattr__(self
, name
, value
):
189 if name
in ("value", "nodeValue"):
190 self
.__dict
__["value"] = self
.__dict
__["nodeValue"] = value
192 self
.__dict
__[name
] = value
195 """the attribute list is a transient interface to the underlying
196 dictionaries. mutations here will change the underlying element's
198 def __init__(self
, attrs
, attrsNS
):
200 self
._attrsNS
= attrsNS
201 self
.length
= len(self
._attrs
.keys())
203 def item(self
, index
):
205 return self
[self
.keys()[index
]]
210 return map(lambda node
: (node
.tagName
, node
.value
),
211 self
._attrs
.values())
214 return map(lambda node
: ((node
.URI
, node
.localName
), node
.value
),
215 self
._attrs
.values())
218 return self
._attrs
.keys()
221 return self
._attrsNS
.keys()
224 return self
._attrs
.values()
229 def __cmp__(self
, other
):
230 if self
._attrs
is getattr(other
, "_attrs", None):
233 return cmp(id(self
), id(other
))
235 #FIXME: is it appropriate to return .value?
236 def __getitem__(self
, attname_or_tuple
):
237 if type(attname_or_tuple
) is types
.TupleType
:
238 return self
._attrsNS
[attname_or_tuple
]
240 return self
._attrs
[attname_or_tuple
]
243 def __setitem__(self
, attname
, value
):
244 if type(value
) is types
.StringType
:
248 assert isinstance(value
, Attr
) or type(value
) is types
.StringType
250 old
= self
._attrs
.get(attname
, None)
253 self
._attrs
[node
.name
] = node
254 self
._attrsNS
[(node
.namespaceURI
, node
.localName
)] = node
256 def __delitem__(self
, attname_or_tuple
):
257 node
= self
[attname_or_tuple
]
259 del self
._attrs
[node
.name
]
260 del self
._attrsNS
[(node
.namespaceURI
, node
.localName
)]
263 nodeType
= Node
.ELEMENT_NODE
265 def __init__(self
, tagName
, namespaceURI
="", prefix
="",
268 self
.tagName
= self
.nodeName
= tagName
269 self
.localName
= localName
or tagName
271 self
.namespaceURI
= namespaceURI
272 self
.nodeValue
= None
274 self
._attrs
={} # attributes are double-indexed:
275 self
._attrsNS
={}# tagName -> Attribute
276 # URI,localName -> Attribute
277 # in the future: consider lazy generation of attribute objects
278 # this is too tricky for now because of headaches
281 def getAttribute(self
, attname
):
282 return self
._attrs
[attname
].value
284 def getAttributeNS(self
, namespaceURI
, localName
):
285 return self
._attrsNS
[(namespaceURI
, localName
)].value
287 def setAttribute(self
, attname
, value
):
290 attr
.__dict
__["value"] = attr
.__dict
__["nodeValue"] = value
291 self
.setAttributeNode(attr
)
293 def setAttributeNS(self
, namespaceURI
, qualifiedName
, value
):
294 prefix
, localname
= _nssplit(qualifiedName
)
296 attr
= Attr(qualifiedName
, namespaceURI
, localname
, prefix
)
297 attr
.__dict
__["value"] = attr
.__dict
__["nodeValue"] = value
298 self
.setAttributeNode(attr
)
299 # FIXME: return original node if something changed.
301 def getAttributeNode(self
, attrname
):
302 return self
._attrs
.get(attrname
)
304 def getAttributeNodeNS(self
, namespaceURI
, localName
):
305 return self
._attrsNS
[(namespaceURI
, localName
)]
307 def setAttributeNode(self
, attr
):
308 old
= self
._attrs
.get(attr
.name
, None)
311 self
._attrs
[attr
.name
] = attr
312 self
._attrsNS
[(attr
.namespaceURI
, attr
.localName
)] = attr
313 # FIXME: return old value if something changed
315 def removeAttribute(self
, name
):
316 attr
= self
._attrs
[name
]
317 self
.removeAttributeNode(attr
)
319 def removeAttributeNS(self
, namespaceURI
, localName
):
320 attr
= self
._attrsNS
[(namespaceURI
, localName
)]
321 self
.removeAttributeNode(attr
)
323 def removeAttributeNode(self
, node
):
325 del self
._attrs
[node
.name
]
326 del self
._attrsNS
[(node
.namespaceURI
, node
.localName
)]
328 def getElementsByTagName(self
, name
):
329 return _getElementsByTagNameHelper(self
, name
, [])
331 def getElementsByTagNameNS(self
, namespaceURI
, localName
):
332 _getElementsByTagNameNSHelper(self
, namespaceURI
, localName
, [])
335 return "<DOM Element: %s at %s>" % (self
.tagName
, id(self
))
338 def writexml(self
, writer
):
339 writer
.write("<" + self
.tagName
)
341 a_names
= self
._get
_attributes
().keys()
344 for a_name
in a_names
:
345 writer
.write(" %s=\"" % a_name
)
346 _write_data(writer
, self
._get
_attributes
()[a_name
].value
)
350 for node
in self
.childNodes
:
351 node
.writexml(writer
)
352 writer
.write("</%s>" % self
.tagName
)
356 def _get_attributes(self
):
357 return AttributeList(self
._attrs
, self
._attrsNS
)
360 nodeType
= Node
.COMMENT_NODE
362 def __init__(self
, data
):
364 self
.data
= self
.nodeValue
= data
365 self
.nodeName
= "#comment"
366 self
.attributes
= None
368 def writexml(self
, writer
):
369 writer
.write("<!--%s-->" % self
.data
)
371 class ProcessingInstruction(Node
):
372 nodeType
= Node
.PROCESSING_INSTRUCTION_NODE
374 def __init__(self
, target
, data
):
376 self
.target
= self
.nodeName
= target
377 self
.data
= self
.nodeValue
= data
378 self
.attributes
= None
380 def writexml(self
, writer
):
381 writer
.write("<?%s %s?>" % (self
.target
, self
.data
))
384 nodeType
= Node
.TEXT_NODE
387 def __init__(self
, data
):
389 self
.data
= self
.nodeValue
= data
390 self
.attributes
= None
393 if len(self
.data
) > 10:
397 return "<DOM Text node \"%s%s\">" % (self
.data
[0:10], dotdotdot
)
399 def writexml(self
, writer
):
400 _write_data(writer
, self
.data
)
402 def _nssplit(qualifiedName
):
404 fields
= string
.split(qualifiedName
,':', 1)
407 elif len(fields
) == 1:
408 return ('', fields
[0])
410 class Document(Node
):
411 nodeType
= Node
.DOCUMENT_NODE
412 documentElement
= None
416 self
.attributes
= None
417 self
.nodeName
= "#document"
418 self
.nodeValue
= None
420 def appendChild(self
, node
):
421 if node
.nodeType
== Node
.ELEMENT_NODE
:
422 if self
.documentElement
:
423 raise TypeError, "Two document elements disallowed"
425 self
.documentElement
= node
426 Node
.appendChild(self
, node
)
429 createElement
= Element
431 createTextNode
= Text
433 createComment
= Comment
435 createProcessingInstruction
= ProcessingInstruction
437 createAttribute
= Attr
439 def createElementNS(self
, namespaceURI
, qualifiedName
):
440 prefix
,localName
= _nssplit(qualifiedName
)
441 return Element(qualifiedName
, namespaceURI
, prefix
, localName
)
443 def createAttributeNS(self
, namespaceURI
, qualifiedName
):
444 prefix
,localName
= _nssplit(qualifiedName
)
445 return Attr(qualifiedName
, namespaceURI
, localName
, prefix
)
447 def getElementsByTagNameNS(self
, namespaceURI
, localName
):
448 _getElementsByTagNameNSHelper(self
, namespaceURI
, localName
)
451 self
.documentElement
= None
454 def getElementsByTagName(self
, name
):
456 _getElementsByTagNameHelper(self
, name
, rc
)
459 def writexml(self
, writer
):
460 for node
in self
.childNodes
:
461 node
.writexml(writer
)
463 def _doparse(func
, args
, kwargs
):
464 events
= apply(func
, args
, kwargs
)
465 toktype
, rootNode
= events
.getEvent()
466 events
.expandNode(rootNode
)
469 def parse(*args
, **kwargs
):
470 "Parse a file into a DOM by filename or file object"
471 return _doparse(pulldom
.parse
, args
, kwargs
)
473 def parseString(*args
, **kwargs
):
474 "Parse a file into a DOM from a string"
475 return _doparse(pulldom
.parseString
, args
, kwargs
)