6 _StringTypes
= [types
.StringType
, types
.UnicodeType
]
8 _StringTypes
= [types
.StringType
]
10 START_ELEMENT
= "START_ELEMENT"
11 END_ELEMENT
= "END_ELEMENT"
13 START_DOCUMENT
= "START_DOCUMENT"
14 END_DOCUMENT
= "END_DOCUMENT"
15 PROCESSING_INSTRUCTION
= "PROCESSING_INSTRUCTION"
16 IGNORABLE_WHITESPACE
= "IGNORABLE_WHITESPACE"
17 CHARACTERS
= "CHARACTERS"
19 class PullDOM(xml
.sax
.ContentHandler
):
23 def __init__(self
, documentFactory
=None):
24 self
.documentFactory
= documentFactory
25 self
.firstEvent
= [None, None]
26 self
.lastEvent
= self
.firstEvent
27 self
.elementStack
= []
28 self
.push
= self
.elementStack
.append
30 self
.pop
= self
.elementStack
.pop
31 except AttributeError:
32 # use class' pop instead
34 self
._ns
_contexts
= [{}] # contains uri -> prefix dicts
35 self
._current
_context
= self
._ns
_contexts
[-1]
38 result
= self
.elementStack
[-1]
39 del self
.elementStack
[-1]
42 def setDocumentLocator(self
, locator
):
43 self
._locator
= locator
45 def startPrefixMapping(self
, prefix
, uri
):
46 self
._ns
_contexts
.append(self
._current
_context
.copy())
47 self
._current
_context
[uri
] = prefix
or ''
49 def endPrefixMapping(self
, prefix
):
50 self
._current
_context
= self
._ns
_contexts
.pop()
52 def startElementNS(self
, name
, tagName
, attrs
):
55 # When using namespaces, the reader may or may not
56 # provide us with the original name. If not, create
57 # *a* valid tagName from the current context.
59 prefix
= self
._current
_context
[uri
]
61 tagName
= prefix
+ ":" + localname
65 node
= self
.document
.createElementNS(uri
, tagName
)
67 node
= self
.buildDocument(uri
, tagName
)
69 # When the tagname is not prefixed, it just appears as
72 node
= self
.document
.createElement(localname
)
74 node
= self
.buildDocument(None, localname
)
76 for aname
,value
in attrs
.items():
77 a_uri
, a_localname
= aname
79 prefix
= self
._current
_context
[a_uri
]
81 qname
= prefix
+ ":" + a_localname
84 attr
= self
.document
.createAttributeNS(a_uri
, qname
)
86 attr
= self
.document
.createAttribute(a_localname
)
88 node
.setAttributeNode(attr
)
90 self
.lastEvent
[1] = [(START_ELEMENT
, node
), None]
91 self
.lastEvent
= self
.lastEvent
[1]
94 def endElementNS(self
, name
, tagName
):
95 self
.lastEvent
[1] = [(END_ELEMENT
, self
.pop()), None]
96 self
.lastEvent
= self
.lastEvent
[1]
98 def startElement(self
, name
, attrs
):
100 node
= self
.document
.createElement(name
)
102 node
= self
.buildDocument(None, name
)
104 for aname
,value
in attrs
.items():
105 attr
= self
.document
.createAttribute(aname
)
107 node
.setAttributeNode(attr
)
109 self
.lastEvent
[1] = [(START_ELEMENT
, node
), None]
110 self
.lastEvent
= self
.lastEvent
[1]
113 def endElement(self
, name
):
114 self
.lastEvent
[1] = [(END_ELEMENT
, self
.pop()), None]
115 self
.lastEvent
= self
.lastEvent
[1]
117 def comment(self
, s
):
118 node
= self
.document
.createComment(s
)
119 self
.lastEvent
[1] = [(COMMENT
, node
), None]
120 self
.lastEvent
= self
.lastEvent
[1]
122 def processingInstruction(self
, target
, data
):
123 node
= self
.document
.createProcessingInstruction(target
, data
)
125 self
.lastEvent
[1] = [(PROCESSING_INSTRUCTION
, node
), None]
126 self
.lastEvent
= self
.lastEvent
[1]
128 def ignorableWhitespace(self
, chars
):
129 node
= self
.document
.createTextNode(chars
)
130 self
.lastEvent
[1] = [(IGNORABLE_WHITESPACE
, node
), None]
131 self
.lastEvent
= self
.lastEvent
[1]
133 def characters(self
, chars
):
134 node
= self
.document
.createTextNode(chars
)
135 self
.lastEvent
[1] = [(CHARACTERS
, node
), None]
136 self
.lastEvent
= self
.lastEvent
[1]
138 def startDocument(self
):
139 if self
.documentFactory
is None:
140 import xml
.dom
.minidom
141 self
.documentFactory
= xml
.dom
.minidom
.Document
.implementation
143 def buildDocument(self
, uri
, tagname
):
144 # Can't do that in startDocument, since we need the tagname
145 # XXX: obtain DocumentType
146 node
= self
.documentFactory
.createDocument(uri
, tagname
, None)
148 self
.lastEvent
[1] = [(START_DOCUMENT
, node
), None]
149 self
.lastEvent
= self
.lastEvent
[1]
151 return node
.firstChild
153 def endDocument(self
):
154 self
.lastEvent
[1] = [(END_DOCUMENT
, self
.document
), None]
158 "clear(): Explicitly release parsing structures"
162 def warning(self
, exception
):
164 def error(self
, exception
):
166 def fatalError(self
, exception
):
169 class DOMEventStream
:
170 def __init__(self
, stream
, parser
, bufsize
):
173 self
.bufsize
= bufsize
177 self
.pulldom
= PullDOM()
178 # This content handler relies on namespace support
179 self
.parser
.setFeature(xml
.sax
.handler
.feature_namespaces
, 1)
180 self
.parser
.setContentHandler(self
.pulldom
)
182 def __getitem__(self
, pos
):
188 def expandNode(self
, node
):
189 event
= self
.getEvent()
192 token
, cur_node
= event
195 if token
!= END_ELEMENT
:
196 parents
[-1].appendChild(cur_node
)
197 if token
== START_ELEMENT
:
198 parents
.append(cur_node
)
199 elif token
== END_ELEMENT
:
201 event
= self
.getEvent()
204 if not self
.pulldom
.firstEvent
[1]:
205 self
.pulldom
.lastEvent
= self
.pulldom
.firstEvent
206 while not self
.pulldom
.firstEvent
[1]:
207 buf
= self
.stream
.read(self
.bufsize
)
211 self
.parser
.feed(buf
)
212 rc
= self
.pulldom
.firstEvent
[1][0]
213 self
.pulldom
.firstEvent
[1] = self
.pulldom
.firstEvent
[1][1]
217 "clear(): Explicitly release parsing objects"
223 class SAX2DOM(PullDOM
):
225 def startElementNS(self
, name
, tagName
, attrs
):
226 PullDOM
.startElementNS(self
, name
, tagName
, attrs
)
227 curNode
= self
.elementStack
[-1]
228 parentNode
= self
.elementStack
[-2]
229 parentNode
.appendChild(curNode
)
231 def startElement(self
, name
, attrs
):
232 PullDOM
.startElement(self
, name
, attrs
)
233 curNode
= self
.elementStack
[-1]
234 parentNode
= self
.elementStack
[-2]
235 parentNode
.appendChild(curNode
)
237 def processingInstruction(self
, target
, data
):
238 PullDOM
.processingInstruction(self
, target
, data
)
239 node
= self
.lastEvent
[0][1]
240 parentNode
= self
.elementStack
[-1]
241 parentNode
.appendChild(node
)
243 def ignorableWhitespace(self
, chars
):
244 PullDOM
.ignorableWhitespace(self
, chars
)
245 node
= self
.lastEvent
[0][1]
246 parentNode
= self
.elementStack
[-1]
247 parentNode
.appendChild(node
)
249 def characters(self
, chars
):
250 PullDOM
.characters(self
, chars
)
251 node
= self
.lastEvent
[0][1]
252 parentNode
= self
.elementStack
[-1]
253 parentNode
.appendChild(node
)
256 default_bufsize
= (2 ** 14) - 20
258 def parse(stream_or_string
, parser
=None, bufsize
=None):
260 bufsize
= default_bufsize
261 if type(stream_or_string
) in _StringTypes
:
262 stream
= open(stream_or_string
)
264 stream
= stream_or_string
266 parser
= xml
.sax
.make_parser()
267 return DOMEventStream(stream
, parser
, bufsize
)
269 def parseString(string
, parser
=None):
271 from cStringIO
import StringIO
273 from StringIO
import StringIO
275 bufsize
= len(string
)
276 buf
= StringIO(string
)
278 parser
= xml
.sax
.make_parser()
279 return DOMEventStream(buf
, parser
, bufsize
)