Updated for 2.1a3
[python/dscho.git] / Lib / xml / dom / pulldom.py
blobb573ba0d18df96700e3cd0d520758be81d90ad55
1 import xml.sax
2 import xml.sax.handler
3 import types
5 try:
6 _StringTypes = [types.StringType, types.UnicodeType]
7 except AttributeError:
8 _StringTypes = [types.StringType]
10 START_ELEMENT = "START_ELEMENT"
11 END_ELEMENT = "END_ELEMENT"
12 COMMENT = "COMMENT"
13 START_DOCUMENT = "START_DOCUMENT"
14 END_DOCUMENT = "END_DOCUMENT"
15 PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
16 IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
17 CHARACTERS = "CHARACTERS"
19 class PullDOM(xml.sax.ContentHandler):
20 _locator = None
21 document = None
23 def __init__(self, documentFactory=None):
24 self.documentFactory = documentFactory
25 self.firstEvent = [None, None]
26 self.lastEvent = self.firstEvent
27 self.elementStack = []
28 self.push = self.elementStack.append
29 try:
30 self.pop = self.elementStack.pop
31 except AttributeError:
32 # use class' pop instead
33 pass
34 self._ns_contexts = [{}] # contains uri -> prefix dicts
35 self._current_context = self._ns_contexts[-1]
37 def pop(self):
38 result = self.elementStack[-1]
39 del self.elementStack[-1]
40 return result
42 def setDocumentLocator(self, locator):
43 self._locator = locator
45 def startPrefixMapping(self, prefix, uri):
46 self._ns_contexts.append(self._current_context.copy())
47 self._current_context[uri] = prefix or ''
49 def endPrefixMapping(self, prefix):
50 self._current_context = self._ns_contexts.pop()
52 def startElementNS(self, name, tagName , attrs):
53 uri, localname = name
54 if uri:
55 # When using namespaces, the reader may or may not
56 # provide us with the original name. If not, create
57 # *a* valid tagName from the current context.
58 if tagName is None:
59 prefix = self._current_context[uri]
60 if prefix:
61 tagName = prefix + ":" + localname
62 else:
63 tagName = localname
64 if self.document:
65 node = self.document.createElementNS(uri, tagName)
66 else:
67 node = self.buildDocument(uri, tagName)
68 else:
69 # When the tagname is not prefixed, it just appears as
70 # localname
71 if self.document:
72 node = self.document.createElement(localname)
73 else:
74 node = self.buildDocument(None, localname)
76 for aname,value in attrs.items():
77 a_uri, a_localname = aname
78 if a_uri:
79 prefix = self._current_context[a_uri]
80 if prefix:
81 qname = prefix + ":" + a_localname
82 else:
83 qname = a_localname
84 attr = self.document.createAttributeNS(a_uri, qname)
85 else:
86 attr = self.document.createAttribute(a_localname)
87 attr.value = value
88 node.setAttributeNode(attr)
90 self.lastEvent[1] = [(START_ELEMENT, node), None]
91 self.lastEvent = self.lastEvent[1]
92 self.push(node)
94 def endElementNS(self, name, tagName):
95 self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
96 self.lastEvent = self.lastEvent[1]
98 def startElement(self, name, attrs):
99 if self.document:
100 node = self.document.createElement(name)
101 else:
102 node = self.buildDocument(None, name)
104 for aname,value in attrs.items():
105 attr = self.document.createAttribute(aname)
106 attr.value = value
107 node.setAttributeNode(attr)
109 self.lastEvent[1] = [(START_ELEMENT, node), None]
110 self.lastEvent = self.lastEvent[1]
111 self.push(node)
113 def endElement(self, name):
114 self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
115 self.lastEvent = self.lastEvent[1]
117 def comment(self, s):
118 node = self.document.createComment(s)
119 self.lastEvent[1] = [(COMMENT, node), None]
120 self.lastEvent = self.lastEvent[1]
122 def processingInstruction(self, target, data):
123 node = self.document.createProcessingInstruction(target, data)
125 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
126 self.lastEvent = self.lastEvent[1]
128 def ignorableWhitespace(self, chars):
129 node = self.document.createTextNode(chars)
130 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
131 self.lastEvent = self.lastEvent[1]
133 def characters(self, chars):
134 node = self.document.createTextNode(chars)
135 self.lastEvent[1] = [(CHARACTERS, node), None]
136 self.lastEvent = self.lastEvent[1]
138 def startDocument(self):
139 if self.documentFactory is None:
140 import xml.dom.minidom
141 self.documentFactory = xml.dom.minidom.Document.implementation
143 def buildDocument(self, uri, tagname):
144 # Can't do that in startDocument, since we need the tagname
145 # XXX: obtain DocumentType
146 node = self.documentFactory.createDocument(uri, tagname, None)
147 self.document = node
148 self.lastEvent[1] = [(START_DOCUMENT, node), None]
149 self.lastEvent = self.lastEvent[1]
150 self.push(node)
151 return node.firstChild
153 def endDocument(self):
154 self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
155 self.pop()
157 def clear(self):
158 "clear(): Explicitly release parsing structures"
159 self.document = None
161 class ErrorHandler:
162 def warning(self, exception):
163 print exception
164 def error(self, exception):
165 raise exception
166 def fatalError(self, exception):
167 raise exception
169 class DOMEventStream:
170 def __init__(self, stream, parser, bufsize):
171 self.stream = stream
172 self.parser = parser
173 self.bufsize = bufsize
174 self.reset()
176 def reset(self):
177 self.pulldom = PullDOM()
178 # This content handler relies on namespace support
179 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
180 self.parser.setContentHandler(self.pulldom)
182 def __getitem__(self, pos):
183 rc = self.getEvent()
184 if rc:
185 return rc
186 raise IndexError
188 def expandNode(self, node):
189 event = self.getEvent()
190 parents = [node]
191 while event:
192 token, cur_node = event
193 if cur_node is node:
194 return
195 if token != END_ELEMENT:
196 parents[-1].appendChild(cur_node)
197 if token == START_ELEMENT:
198 parents.append(cur_node)
199 elif token == END_ELEMENT:
200 del parents[-1]
201 event = self.getEvent()
203 def getEvent(self):
204 if not self.pulldom.firstEvent[1]:
205 self.pulldom.lastEvent = self.pulldom.firstEvent
206 while not self.pulldom.firstEvent[1]:
207 buf = self.stream.read(self.bufsize)
208 if not buf:
209 self.parser.close()
210 return None
211 self.parser.feed(buf)
212 rc = self.pulldom.firstEvent[1][0]
213 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
214 return rc
216 def clear(self):
217 "clear(): Explicitly release parsing objects"
218 self.pulldom.clear()
219 del self.pulldom
220 self.parser = None
221 self.stream = None
223 class SAX2DOM(PullDOM):
225 def startElementNS(self, name, tagName , attrs):
226 PullDOM.startElementNS(self, name, tagName, attrs)
227 curNode = self.elementStack[-1]
228 parentNode = self.elementStack[-2]
229 parentNode.appendChild(curNode)
231 def startElement(self, name, attrs):
232 PullDOM.startElement(self, name, attrs)
233 curNode = self.elementStack[-1]
234 parentNode = self.elementStack[-2]
235 parentNode.appendChild(curNode)
237 def processingInstruction(self, target, data):
238 PullDOM.processingInstruction(self, target, data)
239 node = self.lastEvent[0][1]
240 parentNode = self.elementStack[-1]
241 parentNode.appendChild(node)
243 def ignorableWhitespace(self, chars):
244 PullDOM.ignorableWhitespace(self, chars)
245 node = self.lastEvent[0][1]
246 parentNode = self.elementStack[-1]
247 parentNode.appendChild(node)
249 def characters(self, chars):
250 PullDOM.characters(self, chars)
251 node = self.lastEvent[0][1]
252 parentNode = self.elementStack[-1]
253 parentNode.appendChild(node)
256 default_bufsize = (2 ** 14) - 20
258 def parse(stream_or_string, parser=None, bufsize=None):
259 if bufsize is None:
260 bufsize = default_bufsize
261 if type(stream_or_string) in _StringTypes:
262 stream = open(stream_or_string)
263 else:
264 stream = stream_or_string
265 if not parser:
266 parser = xml.sax.make_parser()
267 return DOMEventStream(stream, parser, bufsize)
269 def parseString(string, parser=None):
270 try:
271 from cStringIO import StringIO
272 except ImportError:
273 from StringIO import StringIO
275 bufsize = len(string)
276 buf = StringIO(string)
277 if not parser:
278 parser = xml.sax.make_parser()
279 return DOMEventStream(buf, parser, bufsize)