A number of methods in UnitChecker were called very, very frequently
[translate_toolkit.git] / misc / ourdom.py
blobeaa137d5d9554a4a6115210a59dc4166e6a29728
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Copyright 2004-2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """module that provides modified DOM functionality for our needs
25 Note that users of ourdom should ensure that no code might still use classes
26 directly from minidom, like minidom.Element, minidom.Document or methods such
27 as minidom.parseString, since the functionality provided here will not be in
28 those objects.
29 """
31 from xml.dom import minidom
32 from xml.dom import expatbuilder
34 # helper functions we use to do xml the way we want, used by modified classes below
36 def writexml_helper(self, writer, indent="", addindent="", newl=""):
37 """A replacement for writexml that formats it like typical XML files.
38 Nodes are intendented but text nodes, where whitespace can be significant, are not indented."""
39 # indent = current indentation
40 # addindent = indentation to add to higher levels
41 # newl = newline string
42 writer.write(indent+"<" + self.tagName)
44 attrs = self._get_attributes()
45 a_names = attrs.keys()
46 a_names.sort()
48 for a_name in a_names:
49 writer.write(" %s=\"" % a_name)
50 minidom._write_data(writer, attrs[a_name].value)
51 writer.write("\"")
52 if self.childNodes:
53 # We need to write text nodes without newline and indentation, so
54 # we handle them differently. Note that we here assume that "empty"
55 # text nodes can be done away with (see the strip()). Note also that
56 # nested tags in a text node (like ph tags in xliff) should also not
57 # have newlines and indentation or an extra newline, since that will
58 # alter the text node.
59 haveText = False
60 for childNode in self.childNodes:
61 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip():
62 haveText = True
63 break
64 if haveText:
65 writer.write(">")
66 for node in self.childNodes:
67 node.writexml(writer,"","","")
68 writer.write("</%s>%s" % (self.tagName,newl))
69 else:
70 # This is the normal case that we do with pretty layout
71 writer.write(">%s"%(newl))
72 for node in self.childNodes:
73 if node.nodeType != self.TEXT_NODE:
74 node.writexml(writer,indent+addindent,addindent,newl)
75 writer.write("%s</%s>%s" % (indent,self.tagName,newl))
76 else:
77 writer.write("/>%s"%(newl))
79 def getElementsByTagName_helper(parent, name, dummy=None):
80 """A reimplementation of getElementsByTagName as an iterator.
82 Note that this is not compatible with getElementsByTagName that returns a
83 list, therefore, the class below exposes this through yieldElementsByTagName"""
85 for node in parent.childNodes:
86 if node.nodeType == minidom.Node.ELEMENT_NODE and \
87 (name == "*" or node.tagName == name):
88 yield node
89 if node.hasChildNodes():
90 for othernode in node.getElementsByTagName(name):
91 yield othernode
93 def searchElementsByTagName_helper(parent, name, onlysearch):
94 """limits the search to within tags occuring in onlysearch"""
95 for node in parent.childNodes:
96 if node.nodeType == minidom.Node.ELEMENT_NODE and \
97 (name == "*" or node.tagName == name):
98 yield node
99 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch:
100 for node in node.searchElementsByTagName(name, onlysearch):
101 yield node
103 def getFirstElementByTagName(node, name):
104 results = node.yieldElementsByTagName(name)
105 # if isinstance(results, list):
106 # if len(results) == 0:
107 # return None
108 # else:
109 # return results[0]
110 try:
111 result = results.next()
112 return result
113 except StopIteration:
114 return None
116 def getnodetext(node):
117 """returns the node's text by iterating through the child nodes"""
118 if node is None: return ""
119 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
121 # various modifications to minidom classes to add functionality we like
123 class DOMImplementation(minidom.DOMImplementation):
124 def _create_document(self):
125 return Document()
127 class Element(minidom.Element):
128 def yieldElementsByTagName(self, name):
129 return getElementsByTagName_helper(self, name)
130 def searchElementsByTagName(self, name, onlysearch):
131 return searchElementsByTagName_helper(self, name, onlysearch)
132 def writexml(self, writer, indent, addindent, newl):
133 return writexml_helper(self, writer, indent, addindent, newl)
135 class Document(minidom.Document):
136 implementation = DOMImplementation()
137 def yieldElementsByTagName(self, name):
138 return getElementsByTagName_helper(self, name)
139 def searchElementsByTagName(self, name, onlysearch):
140 return searchElementsByTagName_helper(self, name, onlysearch)
141 def createElement(self, tagName):
142 e = Element(tagName)
143 e.ownerDocument = self
144 return e
145 def createElementNS(self, namespaceURI, qualifiedName):
146 prefix, localName = _nssplit(qualifiedName)
147 e = Element(qualifiedName, namespaceURI, prefix)
148 e.ownerDocument = self
149 return e
151 theDOMImplementation = DOMImplementation()
153 # an ExpatBuilder that allows us to use the above modifications
155 class ExpatBuilderNS(expatbuilder.ExpatBuilderNS):
156 def reset(self):
157 """Free all data structures used during DOM construction."""
158 self.document = theDOMImplementation.createDocument(
159 expatbuilder.EMPTY_NAMESPACE, None, None)
160 self.curNode = self.document
161 self._elem_info = self.document._elem_info
162 self._cdata = False
163 self._initNamespaces()
165 def start_element_handler(self, name, attributes):
166 # all we want to do is construct our own Element instead of minidom.Element
167 # unfortunately the only way to do this is to copy this whole function from expatbuilder.py
168 if ' ' in name:
169 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
170 else:
171 uri = expatbuilder.EMPTY_NAMESPACE
172 qname = name
173 localname = None
174 prefix = expatbuilder.EMPTY_PREFIX
175 node = Element(qname, uri, prefix, localname)
176 node.ownerDocument = self.document
177 expatbuilder._append_child(self.curNode, node)
178 self.curNode = node
180 if self._ns_ordered_prefixes:
181 for prefix, uri in self._ns_ordered_prefixes:
182 if prefix:
183 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix),
184 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns")
185 else:
186 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE,
187 "xmlns", expatbuilder.EMPTY_PREFIX)
188 d = a.childNodes[0].__dict__
189 d['data'] = d['nodeValue'] = uri
190 d = a.__dict__
191 d['value'] = d['nodeValue'] = uri
192 d['ownerDocument'] = self.document
193 expatbuilder._set_attribute_node(node, a)
194 del self._ns_ordered_prefixes[:]
196 if attributes:
197 _attrs = node._attrs
198 _attrsNS = node._attrsNS
199 for i in range(0, len(attributes), 2):
200 aname = attributes[i]
201 value = attributes[i+1]
202 if ' ' in aname:
203 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname)
204 a = minidom.Attr(qname, uri, localname, prefix)
205 _attrs[qname] = a
206 _attrsNS[(uri, localname)] = a
207 else:
208 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE,
209 aname, expatbuilder.EMPTY_PREFIX)
210 _attrs[aname] = a
211 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a
212 d = a.childNodes[0].__dict__
213 d['data'] = d['nodeValue'] = value
214 d = a.__dict__
215 d['ownerDocument'] = self.document
216 d['value'] = d['nodeValue'] = value
217 d['ownerElement'] = node
219 if __debug__:
220 # This only adds some asserts to the original
221 # end_element_handler(), so we only define this when -O is not
222 # used. If changing one, be sure to check the other to see if
223 # it needs to be changed as well.
225 def end_element_handler(self, name):
226 curNode = self.curNode
227 if ' ' in name:
228 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
229 assert (curNode.namespaceURI == uri
230 and curNode.localName == localname
231 and curNode.prefix == prefix), \
232 "element stack messed up! (namespace)"
233 else:
234 assert curNode.nodeName == name, \
235 "element stack messed up - bad nodeName"
236 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \
237 "element stack messed up - bad namespaceURI"
238 self.curNode = curNode.parentNode
239 self._finish_end_element(curNode)
241 # parser methods that use our modified xml classes
243 def parse(file, parser=None, bufsize=None):
244 """Parse a file into a DOM by filename or file object."""
245 builder = ExpatBuilderNS()
246 if isinstance(file, basestring):
247 fp = open(file, 'rb')
248 try:
249 result = builder.parseFile(fp)
250 finally:
251 fp.close()
252 else:
253 result = builder.parseFile(file)
254 return result
256 def parseString(string, parser=None):
257 """Parse a file into a DOM from a string."""
258 builder = ExpatBuilderNS()
259 return builder.parseString(string)