2 # -*- coding: utf-8 -*-
4 # Copyright 2004-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """module that provides modified DOM functionality for our needs
25 Note that users of ourdom should ensure that no code might still use classes
26 directly from minidom, like minidom.Element, minidom.Document or methods such
27 as minidom.parseString, since the functionality provided here will not be in
31 from xml
.dom
import minidom
32 from xml
.dom
import expatbuilder
34 # helper functions we use to do xml the way we want, used by modified classes below
36 def writexml_helper(self
, writer
, indent
="", addindent
="", newl
=""):
37 """A replacement for writexml that formats it like typical XML files.
38 Nodes are intendented but text nodes, where whitespace can be significant, are not indented."""
39 # indent = current indentation
40 # addindent = indentation to add to higher levels
41 # newl = newline string
42 writer
.write(indent
+"<" + self
.tagName
)
44 attrs
= self
._get
_attributes
()
45 a_names
= attrs
.keys()
48 for a_name
in a_names
:
49 writer
.write(" %s=\"" % a_name
)
50 minidom
._write
_data
(writer
, attrs
[a_name
].value
)
53 # We need to write text nodes without newline and indentation, so
54 # we handle them differently. Note that we here assume that "empty"
55 # text nodes can be done away with (see the strip()). Note also that
56 # nested tags in a text node (like ph tags in xliff) should also not
57 # have newlines and indentation or an extra newline, since that will
58 # alter the text node.
60 for childNode
in self
.childNodes
:
61 if childNode
.nodeType
== self
.TEXT_NODE
and childNode
.data
.strip():
66 for node
in self
.childNodes
:
67 node
.writexml(writer
,"","","")
68 writer
.write("</%s>%s" % (self
.tagName
,newl
))
70 # This is the normal case that we do with pretty layout
71 writer
.write(">%s"%(newl))
72 for node
in self
.childNodes
:
73 if node
.nodeType
!= self
.TEXT_NODE
:
74 node
.writexml(writer
,indent
+addindent
,addindent
,newl
)
75 writer
.write("%s</%s>%s" % (indent
,self
.tagName
,newl
))
77 writer
.write("/>%s"%(newl))
79 def getElementsByTagName_helper(parent
, name
, dummy
=None):
80 """A reimplementation of getElementsByTagName as an iterator.
82 Note that this is not compatible with getElementsByTagName that returns a
83 list, therefore, the class below exposes this through yieldElementsByTagName"""
85 for node
in parent
.childNodes
:
86 if node
.nodeType
== minidom
.Node
.ELEMENT_NODE
and \
87 (name
== "*" or node
.tagName
== name
):
89 if node
.hasChildNodes():
90 for othernode
in node
.getElementsByTagName(name
):
93 def searchElementsByTagName_helper(parent
, name
, onlysearch
):
94 """limits the search to within tags occuring in onlysearch"""
95 for node
in parent
.childNodes
:
96 if node
.nodeType
== minidom
.Node
.ELEMENT_NODE
and \
97 (name
== "*" or node
.tagName
== name
):
99 if node
.nodeType
== minidom
.Node
.ELEMENT_NODE
and node
.tagName
in onlysearch
:
100 for node
in node
.searchElementsByTagName(name
, onlysearch
):
103 def getFirstElementByTagName(node
, name
):
104 results
= node
.yieldElementsByTagName(name
)
105 # if isinstance(results, list):
106 # if len(results) == 0:
111 result
= results
.next()
113 except StopIteration:
116 def getnodetext(node
):
117 """returns the node's text by iterating through the child nodes"""
118 if node
is None: return ""
119 return "".join([t
.data
for t
in node
.childNodes
if t
.nodeType
== t
.TEXT_NODE
])
121 # various modifications to minidom classes to add functionality we like
123 class DOMImplementation(minidom
.DOMImplementation
):
124 def _create_document(self
):
127 class Element(minidom
.Element
):
128 def yieldElementsByTagName(self
, name
):
129 return getElementsByTagName_helper(self
, name
)
130 def searchElementsByTagName(self
, name
, onlysearch
):
131 return searchElementsByTagName_helper(self
, name
, onlysearch
)
132 def writexml(self
, writer
, indent
, addindent
, newl
):
133 return writexml_helper(self
, writer
, indent
, addindent
, newl
)
135 class Document(minidom
.Document
):
136 implementation
= DOMImplementation()
137 def yieldElementsByTagName(self
, name
):
138 return getElementsByTagName_helper(self
, name
)
139 def searchElementsByTagName(self
, name
, onlysearch
):
140 return searchElementsByTagName_helper(self
, name
, onlysearch
)
141 def createElement(self
, tagName
):
143 e
.ownerDocument
= self
145 def createElementNS(self
, namespaceURI
, qualifiedName
):
146 prefix
, localName
= _nssplit(qualifiedName
)
147 e
= Element(qualifiedName
, namespaceURI
, prefix
)
148 e
.ownerDocument
= self
151 theDOMImplementation
= DOMImplementation()
153 # an ExpatBuilder that allows us to use the above modifications
155 class ExpatBuilderNS(expatbuilder
.ExpatBuilderNS
):
157 """Free all data structures used during DOM construction."""
158 self
.document
= theDOMImplementation
.createDocument(
159 expatbuilder
.EMPTY_NAMESPACE
, None, None)
160 self
.curNode
= self
.document
161 self
._elem
_info
= self
.document
._elem
_info
163 self
._initNamespaces
()
165 def start_element_handler(self
, name
, attributes
):
166 # all we want to do is construct our own Element instead of minidom.Element
167 # unfortunately the only way to do this is to copy this whole function from expatbuilder.py
169 uri
, localname
, prefix
, qname
= expatbuilder
._parse
_ns
_name
(self
, name
)
171 uri
= expatbuilder
.EMPTY_NAMESPACE
174 prefix
= expatbuilder
.EMPTY_PREFIX
175 node
= Element(qname
, uri
, prefix
, localname
)
176 node
.ownerDocument
= self
.document
177 expatbuilder
._append
_child
(self
.curNode
, node
)
180 if self
._ns
_ordered
_prefixes
:
181 for prefix
, uri
in self
._ns
_ordered
_prefixes
:
183 a
= minidom
.Attr(expatbuilder
._intern
(self
, 'xmlns:' + prefix
),
184 expatbuilder
.XMLNS_NAMESPACE
, prefix
, "xmlns")
186 a
= minidom
.Attr("xmlns", expatbuilder
.XMLNS_NAMESPACE
,
187 "xmlns", expatbuilder
.EMPTY_PREFIX
)
188 d
= a
.childNodes
[0].__dict
__
189 d
['data'] = d
['nodeValue'] = uri
191 d
['value'] = d
['nodeValue'] = uri
192 d
['ownerDocument'] = self
.document
193 expatbuilder
._set
_attribute
_node
(node
, a
)
194 del self
._ns
_ordered
_prefixes
[:]
198 _attrsNS
= node
._attrsNS
199 for i
in range(0, len(attributes
), 2):
200 aname
= attributes
[i
]
201 value
= attributes
[i
+1]
203 uri
, localname
, prefix
, qname
= expatbuilder
._parse
_ns
_name
(self
, aname
)
204 a
= minidom
.Attr(qname
, uri
, localname
, prefix
)
206 _attrsNS
[(uri
, localname
)] = a
208 a
= minidom
.Attr(aname
, expatbuilder
.EMPTY_NAMESPACE
,
209 aname
, expatbuilder
.EMPTY_PREFIX
)
211 _attrsNS
[(expatbuilder
.EMPTY_NAMESPACE
, aname
)] = a
212 d
= a
.childNodes
[0].__dict
__
213 d
['data'] = d
['nodeValue'] = value
215 d
['ownerDocument'] = self
.document
216 d
['value'] = d
['nodeValue'] = value
217 d
['ownerElement'] = node
220 # This only adds some asserts to the original
221 # end_element_handler(), so we only define this when -O is not
222 # used. If changing one, be sure to check the other to see if
223 # it needs to be changed as well.
225 def end_element_handler(self
, name
):
226 curNode
= self
.curNode
228 uri
, localname
, prefix
, qname
= expatbuilder
._parse
_ns
_name
(self
, name
)
229 assert (curNode
.namespaceURI
== uri
230 and curNode
.localName
== localname
231 and curNode
.prefix
== prefix
), \
232 "element stack messed up! (namespace)"
234 assert curNode
.nodeName
== name
, \
235 "element stack messed up - bad nodeName"
236 assert curNode
.namespaceURI
== expatbuilder
.EMPTY_NAMESPACE
, \
237 "element stack messed up - bad namespaceURI"
238 self
.curNode
= curNode
.parentNode
239 self
._finish
_end
_element
(curNode
)
241 # parser methods that use our modified xml classes
243 def parse(file, parser
=None, bufsize
=None):
244 """Parse a file into a DOM by filename or file object."""
245 builder
= ExpatBuilderNS()
246 if isinstance(file, basestring
):
247 fp
= open(file, 'rb')
249 result
= builder
.parseFile(fp
)
253 result
= builder
.parseFile(file)
256 def parseString(string
, parser
=None):
257 """Parse a file into a DOM from a string."""
258 builder
= ExpatBuilderNS()
259 return builder
.parseString(string
)