misc/ourdom.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2004-2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22
  23 """module that provides modified DOM functionality for our needs
  24
  25 Note that users of ourdom should ensure that no code might still use classes
  26 directly from minidom, like minidom.Element, minidom.Document or methods such
  27 as minidom.parseString, since the functionality provided here will not be in
  28 those objects.
  29 """
  30
  31 from xml.dom import minidom
  32 from xml.dom import expatbuilder
  33
  34 # helper functions we use to do xml the way we want, used by modified classes below
  35
  36 def writexml_helper(self, writer, indent="", addindent="", newl=""):
  37     """A replacement for writexml that formats it like typical XML files.
  38     Nodes are intendented but text nodes, where whitespace can be significant, are not indented."""
  39     # indent = current indentation
  40     # addindent = indentation to add to higher levels
  41     # newl = newline string
  42     writer.write(indent+"<" + self.tagName)
  43
  44     attrs = self._get_attributes()
  45     a_names = attrs.keys()
  46     a_names.sort()
  47
  48     for a_name in a_names:
  49         writer.write(" %s=\"" % a_name)
  50         minidom._write_data(writer, attrs[a_name].value)
  51         writer.write("\"")
  52     if self.childNodes:
  53         # We need to write text nodes without newline and indentation, so
  54         # we handle them differently. Note that we here assume that "empty"
  55         # text nodes can be done away with (see the strip()). Note also that
  56         # nested tags in a text node (like ph tags in xliff) should also not
  57         # have newlines and indentation or an extra newline, since that will
  58         # alter the text node.
  59         haveText = False
  60         for childNode in self.childNodes:
  61             if childNode.nodeType == self.TEXT_NODE and childNode.data.strip():
  62                 haveText = True
  63                 break
  64         if haveText:
  65           writer.write(">")
  66           for node in self.childNodes:
  67               node.writexml(writer,"","","")
  68           writer.write("</%s>%s" % (self.tagName,newl))
  69         else:
  70           # This is the normal case that we do with pretty layout
  71           writer.write(">%s"%(newl))
  72           for node in self.childNodes:
  73               if node.nodeType != self.TEXT_NODE:
  74                   node.writexml(writer,indent+addindent,addindent,newl)
  75           writer.write("%s</%s>%s" % (indent,self.tagName,newl))
  76     else:
  77         writer.write("/>%s"%(newl))
  78
  79 def getElementsByTagName_helper(parent, name, dummy=None):
  80     """A reimplementation of getElementsByTagName as an iterator.
  81
  82     Note that this is not compatible with getElementsByTagName that returns a
  83     list, therefore, the class below exposes this through yieldElementsByTagName"""
  84
  85     for node in parent.childNodes:
  86         if node.nodeType == minidom.Node.ELEMENT_NODE and \
  87             (name == "*" or node.tagName == name):
  88             yield node
  89         if node.hasChildNodes():
  90             for othernode in node.getElementsByTagName(name):
  91                 yield othernode
  92
  93 def searchElementsByTagName_helper(parent, name, onlysearch):
  94     """limits the search to within tags occuring in onlysearch"""
  95     for node in parent.childNodes:
  96         if node.nodeType == minidom.Node.ELEMENT_NODE and \
  97             (name == "*" or node.tagName == name):
  98             yield node
  99         if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch:
 100             for node in node.searchElementsByTagName(name, onlysearch):
 101                 yield node
 102
 103 def getFirstElementByTagName(node, name):
 104   results = node.yieldElementsByTagName(name)
 105 #  if isinstance(results, list):
 106 #    if len(results) == 0:
 107 #      return None
 108 #    else:
 109 #      return results[0]
 110   try:
 111     result = results.next()
 112     return result
 113   except StopIteration:
 114     return None
 115
 116 def getnodetext(node):
 117   """returns the node's text by iterating through the child nodes"""
 118   if node is None: return ""
 119   return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
 120
 121 # various modifications to minidom classes to add functionality we like
 122
 123 class DOMImplementation(minidom.DOMImplementation):
 124   def _create_document(self):
 125     return Document()
 126
 127 class Element(minidom.Element):
 128   def yieldElementsByTagName(self, name):
 129     return getElementsByTagName_helper(self, name)
 130   def searchElementsByTagName(self, name, onlysearch):
 131     return searchElementsByTagName_helper(self, name, onlysearch)
 132   def writexml(self, writer, indent, addindent, newl):
 133     return writexml_helper(self, writer, indent, addindent, newl)
 134
 135 class Document(minidom.Document):
 136   implementation = DOMImplementation()
 137   def yieldElementsByTagName(self, name):
 138     return getElementsByTagName_helper(self, name)
 139   def searchElementsByTagName(self, name, onlysearch):
 140     return searchElementsByTagName_helper(self, name, onlysearch)
 141   def createElement(self, tagName):
 142     e = Element(tagName)
 143     e.ownerDocument = self
 144     return e
 145   def createElementNS(self, namespaceURI, qualifiedName):
 146     prefix, localName = _nssplit(qualifiedName)
 147     e = Element(qualifiedName, namespaceURI, prefix)
 148     e.ownerDocument = self
 149     return e
 150
 151 theDOMImplementation = DOMImplementation()
 152
 153 # an ExpatBuilder that allows us to use the above modifications
 154
 155 class ExpatBuilderNS(expatbuilder.ExpatBuilderNS):
 156   def reset(self):
 157     """Free all data structures used during DOM construction."""
 158     self.document = theDOMImplementation.createDocument(
 159       expatbuilder.EMPTY_NAMESPACE, None, None)
 160     self.curNode = self.document
 161     self._elem_info = self.document._elem_info
 162     self._cdata = False
 163     self._initNamespaces()
 164
 165   def start_element_handler(self, name, attributes):
 166     # all we want to do is construct our own Element instead of minidom.Element
 167     # unfortunately the only way to do this is to copy this whole function from expatbuilder.py
 168     if ' ' in name:
 169       uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
 170     else:
 171       uri = expatbuilder.EMPTY_NAMESPACE
 172       qname = name
 173       localname = None
 174       prefix = expatbuilder.EMPTY_PREFIX
 175     node = Element(qname, uri, prefix, localname)
 176     node.ownerDocument = self.document
 177     expatbuilder._append_child(self.curNode, node)
 178     self.curNode = node
 179
 180     if self._ns_ordered_prefixes:
 181       for prefix, uri in self._ns_ordered_prefixes:
 182         if prefix:
 183           a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix),
 184                    expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns")
 185         else:
 186           a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE,
 187                    "xmlns", expatbuilder.EMPTY_PREFIX)
 188         d = a.childNodes[0].__dict__
 189         d['data'] = d['nodeValue'] = uri
 190         d = a.__dict__
 191         d['value'] = d['nodeValue'] = uri
 192         d['ownerDocument'] = self.document
 193         expatbuilder._set_attribute_node(node, a)
 194       del self._ns_ordered_prefixes[:]
 195
 196     if attributes:
 197       _attrs = node._attrs
 198       _attrsNS = node._attrsNS
 199       for i in range(0, len(attributes), 2):
 200         aname = attributes[i]
 201         value = attributes[i+1]
 202         if ' ' in aname:
 203           uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname)
 204           a = minidom.Attr(qname, uri, localname, prefix)
 205           _attrs[qname] = a
 206           _attrsNS[(uri, localname)] = a
 207         else:
 208           a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE,
 209                    aname, expatbuilder.EMPTY_PREFIX)
 210           _attrs[aname] = a
 211           _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a
 212         d = a.childNodes[0].__dict__
 213         d['data'] = d['nodeValue'] = value
 214         d = a.__dict__
 215         d['ownerDocument'] = self.document
 216         d['value'] = d['nodeValue'] = value
 217         d['ownerElement'] = node
 218
 219   if __debug__:
 220     # This only adds some asserts to the original
 221     # end_element_handler(), so we only define this when -O is not
 222     # used.  If changing one, be sure to check the other to see if
 223     # it needs to be changed as well.
 224     #
 225     def end_element_handler(self, name):
 226       curNode = self.curNode
 227       if ' ' in name:
 228         uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
 229         assert (curNode.namespaceURI == uri
 230             and curNode.localName == localname
 231             and curNode.prefix == prefix), \
 232             "element stack messed up! (namespace)"
 233       else:
 234         assert curNode.nodeName == name, \
 235              "element stack messed up - bad nodeName"
 236         assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \
 237              "element stack messed up - bad namespaceURI"
 238       self.curNode = curNode.parentNode
 239       self._finish_end_element(curNode)
 240
 241 # parser methods that use our modified xml classes
 242
 243 def parse(file, parser=None, bufsize=None):
 244   """Parse a file into a DOM by filename or file object."""
 245   builder = ExpatBuilderNS()
 246   if isinstance(file, basestring):
 247     fp = open(file, 'rb')
 248     try:
 249       result = builder.parseFile(fp)
 250     finally:
 251       fp.close()
 252   else:
 253     result = builder.parseFile(file)
 254   return result
 255
 256 def parseString(string, parser=None):
 257   """Parse a file into a DOM from a string."""
 258   builder = ExpatBuilderNS()
 259   return builder.parseString(string)
 260