storage/poxliff.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2006-2007 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22
  23 """An xliff file specifically suited for handling the po representation of
  24 xliff. """
  25
  26 from translate.storage import xliff
  27 from translate.storage import lisa
  28 from translate.storage import poheader
  29 from translate.misc.multistring import multistring
  30 from lxml import etree
  31 import re
  32
  33 def hasplurals(thing):
  34     if not isinstance(thing, multistring):
  35         return False
  36     return len(thing.strings) > 1
  37
  38 class PoXliffUnit(xliff.xliffunit):
  39     """A class to specifically handle the plural units created from a po file."""
  40     def __init__(self, source, empty=False):
  41         self.units = []
  42
  43         if empty:
  44             return
  45
  46         if not hasplurals(source):
  47             super(PoXliffUnit, self).__init__(source)
  48             return
  49
  50         self.xmlelement = etree.Element(self.namespaced("group"))
  51         self.xmlelement.set("restype", "x-gettext-plurals")
  52         self.setsource(source)
  53
  54     def __eq__(self, other):
  55         if isinstance(other, PoXliffUnit):
  56             if len(self.units) != len(other.units):
  57                 return False
  58             if len(self.units) == 0:
  59                 return True
  60             if not super(PoXliffUnit, self).__eq__(other):
  61                 return False
  62             for i in range(len(self.units)-1):
  63                 if not self.units[i+1] == other.units[i+1]:
  64                     return False
  65             return True
  66         if len(self.units) <= 1:
  67             if isinstance(other, lisa.LISAunit):
  68                 return super(PoXliffUnit, self).__eq__(other)
  69             else:
  70                 return self.source == other.source and self.target == other.target
  71         return False
  72
  73     def setsource(self, source, sourcelang="en"):
  74 #        TODO: consider changing from plural to singular, etc.
  75         if not hasplurals(source):
  76             super(PoXliffUnit, self).setsource(source, sourcelang)
  77         else:
  78             target = self.target
  79             for unit in self.units:
  80                 try:
  81                     self.xmlelement.remove(unit.xmlelement)
  82                 except xml.dom.NotFoundErr:
  83                     pass
  84             self.units = []
  85             for s in source.strings:
  86                 newunit = xliff.xliffunit(s)
  87 #                newunit.namespace = self.namespace #XXX?necessary?
  88                 self.units.append(newunit)
  89                 self.xmlelement.append(newunit.xmlelement)
  90             self.target = target
  91
  92     def getsource(self):
  93         strings = [super(PoXliffUnit, self).getsource()]
  94         strings.extend([unit.source for unit in self.units[1:]])
  95         return multistring(strings)
  96     source = property(getsource, setsource)
  97
  98     def settarget(self, text, lang='xx', append=False):
  99         if self.gettarget() == text:
 100             return
 101         if not self.hasplural():
 102             super(PoXliffUnit, self).settarget(text, lang, append)
 103             return
 104         if not isinstance(text, multistring):
 105             text = multistring(text)
 106         source = self.source
 107         sourcel = len(source.strings)
 108         targetl = len(text.strings)
 109         if sourcel < targetl:
 110             sources = source.strings + [source.strings[-1]] * (targetl - sourcel)
 111             targets = text.strings
 112             id = self.getid()
 113             self.source = multistring(sources)
 114             self.setid(id)
 115         elif targetl < sourcel:
 116             targets = text.strings + [""] * (sourcel - targetl)
 117         else:
 118             targets = text.strings
 119
 120         for i in range(len(self.units)):
 121             self.units[i].target = targets[i]
 122
 123     def gettarget(self):
 124         if self.hasplural():
 125             strings = [unit.target for unit in self.units]
 126             if strings:
 127                 return multistring(strings)
 128             else:
 129                 return None
 130         else:
 131             return super(PoXliffUnit, self).gettarget()
 132
 133     target = property(gettarget, settarget)
 134
 135     def addnote(self, text, origin=None):
 136         """Add a note specifically in a "note" tag"""
 137         if isinstance(text, str):
 138             text = text.decode("utf-8")
 139         note = etree.SubElement(self.xmlelement, self.namespaced("note"))
 140         note.text = text
 141         if origin:
 142             note.set("from", origin)
 143         for unit in self.units[1:]:
 144             unit.addnote(text, origin)
 145
 146     def getnotes(self, origin=None):
 147         #NOTE: We support both <context> and <note> tags in xliff files for comments
 148         if origin == "translator":
 149             notes = super(PoXliffUnit, self).getnotes("translator")
 150             trancomments = self.gettranslatorcomments()
 151             if notes == trancomments or trancomments.find(notes) >= 0:
 152                 notes = ""
 153             elif notes.find(trancomments) >= 0:
 154                 trancomments = notes
 155                 notes = ""
 156             trancomments = trancomments + notes
 157             return trancomments
 158         elif origin in ["programmer", "developer", "source code"]:
 159             devcomments = super(PoXliffUnit, self).getnotes("developer")
 160             autocomments = self.getautomaticcomments()
 161             if devcomments == autocomments or autocomments.find(devcomments) >= 0:
 162                 devcomments = ""
 163             elif devcomments.find(autocomments) >= 0:
 164                 autocomments = devcomments
 165                 devcomments = ""
 166             return autocomments
 167         else:
 168             return super(PoXliffUnit, self).getnotes(origin)
 169
 170     def markfuzzy(self, value=True):
 171         super(PoXliffUnit, self).markfuzzy(value)
 172         for unit in self.units[1:]:
 173             unit.markfuzzy(value)
 174
 175     def marktranslated(self):
 176         super(PoXliffUnit, self).marktranslated()
 177         for unit in self.units[1:]:
 178             unit.marktranslated()
 179
 180     def setid(self, id):
 181         self.xmlelement.set("id", id)
 182         if len(self.units) > 1:
 183             for i in range(len(self.units)):
 184                 self.units[i].setid("%s[%d]" % (id, i))
 185
 186     def getlocations(self):
 187         """Returns all the references (source locations)"""
 188         groups = self.getcontextgroups("po-reference")
 189         references = []
 190         for group in groups:
 191             sourcefile = ""
 192             linenumber = ""
 193             for (type, text) in group:
 194                 if type == "sourcefile":
 195                     sourcefile = text
 196                 elif type == "linenumber":
 197                     linenumber = text
 198             assert sourcefile
 199             if linenumber:
 200                 sourcefile = sourcefile + ":" + linenumber
 201             references.append(sourcefile)
 202         return references
 203
 204     def getautomaticcomments(self):
 205         """Returns the automatic comments (x-po-autocomment), which corresponds
 206         to the #. style po comments."""
 207         def hasautocomment((type, text)):
 208             return type == "x-po-autocomment"
 209         groups = self.getcontextgroups("po-entry")
 210         comments = []
 211         for group in groups:
 212             commentpairs = filter(hasautocomment, group)
 213             for (type, text) in commentpairs:
 214                 comments.append(text)
 215         return "\n".join(comments)
 216
 217     def gettranslatorcomments(self):
 218         """Returns the translator comments (x-po-trancomment), which corresponds
 219         to the # style po comments."""
 220         def hastrancomment((type, text)):
 221             return type == "x-po-trancomment"
 222         groups = self.getcontextgroups("po-entry")
 223         comments = []
 224         for group in groups:
 225             commentpairs = filter(hastrancomment, group)
 226             for (type, text) in commentpairs:
 227                 comments.append(text)
 228         return "\n".join(comments)
 229
 230     def isheader(self):
 231         return "gettext-domain-header" in (self.getrestype() or "")
 232
 233     def createfromxmlElement(cls, element, namespace=None):
 234         if element.tag.endswith("trans-unit"):
 235             object = cls(None, empty=True)
 236             object.xmlelement = element
 237             object.namespace = namespace
 238             return object
 239         assert element.tag.endswith("group")
 240         group = cls(None, empty=True)
 241         group.xmlelement = element
 242         group.namespace = namespace
 243         units = element.findall('.//%s' % group.namespaced('trans-unit'))
 244         for unit in units:
 245             subunit = xliff.xliffunit.createfromxmlElement(unit)
 246             subunit.namespace = namespace
 247             group.units.append(subunit)
 248         return group
 249     createfromxmlElement = classmethod(createfromxmlElement)
 250
 251     def hasplural(self):
 252         return self.xmlelement.tag == self.namespaced("group")
 253
 254
 255 class PoXliffFile(xliff.xlifffile, poheader.poheader):
 256     """a file for the po variant of Xliff files"""
 257     UnitClass = PoXliffUnit
 258     def __init__(self, *args, **kwargs):
 259         if not "sourcelanguage" in kwargs:
 260             kwargs["sourcelanguage"] = "en-US"
 261         xliff.xlifffile.__init__(self, *args, **kwargs)
 262
 263     def createfilenode(self, filename, sourcelanguage="en-US", datatype="po"):
 264         # Let's ignore the sourcelanguage parameter opting for the internal
 265         # one. PO files will probably be one language
 266         return super(PoXliffFile, self).createfilenode(filename, sourcelanguage=self.sourcelanguage, datatype="po")
 267
 268     def addheaderunit(self, target, filename):
 269         unit = self.addsourceunit(target, filename, True)
 270         unit.target = target
 271         unit.xmlelement.set("restype", "x-gettext-domain-header")
 272         unit.xmlelement.set("approved", "no")
 273         lisa.setXMLspace(unit.xmlelement, "preserve")
 274         return unit
 275
 276     def addplural(self, source, target, filename, createifmissing=False):
 277         """This method should now be unnecessary, but is left for reference"""
 278         assert isinstance(source, multistring)
 279         if not isinstance(target, multistring):
 280             target = multistring(target)
 281         sourcel = len(source.strings)
 282         targetl = len(target.strings)
 283         if sourcel < targetl:
 284             sources = source.strings + [source.strings[-1]] * targetl - sourcel
 285             targets = target.strings
 286         else:
 287             sources = source.strings
 288             targets = target.strings
 289         self._messagenum += 1
 290         pluralnum = 0
 291         group = self.creategroup(filename, True, restype="x-gettext-plural")
 292         for (src, tgt) in zip(sources, targets):
 293             unit = self.UnitClass(src)
 294             unit.target = tgt
 295             unit.setid("%d[%d]" % (self._messagenum, pluralnum))
 296             pluralnum += 1
 297             group.append(unit.xmlelement)
 298             self.units.append(unit)
 299
 300         if pluralnum < sourcel:
 301             for string in sources[pluralnum:]:
 302                 unit = self.UnitClass(src)
 303                 unit.xmlelement.set("translate", "no")
 304                 unit.setid("%d[%d]" % (self._messagenum, pluralnum))
 305                 pluralnum += 1
 306                 group.append(unit.xmlelement)
 307                 self.units.append(unit)
 308
 309         return self.units[-pluralnum]
 310
 311     def parse(self, xml):
 312         """Populates this object from the given xml string"""
 313         #TODO: Make more robust
 314         def ispluralgroup(node):
 315             """determines whether the xml node refers to a getttext plural"""
 316             return node.get("restype") == "x-gettext-plurals"
 317
 318         def isnonpluralunit(node):
 319             """determindes whether the xml node contains a plural like id.
 320
 321             We want to filter out all the plural nodes, except the very first
 322             one in each group.
 323             """
 324             return re.match(r"\d+\[[123456]\]$", node.get("id") or "") is None
 325
 326         def pluralunits(pluralgroups):
 327             for pluralgroup in pluralgroups:
 328                 yield self.UnitClass.createfromxmlElement(pluralgroup, namespace=self.namespace)
 329
 330         self.filename = getattr(xml, 'name', '')
 331         if hasattr(xml, "read"):
 332             xml.seek(0)
 333             xmlsrc = xml.read()
 334             xml = xmlsrc
 335         self.document = etree.fromstring(xml).getroottree()
 336         self.initbody()
 337         assert self.document.getroot().tag == self.namespaced(self.rootNode)
 338         groups = self.document.findall(".//%s" % self.namespaced("group"))
 339         pluralgroups = filter(ispluralgroup, groups)
 340         termEntries = self.body.findall('.//%s' % self.namespaced(self.UnitClass.rootNode))
 341         if termEntries is None:
 342             return
 343
 344         singularunits = filter(isnonpluralunit, termEntries)
 345         pluralunit_iter = pluralunits(pluralgroups)
 346         try:
 347             nextplural = pluralunit_iter.next()
 348         except StopIteration:
 349             nextplural = None
 350
 351         for entry in singularunits:
 352             term = self.UnitClass.createfromxmlElement(entry, namespace=self.namespace)
 353             if nextplural and unicode(term.source) in nextplural.source.strings:
 354                 self.units.append(nextplural)
 355                 try:
 356                     nextplural = pluralunit_iter.next()
 357                 except StopIteration, i:
 358                     nextplural = None
 359             else:
 360                 self.units.append(term)
 361