convert/xliff2oo.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2004-2006 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21 #
  22
  23 """convert XLIFF localization files to an OpenOffice.org (SDF) localization file"""
  24
  25 import sys
  26 import os
  27 from translate.storage import oo
  28 from translate.storage import factory
  29 from translate.filters import pofilter
  30 from translate.filters import checks
  31 from translate.filters import autocorrect
  32 import time
  33
  34 class reoo:
  35     def __init__(self, templatefile, languages=None, timestamp=None, includefuzzy=False, long_keys=False, filteraction="exclude"):
  36         """construct a reoo converter for the specified languages (timestamp=0 means leave unchanged)"""
  37         # languages is a pair of language ids
  38         self.long_keys = long_keys
  39         self.readoo(templatefile)
  40         self.languages = languages
  41         self.filteraction = filteraction
  42         if timestamp is None:
  43             self.timestamp = time.strptime("2002-02-02 02:02:02", "%Y-%m-%d %H:%M:%S")
  44         else:
  45             self.timestamp = timestamp
  46         if self.timestamp:
  47             self.timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", self.timestamp)
  48         else:
  49             self.timestamp_str = None
  50         self.includefuzzy = includefuzzy
  51
  52     def makekey(self, ookey):
  53         """converts an oo key tuple into a key identifier for the source file"""
  54         project, sourcefile, resourcetype, groupid, localid, platform = ookey
  55         sourcefile = sourcefile.replace('\\','/')
  56         if self.long_keys:
  57             sourcebase = os.path.join(project, sourcefile)
  58         else:
  59             sourceparts = sourcefile.split('/')
  60             sourcebase = "".join(sourceparts[-1:])
  61         if len(groupid) == 0 or len(localid) == 0:
  62             fullid = groupid + localid
  63         else:
  64             fullid = groupid + "." + localid
  65         if resourcetype:
  66             fullid = fullid + "." + resourcetype
  67         key = "%s#%s" % (sourcebase, fullid)
  68         return oo.normalizefilename(key)
  69
  70     def makeindex(self):
  71         """makes an index of the oo keys that are used in the source file"""
  72         self.index = {}
  73         for ookey, theoo in self.o.ookeys.iteritems():
  74             sourcekey = self.makekey(ookey)
  75             self.index[sourcekey] = theoo
  76
  77     def readoo(self, of):
  78         """read in the oo from the file"""
  79         oosrc = of.read()
  80         self.o = oo.oofile()
  81         self.o.parse(oosrc)
  82         self.makeindex()
  83
  84     def handleunit(self, unit):
  85         # TODO: make this work for multiple columns in oo...
  86         locations = unit.getlocations()
  87         # technically our formats should just have one location for each entry...
  88         # but we handle multiple ones just to be safe...
  89         for location in locations:
  90             subkeypos = location.rfind('.')
  91             subkey = location[subkeypos+1:]
  92             key = location[:subkeypos]
  93             # this is just to handle our old system of using %s/%s:%s instead of %s/%s#%s
  94             key = key.replace(':', '#')
  95             # this is to handle using / instead of \ in the sourcefile...
  96             key = key.replace('\\', '/')
  97             key = oo.normalizefilename(key)
  98             if self.index.has_key(key):
  99                 # now we need to replace the definition of entity with msgstr
 100                 theoo = self.index[key] # find the oo
 101                 self.applytranslation(key, subkey, theoo, unit)
 102             else:
 103                 print >> sys.stderr, "couldn't find key %s from po in %d keys" % (key, len(self.index))
 104                 try:
 105                     sourceunitlines = str(unit)
 106                     if isinstance(sourceunitlines, unicode):
 107                         sourceunitlines = sourceunitlines.encode("utf-8")
 108                     print >> sys.stderr, sourceunitlines
 109                 except:
 110                     print >> sys.stderr, "error outputting source unit %r" % (str(unit),)
 111
 112     def applytranslation(self, key, subkey, theoo, unit):
 113         """applies the translation from the source unit to the oo unit"""
 114         if not self.includefuzzy and unit.isfuzzy():
 115             return
 116         makecopy = False
 117         if self.languages is None:
 118             part1 = theoo.lines[0]
 119             if len(theoo.lines) > 1:
 120                 part2 = theoo.lines[1]
 121             else:
 122                 makecopy = True
 123         else:
 124             part1 = theoo.languages[self.languages[0]]
 125             if self.languages[1] in theoo.languages:
 126                 part2 = theoo.languages[self.languages[1]]
 127             else:
 128                 makecopy = True
 129         if makecopy:
 130             part2 = oo.ooline(part1.getparts())
 131         unquotedid = unit.source
 132         unquotedstr = unit.target
 133         # If there is no translation, we don't want to add a line
 134         if len(unquotedstr.strip()) == 0:
 135             return
 136         if isinstance(unquotedstr, unicode):
 137             unquotedstr = unquotedstr.encode("UTF-8")
 138         # finally set the new definition in the oo, but not if its empty
 139         if len(unquotedstr) > 0:
 140             subkey = subkey.strip()
 141             setattr(part2, subkey, unquotedstr)
 142         # set the modified time
 143         if self.timestamp_str:
 144             part2.timestamp = self.timestamp_str
 145         if self.languages:
 146             part2.languageid = self.languages[1]
 147         if makecopy:
 148             theoo.addline(part2)
 149
 150     def convertstore(self, sourcestore):
 151         self.p = sourcestore
 152         # translate the strings
 153         for unit in self.p.units:
 154             # there may be more than one element due to msguniq merge
 155             if filter.validelement(unit, self.p.filename, self.filteraction):
 156                 self.handleunit(unit)
 157         # return the modified oo file object
 158         return self.o
 159
 160 def getmtime(filename):
 161     import stat
 162     return time.localtime(os.stat(filename)[stat.ST_MTIME])
 163
 164 class oocheckfilter(pofilter.pocheckfilter):
 165     def validelement(self, unit, filename, filteraction):
 166         """Returns whether or not to use unit in conversion. (filename is just for error reporting)"""
 167         if filteraction == "none": return True
 168         filterresult = self.filterunit(unit)
 169         if filterresult:
 170             if filterresult != autocorrect:
 171                 for filtername, filtermessage in filterresult.iteritems():
 172                     location = unit.getlocations()[0]
 173                     if filtername in self.options.error:
 174                         print >> sys.stderr, "Error at %s::%s: %s" % (filename, location, filtermessage)
 175                         return not filteraction in ["exclude-all", "exclude-serious"]
 176                     if filtername in self.options.warning or self.options.alwayswarn:
 177                         print >> sys.stderr, "Warning at %s::%s: %s" % (filename, location, filtermessage)
 178                         return not filteraction in ["exclude-all"]
 179         return True
 180
 181 class oofilteroptions:
 182     error = ['variables', 'xmltags', 'escapes']
 183     warning = ['blank']
 184     #To only issue warnings for tests listed in warning, change the following to False:
 185     alwayswarn = True
 186     limitfilters = error + warning
 187     #To use all available tests, uncomment the following:
 188     #limitfilters = []
 189     #To exclude certain tests, list them in here:
 190     excludefilters = {}
 191     includefuzzy = False
 192     includereview = False
 193     includeheader = False
 194     autocorrect = False
 195
 196 options = oofilteroptions()
 197 filter = oocheckfilter(options, [checks.OpenOfficeChecker, checks.StandardUnitChecker], checks.openofficeconfig)
 198
 199 def convertoo(inputfile, outputfile, templatefile, sourcelanguage=None, targetlanguage=None, timestamp=None, includefuzzy=False, multifilestyle="single", filteraction=None):
 200     inputstore = factory.getobject(inputfile)
 201     inputstore.filename = getattr(inputfile, 'name', '')
 202     if not targetlanguage:
 203         raise ValueError("You must specify the target language")
 204     if not sourcelanguage:
 205         if targetlanguage.isdigit():
 206             sourcelanguage = "01"
 207         else:
 208             sourcelanguage = "en-US"
 209     languages = (sourcelanguage, targetlanguage)
 210     if templatefile is None:
 211         raise ValueError("must have template file for oo files")
 212     else:
 213         convertor = reoo(templatefile, languages=languages, timestamp=timestamp, includefuzzy=includefuzzy, long_keys=multifilestyle != "single", filteraction=filteraction)
 214     outputstore = convertor.convertstore(inputstore)
 215     # TODO: check if we need to manually delete missing items
 216     outputfile.write(str(outputstore))
 217     return True
 218
 219 def main(argv=None):
 220     from translate.convert import convert
 221     formats = {("po", "oo"):("oo", convertoo), ("xlf", "oo"):("oo", convertoo)}
 222     # always treat the input as an archive unless it is a directory
 223     archiveformats = {(None, "output"): oo.oomultifile, (None, "template"): oo.oomultifile}
 224     parser = convert.ArchiveConvertOptionParser(formats, usetemplates=True, description=__doc__, archiveformats=archiveformats)
 225     parser.add_option("-l", "--language", dest="targetlanguage", default=None,
 226             help="set target language code (e.g. af-ZA) [required]", metavar="LANG")
 227     parser.add_option("", "--source-language", dest="sourcelanguage", default=None,
 228             help="set source language code (default en-US)", metavar="LANG")
 229     parser.add_option("-T", "--keeptimestamp", dest="timestamp", default=None, action="store_const", const=0,
 230             help="don't change the timestamps of the strings")
 231     parser.add_option("", "--nonrecursiveoutput", dest="allowrecursiveoutput", default=True, action="store_false", help="don't treat the output oo as a recursive store")
 232     parser.add_option("", "--nonrecursivetemplate", dest="allowrecursivetemplate", default=True, action="store_false", help="don't treat the template oo as a recursive store")
 233     parser.add_option("", "--filteraction", dest="filteraction", default="none", metavar="ACTION",
 234             help="action on pofilter failure: none (default), warn, exclude-serious, exclude-all")
 235     parser.add_fuzzy_option()
 236     parser.add_multifile_option()
 237     parser.passthrough.append("sourcelanguage")
 238     parser.passthrough.append("targetlanguage")
 239     parser.passthrough.append("timestamp")
 240     parser.passthrough.append("filteraction")
 241     parser.run(argv)
 242
 243 if __name__ == '__main__':
 244     main()