fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / convert / xliff2oo.py
blobcebe2162bff4fb01fb44550017ad51d4a22b6bc9
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2006 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """convert XLIFF localization files to an OpenOffice.org (SDF) localization file"""
25 import sys
26 import os
27 from translate.storage import oo
28 from translate.storage import factory
29 from translate.filters import pofilter
30 from translate.filters import checks
31 from translate.filters import autocorrect
32 import time
34 class reoo:
35 def __init__(self, templatefile, languages=None, timestamp=None, includefuzzy=False, long_keys=False, filteraction="exclude"):
36 """construct a reoo converter for the specified languages (timestamp=0 means leave unchanged)"""
37 # languages is a pair of language ids
38 self.long_keys = long_keys
39 self.readoo(templatefile)
40 self.languages = languages
41 self.filteraction = filteraction
42 if timestamp is None:
43 self.timestamp = time.strptime("2002-02-02 02:02:02", "%Y-%m-%d %H:%M:%S")
44 else:
45 self.timestamp = timestamp
46 if self.timestamp:
47 self.timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", self.timestamp)
48 else:
49 self.timestamp_str = None
50 self.includefuzzy = includefuzzy
52 def makekey(self, ookey):
53 """converts an oo key tuple into a key identifier for the source file"""
54 project, sourcefile, resourcetype, groupid, localid, platform = ookey
55 sourcefile = sourcefile.replace('\\','/')
56 if self.long_keys:
57 sourcebase = os.path.join(project, sourcefile)
58 else:
59 sourceparts = sourcefile.split('/')
60 sourcebase = "".join(sourceparts[-1:])
61 if len(groupid) == 0 or len(localid) == 0:
62 fullid = groupid + localid
63 else:
64 fullid = groupid + "." + localid
65 if resourcetype:
66 fullid = fullid + "." + resourcetype
67 key = "%s#%s" % (sourcebase, fullid)
68 return oo.normalizefilename(key)
70 def makeindex(self):
71 """makes an index of the oo keys that are used in the source file"""
72 self.index = {}
73 for ookey, theoo in self.o.ookeys.iteritems():
74 sourcekey = self.makekey(ookey)
75 self.index[sourcekey] = theoo
77 def readoo(self, of):
78 """read in the oo from the file"""
79 oosrc = of.read()
80 self.o = oo.oofile()
81 self.o.parse(oosrc)
82 self.makeindex()
84 def handleunit(self, unit):
85 # TODO: make this work for multiple columns in oo...
86 locations = unit.getlocations()
87 # technically our formats should just have one location for each entry...
88 # but we handle multiple ones just to be safe...
89 for location in locations:
90 subkeypos = location.rfind('.')
91 subkey = location[subkeypos+1:]
92 key = location[:subkeypos]
93 # this is just to handle our old system of using %s/%s:%s instead of %s/%s#%s
94 key = key.replace(':', '#')
95 # this is to handle using / instead of \ in the sourcefile...
96 key = key.replace('\\', '/')
97 key = oo.normalizefilename(key)
98 if self.index.has_key(key):
99 # now we need to replace the definition of entity with msgstr
100 theoo = self.index[key] # find the oo
101 self.applytranslation(key, subkey, theoo, unit)
102 else:
103 print >> sys.stderr, "couldn't find key %s from po in %d keys" % (key, len(self.index))
104 try:
105 sourceunitlines = str(unit)
106 if isinstance(sourceunitlines, unicode):
107 sourceunitlines = sourceunitlines.encode("utf-8")
108 print >> sys.stderr, sourceunitlines
109 except:
110 print >> sys.stderr, "error outputting source unit %r" % (str(unit),)
112 def applytranslation(self, key, subkey, theoo, unit):
113 """applies the translation from the source unit to the oo unit"""
114 if not self.includefuzzy and unit.isfuzzy():
115 return
116 makecopy = False
117 if self.languages is None:
118 part1 = theoo.lines[0]
119 if len(theoo.lines) > 1:
120 part2 = theoo.lines[1]
121 else:
122 makecopy = True
123 else:
124 part1 = theoo.languages[self.languages[0]]
125 if self.languages[1] in theoo.languages:
126 part2 = theoo.languages[self.languages[1]]
127 else:
128 makecopy = True
129 if makecopy:
130 part2 = oo.ooline(part1.getparts())
131 unquotedid = unit.source
132 unquotedstr = unit.target
133 # If there is no translation, we don't want to add a line
134 if len(unquotedstr.strip()) == 0:
135 return
136 if isinstance(unquotedstr, unicode):
137 unquotedstr = unquotedstr.encode("UTF-8")
138 # finally set the new definition in the oo, but not if its empty
139 if len(unquotedstr) > 0:
140 subkey = subkey.strip()
141 setattr(part2, subkey, unquotedstr)
142 # set the modified time
143 if self.timestamp_str:
144 part2.timestamp = self.timestamp_str
145 if self.languages:
146 part2.languageid = self.languages[1]
147 if makecopy:
148 theoo.addline(part2)
150 def convertstore(self, sourcestore):
151 self.p = sourcestore
152 # translate the strings
153 for unit in self.p.units:
154 # there may be more than one element due to msguniq merge
155 if filter.validelement(unit, self.p.filename, self.filteraction):
156 self.handleunit(unit)
157 # return the modified oo file object
158 return self.o
160 def getmtime(filename):
161 import stat
162 return time.localtime(os.stat(filename)[stat.ST_MTIME])
164 class oocheckfilter(pofilter.pocheckfilter):
165 def validelement(self, unit, filename, filteraction):
166 """Returns whether or not to use unit in conversion. (filename is just for error reporting)"""
167 if filteraction == "none": return True
168 filterresult = self.filterunit(unit)
169 if filterresult:
170 if filterresult != autocorrect:
171 for filtername, filtermessage in filterresult.iteritems():
172 location = unit.getlocations()[0]
173 if filtername in self.options.error:
174 print >> sys.stderr, "Error at %s::%s: %s" % (filename, location, filtermessage)
175 return not filteraction in ["exclude-all", "exclude-serious"]
176 if filtername in self.options.warning or self.options.alwayswarn:
177 print >> sys.stderr, "Warning at %s::%s: %s" % (filename, location, filtermessage)
178 return not filteraction in ["exclude-all"]
179 return True
181 class oofilteroptions:
182 error = ['variables', 'xmltags', 'escapes']
183 warning = ['blank']
184 #To only issue warnings for tests listed in warning, change the following to False:
185 alwayswarn = True
186 limitfilters = error + warning
187 #To use all available tests, uncomment the following:
188 #limitfilters = []
189 #To exclude certain tests, list them in here:
190 excludefilters = {}
191 includefuzzy = False
192 includereview = False
193 includeheader = False
194 autocorrect = False
196 options = oofilteroptions()
197 filter = oocheckfilter(options, [checks.OpenOfficeChecker, checks.StandardUnitChecker], checks.openofficeconfig)
199 def convertoo(inputfile, outputfile, templatefile, sourcelanguage=None, targetlanguage=None, timestamp=None, includefuzzy=False, multifilestyle="single", filteraction=None):
200 inputstore = factory.getobject(inputfile)
201 inputstore.filename = getattr(inputfile, 'name', '')
202 if not targetlanguage:
203 raise ValueError("You must specify the target language")
204 if not sourcelanguage:
205 if targetlanguage.isdigit():
206 sourcelanguage = "01"
207 else:
208 sourcelanguage = "en-US"
209 languages = (sourcelanguage, targetlanguage)
210 if templatefile is None:
211 raise ValueError("must have template file for oo files")
212 else:
213 convertor = reoo(templatefile, languages=languages, timestamp=timestamp, includefuzzy=includefuzzy, long_keys=multifilestyle != "single", filteraction=filteraction)
214 outputstore = convertor.convertstore(inputstore)
215 # TODO: check if we need to manually delete missing items
216 outputfile.write(str(outputstore))
217 return True
219 def main(argv=None):
220 from translate.convert import convert
221 formats = {("po", "oo"):("oo", convertoo), ("xlf", "oo"):("oo", convertoo)}
222 # always treat the input as an archive unless it is a directory
223 archiveformats = {(None, "output"): oo.oomultifile, (None, "template"): oo.oomultifile}
224 parser = convert.ArchiveConvertOptionParser(formats, usetemplates=True, description=__doc__, archiveformats=archiveformats)
225 parser.add_option("-l", "--language", dest="targetlanguage", default=None,
226 help="set target language code (e.g. af-ZA) [required]", metavar="LANG")
227 parser.add_option("", "--source-language", dest="sourcelanguage", default=None,
228 help="set source language code (default en-US)", metavar="LANG")
229 parser.add_option("-T", "--keeptimestamp", dest="timestamp", default=None, action="store_const", const=0,
230 help="don't change the timestamps of the strings")
231 parser.add_option("", "--nonrecursiveoutput", dest="allowrecursiveoutput", default=True, action="store_false", help="don't treat the output oo as a recursive store")
232 parser.add_option("", "--nonrecursivetemplate", dest="allowrecursivetemplate", default=True, action="store_false", help="don't treat the template oo as a recursive store")
233 parser.add_option("", "--filteraction", dest="filteraction", default="none", metavar="ACTION",
234 help="action on pofilter failure: none (default), warn, exclude-serious, exclude-all")
235 parser.add_fuzzy_option()
236 parser.add_multifile_option()
237 parser.passthrough.append("sourcelanguage")
238 parser.passthrough.append("targetlanguage")
239 parser.passthrough.append("timestamp")
240 parser.passthrough.append("filteraction")
241 parser.run(argv)
243 if __name__ == '__main__':
244 main()