fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / convert / po2oo.py
blobb1a08a7ceaf661eab91a6fba528f6d788cd2b2a1
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2006 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """convert Gettext PO localization files to an OpenOffice.org (SDF) localization file
25 see: http://translate.sourceforge.net/wiki/toolkit/po2oo for examples and
26 usage instructions
27 """
29 import sys
30 import os
31 from translate.storage import oo
32 from translate.storage import factory
33 from translate.filters import pofilter
34 from translate.filters import checks
35 from translate.filters import autocorrect
36 import time
38 class reoo:
39 def __init__(self, templatefile, languages=None, timestamp=None, includefuzzy=False, long_keys=False, filteraction="exclude"):
40 """construct a reoo converter for the specified languages (timestamp=0 means leave unchanged)"""
41 # languages is a pair of language ids
42 self.long_keys = long_keys
43 self.readoo(templatefile)
44 self.languages = languages
45 self.filteraction = filteraction
46 if timestamp is None:
47 self.timestamp = time.strptime("2002-02-02 02:02:02", "%Y-%m-%d %H:%M:%S")
48 else:
49 self.timestamp = timestamp
50 if self.timestamp:
51 self.timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", self.timestamp)
52 else:
53 self.timestamp_str = None
54 self.includefuzzy = includefuzzy
56 def makekey(self, ookey):
57 """converts an oo key tuple into a key identifier for the source file"""
58 project, sourcefile, resourcetype, groupid, localid, platform = ookey
59 sourcefile = sourcefile.replace('\\','/')
60 if self.long_keys:
61 sourcebase = os.path.join(project, sourcefile)
62 else:
63 sourceparts = sourcefile.split('/')
64 sourcebase = "".join(sourceparts[-1:])
65 if len(groupid) == 0 or len(localid) == 0:
66 fullid = groupid + localid
67 else:
68 fullid = groupid + "." + localid
69 if resourcetype:
70 fullid = fullid + "." + resourcetype
71 key = "%s#%s" % (sourcebase, fullid)
72 return oo.normalizefilename(key)
74 def makeindex(self):
75 """makes an index of the oo keys that are used in the source file"""
76 self.index = {}
77 for ookey, theoo in self.o.ookeys.iteritems():
78 sourcekey = self.makekey(ookey)
79 self.index[sourcekey] = theoo
81 def readoo(self, of):
82 """read in the oo from the file"""
83 oosrc = of.read()
84 self.o = oo.oofile()
85 self.o.parse(oosrc)
86 self.makeindex()
88 def handleunit(self, unit):
89 # TODO: make this work for multiple columns in oo...
90 locations = unit.getlocations()
91 # technically our formats should just have one location for each entry...
92 # but we handle multiple ones just to be safe...
93 for location in locations:
94 subkeypos = location.rfind('.')
95 subkey = location[subkeypos+1:]
96 key = location[:subkeypos]
97 # this is just to handle our old system of using %s/%s:%s instead of %s/%s#%s
98 key = key.replace(':', '#')
99 # this is to handle using / instead of \ in the sourcefile...
100 key = key.replace('\\', '/')
101 key = oo.normalizefilename(key)
102 if self.index.has_key(key):
103 # now we need to replace the definition of entity with msgstr
104 theoo = self.index[key] # find the oo
105 self.applytranslation(key, subkey, theoo, unit)
106 else:
107 print >> sys.stderr, "couldn't find key %s from po in %d keys" % (key, len(self.index))
108 try:
109 sourceunitlines = str(unit)
110 if isinstance(sourceunitlines, unicode):
111 sourceunitlines = sourceunitlines.encode("utf-8")
112 print >> sys.stderr, sourceunitlines
113 except:
114 print >> sys.stderr, "error outputting source unit %r" % (str(unit),)
116 def applytranslation(self, key, subkey, theoo, unit):
117 """applies the translation from the source unit to the oo unit"""
118 if not self.includefuzzy and unit.isfuzzy():
119 return
120 makecopy = False
121 if self.languages is None:
122 part1 = theoo.lines[0]
123 if len(theoo.lines) > 1:
124 part2 = theoo.lines[1]
125 else:
126 makecopy = True
127 else:
128 part1 = theoo.languages[self.languages[0]]
129 if self.languages[1] in theoo.languages:
130 part2 = theoo.languages[self.languages[1]]
131 else:
132 makecopy = True
133 if makecopy:
134 part2 = oo.ooline(part1.getparts())
135 unquotedid = unit.source
136 unquotedstr = unit.target
137 # If there is no translation, we don't want to add a line
138 if len(unquotedstr) == 0:
139 return
140 if isinstance(unquotedstr, unicode):
141 unquotedstr = unquotedstr.encode("UTF-8")
142 # finally set the new definition in the oo, but not if its empty
143 if len(unquotedstr) > 0:
144 setattr(part2, subkey, unquotedstr)
145 # set the modified time
146 if self.timestamp_str:
147 part2.timestamp = self.timestamp_str
148 if self.languages:
149 part2.languageid = self.languages[1]
150 if makecopy:
151 theoo.addline(part2)
153 def convertstore(self, sourcestore):
154 self.p = sourcestore
155 # translate the strings
156 for unit in self.p.units:
157 # there may be more than one element due to msguniq merge
158 if filter.validelement(unit, self.p.filename, self.filteraction):
159 self.handleunit(unit)
160 # return the modified oo file object
161 return self.o
163 def getmtime(filename):
164 import stat
165 return time.localtime(os.stat(filename)[stat.ST_MTIME])
167 class oocheckfilter(pofilter.pocheckfilter):
168 def validelement(self, unit, filename, filteraction):
169 """Returns whether or not to use unit in conversion. (filename is just for error reporting)"""
170 if filteraction == "none": return True
171 filterresult = self.filterunit(unit)
172 if filterresult:
173 if filterresult != autocorrect:
174 for filtername, filtermessage in filterresult.iteritems():
175 location = unit.getlocations()[0].encode('utf-8')
176 if filtername in self.options.error:
177 print >> sys.stderr, "Error at %s::%s: %s" % (filename, location, filtermessage)
178 return not filteraction in ["exclude-all", "exclude-serious"]
179 if filtername in self.options.warning or self.options.alwayswarn:
180 print >> sys.stderr, "Warning at %s::%s: %s" % (filename, location, filtermessage)
181 return not filteraction in ["exclude-all"]
182 return True
184 class oofilteroptions:
185 error = ['variables', 'xmltags', 'escapes']
186 warning = ['blank']
187 #To only issue warnings for tests listed in warning, change the following to False:
188 alwayswarn = True
189 limitfilters = error + warning
190 #To use all available tests, uncomment the following:
191 #limitfilters = []
192 #To exclude certain tests, list them in here:
193 excludefilters = {}
194 includefuzzy = False
195 includereview = False
196 includeheader = False
197 autocorrect = False
199 options = oofilteroptions()
200 filter = oocheckfilter(options, [checks.OpenOfficeChecker, checks.StandardUnitChecker], checks.openofficeconfig)
202 def convertoo(inputfile, outputfile, templatefile, sourcelanguage=None, targetlanguage=None, timestamp=None, includefuzzy=False, multifilestyle="single", filteraction=None):
203 inputstore = factory.getobject(inputfile)
204 inputstore.filename = getattr(inputfile, 'name', '')
205 if not targetlanguage:
206 raise ValueError("You must specify the target language")
207 if not sourcelanguage:
208 if targetlanguage.isdigit():
209 sourcelanguage = "01"
210 else:
211 sourcelanguage = "en-US"
212 languages = (sourcelanguage, targetlanguage)
213 if templatefile is None:
214 raise ValueError("must have template file for oo files")
215 else:
216 convertor = reoo(templatefile, languages=languages, timestamp=timestamp, includefuzzy=includefuzzy, long_keys=multifilestyle != "single", filteraction=filteraction)
217 outputstore = convertor.convertstore(inputstore)
218 # TODO: check if we need to manually delete missing items
219 outputfile.write(str(outputstore))
220 return True
222 def main(argv=None):
223 from translate.convert import convert
224 formats = {("po", "oo"):("oo", convertoo), ("xlf", "oo"):("oo", convertoo)}
225 # always treat the input as an archive unless it is a directory
226 archiveformats = {(None, "output"): oo.oomultifile, (None, "template"): oo.oomultifile}
227 parser = convert.ArchiveConvertOptionParser(formats, usetemplates=True, description=__doc__, archiveformats=archiveformats)
228 parser.add_option("-l", "--language", dest="targetlanguage", default=None,
229 help="set target language code (e.g. af-ZA) [required]", metavar="LANG")
230 parser.add_option("", "--source-language", dest="sourcelanguage", default=None,
231 help="set source language code (default en-US)", metavar="LANG")
232 parser.add_option("-T", "--keeptimestamp", dest="timestamp", default=None, action="store_const", const=0,
233 help="don't change the timestamps of the strings")
234 parser.add_option("", "--nonrecursiveoutput", dest="allowrecursiveoutput", default=True, action="store_false", help="don't treat the output oo as a recursive store")
235 parser.add_option("", "--nonrecursivetemplate", dest="allowrecursivetemplate", default=True, action="store_false", help="don't treat the template oo as a recursive store")
236 parser.add_option("", "--filteraction", dest="filteraction", default="none", metavar="ACTION",
237 help="action on pofilter failure: none (default), warn, exclude-serious, exclude-all")
238 parser.add_fuzzy_option()
239 parser.add_multifile_option()
240 parser.passthrough.append("sourcelanguage")
241 parser.passthrough.append("targetlanguage")
242 parser.passthrough.append("timestamp")
243 parser.passthrough.append("filteraction")
244 parser.run(argv)
246 if __name__ == '__main__':
247 main()