for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / filters / pofilter.py
blobee61169ac18e515c70c17701b353aef4a572b382
1 #!/usr/bin/env python
2 #
3 # Copyright 2004-2007 Zuza Software Foundation
4 #
5 # This file is part of translate.
7 # translate is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # translate is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with translate; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 """Perform quality checks on Gettext PO, XLIFF and TMX localization files
23 Snippet files whenever a test fails. These can be examined, corrected and
24 merged back into the originals using pomerge
26 See: http://translate.sourceforge.net/wiki/toolkit/pofilter for examples and
27 usage instructions and http://translate.sourceforge.net/wiki/toolkit/pofilter_tests
28 for full descriptions of all tests
29 """
31 from translate.storage import factory
32 from translate.filters import checks
33 from translate.filters import autocorrect
34 from translate.misc import optrecurse
35 import os
37 class pocheckfilter:
38 def __init__(self, options, checkerclasses=None, checkerconfig=None):
39 # excludefilters={}, limitfilters=None, includeheader=False, includefuzzy=True, includereview=True, autocorrect=False):
40 """builds a checkfilter using the given checker (a list is allowed too)"""
41 if checkerclasses is None:
42 checkerclasses = [checks.StandardChecker, checks.StandardUnitChecker]
43 self.checker = checks.TeeChecker(checkerconfig=checkerconfig, \
44 excludefilters=options.excludefilters, \
45 limitfilters=options.limitfilters, \
46 checkerclasses=checkerclasses, \
47 languagecode=checkerconfig.targetlanguage
49 self.options = options
51 def getfilterdocs(self):
52 """lists the docs for filters available on checker..."""
53 filterdict = self.checker.getfilters()
54 filterdocs = ["%s\t%s" % (name, filterfunc.__doc__) for (name, filterfunc) in filterdict.iteritems()]
55 filterdocs.sort()
56 return "\n".join(filterdocs)
58 def filterunit(self, unit):
59 """runs filters on an element"""
60 if unit.isheader(): return []
61 if not self.options.includefuzzy and unit.isfuzzy(): return []
62 if not self.options.includereview and unit.isreview(): return []
63 failures = self.checker.run_filters(unit)
64 if failures and self.options.autocorrect:
65 # we can't get away with bad unquoting / requoting if we're going to change the result...
66 correction = autocorrect.correct(unit.source, unit.target)
67 if correction:
68 unit.target = correction
69 return autocorrect
70 else:
71 # ignore failures we can't correct when in autocorrect mode
72 return []
73 return failures
75 def filterfile(self, transfile):
76 """Runs filters on a translation store object.
77 Parameters:
78 - transfile. A translation store object.
79 Return value:
80 - A new translation store object with the results of the filter included."""
81 newtransfile = type(transfile)()
82 for unit in transfile.units:
83 filterresult = self.filterunit(unit)
84 if filterresult:
85 if filterresult != autocorrect:
86 for filtername, filtermessage in filterresult.iteritems():
87 unit.adderror(filtername, filtermessage)
88 if isinstance(filtermessage, checks.SeriousFilterFailure):
89 unit.markfuzzy()
90 newtransfile.addunit(unit)
91 if self.options.includeheader and newtransfile.units > 0:
92 newtransfile.units.insert(0, newtransfile.makeheader())
93 newtransfile.changeencoding("UTF-8")
94 return newtransfile
96 class FilterOptionParser(optrecurse.RecursiveOptionParser):
97 """a specialized Option Parser for filter tools..."""
98 def __init__(self, formats):
99 """construct the specialized Option Parser"""
100 optrecurse.RecursiveOptionParser.__init__(self, formats)
101 self.set_usage()
102 self.add_option("-l", "--listfilters", action="callback", dest='listfilters',
103 default=False, callback_kwargs={'dest_value': True},
104 callback=self.parse_noinput, help="list filters available")
106 def parse_noinput(self, option, opt, value, parser, *args, **kwargs):
107 """this sets an option to true, but also sets input to - to prevent an error"""
108 setattr(parser.values, option.dest, kwargs['dest_value'])
109 parser.values.input = "-"
111 def run(self):
112 """parses the arguments, and runs recursiveprocess with the resulting options"""
113 (options, args) = self.parse_args()
114 if options.filterclass is None:
115 checkerclasses = [checks.StandardChecker, checks.StandardUnitChecker]
116 else:
117 checkerclasses = [options.filterclass, checks.StandardUnitChecker]
118 checkerconfig = checks.CheckerConfig(targetlanguage=options.targetlanguage)
119 if options.notranslatefile:
120 if not os.path.exists(options.notranslatefile):
121 self.error("notranslatefile %r does not exist" % options.notranslatefile)
122 notranslatewords = [line.strip() for line in open(options.notranslatefile).readlines()]
123 notranslatewords = dict.fromkeys([key for key in notranslatewords])
124 checkerconfig.notranslatewords.update(notranslatewords)
125 if options.musttranslatefile:
126 if not os.path.exists(options.musttranslatefile):
127 self.error("musttranslatefile %r does not exist" % options.musttranslatefile)
128 musttranslatewords = [line.strip() for line in open(options.musttranslatefile).readlines()]
129 musttranslatewords = dict.fromkeys([key for key in musttranslatewords])
130 checkerconfig.musttranslatewords.update(musttranslatewords)
131 if options.validcharsfile:
132 if not os.path.exists(options.validcharsfile):
133 self.error("validcharsfile %r does not exist" % options.validcharsfile)
134 validchars = open(options.validcharsfile).read()
135 checkerconfig.updatevalidchars(validchars)
136 options.checkfilter = pocheckfilter(options, checkerclasses, checkerconfig)
137 if not options.checkfilter.checker.combinedfilters:
138 self.error("No valid filters were specified")
139 options.inputformats = self.inputformats
140 options.outputoptions = self.outputoptions
141 self.usepsyco(options)
142 if options.listfilters:
143 print options.checkfilter.getfilterdocs()
144 else:
145 self.recursiveprocess(options)
147 def runfilter(inputfile, outputfile, templatefile, checkfilter=None):
148 """reads in inputfile, filters using checkfilter, writes to outputfile"""
149 fromfile = factory.getobject(inputfile)
150 tofile = checkfilter.filterfile(fromfile)
151 if tofile.isempty():
152 return 0
153 outputfile.write(str(tofile))
154 return 1
156 def cmdlineparser():
157 formats = {"po":("po", runfilter), "pot":("pot", runfilter),
158 "xliff":("xliff", runfilter), "xlf":("xlf", runfilter),
159 "tmx":("tmx", runfilter),
160 None:("po", runfilter)}
162 parser = FilterOptionParser(formats)
163 parser.add_option("", "--review", dest="includereview",
164 action="store_true", default=True,
165 help="include units marked for review (default)")
166 parser.add_option("", "--noreview", dest="includereview",
167 action="store_false", default=True,
168 help="exclude units marked for review")
169 parser.add_option("", "--fuzzy", dest="includefuzzy",
170 action="store_true", default=True,
171 help="include units marked fuzzy (default)")
172 parser.add_option("", "--nofuzzy", dest="includefuzzy",
173 action="store_false", default=True,
174 help="exclude units marked fuzzy")
175 parser.add_option("", "--header", dest="includeheader",
176 action="store_true", default=False,
177 help="include a PO header in the output")
178 parser.add_option("", "--autocorrect", dest="autocorrect",
179 action="store_true", default=False,
180 help="output automatic corrections where possible rather than describing issues")
181 parser.add_option("", "--language", dest="targetlanguage", default=None,
182 help="set target language code (e.g. af-ZA) [required for spell check and recommended in general]", metavar="LANG")
183 parser.add_option("", "--openoffice", dest="filterclass",
184 action="store_const", default=None, const=checks.OpenOfficeChecker,
185 help="use the standard checks for OpenOffice translations")
186 parser.add_option("", "--mozilla", dest="filterclass",
187 action="store_const", default=None, const=checks.MozillaChecker,
188 help="use the standard checks for Mozilla translations")
189 parser.add_option("", "--gnome", dest="filterclass",
190 action="store_const", default=None, const=checks.GnomeChecker,
191 help="use the standard checks for Gnome translations")
192 parser.add_option("", "--kde", dest="filterclass",
193 action="store_const", default=None, const=checks.KdeChecker,
194 help="use the standard checks for KDE translations")
195 parser.add_option("", "--wx", dest="filterclass",
196 action="store_const", default=None, const=checks.KdeChecker,
197 help="use the standard checks for wxWidgets translations")
198 parser.add_option("", "--excludefilter", dest="excludefilters",
199 action="append", default=[], type="string", metavar="FILTER",
200 help="don't use FILTER when filtering")
201 parser.add_option("-t", "--test", dest="limitfilters",
202 action="append", default=None, type="string", metavar="FILTER",
203 help="only use test FILTERs specified with this option when filtering")
204 parser.add_option("", "--notranslatefile", dest="notranslatefile",
205 default=None, type="string", metavar="FILE",
206 help="read list of untranslatable words from FILE (must not be translated)")
207 parser.add_option("", "--musttranslatefile", dest="musttranslatefile",
208 default=None, type="string", metavar="FILE",
209 help="read list of translatable words from FILE (must be translated)")
210 parser.add_option("", "--validcharsfile", dest="validcharsfile",
211 default=None, type="string", metavar="FILE",
212 help="read list of all valid characters from FILE (must be in UTF-8)")
213 parser.passthrough.append('checkfilter')
214 parser.description = __doc__
215 return parser
217 def main():
218 parser = cmdlineparser()
219 parser.run()
221 if __name__ == '__main__':
222 main()