for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / tools / poconflicts.py
blobcd68477b9b40cf98bc55ffbac7d9f9450ecf0076
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2005-2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Conflict finder for Gettext PO localization files
24 See: http://translate.sourceforge.net/wiki/toolkit/poconflicts for examples and
25 usage instructions
26 """
28 from translate.storage import factory
29 from translate.storage import po
30 from translate.misc import optrecurse
31 import sys
32 import os
34 class ConflictOptionParser(optrecurse.RecursiveOptionParser):
35 """a specialized Option Parser for the conflict tool..."""
36 def parse_args(self, args=None, values=None):
37 """parses the command line options, handling implicit input/output args"""
38 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
39 # some intelligence as to what reasonable people might give on the command line
40 if args and not options.input:
41 if not options.output:
42 options.input = args[:-1]
43 args = args[-1:]
44 else:
45 options.input = args
46 args = []
47 if args and not options.output:
48 options.output = args[-1]
49 args = args[:-1]
50 if not options.output:
51 self.error("output file is required")
52 if args:
53 self.error("You have used an invalid combination of --input, --output and freestanding args")
54 if isinstance(options.input, list) and len(options.input) == 1:
55 options.input = options.input[0]
56 return (options, args)
58 def set_usage(self, usage=None):
59 """sets the usage string - if usage not given, uses getusagestring for each option"""
60 if usage is None:
61 self.usage = "%prog " + " ".join([self.getusagestring(option) for option in self.option_list]) + \
62 "\n input directory is searched for PO files, PO files with name of conflicting string are output in output directory"
63 else:
64 super(ConflictOptionParser, self).set_usage(usage)
66 def run(self):
67 """parses the arguments, and runs recursiveprocess with the resulting options"""
68 (options, args) = self.parse_args()
69 options.inputformats = self.inputformats
70 options.outputoptions = self.outputoptions
71 self.usepsyco(options)
72 self.recursiveprocess(options)
74 def recursiveprocess(self, options):
75 """recurse through directories and process files"""
76 if self.isrecursive(options.input, 'input') and getattr(options, "allowrecursiveinput", True):
77 if not self.isrecursive(options.output, 'output'):
78 try:
79 self.warning("Output directory does not exist. Attempting to create")
80 os.mkdir(options.output)
81 except:
82 self.error(optrecurse.optparse.OptionValueError("Output directory does not exist, attempt to create failed"))
83 if isinstance(options.input, list):
84 inputfiles = self.recurseinputfilelist(options)
85 else:
86 inputfiles = self.recurseinputfiles(options)
87 else:
88 if options.input:
89 inputfiles = [os.path.basename(options.input)]
90 options.input = os.path.dirname(options.input)
91 else:
92 inputfiles = [options.input]
93 self.textmap = {}
94 self.initprogressbar(inputfiles, options)
95 for inputpath in inputfiles:
96 fullinputpath = self.getfullinputpath(options, inputpath)
97 try:
98 success = self.processfile(None, options, fullinputpath)
99 except Exception, error:
100 if isinstance(error, KeyboardInterrupt):
101 raise
102 self.warning("Error processing: input %s" % (fullinputpath), options, sys.exc_info())
103 success = False
104 self.reportprogress(inputpath, success)
105 del self.progressbar
106 self.buildconflictmap()
107 self.outputconflicts(options)
109 def clean(self, string, options):
110 """returns the cleaned string that contains the text to be matched"""
111 if options.ignorecase:
112 string = string.lower()
113 for accelerator in options.accelchars:
114 string = string.replace(accelerator, "")
115 string = string.strip()
116 return string
118 def processfile(self, fileprocessor, options, fullinputpath):
119 """process an individual file"""
120 inputfile = self.openinputfile(options, fullinputpath)
121 inputfile = factory.getobject(inputfile)
122 for unit in inputfile.units:
123 if unit.isheader() or not unit.istranslated():
124 continue
125 if unit.hasplural():
126 continue
127 if not options.invert:
128 source = self.clean(unit.source, options)
129 target = self.clean(unit.target, options)
130 else:
131 target = self.clean(unit.source, options)
132 source = self.clean(unit.target, options)
133 self.textmap.setdefault(source, []).append((target, unit, fullinputpath))
135 def flatten(self, text, joinchar):
136 """flattens text to just be words"""
137 flattext = ""
138 for c in text:
139 if c.isalnum():
140 flattext += c
141 elif flattext[-1:].isalnum():
142 flattext += joinchar
143 return flattext.rstrip(joinchar)
145 def buildconflictmap(self):
146 """work out which strings are conflicting"""
147 self.conflictmap = {}
148 for source, translations in self.textmap.iteritems():
149 if len(source) <= 1:
150 continue
151 if len(translations) > 1:
152 uniquetranslations = dict.fromkeys([target for target, unit, filename in translations])
153 if len(uniquetranslations) > 1:
154 self.conflictmap[self.flatten(source, " ")] = translations
156 def outputconflicts(self, options):
157 """saves the result of the conflict match"""
158 print "%d/%d different strings have conflicts" % (len(self.conflictmap), len(self.textmap))
159 reducedmap = {}
160 for source, translations in self.conflictmap.iteritems():
161 words = source.split()
162 words.sort(lambda x, y: cmp(len(x), len(y)))
163 source = words[-1]
164 reducedmap.setdefault(source, []).extend(translations)
165 # reduce plurals
166 plurals = {}
167 for word in reducedmap:
168 if word + "s" in reducedmap:
169 plurals[word] = word + "s"
170 for word, pluralword in plurals.iteritems():
171 reducedmap[word].extend(reducedmap.pop(pluralword))
172 for source, translations in reducedmap.iteritems():
173 flatsource = self.flatten(source, "-")
174 fulloutputpath = os.path.join(options.output, flatsource + os.extsep + "po")
175 conflictfile = po.pofile()
176 for target, unit, filename in translations:
177 unit.othercomments.append("# (poconflicts) %s\n" % filename)
178 conflictfile.units.append(unit)
179 open(fulloutputpath, "w").write(str(conflictfile))
181 def main():
182 formats = {"po":("po", None), None:("po", None)}
183 parser = ConflictOptionParser(formats)
184 parser.add_option("-I", "--ignore-case", dest="ignorecase",
185 action="store_true", default=False, help="ignore case distinctions")
186 parser.add_option("-v", "--invert", dest="invert",
187 action="store_true", default=False, help="invert the conflicts thus extracting conflicting destination words")
188 parser.add_option("", "--accelerator", dest="accelchars", default="",
189 metavar="ACCELERATORS", help="ignores the given accelerator characters when matching")
190 parser.set_usage()
191 parser.description = __doc__
192 parser.run()
195 if __name__ == '__main__':
196 main()