2 # -*- coding: utf-8 -*-
4 # Copyright 2005-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Conflict finder for Gettext PO localization files
24 See: http://translate.sourceforge.net/wiki/toolkit/poconflicts for examples and
28 from translate
.storage
import factory
29 from translate
.storage
import po
30 from translate
.misc
import optrecurse
34 class ConflictOptionParser(optrecurse
.RecursiveOptionParser
):
35 """a specialized Option Parser for the conflict tool..."""
36 def parse_args(self
, args
=None, values
=None):
37 """parses the command line options, handling implicit input/output args"""
38 (options
, args
) = optrecurse
.optparse
.OptionParser
.parse_args(self
, args
, values
)
39 # some intelligence as to what reasonable people might give on the command line
40 if args
and not options
.input:
41 if not options
.output
:
42 options
.input = args
[:-1]
47 if args
and not options
.output
:
48 options
.output
= args
[-1]
50 if not options
.output
:
51 self
.error("output file is required")
53 self
.error("You have used an invalid combination of --input, --output and freestanding args")
54 if isinstance(options
.input, list) and len(options
.input) == 1:
55 options
.input = options
.input[0]
56 return (options
, args
)
58 def set_usage(self
, usage
=None):
59 """sets the usage string - if usage not given, uses getusagestring for each option"""
61 self
.usage
= "%prog " + " ".join([self
.getusagestring(option
) for option
in self
.option_list
]) + \
62 "\n input directory is searched for PO files, PO files with name of conflicting string are output in output directory"
64 super(ConflictOptionParser
, self
).set_usage(usage
)
67 """parses the arguments, and runs recursiveprocess with the resulting options"""
68 (options
, args
) = self
.parse_args()
69 options
.inputformats
= self
.inputformats
70 options
.outputoptions
= self
.outputoptions
71 self
.usepsyco(options
)
72 self
.recursiveprocess(options
)
74 def recursiveprocess(self
, options
):
75 """recurse through directories and process files"""
76 if self
.isrecursive(options
.input, 'input') and getattr(options
, "allowrecursiveinput", True):
77 if not self
.isrecursive(options
.output
, 'output'):
79 self
.warning("Output directory does not exist. Attempting to create")
80 os
.mkdir(options
.output
)
82 self
.error(optrecurse
.optparse
.OptionValueError("Output directory does not exist, attempt to create failed"))
83 if isinstance(options
.input, list):
84 inputfiles
= self
.recurseinputfilelist(options
)
86 inputfiles
= self
.recurseinputfiles(options
)
89 inputfiles
= [os
.path
.basename(options
.input)]
90 options
.input = os
.path
.dirname(options
.input)
92 inputfiles
= [options
.input]
94 self
.initprogressbar(inputfiles
, options
)
95 for inputpath
in inputfiles
:
96 fullinputpath
= self
.getfullinputpath(options
, inputpath
)
98 success
= self
.processfile(None, options
, fullinputpath
)
99 except Exception, error
:
100 if isinstance(error
, KeyboardInterrupt):
102 self
.warning("Error processing: input %s" % (fullinputpath
), options
, sys
.exc_info())
104 self
.reportprogress(inputpath
, success
)
106 self
.buildconflictmap()
107 self
.outputconflicts(options
)
109 def clean(self
, string
, options
):
110 """returns the cleaned string that contains the text to be matched"""
111 if options
.ignorecase
:
112 string
= string
.lower()
113 for accelerator
in options
.accelchars
:
114 string
= string
.replace(accelerator
, "")
115 string
= string
.strip()
118 def processfile(self
, fileprocessor
, options
, fullinputpath
):
119 """process an individual file"""
120 inputfile
= self
.openinputfile(options
, fullinputpath
)
121 inputfile
= factory
.getobject(inputfile
)
122 for unit
in inputfile
.units
:
123 if unit
.isheader() or not unit
.istranslated():
127 if not options
.invert
:
128 source
= self
.clean(unit
.source
, options
)
129 target
= self
.clean(unit
.target
, options
)
131 target
= self
.clean(unit
.source
, options
)
132 source
= self
.clean(unit
.target
, options
)
133 self
.textmap
.setdefault(source
, []).append((target
, unit
, fullinputpath
))
135 def flatten(self
, text
, joinchar
):
136 """flattens text to just be words"""
141 elif flattext
[-1:].isalnum():
143 return flattext
.rstrip(joinchar
)
145 def buildconflictmap(self
):
146 """work out which strings are conflicting"""
147 self
.conflictmap
= {}
148 for source
, translations
in self
.textmap
.iteritems():
151 if len(translations
) > 1:
152 uniquetranslations
= dict.fromkeys([target
for target
, unit
, filename
in translations
])
153 if len(uniquetranslations
) > 1:
154 self
.conflictmap
[self
.flatten(source
, " ")] = translations
156 def outputconflicts(self
, options
):
157 """saves the result of the conflict match"""
158 print "%d/%d different strings have conflicts" % (len(self
.conflictmap
), len(self
.textmap
))
160 for source
, translations
in self
.conflictmap
.iteritems():
161 words
= source
.split()
162 words
.sort(lambda x
, y
: cmp(len(x
), len(y
)))
164 reducedmap
.setdefault(source
, []).extend(translations
)
167 for word
in reducedmap
:
168 if word
+ "s" in reducedmap
:
169 plurals
[word
] = word
+ "s"
170 for word
, pluralword
in plurals
.iteritems():
171 reducedmap
[word
].extend(reducedmap
.pop(pluralword
))
172 for source
, translations
in reducedmap
.iteritems():
173 flatsource
= self
.flatten(source
, "-")
174 fulloutputpath
= os
.path
.join(options
.output
, flatsource
+ os
.extsep
+ "po")
175 conflictfile
= po
.pofile()
176 for target
, unit
, filename
in translations
:
177 unit
.othercomments
.append("# (poconflicts) %s\n" % filename
)
178 conflictfile
.units
.append(unit
)
179 open(fulloutputpath
, "w").write(str(conflictfile
))
182 formats
= {"po":("po", None), None:("po", None)}
183 parser
= ConflictOptionParser(formats
)
184 parser
.add_option("-I", "--ignore-case", dest
="ignorecase",
185 action
="store_true", default
=False, help="ignore case distinctions")
186 parser
.add_option("-v", "--invert", dest
="invert",
187 action
="store_true", default
=False, help="invert the conflicts thus extracting conflicting destination words")
188 parser
.add_option("", "--accelerator", dest
="accelchars", default
="",
189 metavar
="ACCELERATORS", help="ignores the given accelerator characters when matching")
191 parser
.description
= __doc__
195 if __name__
== '__main__':