fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / tools / pogrep.py
blobefd01053d56a7a810bc6aaffaccd1bd78eba6f57
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2002-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Grep XLIFF, Gettext PO and TMX localization files
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
31 from translate.storage import factory
32 from translate.misc import optrecurse
33 from translate.misc.multistring import multistring
34 from translate.lang import data
35 import re
36 import locale
38 class GrepFilter:
39 def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False, invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False):
40 """builds a checkfilter using the given checker"""
41 if isinstance(searchstring, unicode):
42 self.searchstring = searchstring
43 else:
44 self.searchstring = searchstring.decode(encoding)
45 self.searchstring = data.normalize(self.searchstring)
46 if searchparts:
47 # For now we still support the old terminology, except for the old 'source'
48 # which has a new meaning now.
49 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
50 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
51 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
52 self.search_locations = 'locations' in searchparts
53 else:
54 self.search_source = True
55 self.search_target = True
56 self.search_notes = False
57 self.search_locations = False
58 self.ignorecase = ignorecase
59 if self.ignorecase:
60 self.searchstring = self.searchstring.lower()
61 self.useregexp = useregexp
62 if self.useregexp:
63 self.searchpattern = re.compile(self.searchstring)
64 self.invertmatch = invertmatch
65 self.accelchar = accelchar
66 self.includeheader = includeheader
68 def matches(self, teststr):
69 teststr = data.normalize(teststr)
70 if self.ignorecase:
71 teststr = teststr.lower()
72 if self.accelchar:
73 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
74 teststr = re.sub(self.accelchar, "", teststr)
75 if self.useregexp:
76 found = self.searchpattern.search(teststr)
77 else:
78 found = teststr.find(self.searchstring) != -1
79 if self.invertmatch:
80 found = not found
81 return found
83 def filterunit(self, unit):
84 """runs filters on an element"""
85 if unit.isheader(): return []
87 if self.search_source:
88 if isinstance(unit.source, multistring):
89 strings = unit.source.strings
90 else:
91 strings = [unit.source]
92 for string in strings:
93 if self.matches(string):
94 return True
96 if self.search_target:
97 if isinstance(unit.target, multistring):
98 strings = unit.target.strings
99 else:
100 strings = [unit.target]
101 for string in strings:
102 if self.matches(string):
103 return True
105 if self.search_notes:
106 return self.matches(unit.getnotes())
107 if self.search_locations:
108 return self.matches(u" ".join(unit.getlocations()))
109 return False
111 def filterfile(self, thefile):
112 """runs filters on a translation file object"""
113 thenewfile = type(thefile)()
114 for unit in thefile.units:
115 if self.filterunit(unit):
116 thenewfile.addunit(unit)
117 if self.includeheader and thenewfile.units > 0:
118 if thefile.units[0].isheader():
119 thenewfile.units.insert(0, thefile.units[0])
120 else:
121 thenewfile.units.insert(0, thenewfile.makeheader())
122 return thenewfile
124 class GrepOptionParser(optrecurse.RecursiveOptionParser):
125 """a specialized Option Parser for the grep tool..."""
126 def parse_args(self, args=None, values=None):
127 """parses the command line options, handling implicit input/output args"""
128 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
129 # some intelligence as to what reasonable people might give on the command line
130 if args:
131 options.searchstring = args[0]
132 args = args[1:]
133 else:
134 self.error("At least one argument must be given for the search string")
135 if args and not options.input:
136 if not options.output:
137 options.input = args[:-1]
138 args = args[-1:]
139 else:
140 options.input = args
141 args = []
142 if args and not options.output:
143 options.output = args[-1]
144 args = args[:-1]
145 if args:
146 self.error("You have used an invalid combination of --input, --output and freestanding args")
147 if isinstance(options.input, list) and len(options.input) == 1:
148 options.input = options.input[0]
149 return (options, args)
151 def set_usage(self, usage=None):
152 """sets the usage string - if usage not given, uses getusagestring for each option"""
153 if usage is None:
154 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
155 else:
156 super(GrepOptionParser, self).set_usage(usage)
158 def run(self):
159 """parses the arguments, and runs recursiveprocess with the resulting options"""
160 (options, args) = self.parse_args()
161 options.inputformats = self.inputformats
162 options.outputoptions = self.outputoptions
163 options.checkfilter = GrepFilter(options.searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar, locale.getpreferredencoding(), options.includeheader)
164 self.usepsyco(options)
165 self.recursiveprocess(options)
167 def rungrep(inputfile, outputfile, templatefile, checkfilter):
168 """reads in inputfile, filters using checkfilter, writes to outputfile"""
169 fromfile = factory.getobject(inputfile)
170 tofile = checkfilter.filterfile(fromfile)
171 if tofile.isempty():
172 return False
173 outputfile.write(str(tofile))
174 return True
176 def cmdlineparser():
177 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
178 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
179 "tmx":("tmx", rungrep),
180 None:("po", rungrep)}
181 parser = GrepOptionParser(formats)
182 parser.add_option("", "--search", dest="searchparts",
183 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
184 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
185 parser.add_option("-I", "--ignore-case", dest="ignorecase",
186 action="store_true", default=False, help="ignore case distinctions")
187 parser.add_option("-e", "--regexp", dest="useregexp",
188 action="store_true", default=False, help="use regular expression matching")
189 parser.add_option("-v", "--invert-match", dest="invertmatch",
190 action="store_true", default=False, help="select non-matching lines")
191 parser.add_option("", "--accelerator", dest="accelchar",
192 action="store", type="choice", choices=["&", "_", "~"],
193 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
194 parser.add_option("", "--header", dest="includeheader",
195 action="store_true", default=False,
196 help="include a PO header in the output")
197 parser.set_usage()
198 parser.passthrough.append('checkfilter')
199 parser.description = __doc__
200 return parser
202 def main():
203 parser = cmdlineparser()
204 parser.run()
206 if __name__ == '__main__':
207 main()