2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2008 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Grep XLIFF, Gettext PO and TMX localization files
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
31 from translate
.storage
import factory
32 from translate
.misc
import optrecurse
33 from translate
.misc
.multistring
import multistring
34 from translate
.lang
import data
39 def __init__(self
, searchstring
, searchparts
, ignorecase
=False, useregexp
=False, invertmatch
=False, accelchar
=None, encoding
='utf-8', includeheader
=False):
40 """builds a checkfilter using the given checker"""
41 if isinstance(searchstring
, unicode):
42 self
.searchstring
= searchstring
44 self
.searchstring
= searchstring
.decode(encoding
)
45 self
.searchstring
= data
.normalize(self
.searchstring
)
47 # For now we still support the old terminology, except for the old 'source'
48 # which has a new meaning now.
49 self
.search_source
= ('source' in searchparts
) or ('msgid' in searchparts
)
50 self
.search_target
= ('target' in searchparts
) or ('msgstr' in searchparts
)
51 self
.search_notes
= ('notes' in searchparts
) or ('comment' in searchparts
)
52 self
.search_locations
= 'locations' in searchparts
54 self
.search_source
= True
55 self
.search_target
= True
56 self
.search_notes
= False
57 self
.search_locations
= False
58 self
.ignorecase
= ignorecase
60 self
.searchstring
= self
.searchstring
.lower()
61 self
.useregexp
= useregexp
63 self
.searchpattern
= re
.compile(self
.searchstring
)
64 self
.invertmatch
= invertmatch
65 self
.accelchar
= accelchar
66 self
.includeheader
= includeheader
68 def matches(self
, teststr
):
69 teststr
= data
.normalize(teststr
)
71 teststr
= teststr
.lower()
73 teststr
= re
.sub(self
.accelchar
+ self
.accelchar
, "#", teststr
)
74 teststr
= re
.sub(self
.accelchar
, "", teststr
)
76 found
= self
.searchpattern
.search(teststr
)
78 found
= teststr
.find(self
.searchstring
) != -1
83 def filterunit(self
, unit
):
84 """runs filters on an element"""
85 if unit
.isheader(): return []
87 if self
.search_source
:
88 if isinstance(unit
.source
, multistring
):
89 strings
= unit
.source
.strings
91 strings
= [unit
.source
]
92 for string
in strings
:
93 if self
.matches(string
):
96 if self
.search_target
:
97 if isinstance(unit
.target
, multistring
):
98 strings
= unit
.target
.strings
100 strings
= [unit
.target
]
101 for string
in strings
:
102 if self
.matches(string
):
105 if self
.search_notes
:
106 return self
.matches(unit
.getnotes())
107 if self
.search_locations
:
108 return self
.matches(u
" ".join(unit
.getlocations()))
111 def filterfile(self
, thefile
):
112 """runs filters on a translation file object"""
113 thenewfile
= type(thefile
)()
114 for unit
in thefile
.units
:
115 if self
.filterunit(unit
):
116 thenewfile
.addunit(unit
)
117 if self
.includeheader
and thenewfile
.units
> 0:
118 if thefile
.units
[0].isheader():
119 thenewfile
.units
.insert(0, thefile
.units
[0])
121 thenewfile
.units
.insert(0, thenewfile
.makeheader())
124 class GrepOptionParser(optrecurse
.RecursiveOptionParser
):
125 """a specialized Option Parser for the grep tool..."""
126 def parse_args(self
, args
=None, values
=None):
127 """parses the command line options, handling implicit input/output args"""
128 (options
, args
) = optrecurse
.optparse
.OptionParser
.parse_args(self
, args
, values
)
129 # some intelligence as to what reasonable people might give on the command line
131 options
.searchstring
= args
[0]
134 self
.error("At least one argument must be given for the search string")
135 if args
and not options
.input:
136 if not options
.output
:
137 options
.input = args
[:-1]
142 if args
and not options
.output
:
143 options
.output
= args
[-1]
146 self
.error("You have used an invalid combination of --input, --output and freestanding args")
147 if isinstance(options
.input, list) and len(options
.input) == 1:
148 options
.input = options
.input[0]
149 return (options
, args
)
151 def set_usage(self
, usage
=None):
152 """sets the usage string - if usage not given, uses getusagestring for each option"""
154 self
.usage
= "%prog searchstring " + " ".join([self
.getusagestring(option
) for option
in self
.option_list
])
156 super(GrepOptionParser
, self
).set_usage(usage
)
159 """parses the arguments, and runs recursiveprocess with the resulting options"""
160 (options
, args
) = self
.parse_args()
161 options
.inputformats
= self
.inputformats
162 options
.outputoptions
= self
.outputoptions
163 options
.checkfilter
= GrepFilter(options
.searchstring
, options
.searchparts
, options
.ignorecase
, options
.useregexp
, options
.invertmatch
, options
.accelchar
, locale
.getpreferredencoding(), options
.includeheader
)
164 self
.usepsyco(options
)
165 self
.recursiveprocess(options
)
167 def rungrep(inputfile
, outputfile
, templatefile
, checkfilter
):
168 """reads in inputfile, filters using checkfilter, writes to outputfile"""
169 fromfile
= factory
.getobject(inputfile
)
170 tofile
= checkfilter
.filterfile(fromfile
)
173 outputfile
.write(str(tofile
))
177 formats
= {"po":("po", rungrep
), "pot":("pot", rungrep
),
178 "xliff":("xliff", rungrep
), "xlf":("xlf", rungrep
), "xlff":("xlff", rungrep
),
179 "tmx":("tmx", rungrep
),
180 None:("po", rungrep
)}
181 parser
= GrepOptionParser(formats
)
182 parser
.add_option("", "--search", dest
="searchparts",
183 action
="append", type="choice", choices
=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
184 metavar
="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
185 parser
.add_option("-I", "--ignore-case", dest
="ignorecase",
186 action
="store_true", default
=False, help="ignore case distinctions")
187 parser
.add_option("-e", "--regexp", dest
="useregexp",
188 action
="store_true", default
=False, help="use regular expression matching")
189 parser
.add_option("-v", "--invert-match", dest
="invertmatch",
190 action
="store_true", default
=False, help="select non-matching lines")
191 parser
.add_option("", "--accelerator", dest
="accelchar",
192 action
="store", type="choice", choices
=["&", "_", "~"],
193 metavar
="ACCELERATOR", help="ignores the given accelerator when matching")
194 parser
.add_option("", "--header", dest
="includeheader",
195 action
="store_true", default
=False,
196 help="include a PO header in the output")
198 parser
.passthrough
.append('checkfilter')
199 parser
.description
= __doc__
203 parser
= cmdlineparser()
206 if __name__
== '__main__':