3 # Copyright 2008 the Melange authors.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # __doc__ string is slightly unconventional because it is used as usage text
18 """%prog [OPTIONS] [FIND_REGEX] [REPLACE_FORMAT]
20 Script to list, search, and modify files using Python regex patterns.
22 OPTIONS: optional command-line flags; see %prog --help
24 FIND_REGEX: an optional valid Python regular expression pattern;
25 if supplied, only files containing at least one match will be processed;
26 matching file paths will be printed; if supplied, REPLACE_FORMAT will be
27 used to convert the match groups into formatted output.
29 REPLACE_FORMAT: an optional valid Python format string;
30 FIND_REGEX must be supplied first if REPLACE_FORMAT is supplied;
31 positional arguments will be replaced with ordered groups from
32 FIND_REGEX matches, and named arguments will be replaced with named
33 groups from FIND_REGEX matches."""
36 '"Todd Larsen" <tlarsen@google.com>',
49 class Error(Exception):
50 """Base class of all exceptions in this module.
55 def compileRegex(pattern
):
56 """Compiles a Python regex pattern into a regex object.
59 pattern: valid Python regex pattern string, or an already-compiled
60 regex object (in which case this function is is a no-op)
63 regex object compiled from pattern
66 Error if pattern could not be compiled.
69 return re
.compile(pattern
)
70 except sre_constants
.error
, error
:
71 msg
= 're.compile: %s\n%s' % (error
.args
[0], pattern
)
72 raise Error(errno
.EINVAL
, msg
)
75 def findAll(text_to_search
, pattern
):
76 """Returns all matches of a regex in a string.
79 text_to_search: string in which to find matches
80 pattern: Python regex pattern (or already-compiled regex object)
81 indicating which matches to retrieve
84 a (possibly empty) list of the matches found, as strings
88 def _captureMatchText(match
):
89 match_text
= match
.group()
90 matches
.append(match_text
)
93 compileRegex(pattern
).sub(_captureMatchText
, text_to_search
)
98 def getFileContents(file_path
):
99 """Reads the contents of a file as a single string, then closes the file.
102 file_path: path to the file to read its contents into a string
105 a single string containing the entire contents of the file
107 file_to_read
= open(file_path
)
108 file_contents
= file_to_read
.read()
113 def findAllInFile(file_path
, pattern
, *ignored_args
, **ignored_kwargs
):
114 """Action to return a list of all pattern matches in a file.
117 file_path: path of file to manipulate
118 pattern: see findAll()
119 *ignored_args: other positional arguments which are ignored
120 command-line arguments not used by this action callable
121 **ignored_kwargs: other keyword arguments which are ignored
122 command-line options not used by this action callable
125 two-tuple of boolean indicating if any match was found and a
126 (possibly empty) list of the matches found, as strings (to be used
127 as printable output of the action)
129 matches
= findAll(getFileContents(file_path
), pattern
)
136 return found
, matches
139 def replaceAll(original
, pattern
, format
):
140 """Substitutes formatted text for all matches in a string.
143 original: original string in which to find and replace matches
144 pattern: Python regex pattern (or already-compiled regex object)
145 indicating which matches to replace
146 format: Python format string specifying how to format the
147 replacement text; how this format string is interpreted depends
148 on the contents of the pattern; if the pattern contains:
149 named groups: format is expected to contain named format specifiers
150 unnamed groups: format is expected to contain exactly the same
151 number of unnamed format specifiers as the number of groups in
153 no groups: format is expected to contain a single format specifier
154 (in which case the entire match is supplied to it), or no format
155 specifier at all (in which case the "format" string simply
156 replaces the match with no substitutions from the match itself)
159 two-tuple of the text with all matches replaced as specified by
160 pattern and format, and a list of the original matches, each followed
163 matches_and_replacements
= []
165 def _replaceWithFormat(match
):
166 formatted_match
= None
168 if match
.groupdict():
170 formatted_match
= format
% match
.groupdict()
174 if (not formatted_match
) and match
.groups():
176 formatted_match
= format
% match
.groups()
180 if (not formatted_match
):
182 formatted_match
= format
% match
.group()
184 formatted_match
= format
186 matches_and_replacements
.append(match
.group())
187 matches_and_replacements
.append(formatted_match
)
188 return formatted_match
190 replaced
= compileRegex(pattern
).sub(_replaceWithFormat
, original
)
192 return replaced
, matches_and_replacements
195 def writeAltFileIfExt(path
, ext
, contents
):
196 """Writes a file if path and additional extension are supplied.
198 If path or ext are not supplied, no file is written.
201 path: path of file to be written, to which ext will be appended
202 ext: additional file extension that will be appended to path
203 contents: contents of file to be written, as a string
205 if (not path
) or (not ext
):
208 if ext
.startswith('.'):
211 alt_path
= '%s.%s' % (path
, ext
)
212 alt_file
= open(alt_path
, 'w')
213 alt_file
.write(contents
)
217 def replaceAllInFile(file_path
, pattern
, format
,
218 new_ext
=None, backup_ext
=None,
219 overwrite_files
=False,
220 *ignored_args
, **ignored_kwargs
):
221 """Substitutes formatted text for all matches in a file.
224 file_path: path of file to manipulate
225 pattern, format: see replaceAll()
226 *ignored_args: other positional arguments which are ignored
227 command-line arguments not used by this action callable
228 **ignored_kwargs: other keyword arguments which are ignored
229 command-line options not used by this action callable
232 two-tuple of boolean indicating if any match was found and a
233 list of printable output text lines containing pairs of original
234 pattern matches each followed by the formatted replacement
236 original
= getFileContents(file_path
)
238 replaced
, matches_and_replacements
= replaceAll(
239 original
, pattern
, format
)
241 if matches_and_replacements
:
243 writeAltFileIfExt(file_path
, new_ext
, replaced
)
244 writeAltFileIfExt(file_path
, backup_ext
, original
)
247 if replaced
!= original
:
248 replaced_file
= open(file_path
, 'w')
249 replaced_file
.write(replaced
)
250 replaced_file
.close()
254 return found
, matches_and_replacements
257 def listFile(*ignored_args
, **ignored_kwargs
):
258 """No-op action callable that ignores arguments and returns (True, []).
260 return True, [] # match only based on file names, which was done by caller
263 def applyActionToFiles(action
, action_args
,
264 start_path
='', abs_path
=False, files_pattern
='',
265 recurse_dirs
=False, dirs_pattern
='',
266 follow_symlinks
=False, quiet_output
=False,
267 hide_paths
=False, hide_text
=False, **action_options
):
268 """Applies a callable action to files, based on options and arguments.
271 action: callable that expects a file path argument, positional arguments
272 (action_args), and keyword options from the command-line options dict;
273 and returns a "matched" boolean and a list of output strings
274 action_args: list of positional arguments, if any; passed to action
276 start_path: required path of initial directory to visit
277 abs_path: optional boolean indicating to use absolute paths
278 files_pattern: required Python regex (object or pattern) which selects
279 which files to pass to the action callable
280 recurse_dirs: boolean indicating if subdirectories should be traversed
281 dirs_pattern: Python regex (object or pattern) which selects which
282 subdirectories to traverse if recurse_dirs is True
283 follow_symlinks: boolean indicating if symlinks should be traversed
284 quiet_output: optional boolean indicating if output should be suppressed
285 hide_paths: optional boolean indicating to omit file paths from output
286 hide_text: optional boolean indicating to omit find/replace text from
288 **action_options: remaining keyword arguments that are passed unchanged
289 to the action callable
292 two-tuple containing an exit code and a (possibly empty) list of
296 Error exception if problems occur (file I/O, invalid regex, etc.).
298 exit_code
= errno
.ENOENT
301 start_path
= os
.path
.expandvars(os
.path
.expanduser(start_path
))
304 start_path
= os
.path
.abspath(start_path
)
308 files_regex
= compileRegex(files_pattern
)
311 dirs_regex
= compileRegex(dirs_pattern
)
317 # expand iterator into an actual list and sort it
319 items
= dircache
.listdir(path
)[:]
320 except (IOError, OSError), error
:
321 raise Error(error
.args
[0], '%s: %s' % (
322 error
.__class
__.__name
__, error
.args
[1]))
327 item_path
= os
.path
.join(path
, item
)
329 if os
.path
.islink(item_path
):
330 if not follow_symlinks
:
331 continue # do not follow symlinks (ignore them)
333 if os
.path
.isdir(item_path
):
335 if dirs_regex
.match(item
):
336 sub_paths
.append(item_path
)
339 if os
.path
.isfile(item_path
) and files_regex
.match(item
):
341 matched
, found_output
= action(item_path
, *action_args
,
343 except (IOError, OSError), error
:
344 raise Error(error
.args
[0], '%s: %s' % (
345 error
.__class
__.__name
__, error
.args
[1]))
348 exit_code
= 0 # at least one matched file has now been found
350 if (not quiet_output
) and (not hide_paths
):
351 output
.append(item_path
)
353 if (not quiet_output
) and (not hide_text
):
354 output
.extend(found_output
)
358 return exit_code
, output
361 class _ErrorOptionParser(optparse
.OptionParser
):
362 """Customized optparse.OptionParser that does not call sys.exit().
365 def error(self
, msg
):
366 """Raises an Error exception, instead of calling sys.exit().
368 raise Error(errno
.EINVAL
, msg
)
372 """Returns a custom OptionParser for parsing command-line arguments.
374 parser
= _ErrorOptionParser(__doc__
)
376 filter_group
= optparse
.OptionGroup(parser
,
378 'Options used to select which files to process.')
380 filter_group
.add_option(
381 '-f', '--files', dest
='files_pattern',
382 default
='(?!^.*\.pyc|.*\.ico|.*\.gif|.*\.png|.*\.jpg$)',
383 metavar
='FILES_REGEX',
384 help=('Python regex pattern (*not* a glob!) defining files to process'
385 ' in each directory [default: %default]'))
387 filter_group
.add_option(
388 '-F', '--follow', dest
='follow_symlinks', default
=False,
390 help=('follow file and subdirectory symlinks (possibly *DANGEROUS*)'
391 ' [default: %default]'))
393 parser
.add_option_group(filter_group
)
395 dir_group
= optparse
.OptionGroup(parser
,
397 'Options used to indicate which directories to traverse.')
399 dir_group
.add_option(
400 '-s', '--start', dest
='start_path', default
=os
.curdir
, metavar
='PATH',
401 help='directory in which to start processing files [default: %default]')
403 dir_group
.add_option(
404 '-R', '--recursive', dest
='recurse_dirs', default
=False,
406 help='recurse into subdirectories [default: %default]')
408 dir_group
.add_option(
409 '-d', '--dirs', dest
='dirs_pattern', default
='^[^.].*$',
410 metavar
='SUBDIRS_REGEX',
411 help=('Python regex pattern (*not* a glob!) defining subdirectories to'
412 ' recurse into (if --recursive) [default: %default]'))
414 parser
.add_option_group(dir_group
)
416 output_group
= optparse
.OptionGroup(parser
,
418 'Options used to control program output.')
420 output_group
.add_option(
421 '-a', '--abspath', dest
='abs_path', default
=False, action
='store_true',
422 help=('output absolute paths instead of relative paths'
423 ' [default: %default]'))
425 output_group
.add_option(
426 '', '--nopaths', dest
='hide_paths', default
=False, action
='store_true',
427 help=('suppress printing of file path names for successfully matched'
428 ' files to stdout [default: %default]'))
430 output_group
.add_option(
431 '', '--notext', dest
='hide_text', default
=False, action
='store_true',
432 help=('suppress find/replace text output to stdout (but still print'
433 ' paths if not --nopath, and still perform replacements if'
434 ' specified) [default: %default]'))
436 output_group
.add_option(
437 '-q', '--quiet', dest
='quiet_output', default
=False, action
='store_true',
438 help=('suppress *all* printed output to stdout (but still perform'
439 ' replacements if specified) [default: %default]'))
441 parser
.add_option_group(output_group
)
443 replace_group
= optparse
.OptionGroup(parser
,
445 'Options applied when matches in files are replaced with substitutions.'
446 ' (Only possible if REPLACE_FORMAT is supplied.)')
448 replace_group
.add_option(
449 '-o', '--overwrite', dest
='overwrite_files', default
=False,
451 help=('overwrite original files with formatted text substituted for'
452 ' matches [default: %default]'))
454 replace_group
.add_option(
455 '-b', '--backup', dest
='backup_ext', default
='', metavar
='EXTENSION',
456 help=('if supplied, and file would be overwritten, backup original'
457 ' file with the supplied extension [default is no backups of'
458 ' overwritten files are kept]'))
460 replace_group
.add_option(
461 '-n', '--new', dest
='new_ext', default
='', metavar
='EXTENSION',
462 help=('if supplied, and file has matches and and is altered by'
463 ' substitutions, create a new file with the supplied extension'
464 ' [default is no new file is created]'))
466 parser
.add_option_group(replace_group
)
471 def _parseArgs(cmd_line_args
):
472 """Builds a command-line option parser and parses command-line arguments.
475 cmd_line_args: command-line arguments, excluding the argv[0] program name
478 four-tuple of action callable, supplied command-line options (including
479 those defined by defaults in the command-line parser) as a dict,
480 remaining positional command-line arguments, and the parser itself
483 Error if problems occurred during commmand-line argument parsing.
485 parser
= _buildParser()
486 options
, args
= parser
.parse_args(args
=cmd_line_args
)
489 # no FIND_REGEX or REPLACE_PATTERN supplied, so just match based
490 # on file name and subdirectory name patterns
493 # FIND_REGEX supplied, but not REPLACE_PATTERN, so just match based
494 # on file name and subdirectory name patterns, and then on file
496 action
= findAllInFile
498 # FIND_REGEX and REPLACE_PATTERN both supplied, so match based
499 # on file name and subdirectory name patterns, and then do a find and
500 # replace on file contents
501 action
= replaceAllInFile
503 raise Error(errno
.EINVAL
,'too many (%d) arguments supplied:\n%s' % (
504 len(args
), ' '.join(args
)))
506 return action
, vars(options
), args
, parser
510 """Wrapper that catches exceptions, prints output, and returns exit status.
512 Normal program output is printed to stdout. Error output (including
513 exception text) is printed to stderr.
516 argv: script arguments, usually sys.argv; argv[0] is expected to be the
520 exit code suitable for sys.exit()
522 options
= {} # empty options, used if _parseArgs() fails
526 action
, options
, args
, parser
= _parseArgs(argv
[1:])
527 exit_code
, output
= applyActionToFiles(action
, args
, **options
)
529 if output
: print '\n'.join(output
)
532 if not options
.get('quiet_output'):
533 print >>sys
.stderr
, '\nERROR: (%s: %s) %s\n' % (
534 error
.args
[0], os
.strerror(error
.args
[0]), error
.args
[1])
537 print >>sys
.stderr
, parser
.get_usage()
539 exit_code
= error
.args
[0]
544 if __name__
== '__main__':
545 sys
.exit(_main(sys
.argv
))