tools/clang/scripts/run_tool.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 """Wrapper script to help run clang tools across Chromium code.
   7
   8 How to use this tool:
   9 If you want to run the tool across all Chromium code:
  10 run_tool.py <tool> <path/to/compiledb>
  11
  12 If you want to include all files mentioned in the compilation database:
  13 run_tool.py <tool> <path/to/compiledb> --all
  14
  15 If you only want to run the tool across just chrome/browser and content/browser:
  16 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
  17
  18 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
  19 information, which documents the entire automated refactoring flow in Chromium.
  20
  21 Why use this tool:
  22 The clang tool implementation doesn't take advantage of multiple cores, and if
  23 it fails mysteriously in the middle, all the generated replacements will be
  24 lost.
  25
  26 Unfortunately, if the work is simply sharded across multiple cores by running
  27 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
  28 the same time. To work around that, clang tools that are run using this tool
  29 should output edits to stdout in the following format:
  30
  31 ==== BEGIN EDITS ====
  32 r:<file path>:<offset>:<length>:<replacement text>
  33 r:<file path>:<offset>:<length>:<replacement text>
  34 ...etc...
  35 ==== END EDITS ====
  36
  37 Any generated edits are applied once the clang tool has finished running
  38 across Chromium, regardless of whether some instances failed or not.
  39 """
  40
  41 import collections
  42 import functools
  43 import json
  44 import multiprocessing
  45 import os.path
  46 import pipes
  47 import subprocess
  48 import sys
  49
  50
  51 Edit = collections.namedtuple(
  52     'Edit', ('edit_type', 'offset', 'length', 'replacement'))
  53
  54
  55 def _GetFilesFromGit(paths = None):
  56   """Gets the list of files in the git repository.
  57
  58   Args:
  59     paths: Prefix filter for the returned paths. May contain multiple entries.
  60   """
  61   args = []
  62   if sys.platform == 'win32':
  63     args.append('git.bat')
  64   else:
  65     args.append('git')
  66   args.append('ls-files')
  67   if paths:
  68     args.extend(paths)
  69   command = subprocess.Popen(args, stdout=subprocess.PIPE)
  70   output, _ = command.communicate()
  71   return [os.path.realpath(p) for p in output.splitlines()]
  72
  73
  74 def _GetFilesFromCompileDB(build_directory):
  75   """ Gets the list of files mentioned in the compilation database.
  76
  77   Args:
  78     build_directory: Directory that contains the compile database.
  79   """
  80   compiledb_path = os.path.join(build_directory, 'compile_commands.json')
  81   with open(compiledb_path, 'rb') as compiledb_file:
  82     json_commands = json.load(compiledb_file)
  83
  84   return [os.path.join(entry['directory'], entry['file'])
  85           for entry in json_commands]
  86
  87
  88 def _ExtractEditsFromStdout(build_directory, stdout):
  89   """Extracts generated list of edits from the tool's stdout.
  90
  91   The expected format is documented at the top of this file.
  92
  93   Args:
  94     build_directory: Directory that contains the compile database. Used to
  95       normalize the filenames.
  96     stdout: The stdout from running the clang tool.
  97
  98   Returns:
  99     A dictionary mapping filenames to the associated edits.
 100   """
 101   lines = stdout.splitlines()
 102   start_index = lines.index('==== BEGIN EDITS ====')
 103   end_index = lines.index('==== END EDITS ====')
 104   edits = collections.defaultdict(list)
 105   for line in lines[start_index + 1:end_index]:
 106     try:
 107       edit_type, path, offset, length, replacement = line.split(':::', 4)
 108       replacement = replacement.replace("\0", "\n");
 109       # Normalize the file path emitted by the clang tool.
 110       path = os.path.realpath(os.path.join(build_directory, path))
 111       edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
 112     except ValueError:
 113       print 'Unable to parse edit: %s' % line
 114   return edits
 115
 116
 117 def _ExecuteTool(toolname, build_directory, filename):
 118   """Executes the tool.
 119
 120   This is defined outside the class so it can be pickled for the multiprocessing
 121   module.
 122
 123   Args:
 124     toolname: Path to the tool to execute.
 125     build_directory: Directory that contains the compile database.
 126     filename: The file to run the tool over.
 127
 128   Returns:
 129     A dictionary that must contain the key "status" and a boolean value
 130     associated with it.
 131
 132     If status is True, then the generated edits are stored with the key "edits"
 133     in the dictionary.
 134
 135     Otherwise, the filename and the output from stderr are associated with the
 136     keys "filename" and "stderr" respectively.
 137   """
 138   command = subprocess.Popen((toolname, '-p', build_directory, filename),
 139                              stdout=subprocess.PIPE,
 140                              stderr=subprocess.PIPE)
 141   stdout, stderr = command.communicate()
 142   if command.returncode != 0:
 143     return {'status': False, 'filename': filename, 'stderr': stderr}
 144   else:
 145     return {'status': True,
 146             'edits': _ExtractEditsFromStdout(build_directory, stdout)}
 147
 148
 149 class _CompilerDispatcher(object):
 150   """Multiprocessing controller for running clang tools in parallel."""
 151
 152   def __init__(self, toolname, build_directory, filenames):
 153     """Initializer method.
 154
 155     Args:
 156       toolname: Path to the tool to execute.
 157       build_directory: Directory that contains the compile database.
 158       filenames: The files to run the tool over.
 159     """
 160     self.__toolname = toolname
 161     self.__build_directory = build_directory
 162     self.__filenames = filenames
 163     self.__success_count = 0
 164     self.__failed_count = 0
 165     self.__edit_count = 0
 166     self.__edits = collections.defaultdict(list)
 167
 168   @property
 169   def edits(self):
 170     return self.__edits
 171
 172   @property
 173   def failed_count(self):
 174     return self.__failed_count
 175
 176   def Run(self):
 177     """Does the grunt work."""
 178     pool = multiprocessing.Pool()
 179     result_iterator = pool.imap_unordered(
 180         functools.partial(_ExecuteTool, self.__toolname,
 181                           self.__build_directory),
 182         self.__filenames)
 183     for result in result_iterator:
 184       self.__ProcessResult(result)
 185     sys.stdout.write('\n')
 186     sys.stdout.flush()
 187
 188   def __ProcessResult(self, result):
 189     """Handles result processing.
 190
 191     Args:
 192       result: The result dictionary returned by _ExecuteTool.
 193     """
 194     if result['status']:
 195       self.__success_count += 1
 196       for k, v in result['edits'].iteritems():
 197         self.__edits[k].extend(v)
 198         self.__edit_count += len(v)
 199     else:
 200       self.__failed_count += 1
 201       sys.stdout.write('\nFailed to process %s\n' % result['filename'])
 202       sys.stdout.write(result['stderr'])
 203       sys.stdout.write('\n')
 204     percentage = (
 205         float(self.__success_count + self.__failed_count) /
 206         len(self.__filenames)) * 100
 207     sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
 208         self.__success_count, self.__failed_count, self.__edit_count,
 209         percentage))
 210     sys.stdout.flush()
 211
 212
 213 def _ApplyEdits(edits, clang_format_diff_path):
 214   """Apply the generated edits.
 215
 216   Args:
 217     edits: A dict mapping filenames to Edit instances that apply to that file.
 218     clang_format_diff_path: Path to the clang-format-diff.py helper to help
 219       automatically reformat diffs to avoid style violations. Pass None if the
 220       clang-format step should be skipped.
 221   """
 222   edit_count = 0
 223   for k, v in edits.iteritems():
 224     # Sort the edits and iterate through them in reverse order. Sorting allows
 225     # duplicate edits to be quickly skipped, while reversing means that
 226     # subsequent edits don't need to have their offsets updated with each edit
 227     # applied.
 228     v.sort()
 229     last_edit = None
 230     with open(k, 'rb+') as f:
 231       contents = bytearray(f.read())
 232       for edit in reversed(v):
 233         if edit == last_edit:
 234           continue
 235         last_edit = edit
 236         contents[edit.offset:edit.offset + edit.length] = edit.replacement
 237         if not edit.replacement:
 238           _ExtendDeletionIfElementIsInList(contents, edit.offset)
 239         edit_count += 1
 240       f.seek(0)
 241       f.truncate()
 242       f.write(contents)
 243     if clang_format_diff_path:
 244       # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
 245       # uses python2.7. Use the deprecated interface until Chrome uses a newer
 246       # Python.
 247       if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
 248           pipes.quote(k), clang_format_diff_path), shell=True) != 0:
 249         print 'clang-format failed for %s' % k
 250   print 'Applied %d edits to %d files' % (edit_count, len(edits))
 251
 252
 253 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
 254
 255
 256 def _ExtendDeletionIfElementIsInList(contents, offset):
 257   """Extends the range of a deletion if the deleted element was part of a list.
 258
 259   This rewriter helper makes it easy for refactoring tools to remove elements
 260   from a list. Even if a matcher callback knows that it is removing an element
 261   from a list, it may not have enough information to accurately remove the list
 262   element; for example, another matcher callback may end up removing an adjacent
 263   list element, or all the list elements may end up being removed.
 264
 265   With this helper, refactoring tools can simply remove the list element and not
 266   worry about having to include the comma in the replacement.
 267
 268   Args:
 269     contents: A bytearray with the deletion already applied.
 270     offset: The offset in the bytearray where the deleted range used to be.
 271   """
 272   char_before = char_after = None
 273   left_trim_count = 0
 274   for byte in reversed(contents[:offset]):
 275     left_trim_count += 1
 276     if byte in _WHITESPACE_BYTES:
 277       continue
 278     if byte in (ord(','), ord(':'), ord('('), ord('{')):
 279       char_before = chr(byte)
 280     break
 281
 282   right_trim_count = 0
 283   for byte in contents[offset:]:
 284     right_trim_count += 1
 285     if byte in _WHITESPACE_BYTES:
 286       continue
 287     if byte == ord(','):
 288       char_after = chr(byte)
 289     break
 290
 291   if char_before:
 292     if char_after:
 293       del contents[offset:offset + right_trim_count]
 294     elif char_before in (',', ':'):
 295       del contents[offset - left_trim_count:offset]
 296
 297
 298 def main(argv):
 299   if len(argv) < 2:
 300     print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
 301     print '  <clang tool> is the clang tool that should be run.'
 302     print '  <compile db> is the directory that contains the compile database'
 303     print '  <path 1> <path2> ... can be used to filter what files are edited'
 304     return 1
 305
 306   clang_format_diff_path = os.path.join(
 307       os.path.dirname(os.path.realpath(__file__)),
 308       '../../../third_party/llvm/tools/clang/tools/clang-format',
 309       'clang-format-diff.py')
 310   # TODO(dcheng): Allow this to be controlled with a flag as well.
 311   # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
 312   # on Windows.
 313   if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32':
 314     clang_format_diff_path = None
 315
 316   if len(argv) == 3 and argv[2] == '--all':
 317     filenames = set(_GetFilesFromCompileDB(argv[1]))
 318     source_filenames = filenames
 319   else:
 320     filenames = set(_GetFilesFromGit(argv[2:]))
 321     # Filter out files that aren't C/C++/Obj-C/Obj-C++.
 322     extensions = frozenset(('.c', '.cc', '.m', '.mm'))
 323     source_filenames = [f for f in filenames
 324                         if os.path.splitext(f)[1] in extensions]
 325   dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames)
 326   dispatcher.Run()
 327   # Filter out edits to files that aren't in the git repository, since it's not
 328   # useful to modify files that aren't under source control--typically, these
 329   # are generated files or files in a git submodule that's not part of Chromium.
 330   _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
 331                     if os.path.realpath(k) in filenames},
 332               clang_format_diff_path)
 333   if dispatcher.failed_count != 0:
 334     return 2
 335   return 0
 336
 337
 338 if __name__ == '__main__':
 339   sys.exit(main(sys.argv[1:]))