Roll src/third_party/WebKit 3aea697:d9c6159 (svn 201973:201974)
[chromium-blink-merge.git] / tools / clang / scripts / run_tool.py
blob56cd5d24f8be2f400190136d9705972f9fb1e6c4
1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Wrapper script to help run clang tools across Chromium code.
8 How to use this tool:
9 If you want to run the tool across all Chromium code:
10 run_tool.py <tool> <path/to/compiledb>
12 If you want to include all files mentioned in the compilation database:
13 run_tool.py <tool> <path/to/compiledb> --all
15 If you only want to run the tool across just chrome/browser and content/browser:
16 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
18 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
19 information, which documents the entire automated refactoring flow in Chromium.
21 Why use this tool:
22 The clang tool implementation doesn't take advantage of multiple cores, and if
23 it fails mysteriously in the middle, all the generated replacements will be
24 lost.
26 Unfortunately, if the work is simply sharded across multiple cores by running
27 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
28 the same time. To work around that, clang tools that are run using this tool
29 should output edits to stdout in the following format:
31 ==== BEGIN EDITS ====
32 r:<file path>:<offset>:<length>:<replacement text>
33 r:<file path>:<offset>:<length>:<replacement text>
34 ...etc...
35 ==== END EDITS ====
37 Any generated edits are applied once the clang tool has finished running
38 across Chromium, regardless of whether some instances failed or not.
39 """
41 import collections
42 import functools
43 import json
44 import multiprocessing
45 import os.path
46 import pipes
47 import subprocess
48 import sys
51 Edit = collections.namedtuple(
52 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
55 def _GetFilesFromGit(paths = None):
56 """Gets the list of files in the git repository.
58 Args:
59 paths: Prefix filter for the returned paths. May contain multiple entries.
60 """
61 args = []
62 if sys.platform == 'win32':
63 args.append('git.bat')
64 else:
65 args.append('git')
66 args.append('ls-files')
67 if paths:
68 args.extend(paths)
69 command = subprocess.Popen(args, stdout=subprocess.PIPE)
70 output, _ = command.communicate()
71 return [os.path.realpath(p) for p in output.splitlines()]
74 def _GetFilesFromCompileDB(build_directory):
75 """ Gets the list of files mentioned in the compilation database.
77 Args:
78 build_directory: Directory that contains the compile database.
79 """
80 compiledb_path = os.path.join(build_directory, 'compile_commands.json')
81 with open(compiledb_path, 'rb') as compiledb_file:
82 json_commands = json.load(compiledb_file)
84 return [os.path.join(entry['directory'], entry['file'])
85 for entry in json_commands]
88 def _ExtractEditsFromStdout(build_directory, stdout):
89 """Extracts generated list of edits from the tool's stdout.
91 The expected format is documented at the top of this file.
93 Args:
94 build_directory: Directory that contains the compile database. Used to
95 normalize the filenames.
96 stdout: The stdout from running the clang tool.
98 Returns:
99 A dictionary mapping filenames to the associated edits.
101 lines = stdout.splitlines()
102 start_index = lines.index('==== BEGIN EDITS ====')
103 end_index = lines.index('==== END EDITS ====')
104 edits = collections.defaultdict(list)
105 for line in lines[start_index + 1:end_index]:
106 try:
107 edit_type, path, offset, length, replacement = line.split(':::', 4)
108 replacement = replacement.replace("\0", "\n");
109 # Normalize the file path emitted by the clang tool.
110 path = os.path.realpath(os.path.join(build_directory, path))
111 edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
112 except ValueError:
113 print 'Unable to parse edit: %s' % line
114 return edits
117 def _ExecuteTool(toolname, build_directory, filename):
118 """Executes the tool.
120 This is defined outside the class so it can be pickled for the multiprocessing
121 module.
123 Args:
124 toolname: Path to the tool to execute.
125 build_directory: Directory that contains the compile database.
126 filename: The file to run the tool over.
128 Returns:
129 A dictionary that must contain the key "status" and a boolean value
130 associated with it.
132 If status is True, then the generated edits are stored with the key "edits"
133 in the dictionary.
135 Otherwise, the filename and the output from stderr are associated with the
136 keys "filename" and "stderr" respectively.
138 command = subprocess.Popen((toolname, '-p', build_directory, filename),
139 stdout=subprocess.PIPE,
140 stderr=subprocess.PIPE)
141 stdout, stderr = command.communicate()
142 if command.returncode != 0:
143 return {'status': False, 'filename': filename, 'stderr': stderr}
144 else:
145 return {'status': True,
146 'edits': _ExtractEditsFromStdout(build_directory, stdout)}
149 class _CompilerDispatcher(object):
150 """Multiprocessing controller for running clang tools in parallel."""
152 def __init__(self, toolname, build_directory, filenames):
153 """Initializer method.
155 Args:
156 toolname: Path to the tool to execute.
157 build_directory: Directory that contains the compile database.
158 filenames: The files to run the tool over.
160 self.__toolname = toolname
161 self.__build_directory = build_directory
162 self.__filenames = filenames
163 self.__success_count = 0
164 self.__failed_count = 0
165 self.__edit_count = 0
166 self.__edits = collections.defaultdict(list)
168 @property
169 def edits(self):
170 return self.__edits
172 @property
173 def failed_count(self):
174 return self.__failed_count
176 def Run(self):
177 """Does the grunt work."""
178 pool = multiprocessing.Pool()
179 result_iterator = pool.imap_unordered(
180 functools.partial(_ExecuteTool, self.__toolname,
181 self.__build_directory),
182 self.__filenames)
183 for result in result_iterator:
184 self.__ProcessResult(result)
185 sys.stdout.write('\n')
186 sys.stdout.flush()
188 def __ProcessResult(self, result):
189 """Handles result processing.
191 Args:
192 result: The result dictionary returned by _ExecuteTool.
194 if result['status']:
195 self.__success_count += 1
196 for k, v in result['edits'].iteritems():
197 self.__edits[k].extend(v)
198 self.__edit_count += len(v)
199 else:
200 self.__failed_count += 1
201 sys.stdout.write('\nFailed to process %s\n' % result['filename'])
202 sys.stdout.write(result['stderr'])
203 sys.stdout.write('\n')
204 percentage = (
205 float(self.__success_count + self.__failed_count) /
206 len(self.__filenames)) * 100
207 sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
208 self.__success_count, self.__failed_count, self.__edit_count,
209 percentage))
210 sys.stdout.flush()
213 def _ApplyEdits(edits, clang_format_diff_path):
214 """Apply the generated edits.
216 Args:
217 edits: A dict mapping filenames to Edit instances that apply to that file.
218 clang_format_diff_path: Path to the clang-format-diff.py helper to help
219 automatically reformat diffs to avoid style violations. Pass None if the
220 clang-format step should be skipped.
222 edit_count = 0
223 for k, v in edits.iteritems():
224 # Sort the edits and iterate through them in reverse order. Sorting allows
225 # duplicate edits to be quickly skipped, while reversing means that
226 # subsequent edits don't need to have their offsets updated with each edit
227 # applied.
228 v.sort()
229 last_edit = None
230 with open(k, 'rb+') as f:
231 contents = bytearray(f.read())
232 for edit in reversed(v):
233 if edit == last_edit:
234 continue
235 last_edit = edit
236 contents[edit.offset:edit.offset + edit.length] = edit.replacement
237 if not edit.replacement:
238 _ExtendDeletionIfElementIsInList(contents, edit.offset)
239 edit_count += 1
240 f.seek(0)
241 f.truncate()
242 f.write(contents)
243 if clang_format_diff_path:
244 # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
245 # uses python2.7. Use the deprecated interface until Chrome uses a newer
246 # Python.
247 if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
248 pipes.quote(k), clang_format_diff_path), shell=True) != 0:
249 print 'clang-format failed for %s' % k
250 print 'Applied %d edits to %d files' % (edit_count, len(edits))
253 _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
256 def _ExtendDeletionIfElementIsInList(contents, offset):
257 """Extends the range of a deletion if the deleted element was part of a list.
259 This rewriter helper makes it easy for refactoring tools to remove elements
260 from a list. Even if a matcher callback knows that it is removing an element
261 from a list, it may not have enough information to accurately remove the list
262 element; for example, another matcher callback may end up removing an adjacent
263 list element, or all the list elements may end up being removed.
265 With this helper, refactoring tools can simply remove the list element and not
266 worry about having to include the comma in the replacement.
268 Args:
269 contents: A bytearray with the deletion already applied.
270 offset: The offset in the bytearray where the deleted range used to be.
272 char_before = char_after = None
273 left_trim_count = 0
274 for byte in reversed(contents[:offset]):
275 left_trim_count += 1
276 if byte in _WHITESPACE_BYTES:
277 continue
278 if byte in (ord(','), ord(':'), ord('('), ord('{')):
279 char_before = chr(byte)
280 break
282 right_trim_count = 0
283 for byte in contents[offset:]:
284 right_trim_count += 1
285 if byte in _WHITESPACE_BYTES:
286 continue
287 if byte == ord(','):
288 char_after = chr(byte)
289 break
291 if char_before:
292 if char_after:
293 del contents[offset:offset + right_trim_count]
294 elif char_before in (',', ':'):
295 del contents[offset - left_trim_count:offset]
298 def main(argv):
299 if len(argv) < 2:
300 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
301 print ' <clang tool> is the clang tool that should be run.'
302 print ' <compile db> is the directory that contains the compile database'
303 print ' <path 1> <path2> ... can be used to filter what files are edited'
304 return 1
306 clang_format_diff_path = os.path.join(
307 os.path.dirname(os.path.realpath(__file__)),
308 '../../../third_party/llvm/tools/clang/tools/clang-format',
309 'clang-format-diff.py')
310 # TODO(dcheng): Allow this to be controlled with a flag as well.
311 # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
312 # on Windows.
313 if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32':
314 clang_format_diff_path = None
316 if len(argv) == 3 and argv[2] == '--all':
317 filenames = set(_GetFilesFromCompileDB(argv[1]))
318 source_filenames = filenames
319 else:
320 filenames = set(_GetFilesFromGit(argv[2:]))
321 # Filter out files that aren't C/C++/Obj-C/Obj-C++.
322 extensions = frozenset(('.c', '.cc', '.m', '.mm'))
323 source_filenames = [f for f in filenames
324 if os.path.splitext(f)[1] in extensions]
325 dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames)
326 dispatcher.Run()
327 # Filter out edits to files that aren't in the git repository, since it's not
328 # useful to modify files that aren't under source control--typically, these
329 # are generated files or files in a git submodule that's not part of Chromium.
330 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
331 if os.path.realpath(k) in filenames},
332 clang_format_diff_path)
333 if dispatcher.failed_count != 0:
334 return 2
335 return 0
338 if __name__ == '__main__':
339 sys.exit(main(sys.argv[1:]))