2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Wrapper script to help run clang tools across Chromium code.
9 If you want to run the tool across all Chromium code:
10 run_tool.py <tool> <path/to/compiledb>
12 If you want to include all files mentioned in the compilation database:
13 run_tool.py <tool> <path/to/compiledb> --all
15 If you only want to run the tool across just chrome/browser and content/browser:
16 run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
18 Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
19 information, which documents the entire automated refactoring flow in Chromium.
22 The clang tool implementation doesn't take advantage of multiple cores, and if
23 it fails mysteriously in the middle, all the generated replacements will be
26 Unfortunately, if the work is simply sharded across multiple cores by running
27 multiple RefactoringTools, problems arise when they attempt to rewrite a file at
28 the same time. To work around that, clang tools that are run using this tool
29 should output edits to stdout in the following format:
32 r:<file path>:<offset>:<length>:<replacement text>
33 r:<file path>:<offset>:<length>:<replacement text>
37 Any generated edits are applied once the clang tool has finished running
38 across Chromium, regardless of whether some instances failed or not.
44 import multiprocessing
51 Edit
= collections
.namedtuple(
52 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
55 def _GetFilesFromGit(paths
= None):
56 """Gets the list of files in the git repository.
59 paths: Prefix filter for the returned paths. May contain multiple entries.
62 if sys
.platform
== 'win32':
63 args
.append('git.bat')
66 args
.append('ls-files')
69 command
= subprocess
.Popen(args
, stdout
=subprocess
.PIPE
)
70 output
, _
= command
.communicate()
71 return [os
.path
.realpath(p
) for p
in output
.splitlines()]
74 def _GetFilesFromCompileDB(build_directory
):
75 """ Gets the list of files mentioned in the compilation database.
78 build_directory: Directory that contains the compile database.
80 compiledb_path
= os
.path
.join(build_directory
, 'compile_commands.json')
81 with
open(compiledb_path
, 'rb') as compiledb_file
:
82 json_commands
= json
.load(compiledb_file
)
84 return [os
.path
.join(entry
['directory'], entry
['file'])
85 for entry
in json_commands
]
88 def _ExtractEditsFromStdout(build_directory
, stdout
):
89 """Extracts generated list of edits from the tool's stdout.
91 The expected format is documented at the top of this file.
94 build_directory: Directory that contains the compile database. Used to
95 normalize the filenames.
96 stdout: The stdout from running the clang tool.
99 A dictionary mapping filenames to the associated edits.
101 lines
= stdout
.splitlines()
102 start_index
= lines
.index('==== BEGIN EDITS ====')
103 end_index
= lines
.index('==== END EDITS ====')
104 edits
= collections
.defaultdict(list)
105 for line
in lines
[start_index
+ 1:end_index
]:
107 edit_type
, path
, offset
, length
, replacement
= line
.split(':::', 4)
108 replacement
= replacement
.replace("\0", "\n");
109 # Normalize the file path emitted by the clang tool.
110 path
= os
.path
.realpath(os
.path
.join(build_directory
, path
))
111 edits
[path
].append(Edit(edit_type
, int(offset
), int(length
), replacement
))
113 print 'Unable to parse edit: %s' % line
117 def _ExecuteTool(toolname
, build_directory
, filename
):
118 """Executes the tool.
120 This is defined outside the class so it can be pickled for the multiprocessing
124 toolname: Path to the tool to execute.
125 build_directory: Directory that contains the compile database.
126 filename: The file to run the tool over.
129 A dictionary that must contain the key "status" and a boolean value
132 If status is True, then the generated edits are stored with the key "edits"
135 Otherwise, the filename and the output from stderr are associated with the
136 keys "filename" and "stderr" respectively.
138 command
= subprocess
.Popen((toolname
, '-p', build_directory
, filename
),
139 stdout
=subprocess
.PIPE
,
140 stderr
=subprocess
.PIPE
)
141 stdout
, stderr
= command
.communicate()
142 if command
.returncode
!= 0:
143 return {'status': False, 'filename': filename
, 'stderr': stderr
}
145 return {'status': True,
146 'edits': _ExtractEditsFromStdout(build_directory
, stdout
)}
149 class _CompilerDispatcher(object):
150 """Multiprocessing controller for running clang tools in parallel."""
152 def __init__(self
, toolname
, build_directory
, filenames
):
153 """Initializer method.
156 toolname: Path to the tool to execute.
157 build_directory: Directory that contains the compile database.
158 filenames: The files to run the tool over.
160 self
.__toolname
= toolname
161 self
.__build
_directory
= build_directory
162 self
.__filenames
= filenames
163 self
.__success
_count
= 0
164 self
.__failed
_count
= 0
165 self
.__edit
_count
= 0
166 self
.__edits
= collections
.defaultdict(list)
173 def failed_count(self
):
174 return self
.__failed
_count
177 """Does the grunt work."""
178 pool
= multiprocessing
.Pool()
179 result_iterator
= pool
.imap_unordered(
180 functools
.partial(_ExecuteTool
, self
.__toolname
,
181 self
.__build
_directory
),
183 for result
in result_iterator
:
184 self
.__ProcessResult
(result
)
185 sys
.stdout
.write('\n')
188 def __ProcessResult(self
, result
):
189 """Handles result processing.
192 result: The result dictionary returned by _ExecuteTool.
195 self
.__success
_count
+= 1
196 for k
, v
in result
['edits'].iteritems():
197 self
.__edits
[k
].extend(v
)
198 self
.__edit
_count
+= len(v
)
200 self
.__failed
_count
+= 1
201 sys
.stdout
.write('\nFailed to process %s\n' % result
['filename'])
202 sys
.stdout
.write(result
['stderr'])
203 sys
.stdout
.write('\n')
205 float(self
.__success
_count
+ self
.__failed
_count
) /
206 len(self
.__filenames
)) * 100
207 sys
.stdout
.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
208 self
.__success
_count
, self
.__failed
_count
, self
.__edit
_count
,
213 def _ApplyEdits(edits
, clang_format_diff_path
):
214 """Apply the generated edits.
217 edits: A dict mapping filenames to Edit instances that apply to that file.
218 clang_format_diff_path: Path to the clang-format-diff.py helper to help
219 automatically reformat diffs to avoid style violations. Pass None if the
220 clang-format step should be skipped.
223 for k
, v
in edits
.iteritems():
224 # Sort the edits and iterate through them in reverse order. Sorting allows
225 # duplicate edits to be quickly skipped, while reversing means that
226 # subsequent edits don't need to have their offsets updated with each edit
230 with
open(k
, 'rb+') as f
:
231 contents
= bytearray(f
.read())
232 for edit
in reversed(v
):
233 if edit
== last_edit
:
236 contents
[edit
.offset
:edit
.offset
+ edit
.length
] = edit
.replacement
237 if not edit
.replacement
:
238 _ExtendDeletionIfElementIsInList(contents
, edit
.offset
)
243 if clang_format_diff_path
:
244 # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
245 # uses python2.7. Use the deprecated interface until Chrome uses a newer
247 if subprocess
.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
248 pipes
.quote(k
), clang_format_diff_path
), shell
=True) != 0:
249 print 'clang-format failed for %s' % k
250 print 'Applied %d edits to %d files' % (edit_count
, len(edits
))
253 _WHITESPACE_BYTES
= frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
256 def _ExtendDeletionIfElementIsInList(contents
, offset
):
257 """Extends the range of a deletion if the deleted element was part of a list.
259 This rewriter helper makes it easy for refactoring tools to remove elements
260 from a list. Even if a matcher callback knows that it is removing an element
261 from a list, it may not have enough information to accurately remove the list
262 element; for example, another matcher callback may end up removing an adjacent
263 list element, or all the list elements may end up being removed.
265 With this helper, refactoring tools can simply remove the list element and not
266 worry about having to include the comma in the replacement.
269 contents: A bytearray with the deletion already applied.
270 offset: The offset in the bytearray where the deleted range used to be.
272 char_before
= char_after
= None
274 for byte
in reversed(contents
[:offset
]):
276 if byte
in _WHITESPACE_BYTES
:
278 if byte
in (ord(','), ord(':'), ord('('), ord('{')):
279 char_before
= chr(byte
)
283 for byte
in contents
[offset
:]:
284 right_trim_count
+= 1
285 if byte
in _WHITESPACE_BYTES
:
288 char_after
= chr(byte
)
293 del contents
[offset
:offset
+ right_trim_count
]
294 elif char_before
in (',', ':'):
295 del contents
[offset
- left_trim_count
:offset
]
300 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
301 print ' <clang tool> is the clang tool that should be run.'
302 print ' <compile db> is the directory that contains the compile database'
303 print ' <path 1> <path2> ... can be used to filter what files are edited'
306 clang_format_diff_path
= os
.path
.join(
307 os
.path
.dirname(os
.path
.realpath(__file__
)),
308 '../../../third_party/llvm/tools/clang/tools/clang-format',
309 'clang-format-diff.py')
310 # TODO(dcheng): Allow this to be controlled with a flag as well.
311 # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
313 if not os
.path
.isfile(clang_format_diff_path
) or sys
.platform
== 'win32':
314 clang_format_diff_path
= None
316 if len(argv
) == 3 and argv
[2] == '--all':
317 filenames
= set(_GetFilesFromCompileDB(argv
[1]))
318 source_filenames
= filenames
320 filenames
= set(_GetFilesFromGit(argv
[2:]))
321 # Filter out files that aren't C/C++/Obj-C/Obj-C++.
322 extensions
= frozenset(('.c', '.cc', '.m', '.mm'))
323 source_filenames
= [f
for f
in filenames
324 if os
.path
.splitext(f
)[1] in extensions
]
325 dispatcher
= _CompilerDispatcher(argv
[0], argv
[1], source_filenames
)
327 # Filter out edits to files that aren't in the git repository, since it's not
328 # useful to modify files that aren't under source control--typically, these
329 # are generated files or files in a git submodule that's not part of Chromium.
330 _ApplyEdits({k
: v
for k
, v
in dispatcher
.edits
.iteritems()
331 if os
.path
.realpath(k
) in filenames
},
332 clang_format_diff_path
)
333 if dispatcher
.failed_count
!= 0:
338 if __name__
== '__main__':
339 sys
.exit(main(sys
.argv
[1:]))