3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #===------------------------------------------------------------------------===#
12 clang-format git integration
13 ============================
15 This file provides a clang-format integration for git. Put it somewhere in your
16 path and ensure that it is executable. Then, "git clang-format" will invoke
17 clang-format on the changes in current files or a specific commit.
19 For further details, run:
22 Requires Python 2.7 or Python 3
25 from __future__ import absolute_import, division, print_function
35 usage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] '
39 If zero or one commits are given, run clang-format on all lines that differ
40 between the working directory and <commit>, which defaults to HEAD. Changes are
41 only applied to the working directory, or in the stage/index.
44 To format staged changes, i.e everything that's been `git add`ed:
47 To also format everything touched in the most recent commit:
48 git clang-format HEAD~1
50 If you're on a branch off main, to format everything touched on your branch:
53 If two commits are given (requires --diff), run clang-format on all lines in the
54 second <commit> that differ from the first <commit>.
56 The following git-config settings set the default of the corresponding option:
59 clangFormat.extensions
63 # Name of the temporary index file in which save the output of clang-format.
64 # This file is created within the .git directory.
65 temp_index_basename = 'clang-format-index'
68 Range = collections.namedtuple('Range', 'start, count')
72 config = load_git_config()
74 # In order to keep '--' yet allow options after positionals, we need to
75 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
76 # nargs=argparse.REMAINDER disallows options after positionals.)
79 idx = argv.index('--')
83 dash_dash = argv[idx:]
86 default_extensions = ','.join([
87 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
91 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
92 'ccm', 'cppm', 'cxxm', 'c++m', # C++ Modules
94 # Other languages that clang-format supports
95 'proto', 'protodevel', # Protocol Buffers
103 p = argparse.ArgumentParser(
104 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
106 p.add_argument('--binary',
107 default=config.get('clangformat.binary', 'clang-format'),
108 help='path to clang-format'),
109 p.add_argument('--commit',
110 default=config.get('clangformat.commit', 'HEAD'),
111 help='default commit to use if none is specified'),
112 p.add_argument('--diff', action='store_true',
113 help='print a diff instead of applying the changes')
114 p.add_argument('--diffstat', action='store_true',
115 help='print a diffstat instead of applying the changes')
116 p.add_argument('--extensions',
117 default=config.get('clangformat.extensions',
119 help=('comma-separated list of file extensions to format, '
120 'excluding the period and case-insensitive')),
121 p.add_argument('-f', '--force', action='store_true',
122 help='allow changes to unstaged files')
123 p.add_argument('-p', '--patch', action='store_true',
124 help='select hunks interactively')
125 p.add_argument('-q', '--quiet', action='count', default=0,
126 help='print less information')
127 p.add_argument('--staged', '--cached', action='store_true',
128 help='format lines in the stage instead of the working dir')
129 p.add_argument('--style',
130 default=config.get('clangformat.style', None),
131 help='passed to clang-format'),
132 p.add_argument('-v', '--verbose', action='count', default=0,
133 help='print extra information')
134 # We gather all the remaining positional arguments into 'args' since we need
135 # to use some heuristics to determine whether or not <commit> was present.
136 # However, to print pretty messages, we make use of metavar and help.
137 p.add_argument('args', nargs='*', metavar='<commit>',
138 help='revision from which to compute the diff')
139 p.add_argument('ignored', nargs='*', metavar='<file>...',
140 help='if specified, only consider differences in these files')
141 opts = p.parse_args(argv)
143 opts.verbose -= opts.quiet
146 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
149 die('--staged is not allowed when two commits are given')
151 die('--diff is required when two commits are given')
154 die('at most two commits allowed; %d given' % len(commits))
155 changed_lines = compute_diff_and_extract_lines(commits, files, opts.staged)
156 if opts.verbose >= 1:
157 ignored_files = set(changed_lines)
158 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
159 # The computed diff outputs absolute paths, so we must cd before accessing
162 filter_symlinks(changed_lines)
163 if opts.verbose >= 1:
164 ignored_files.difference_update(changed_lines)
167 'Ignoring changes in the following files (wrong extension or symlink):')
168 for filename in ignored_files:
169 print(' %s' % filename)
171 print('Running clang-format on the following files:')
172 for filename in changed_lines:
173 print(' %s' % filename)
175 if not changed_lines:
176 if opts.verbose >= 0:
177 print('no modified files to format')
181 old_tree = commits[1]
184 old_tree = create_tree_from_index(changed_lines)
187 old_tree = create_tree_from_workdir(changed_lines)
189 new_tree = run_clang_format_and_save_to_tree(changed_lines,
193 if opts.verbose >= 1:
194 print('old tree: %s' % old_tree)
195 print('new tree: %s' % new_tree)
197 if old_tree == new_tree:
198 if opts.verbose >= 0:
199 print('clang-format did not modify any files')
203 return print_diff(old_tree, new_tree)
205 return print_diffstat(old_tree, new_tree)
207 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
208 patch_mode=opts.patch)
209 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
210 print('changed files:')
211 for filename in changed_files:
212 print(' %s' % filename)
217 def load_git_config(non_string_options=None):
218 """Return the git configuration as a dictionary.
220 All options are assumed to be strings unless in `non_string_options`, in which
221 is a dictionary mapping option name (in lower case) to either "--bool" or
223 if non_string_options is None:
224 non_string_options = {}
226 for entry in run('git', 'config', '--list', '--null').split('\0'):
229 name, value = entry.split('\n', 1)
231 # A setting with no '=' ('\n' with --null) is implicitly 'true'
234 if name in non_string_options:
235 value = run('git', 'config', non_string_options[name], name)
240 def interpret_args(args, dash_dash, default_commit):
241 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
243 It is assumed that "--" and everything that follows has been removed from
244 args and placed in `dash_dash`.
246 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
247 left (if present) are taken as commits. Otherwise, the arguments are checked
248 from left to right if they are commits or files. If commits are not given,
249 a list with `default_commit` is used."""
252 commits = [default_commit]
255 for commit in commits:
256 object_type = get_object_type(commit)
257 if object_type not in ('commit', 'tag'):
258 if object_type is None:
259 die("'%s' is not a commit" % commit)
261 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
262 files = dash_dash[1:]
266 if not disambiguate_revision(args[0]):
268 commits.append(args.pop(0))
270 commits = [default_commit]
273 commits = [default_commit]
275 return commits, files
278 def disambiguate_revision(value):
279 """Returns True if `value` is a revision, False if it is a file, or dies."""
280 # If `value` is ambiguous (neither a commit nor a file), the following
281 # command will die with an appropriate error message.
282 run('git', 'rev-parse', value, verbose=False)
283 object_type = get_object_type(value)
284 if object_type is None:
286 if object_type in ('commit', 'tag'):
288 die('`%s` is a %s, but a commit or filename was expected' %
289 (value, object_type))
292 def get_object_type(value):
293 """Returns a string description of an object's type, or None if it is not
294 a valid git object."""
295 cmd = ['git', 'cat-file', '-t', value]
296 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
297 stdout, stderr = p.communicate()
298 if p.returncode != 0:
300 return convert_string(stdout.strip())
303 def compute_diff_and_extract_lines(commits, files, staged):
304 """Calls compute_diff() followed by extract_lines()."""
305 diff_process = compute_diff(commits, files, staged)
306 changed_lines = extract_lines(diff_process.stdout)
307 diff_process.stdout.close()
309 if diff_process.returncode != 0:
310 # Assume error was already printed to stderr.
315 def compute_diff(commits, files, staged):
316 """Return a subprocess object producing the diff from `commits`.
318 The return value's `stdin` file object will produce a patch with the
319 differences between the working directory (or stage if --staged is used) and
320 the first commit if a single one was specified, or the difference between
321 both specified commits, filtered on `files` (if non-empty).
322 Zero context lines are used in the patch."""
323 git_tool = 'diff-index'
326 git_tool = 'diff-tree'
328 extra_args += ['--cached']
329 cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--']
331 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
336 def extract_lines(patch_file):
337 """Extract the changed lines in `patch_file`.
339 The return value is a dictionary mapping filename to a list of (start_line,
342 The input must have been produced with ``-U0``, meaning unidiff format with
343 zero lines of context. The return value is a dict mapping filename to a
344 list of line `Range`s."""
346 for line in patch_file:
347 line = convert_string(line)
348 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
350 filename = match.group(1).rstrip('\r\n')
351 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
353 start_line = int(match.group(1))
356 line_count = int(match.group(3))
361 matches.setdefault(filename, []).append(Range(start_line, line_count))
365 def filter_by_extension(dictionary, allowed_extensions):
366 """Delete every key in `dictionary` that doesn't have an allowed extension.
368 `allowed_extensions` must be a collection of lowercase file extensions,
369 excluding the period."""
370 allowed_extensions = frozenset(allowed_extensions)
371 for filename in list(dictionary.keys()):
372 base_ext = filename.rsplit('.', 1)
373 if len(base_ext) == 1 and '' in allowed_extensions:
375 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
376 del dictionary[filename]
379 def filter_symlinks(dictionary):
380 """Delete every key in `dictionary` that is a symlink."""
381 for filename in list(dictionary.keys()):
382 if os.path.islink(filename):
383 del dictionary[filename]
386 def cd_to_toplevel():
387 """Change to the top level of the git repository."""
388 toplevel = run('git', 'rev-parse', '--show-toplevel')
392 def create_tree_from_workdir(filenames):
393 """Create a new git tree with the given files from the working directory.
395 Returns the object ID (SHA-1) of the created tree."""
396 return create_tree(filenames, '--stdin')
399 def create_tree_from_index(filenames):
400 # Copy the environment, because the files have to be read from the original
402 env = os.environ.copy()
403 def index_contents_generator():
404 for filename in filenames:
405 git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename]
406 git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env,
407 stdin=subprocess.PIPE,
408 stdout=subprocess.PIPE)
409 stdout = git_ls_files.communicate()[0]
410 yield convert_string(stdout.split(b'\0')[0])
411 return create_tree(index_contents_generator(), '--index-info')
414 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
415 binary='clang-format', style=None):
416 """Run clang-format on each file and save the result to a git tree.
418 Returns the object ID (SHA-1) of the created tree."""
419 # Copy the environment when formatting the files in the index, because the
420 # files have to be read from the original index.
421 env = os.environ.copy() if revision == '' else None
422 def iteritems(container):
424 return container.iteritems() # Python 2
425 except AttributeError:
426 return container.items() # Python 3
427 def index_info_generator():
428 for filename, line_ranges in iteritems(changed_lines):
429 if revision is not None:
430 if len(revision) > 0:
431 git_metadata_cmd = ['git', 'ls-tree',
432 '%s:%s' % (revision, os.path.dirname(filename)),
433 os.path.basename(filename)]
435 git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename]
436 git_metadata = subprocess.Popen(git_metadata_cmd, env=env,
437 stdin=subprocess.PIPE,
438 stdout=subprocess.PIPE)
439 stdout = git_metadata.communicate()[0]
440 mode = oct(int(stdout.split()[0], 8))
442 mode = oct(os.stat(filename).st_mode)
443 # Adjust python3 octal format so that it matches what git expects
444 if mode.startswith('0o'):
445 mode = '0' + mode[2:]
446 blob_id = clang_format_to_blob(filename, line_ranges,
451 yield '%s %s\t%s' % (mode, blob_id, filename)
452 return create_tree(index_info_generator(), '--index-info')
455 def create_tree(input_lines, mode):
456 """Create a tree object from the given input.
458 If mode is '--stdin', it must be a list of filenames. If mode is
459 '--index-info' is must be a list of values suitable for "git update-index
460 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
462 assert mode in ('--stdin', '--index-info')
463 cmd = ['git', 'update-index', '--add', '-z', mode]
464 with temporary_index_file():
465 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
466 for line in input_lines:
467 p.stdin.write(to_bytes('%s\0' % line))
470 die('`%s` failed' % ' '.join(cmd))
471 tree_id = run('git', 'write-tree')
475 def clang_format_to_blob(filename, line_ranges, revision=None,
476 binary='clang-format', style=None, env=None):
477 """Run clang-format on the given file and save the result to a git blob.
479 Runs on the file in `revision` if not None, or on the file in the working
480 directory if `revision` is None. Revision can be set to an empty string to run
481 clang-format on the file in the index.
483 Returns the object ID (SHA-1) of the created blob."""
484 clang_format_cmd = [binary]
486 clang_format_cmd.extend(['-style='+style])
487 clang_format_cmd.extend([
488 '-lines=%s:%s' % (start_line, start_line+line_count-1)
489 for start_line, line_count in line_ranges])
490 if revision is not None:
491 clang_format_cmd.extend(['-assume-filename='+filename])
492 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
493 git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE,
494 stdout=subprocess.PIPE)
495 git_show.stdin.close()
496 clang_format_stdin = git_show.stdout
498 clang_format_cmd.extend([filename])
500 clang_format_stdin = subprocess.PIPE
502 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
503 stdout=subprocess.PIPE)
504 if clang_format_stdin == subprocess.PIPE:
505 clang_format_stdin = clang_format.stdin
507 if e.errno == errno.ENOENT:
508 die('cannot find executable "%s"' % binary)
511 clang_format_stdin.close()
512 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
513 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
514 stdout=subprocess.PIPE)
515 clang_format.stdout.close()
516 stdout = hash_object.communicate()[0]
517 if hash_object.returncode != 0:
518 die('`%s` failed' % ' '.join(hash_object_cmd))
519 if clang_format.wait() != 0:
520 die('`%s` failed' % ' '.join(clang_format_cmd))
521 if git_show and git_show.wait() != 0:
522 die('`%s` failed' % ' '.join(git_show_cmd))
523 return convert_string(stdout).rstrip('\r\n')
526 @contextlib.contextmanager
527 def temporary_index_file(tree=None):
528 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
529 the file afterward."""
530 index_path = create_temporary_index(tree)
531 old_index_path = os.environ.get('GIT_INDEX_FILE')
532 os.environ['GIT_INDEX_FILE'] = index_path
536 if old_index_path is None:
537 del os.environ['GIT_INDEX_FILE']
539 os.environ['GIT_INDEX_FILE'] = old_index_path
540 os.remove(index_path)
543 def create_temporary_index(tree=None):
544 """Create a temporary index file and return the created file's path.
546 If `tree` is not None, use that as the tree to read in. Otherwise, an
547 empty index is created."""
548 gitdir = run('git', 'rev-parse', '--git-dir')
549 path = os.path.join(gitdir, temp_index_basename)
552 run('git', 'read-tree', '--index-output='+path, tree)
556 def print_diff(old_tree, new_tree):
557 """Print the diff between the two trees to stdout."""
558 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
559 # is expected to be viewed by the user, and only the former does nice things
560 # like color and pagination.
562 # We also only print modified files since `new_tree` only contains the files
563 # that were modified, so unmodified files would show as deleted without the
565 return subprocess.run(['git', 'diff', '--diff-filter=M',
566 '--exit-code', old_tree, new_tree]).returncode
568 def print_diffstat(old_tree, new_tree):
569 """Print the diffstat between the two trees to stdout."""
570 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
571 # is expected to be viewed by the user, and only the former does nice things
572 # like color and pagination.
574 # We also only print modified files since `new_tree` only contains the files
575 # that were modified, so unmodified files would show as deleted without the
577 return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code',
578 '--stat', old_tree, new_tree]).returncode
580 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
581 """Apply the changes in `new_tree` to the working directory.
583 Bails if there are local changes in those files and not `force`. If
584 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
585 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
586 '--name-only', old_tree,
587 new_tree).rstrip('\0').split('\0')
589 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
591 print('The following files would be modified but '
592 'have unstaged changes:', file=sys.stderr)
593 print(unstaged_files, file=sys.stderr)
594 print('Please commit, stage, or stash them first.', file=sys.stderr)
597 # In patch mode, we could just as well create an index from the new tree
598 # and checkout from that, but then the user will be presented with a
599 # message saying "Discard ... from worktree". Instead, we use the old
600 # tree as the index and checkout from new_tree, which gives the slightly
601 # better message, "Apply ... to index and worktree". This is not quite
602 # right, since it won't be applied to the user's index, but oh well.
603 with temporary_index_file(old_tree):
604 subprocess.run(['git', 'checkout', '--patch', new_tree], check=True)
605 index_tree = old_tree
607 with temporary_index_file(new_tree):
608 run('git', 'checkout-index', '-a', '-f')
612 def run(*args, **kwargs):
613 stdin = kwargs.pop('stdin', '')
614 verbose = kwargs.pop('verbose', True)
615 strip = kwargs.pop('strip', True)
617 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
618 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
619 stdin=subprocess.PIPE)
620 stdout, stderr = p.communicate(input=stdin)
622 stdout = convert_string(stdout)
623 stderr = convert_string(stderr)
625 if p.returncode == 0:
628 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
629 print(stderr.rstrip(), file=sys.stderr)
631 stdout = stdout.rstrip('\r\n')
634 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
636 print(stderr.rstrip(), file=sys.stderr)
641 print('error:', message, file=sys.stderr)
645 def to_bytes(str_input):
646 # Encode to UTF-8 to get binary data.
647 if isinstance(str_input, bytes):
649 return str_input.encode('utf-8')
652 def to_string(bytes_input):
653 if isinstance(bytes_input, str):
655 return bytes_input.encode('utf-8')
658 def convert_string(bytes_input):
660 return to_string(bytes_input.decode('utf-8'))
661 except AttributeError: # 'str' object has no attribute 'decode'.
662 return str(bytes_input)
664 return str(bytes_input)
666 if __name__ == '__main__':