3 # ===- git-clang-format - ClangFormat Git Integration -------*- python -*--=== #
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 # ===----------------------------------------------------------------------=== #
12 clang-format git integration
13 ============================
15 This file provides a clang-format integration for git. Put it somewhere in your
16 path and ensure that it is executable. Then, "git clang-format" will invoke
17 clang-format on the changes in current files or a specific commit.
19 For further details, run:
22 Requires Python 2.7 or Python 3
25 from __future__ import absolute_import, division, print_function
36 "git clang-format [OPTIONS] [<commit>] [<commit>|--staged] [--] [<file>...]"
40 If zero or one commits are given, run clang-format on all lines that differ
41 between the working directory and <commit>, which defaults to HEAD. Changes are
42 only applied to the working directory, or in the stage/index.
45 To format staged changes, i.e everything that's been `git add`ed:
48 To also format everything touched in the most recent commit:
49 git clang-format HEAD~1
51 If you're on a branch off main, to format everything touched on your branch:
54 If two commits are given (requires --diff), run clang-format on all lines in the
55 second <commit> that differ from the first <commit>.
57 The following git-config settings set the default of the corresponding option:
60 clangFormat.extensions
64 # Name of the temporary index file in which save the output of clang-format.
65 # This file is created within the .git directory.
66 temp_index_basename = "clang-format-index"
69 Range = collections.namedtuple("Range", "start, count")
73 config = load_git_config()
75 # In order to keep '--' yet allow options after positionals, we need to
76 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
77 # nargs=argparse.REMAINDER disallows options after positionals.)
80 idx = argv.index("--")
84 dash_dash = argv[idx:]
87 default_extensions = ",".join(
89 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
106 "c++m", # C++ Modules
109 # Other languages that clang-format supports
111 "protodevel", # Protocol Buffers
128 "asciipb", # TextProto
132 p = argparse.ArgumentParser(
134 formatter_class=argparse.RawDescriptionHelpFormatter,
139 default=config.get("clangformat.binary", "clang-format"),
140 help="path to clang-format",
144 default=config.get("clangformat.commit", "HEAD"),
145 help="default commit to use if none is specified",
150 help="print a diff instead of applying the changes",
155 help="print a diffstat instead of applying the changes",
159 default=config.get("clangformat.extensions", default_extensions),
161 "comma-separated list of file extensions to format, "
162 "excluding the period and case-insensitive"
169 help="allow changes to unstaged files",
172 "-p", "--patch", action="store_true", help="select hunks interactively"
179 help="print less information",
185 help="format lines in the stage instead of the working dir",
189 default=config.get("clangformat.style", None),
190 help="passed to clang-format",
197 help="print extra information",
200 "--diff_from_common_commit",
203 "diff from the last common commit for commits in "
204 "separate branches rather than the exact point of the "
208 # We gather all the remaining positional arguments into 'args' since we need
209 # to use some heuristics to determine whether or not <commit> was present.
210 # However, to print pretty messages, we make use of metavar and help.
215 help="revision from which to compute the diff",
221 help="if specified, only consider differences in these files",
223 opts = p.parse_args(argv)
225 opts.verbose -= opts.quiet
228 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
230 die("at most two commits allowed; %d given" % len(commits))
231 if len(commits) == 2:
233 die("--staged is not allowed when two commits are given")
235 die("--diff is required when two commits are given")
236 elif opts.diff_from_common_commit:
238 "--diff_from_common_commit is only allowed when two commits are "
242 if os.path.dirname(opts.binary):
243 opts.binary = os.path.abspath(opts.binary)
245 changed_lines = compute_diff_and_extract_lines(
246 commits, files, opts.staged, opts.diff_from_common_commit
248 if opts.verbose >= 1:
249 ignored_files = set(changed_lines)
250 filter_by_extension(changed_lines, opts.extensions.lower().split(","))
251 # The computed diff outputs absolute paths, so we must cd before accessing
254 filter_symlinks(changed_lines)
255 filter_ignored_files(changed_lines, binary=opts.binary)
256 if opts.verbose >= 1:
257 ignored_files.difference_update(changed_lines)
260 "Ignoring the following files (wrong extension, symlink, or "
261 "ignored by clang-format):"
263 for filename in ignored_files:
264 print(" %s" % filename)
266 print("Running clang-format on the following files:")
267 for filename in changed_lines:
268 print(" %s" % filename)
270 if not changed_lines:
271 if opts.verbose >= 0:
272 print("no modified files to format")
276 old_tree = commits[1]
279 old_tree = create_tree_from_index(changed_lines)
282 old_tree = create_tree_from_workdir(changed_lines)
284 new_tree = run_clang_format_and_save_to_tree(
285 changed_lines, revision, binary=opts.binary, style=opts.style
287 if opts.verbose >= 1:
288 print("old tree: %s" % old_tree)
289 print("new tree: %s" % new_tree)
291 if old_tree == new_tree:
292 if opts.verbose >= 0:
293 print("clang-format did not modify any files")
297 return print_diff(old_tree, new_tree)
299 return print_diffstat(old_tree, new_tree)
301 changed_files = apply_changes(
302 old_tree, new_tree, force=opts.force, patch_mode=opts.patch
304 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
305 print("changed files:")
306 for filename in changed_files:
307 print(" %s" % filename)
312 def load_git_config(non_string_options=None):
313 """Return the git configuration as a dictionary.
315 All options are assumed to be strings unless in `non_string_options`, in
316 which is a dictionary mapping option name (in lower case) to either "--bool"
318 if non_string_options is None:
319 non_string_options = {}
321 for entry in run("git", "config", "--list", "--null").split("\0"):
324 name, value = entry.split("\n", 1)
326 # A setting with no '=' ('\n' with --null) is implicitly 'true'
329 if name in non_string_options:
330 value = run("git", "config", non_string_options[name], name)
335 def interpret_args(args, dash_dash, default_commit):
336 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
338 It is assumed that "--" and everything that follows has been removed from
339 args and placed in `dash_dash`.
341 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
342 left (if present) are taken as commits. Otherwise, the arguments are
343 checked from left to right if they are commits or files. If commits are not
344 given, a list with `default_commit` is used."""
347 commits = [default_commit]
350 for commit in commits:
351 object_type = get_object_type(commit)
352 if object_type not in ("commit", "tag"):
353 if object_type is None:
354 die("'%s' is not a commit" % commit)
357 "'%s' is a %s, but a commit was expected"
358 % (commit, object_type)
360 files = dash_dash[1:]
364 if not disambiguate_revision(args[0]):
366 commits.append(args.pop(0))
368 commits = [default_commit]
371 commits = [default_commit]
373 return commits, files
376 def disambiguate_revision(value):
377 """Returns True if `value` is a revision, False if it is a file, or dies."""
378 # If `value` is ambiguous (neither a commit nor a file), the following
379 # command will die with an appropriate error message.
380 run("git", "rev-parse", value, verbose=False)
381 object_type = get_object_type(value)
382 if object_type is None:
384 if object_type in ("commit", "tag"):
387 "`%s` is a %s, but a commit or filename was expected"
388 % (value, object_type)
392 def get_object_type(value):
393 """Returns a string description of an object's type, or None if it is not
394 a valid git object."""
395 cmd = ["git", "cat-file", "-t", value]
396 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
397 stdout, stderr = p.communicate()
398 if p.returncode != 0:
400 return convert_string(stdout.strip())
403 def compute_diff_and_extract_lines(commits, files, staged, diff_common_commit):
404 """Calls compute_diff() followed by extract_lines()."""
405 diff_process = compute_diff(commits, files, staged, diff_common_commit)
406 changed_lines = extract_lines(diff_process.stdout)
407 diff_process.stdout.close()
409 if diff_process.returncode != 0:
410 # Assume error was already printed to stderr.
415 def compute_diff(commits, files, staged, diff_common_commit):
416 """Return a subprocess object producing the diff from `commits`.
418 The return value's `stdin` file object will produce a patch with the
419 differences between the working directory (or stage if --staged is used) and
420 the first commit if a single one was specified, or the difference between
421 both specified commits, filtered on `files` (if non-empty).
422 Zero context lines are used in the patch."""
423 git_tool = "diff-index"
425 if len(commits) == 2:
426 git_tool = "diff-tree"
427 if diff_common_commit:
428 commits = [f"{commits[0]}...{commits[1]}"]
430 extra_args += ["--cached"]
432 cmd = ["git", git_tool, "-p", "-U0"] + extra_args + commits + ["--"]
434 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
439 def extract_lines(patch_file):
440 """Extract the changed lines in `patch_file`.
442 The return value is a dictionary mapping filename to a list of (start_line,
445 The input must have been produced with ``-U0``, meaning unidiff format with
446 zero lines of context. The return value is a dict mapping filename to a
447 list of line `Range`s."""
449 for line in patch_file:
450 line = convert_string(line)
451 match = re.search(r"^\+\+\+\ [^/]+/(.*)", line)
453 filename = match.group(1).rstrip("\r\n\t")
454 match = re.search(r"^@@ -[0-9,]+ \+(\d+)(,(\d+))?", line)
456 start_line = int(match.group(1))
459 line_count = int(match.group(3))
464 matches.setdefault(filename, []).append(
465 Range(start_line, line_count)
470 def filter_by_extension(dictionary, allowed_extensions):
471 """Delete every key in `dictionary` that doesn't have an allowed extension.
473 `allowed_extensions` must be a collection of lowercase file extensions,
474 excluding the period."""
475 allowed_extensions = frozenset(allowed_extensions)
476 for filename in list(dictionary.keys()):
477 base_ext = filename.rsplit(".", 1)
478 if len(base_ext) == 1 and "" in allowed_extensions:
480 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
481 del dictionary[filename]
484 def filter_symlinks(dictionary):
485 """Delete every key in `dictionary` that is a symlink."""
486 for filename in list(dictionary.keys()):
487 if os.path.islink(filename):
488 del dictionary[filename]
491 def filter_ignored_files(dictionary, binary):
492 """Delete every key in `dictionary` that is ignored by clang-format."""
493 ignored_files = run(binary, "-list-ignored", *dictionary.keys())
494 if not ignored_files:
496 ignored_files = ignored_files.split("\n")
497 for filename in ignored_files:
498 del dictionary[filename]
501 def cd_to_toplevel():
502 """Change to the top level of the git repository."""
503 toplevel = run("git", "rev-parse", "--show-toplevel")
507 def create_tree_from_workdir(filenames):
508 """Create a new git tree with the given files from the working directory.
510 Returns the object ID (SHA-1) of the created tree."""
511 return create_tree(filenames, "--stdin")
514 def create_tree_from_index(filenames):
515 # Copy the environment, because the files have to be read from the original
517 env = os.environ.copy()
519 def index_contents_generator():
520 for filename in filenames:
529 git_ls_files = subprocess.Popen(
532 stdin=subprocess.PIPE,
533 stdout=subprocess.PIPE,
535 stdout = git_ls_files.communicate()[0]
536 yield convert_string(stdout.split(b"\0")[0])
538 return create_tree(index_contents_generator(), "--index-info")
541 def run_clang_format_and_save_to_tree(
542 changed_lines, revision=None, binary="clang-format", style=None
544 """Run clang-format on each file and save the result to a git tree.
546 Returns the object ID (SHA-1) of the created tree."""
547 # Copy the environment when formatting the files in the index, because the
548 # files have to be read from the original index.
549 env = os.environ.copy() if revision == "" else None
551 def iteritems(container):
553 return container.iteritems() # Python 2
554 except AttributeError:
555 return container.items() # Python 3
557 def index_info_generator():
558 for filename, line_ranges in iteritems(changed_lines):
559 if revision is not None:
560 if len(revision) > 0:
564 "%s:%s" % (revision, os.path.dirname(filename)),
565 os.path.basename(filename),
575 git_metadata = subprocess.Popen(
578 stdin=subprocess.PIPE,
579 stdout=subprocess.PIPE,
581 stdout = git_metadata.communicate()[0]
582 mode = oct(int(stdout.split()[0], 8))
584 mode = oct(os.stat(filename).st_mode)
585 # Adjust python3 octal format so that it matches what git expects
586 if mode.startswith("0o"):
587 mode = "0" + mode[2:]
588 blob_id = clang_format_to_blob(
596 yield "%s %s\t%s" % (mode, blob_id, filename)
598 return create_tree(index_info_generator(), "--index-info")
601 def create_tree(input_lines, mode):
602 """Create a tree object from the given input.
604 If mode is '--stdin', it must be a list of filenames. If mode is
605 '--index-info' is must be a list of values suitable for "git update-index
606 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other
608 assert mode in ("--stdin", "--index-info")
609 cmd = ["git", "update-index", "--add", "-z", mode]
610 with temporary_index_file():
611 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
612 for line in input_lines:
613 p.stdin.write(to_bytes("%s\0" % line))
616 die("`%s` failed" % " ".join(cmd))
617 tree_id = run("git", "write-tree")
621 def clang_format_to_blob(
625 binary="clang-format",
629 """Run clang-format on the given file and save the result to a git blob.
631 Runs on the file in `revision` if not None, or on the file in the working
632 directory if `revision` is None. Revision can be set to an empty string to
633 run clang-format on the file in the index.
635 Returns the object ID (SHA-1) of the created blob."""
636 clang_format_cmd = [binary]
638 clang_format_cmd.extend(["--style=" + style])
639 clang_format_cmd.extend(
641 "--lines=%s:%s" % (start_line, start_line + line_count - 1)
642 for start_line, line_count in line_ranges
645 if revision is not None:
646 clang_format_cmd.extend(["--assume-filename=" + filename])
651 "%s:%s" % (revision, filename),
653 git_show = subprocess.Popen(
654 git_show_cmd, env=env, stdin=subprocess.PIPE, stdout=subprocess.PIPE
656 git_show.stdin.close()
657 clang_format_stdin = git_show.stdout
659 clang_format_cmd.extend([filename])
661 clang_format_stdin = subprocess.PIPE
663 clang_format = subprocess.Popen(
664 clang_format_cmd, stdin=clang_format_stdin, stdout=subprocess.PIPE
666 if clang_format_stdin == subprocess.PIPE:
667 clang_format_stdin = clang_format.stdin
669 if e.errno == errno.ENOENT:
670 die('cannot find executable "%s"' % binary)
673 clang_format_stdin.close()
678 "--path=" + filename,
681 hash_object = subprocess.Popen(
682 hash_object_cmd, stdin=clang_format.stdout, stdout=subprocess.PIPE
684 clang_format.stdout.close()
685 stdout = hash_object.communicate()[0]
686 if hash_object.returncode != 0:
687 die("`%s` failed" % " ".join(hash_object_cmd))
688 if clang_format.wait() != 0:
689 die("`%s` failed" % " ".join(clang_format_cmd))
690 if git_show and git_show.wait() != 0:
691 die("`%s` failed" % " ".join(git_show_cmd))
692 return convert_string(stdout).rstrip("\r\n")
695 @contextlib.contextmanager
696 def temporary_index_file(tree=None):
697 """Context manager for setting GIT_INDEX_FILE to a temporary file and
698 deleting the file afterward."""
699 index_path = create_temporary_index(tree)
700 old_index_path = os.environ.get("GIT_INDEX_FILE")
701 os.environ["GIT_INDEX_FILE"] = index_path
705 if old_index_path is None:
706 del os.environ["GIT_INDEX_FILE"]
708 os.environ["GIT_INDEX_FILE"] = old_index_path
709 os.remove(index_path)
712 def create_temporary_index(tree=None):
713 """Create a temporary index file and return the created file's path.
715 If `tree` is not None, use that as the tree to read in. Otherwise, an
716 empty index is created."""
717 gitdir = run("git", "rev-parse", "--git-dir")
718 path = os.path.join(gitdir, temp_index_basename)
721 run("git", "read-tree", "--index-output=" + path, tree)
725 def print_diff(old_tree, new_tree):
726 """Print the diff between the two trees to stdout."""
727 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the
728 # output is expected to be viewed by the user, and only the former does nice
729 # things like color and pagination.
731 # We also only print modified files since `new_tree` only contains the files
732 # that were modified, so unmodified files would show as deleted without the
734 return subprocess.run(
735 ["git", "diff", "--diff-filter=M", "--exit-code", old_tree, new_tree]
739 def print_diffstat(old_tree, new_tree):
740 """Print the diffstat between the two trees to stdout."""
741 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the
742 # output is expected to be viewed by the user, and only the former does nice
743 # things like color and pagination.
745 # We also only print modified files since `new_tree` only contains the files
746 # that were modified, so unmodified files would show as deleted without the
748 return subprocess.run(
761 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
762 """Apply the changes in `new_tree` to the working directory.
764 Bails if there are local changes in those files and not `force`. If
765 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
781 unstaged_files = run(
782 "git", "diff-files", "--name-status", *changed_files
786 "The following files would be modified but have unstaged "
790 print(unstaged_files, file=sys.stderr)
791 print("Please commit, stage, or stash them first.", file=sys.stderr)
794 # In patch mode, we could just as well create an index from the new tree
795 # and checkout from that, but then the user will be presented with a
796 # message saying "Discard ... from worktree". Instead, we use the old
797 # tree as the index and checkout from new_tree, which gives the slightly
798 # better message, "Apply ... to index and worktree". This is not quite
799 # right, since it won't be applied to the user's index, but oh well.
800 with temporary_index_file(old_tree):
801 subprocess.run(["git", "checkout", "--patch", new_tree], check=True)
802 index_tree = old_tree
804 with temporary_index_file(new_tree):
805 run("git", "checkout-index", "-f", "--", *changed_files)
809 def run(*args, **kwargs):
810 stdin = kwargs.pop("stdin", "")
811 verbose = kwargs.pop("verbose", True)
812 strip = kwargs.pop("strip", True)
814 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
815 p = subprocess.Popen(
817 stdout=subprocess.PIPE,
818 stderr=subprocess.PIPE,
819 stdin=subprocess.PIPE,
821 stdout, stderr = p.communicate(input=stdin)
823 stdout = convert_string(stdout)
824 stderr = convert_string(stderr)
826 if p.returncode == 0:
830 "`%s` printed to stderr:" % " ".join(args), file=sys.stderr
832 print(stderr.rstrip(), file=sys.stderr)
834 stdout = stdout.rstrip("\r\n")
838 "`%s` returned %s" % (" ".join(args), p.returncode), file=sys.stderr
841 print(stderr.rstrip(), file=sys.stderr)
846 print("error:", message, file=sys.stderr)
850 def to_bytes(str_input):
851 # Encode to UTF-8 to get binary data.
852 if isinstance(str_input, bytes):
854 return str_input.encode("utf-8")
857 def to_string(bytes_input):
858 if isinstance(bytes_input, str):
860 return bytes_input.encode("utf-8")
863 def convert_string(bytes_input):
865 return to_string(bytes_input.decode("utf-8"))
866 except AttributeError: # 'str' object has no attribute 'decode'.
867 return str(bytes_input)
869 return str(bytes_input)
872 if __name__ == "__main__":