3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #===------------------------------------------------------------------------===#
12 clang-format git integration
13 ============================
15 This file provides a clang-format integration for git. Put it somewhere in your
16 path and ensure that it is executable. Then, "git clang-format" will invoke
17 clang-format on the changes in current files or a specific commit.
19 For further details, run:
22 Requires Python 2.7 or Python 3
25 from __future__ import absolute_import, division, print_function
35 usage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] '
39 If zero or one commits are given, run clang-format on all lines that differ
40 between the working directory and <commit>, which defaults to HEAD. Changes are
41 only applied to the working directory, or in the stage/index.
44 To format staged changes, i.e everything that's been `git add`ed:
47 To also format everything touched in the most recent commit:
48 git clang-format HEAD~1
50 If you're on a branch off main, to format everything touched on your branch:
53 If two commits are given (requires --diff), run clang-format on all lines in the
54 second <commit> that differ from the first <commit>.
56 The following git-config settings set the default of the corresponding option:
59 clangFormat.extensions
63 # Name of the temporary index file in which save the output of clang-format.
64 # This file is created within the .git directory.
65 temp_index_basename = 'clang-format-index'
68 Range = collections.namedtuple('Range', 'start, count')
72 config = load_git_config()
74 # In order to keep '--' yet allow options after positionals, we need to
75 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
76 # nargs=argparse.REMAINDER disallows options after positionals.)
79 idx = argv.index('--')
83 dash_dash = argv[idx:]
86 default_extensions = ','.join([
87 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
91 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc', # C++
92 'ccm', 'cppm', 'cxxm', 'c++m', # C++ Modules
94 # Other languages that clang-format supports
95 'proto', 'protodevel', # Protocol Buffers
101 'sv', 'svh', 'v', 'vh', # Verilog
104 p = argparse.ArgumentParser(
105 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
107 p.add_argument('--binary',
108 default=config.get('clangformat.binary', 'clang-format'),
109 help='path to clang-format'),
110 p.add_argument('--commit',
111 default=config.get('clangformat.commit', 'HEAD'),
112 help='default commit to use if none is specified'),
113 p.add_argument('--diff', action='store_true',
114 help='print a diff instead of applying the changes')
115 p.add_argument('--diffstat', action='store_true',
116 help='print a diffstat instead of applying the changes')
117 p.add_argument('--extensions',
118 default=config.get('clangformat.extensions',
120 help=('comma-separated list of file extensions to format, '
121 'excluding the period and case-insensitive')),
122 p.add_argument('-f', '--force', action='store_true',
123 help='allow changes to unstaged files')
124 p.add_argument('-p', '--patch', action='store_true',
125 help='select hunks interactively')
126 p.add_argument('-q', '--quiet', action='count', default=0,
127 help='print less information')
128 p.add_argument('--staged', '--cached', action='store_true',
129 help='format lines in the stage instead of the working dir')
130 p.add_argument('--style',
131 default=config.get('clangformat.style', None),
132 help='passed to clang-format'),
133 p.add_argument('-v', '--verbose', action='count', default=0,
134 help='print extra information')
135 p.add_argument('--diff_from_common_commit', action='store_true',
136 help=('diff from the last common commit for commits in '
137 'separate branches rather than the exact point of the '
139 # We gather all the remaining positional arguments into 'args' since we need
140 # to use some heuristics to determine whether or not <commit> was present.
141 # However, to print pretty messages, we make use of metavar and help.
142 p.add_argument('args', nargs='*', metavar='<commit>',
143 help='revision from which to compute the diff')
144 p.add_argument('ignored', nargs='*', metavar='<file>...',
145 help='if specified, only consider differences in these files')
146 opts = p.parse_args(argv)
148 opts.verbose -= opts.quiet
151 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
153 die('at most two commits allowed; %d given' % len(commits))
154 if len(commits) == 2:
156 die('--staged is not allowed when two commits are given')
158 die('--diff is required when two commits are given')
159 elif opts.diff_from_common_commit:
160 die('--diff_from_common_commit is only allowed when two commits are given')
162 if os.path.dirname(opts.binary):
163 opts.binary = os.path.abspath(opts.binary)
165 changed_lines = compute_diff_and_extract_lines(commits,
168 opts.diff_from_common_commit)
169 if opts.verbose >= 1:
170 ignored_files = set(changed_lines)
171 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
172 # The computed diff outputs absolute paths, so we must cd before accessing
175 filter_symlinks(changed_lines)
176 filter_ignored_files(changed_lines, binary=opts.binary)
177 if opts.verbose >= 1:
178 ignored_files.difference_update(changed_lines)
180 print('Ignoring the following files (wrong extension, symlink, or '
181 'ignored by clang-format):')
182 for filename in ignored_files:
183 print(' %s' % filename)
185 print('Running clang-format on the following files:')
186 for filename in changed_lines:
187 print(' %s' % filename)
189 if not changed_lines:
190 if opts.verbose >= 0:
191 print('no modified files to format')
195 old_tree = commits[1]
198 old_tree = create_tree_from_index(changed_lines)
201 old_tree = create_tree_from_workdir(changed_lines)
203 new_tree = run_clang_format_and_save_to_tree(changed_lines,
207 if opts.verbose >= 1:
208 print('old tree: %s' % old_tree)
209 print('new tree: %s' % new_tree)
211 if old_tree == new_tree:
212 if opts.verbose >= 0:
213 print('clang-format did not modify any files')
217 return print_diff(old_tree, new_tree)
219 return print_diffstat(old_tree, new_tree)
221 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
222 patch_mode=opts.patch)
223 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
224 print('changed files:')
225 for filename in changed_files:
226 print(' %s' % filename)
231 def load_git_config(non_string_options=None):
232 """Return the git configuration as a dictionary.
234 All options are assumed to be strings unless in `non_string_options`, in which
235 is a dictionary mapping option name (in lower case) to either "--bool" or
237 if non_string_options is None:
238 non_string_options = {}
240 for entry in run('git', 'config', '--list', '--null').split('\0'):
243 name, value = entry.split('\n', 1)
245 # A setting with no '=' ('\n' with --null) is implicitly 'true'
248 if name in non_string_options:
249 value = run('git', 'config', non_string_options[name], name)
254 def interpret_args(args, dash_dash, default_commit):
255 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
257 It is assumed that "--" and everything that follows has been removed from
258 args and placed in `dash_dash`.
260 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
261 left (if present) are taken as commits. Otherwise, the arguments are checked
262 from left to right if they are commits or files. If commits are not given,
263 a list with `default_commit` is used."""
266 commits = [default_commit]
269 for commit in commits:
270 object_type = get_object_type(commit)
271 if object_type not in ('commit', 'tag'):
272 if object_type is None:
273 die("'%s' is not a commit" % commit)
275 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
276 files = dash_dash[1:]
280 if not disambiguate_revision(args[0]):
282 commits.append(args.pop(0))
284 commits = [default_commit]
287 commits = [default_commit]
289 return commits, files
292 def disambiguate_revision(value):
293 """Returns True if `value` is a revision, False if it is a file, or dies."""
294 # If `value` is ambiguous (neither a commit nor a file), the following
295 # command will die with an appropriate error message.
296 run('git', 'rev-parse', value, verbose=False)
297 object_type = get_object_type(value)
298 if object_type is None:
300 if object_type in ('commit', 'tag'):
302 die('`%s` is a %s, but a commit or filename was expected' %
303 (value, object_type))
306 def get_object_type(value):
307 """Returns a string description of an object's type, or None if it is not
308 a valid git object."""
309 cmd = ['git', 'cat-file', '-t', value]
310 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
311 stdout, stderr = p.communicate()
312 if p.returncode != 0:
314 return convert_string(stdout.strip())
317 def compute_diff_and_extract_lines(commits, files, staged, diff_common_commit):
318 """Calls compute_diff() followed by extract_lines()."""
319 diff_process = compute_diff(commits, files, staged, diff_common_commit)
320 changed_lines = extract_lines(diff_process.stdout)
321 diff_process.stdout.close()
323 if diff_process.returncode != 0:
324 # Assume error was already printed to stderr.
329 def compute_diff(commits, files, staged, diff_common_commit):
330 """Return a subprocess object producing the diff from `commits`.
332 The return value's `stdin` file object will produce a patch with the
333 differences between the working directory (or stage if --staged is used) and
334 the first commit if a single one was specified, or the difference between
335 both specified commits, filtered on `files` (if non-empty).
336 Zero context lines are used in the patch."""
337 git_tool = 'diff-index'
339 if len(commits) == 2:
340 git_tool = 'diff-tree'
341 if diff_common_commit:
342 commits = [f'{commits[0]}...{commits[1]}']
344 extra_args += ['--cached']
346 cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--']
348 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
353 def extract_lines(patch_file):
354 """Extract the changed lines in `patch_file`.
356 The return value is a dictionary mapping filename to a list of (start_line,
359 The input must have been produced with ``-U0``, meaning unidiff format with
360 zero lines of context. The return value is a dict mapping filename to a
361 list of line `Range`s."""
363 for line in patch_file:
364 line = convert_string(line)
365 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
367 filename = match.group(1).rstrip('\r\n\t')
368 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
370 start_line = int(match.group(1))
373 line_count = int(match.group(3))
378 matches.setdefault(filename, []).append(Range(start_line, line_count))
382 def filter_by_extension(dictionary, allowed_extensions):
383 """Delete every key in `dictionary` that doesn't have an allowed extension.
385 `allowed_extensions` must be a collection of lowercase file extensions,
386 excluding the period."""
387 allowed_extensions = frozenset(allowed_extensions)
388 for filename in list(dictionary.keys()):
389 base_ext = filename.rsplit('.', 1)
390 if len(base_ext) == 1 and '' in allowed_extensions:
392 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
393 del dictionary[filename]
396 def filter_symlinks(dictionary):
397 """Delete every key in `dictionary` that is a symlink."""
398 for filename in list(dictionary.keys()):
399 if os.path.islink(filename):
400 del dictionary[filename]
403 def filter_ignored_files(dictionary, binary):
404 """Delete every key in `dictionary` that is ignored by clang-format."""
405 ignored_files = run(binary, '-list-ignored', *dictionary.keys())
406 if not ignored_files:
408 ignored_files = ignored_files.split('\n')
409 for filename in ignored_files:
410 del dictionary[filename]
413 def cd_to_toplevel():
414 """Change to the top level of the git repository."""
415 toplevel = run('git', 'rev-parse', '--show-toplevel')
419 def create_tree_from_workdir(filenames):
420 """Create a new git tree with the given files from the working directory.
422 Returns the object ID (SHA-1) of the created tree."""
423 return create_tree(filenames, '--stdin')
426 def create_tree_from_index(filenames):
427 # Copy the environment, because the files have to be read from the original
429 env = os.environ.copy()
430 def index_contents_generator():
431 for filename in filenames:
432 git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename]
433 git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env,
434 stdin=subprocess.PIPE,
435 stdout=subprocess.PIPE)
436 stdout = git_ls_files.communicate()[0]
437 yield convert_string(stdout.split(b'\0')[0])
438 return create_tree(index_contents_generator(), '--index-info')
441 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
442 binary='clang-format', style=None):
443 """Run clang-format on each file and save the result to a git tree.
445 Returns the object ID (SHA-1) of the created tree."""
446 # Copy the environment when formatting the files in the index, because the
447 # files have to be read from the original index.
448 env = os.environ.copy() if revision == '' else None
449 def iteritems(container):
451 return container.iteritems() # Python 2
452 except AttributeError:
453 return container.items() # Python 3
454 def index_info_generator():
455 for filename, line_ranges in iteritems(changed_lines):
456 if revision is not None:
457 if len(revision) > 0:
458 git_metadata_cmd = ['git', 'ls-tree',
459 '%s:%s' % (revision, os.path.dirname(filename)),
460 os.path.basename(filename)]
462 git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename]
463 git_metadata = subprocess.Popen(git_metadata_cmd, env=env,
464 stdin=subprocess.PIPE,
465 stdout=subprocess.PIPE)
466 stdout = git_metadata.communicate()[0]
467 mode = oct(int(stdout.split()[0], 8))
469 mode = oct(os.stat(filename).st_mode)
470 # Adjust python3 octal format so that it matches what git expects
471 if mode.startswith('0o'):
472 mode = '0' + mode[2:]
473 blob_id = clang_format_to_blob(filename, line_ranges,
478 yield '%s %s\t%s' % (mode, blob_id, filename)
479 return create_tree(index_info_generator(), '--index-info')
482 def create_tree(input_lines, mode):
483 """Create a tree object from the given input.
485 If mode is '--stdin', it must be a list of filenames. If mode is
486 '--index-info' is must be a list of values suitable for "git update-index
487 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
489 assert mode in ('--stdin', '--index-info')
490 cmd = ['git', 'update-index', '--add', '-z', mode]
491 with temporary_index_file():
492 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
493 for line in input_lines:
494 p.stdin.write(to_bytes('%s\0' % line))
497 die('`%s` failed' % ' '.join(cmd))
498 tree_id = run('git', 'write-tree')
502 def clang_format_to_blob(filename, line_ranges, revision=None,
503 binary='clang-format', style=None, env=None):
504 """Run clang-format on the given file and save the result to a git blob.
506 Runs on the file in `revision` if not None, or on the file in the working
507 directory if `revision` is None. Revision can be set to an empty string to run
508 clang-format on the file in the index.
510 Returns the object ID (SHA-1) of the created blob."""
511 clang_format_cmd = [binary]
513 clang_format_cmd.extend(['--style='+style])
514 clang_format_cmd.extend([
515 '--lines=%s:%s' % (start_line, start_line+line_count-1)
516 for start_line, line_count in line_ranges])
517 if revision is not None:
518 clang_format_cmd.extend(['--assume-filename='+filename])
519 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
520 git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE,
521 stdout=subprocess.PIPE)
522 git_show.stdin.close()
523 clang_format_stdin = git_show.stdout
525 clang_format_cmd.extend([filename])
527 clang_format_stdin = subprocess.PIPE
529 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
530 stdout=subprocess.PIPE)
531 if clang_format_stdin == subprocess.PIPE:
532 clang_format_stdin = clang_format.stdin
534 if e.errno == errno.ENOENT:
535 die('cannot find executable "%s"' % binary)
538 clang_format_stdin.close()
539 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
540 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
541 stdout=subprocess.PIPE)
542 clang_format.stdout.close()
543 stdout = hash_object.communicate()[0]
544 if hash_object.returncode != 0:
545 die('`%s` failed' % ' '.join(hash_object_cmd))
546 if clang_format.wait() != 0:
547 die('`%s` failed' % ' '.join(clang_format_cmd))
548 if git_show and git_show.wait() != 0:
549 die('`%s` failed' % ' '.join(git_show_cmd))
550 return convert_string(stdout).rstrip('\r\n')
553 @contextlib.contextmanager
554 def temporary_index_file(tree=None):
555 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
556 the file afterward."""
557 index_path = create_temporary_index(tree)
558 old_index_path = os.environ.get('GIT_INDEX_FILE')
559 os.environ['GIT_INDEX_FILE'] = index_path
563 if old_index_path is None:
564 del os.environ['GIT_INDEX_FILE']
566 os.environ['GIT_INDEX_FILE'] = old_index_path
567 os.remove(index_path)
570 def create_temporary_index(tree=None):
571 """Create a temporary index file and return the created file's path.
573 If `tree` is not None, use that as the tree to read in. Otherwise, an
574 empty index is created."""
575 gitdir = run('git', 'rev-parse', '--git-dir')
576 path = os.path.join(gitdir, temp_index_basename)
579 run('git', 'read-tree', '--index-output='+path, tree)
583 def print_diff(old_tree, new_tree):
584 """Print the diff between the two trees to stdout."""
585 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
586 # is expected to be viewed by the user, and only the former does nice things
587 # like color and pagination.
589 # We also only print modified files since `new_tree` only contains the files
590 # that were modified, so unmodified files would show as deleted without the
592 return subprocess.run(['git', 'diff', '--diff-filter=M',
593 '--exit-code', old_tree, new_tree]).returncode
595 def print_diffstat(old_tree, new_tree):
596 """Print the diffstat between the two trees to stdout."""
597 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
598 # is expected to be viewed by the user, and only the former does nice things
599 # like color and pagination.
601 # We also only print modified files since `new_tree` only contains the files
602 # that were modified, so unmodified files would show as deleted without the
604 return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code',
605 '--stat', old_tree, new_tree]).returncode
607 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
608 """Apply the changes in `new_tree` to the working directory.
610 Bails if there are local changes in those files and not `force`. If
611 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
612 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
613 '--name-only', old_tree,
614 new_tree).rstrip('\0').split('\0')
616 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
618 print('The following files would be modified but '
619 'have unstaged changes:', file=sys.stderr)
620 print(unstaged_files, file=sys.stderr)
621 print('Please commit, stage, or stash them first.', file=sys.stderr)
624 # In patch mode, we could just as well create an index from the new tree
625 # and checkout from that, but then the user will be presented with a
626 # message saying "Discard ... from worktree". Instead, we use the old
627 # tree as the index and checkout from new_tree, which gives the slightly
628 # better message, "Apply ... to index and worktree". This is not quite
629 # right, since it won't be applied to the user's index, but oh well.
630 with temporary_index_file(old_tree):
631 subprocess.run(['git', 'checkout', '--patch', new_tree], check=True)
632 index_tree = old_tree
634 with temporary_index_file(new_tree):
635 run('git', 'checkout-index', '-f', '--', *changed_files)
639 def run(*args, **kwargs):
640 stdin = kwargs.pop('stdin', '')
641 verbose = kwargs.pop('verbose', True)
642 strip = kwargs.pop('strip', True)
644 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
645 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
646 stdin=subprocess.PIPE)
647 stdout, stderr = p.communicate(input=stdin)
649 stdout = convert_string(stdout)
650 stderr = convert_string(stderr)
652 if p.returncode == 0:
655 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
656 print(stderr.rstrip(), file=sys.stderr)
658 stdout = stdout.rstrip('\r\n')
661 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
663 print(stderr.rstrip(), file=sys.stderr)
668 print('error:', message, file=sys.stderr)
672 def to_bytes(str_input):
673 # Encode to UTF-8 to get binary data.
674 if isinstance(str_input, bytes):
676 return str_input.encode('utf-8')
679 def to_string(bytes_input):
680 if isinstance(bytes_input, str):
682 return bytes_input.encode('utf-8')
685 def convert_string(bytes_input):
687 return to_string(bytes_input.decode('utf-8'))
688 except AttributeError: # 'str' object has no attribute 'decode'.
689 return str(bytes_input)
691 return str(bytes_input)
693 if __name__ == '__main__':