3 #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #===------------------------------------------------------------------------===#
12 clang-format git integration
13 ============================
15 This file provides a clang-format integration for git. Put it somewhere in your
16 path and ensure that it is executable. Then, "git clang-format" will invoke
17 clang-format on the changes in current files or a specific commit.
19 For further details, run:
22 Requires Python 2.7 or Python 3
25 from __future__ import absolute_import, division, print_function
35 usage = ('git clang-format [OPTIONS] [<commit>] [<commit>|--staged] '
39 If zero or one commits are given, run clang-format on all lines that differ
40 between the working directory and <commit>, which defaults to HEAD. Changes are
41 only applied to the working directory, or in the stage/index.
44 To format staged changes, i.e everything that's been `git add`ed:
47 To also format everything touched in the most recent commit:
48 git clang-format HEAD~1
50 If you're on a branch off main, to format everything touched on your branch:
53 If two commits are given (requires --diff), run clang-format on all lines in the
54 second <commit> that differ from the first <commit>.
56 The following git-config settings set the default of the corresponding option:
59 clangFormat.extensions
63 # Name of the temporary index file in which save the output of clang-format.
64 # This file is created within the .git directory.
65 temp_index_basename = 'clang-format-index'
68 Range = collections.namedtuple('Range', 'start, count')
72 config = load_git_config()
74 # In order to keep '--' yet allow options after positionals, we need to
75 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
76 # nargs=argparse.REMAINDER disallows options after positionals.)
79 idx = argv.index('--')
83 dash_dash = argv[idx:]
86 default_extensions = ','.join([
87 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
91 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', 'inc', # C++
92 'ccm', 'cppm', 'cxxm', 'c++m', # C++ Modules
94 # Other languages that clang-format supports
95 'proto', 'protodevel', # Protocol Buffers
101 'sv', 'svh', 'v', 'vh', # Verilog
104 p = argparse.ArgumentParser(
105 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
107 p.add_argument('--binary',
108 default=config.get('clangformat.binary', 'clang-format'),
109 help='path to clang-format'),
110 p.add_argument('--commit',
111 default=config.get('clangformat.commit', 'HEAD'),
112 help='default commit to use if none is specified'),
113 p.add_argument('--diff', action='store_true',
114 help='print a diff instead of applying the changes')
115 p.add_argument('--diffstat', action='store_true',
116 help='print a diffstat instead of applying the changes')
117 p.add_argument('--extensions',
118 default=config.get('clangformat.extensions',
120 help=('comma-separated list of file extensions to format, '
121 'excluding the period and case-insensitive')),
122 p.add_argument('-f', '--force', action='store_true',
123 help='allow changes to unstaged files')
124 p.add_argument('-p', '--patch', action='store_true',
125 help='select hunks interactively')
126 p.add_argument('-q', '--quiet', action='count', default=0,
127 help='print less information')
128 p.add_argument('--staged', '--cached', action='store_true',
129 help='format lines in the stage instead of the working dir')
130 p.add_argument('--style',
131 default=config.get('clangformat.style', None),
132 help='passed to clang-format'),
133 p.add_argument('-v', '--verbose', action='count', default=0,
134 help='print extra information')
135 # We gather all the remaining positional arguments into 'args' since we need
136 # to use some heuristics to determine whether or not <commit> was present.
137 # However, to print pretty messages, we make use of metavar and help.
138 p.add_argument('args', nargs='*', metavar='<commit>',
139 help='revision from which to compute the diff')
140 p.add_argument('ignored', nargs='*', metavar='<file>...',
141 help='if specified, only consider differences in these files')
142 opts = p.parse_args(argv)
144 opts.verbose -= opts.quiet
147 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
150 die('--staged is not allowed when two commits are given')
152 die('--diff is required when two commits are given')
155 die('at most two commits allowed; %d given' % len(commits))
156 changed_lines = compute_diff_and_extract_lines(commits, files, opts.staged)
157 if opts.verbose >= 1:
158 ignored_files = set(changed_lines)
159 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
160 # The computed diff outputs absolute paths, so we must cd before accessing
163 filter_symlinks(changed_lines)
164 if opts.verbose >= 1:
165 ignored_files.difference_update(changed_lines)
168 'Ignoring changes in the following files (wrong extension or symlink):')
169 for filename in ignored_files:
170 print(' %s' % filename)
172 print('Running clang-format on the following files:')
173 for filename in changed_lines:
174 print(' %s' % filename)
176 if not changed_lines:
177 if opts.verbose >= 0:
178 print('no modified files to format')
182 old_tree = commits[1]
185 old_tree = create_tree_from_index(changed_lines)
188 old_tree = create_tree_from_workdir(changed_lines)
190 new_tree = run_clang_format_and_save_to_tree(changed_lines,
194 if opts.verbose >= 1:
195 print('old tree: %s' % old_tree)
196 print('new tree: %s' % new_tree)
198 if old_tree == new_tree:
199 if opts.verbose >= 0:
200 print('clang-format did not modify any files')
204 return print_diff(old_tree, new_tree)
206 return print_diffstat(old_tree, new_tree)
208 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
209 patch_mode=opts.patch)
210 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
211 print('changed files:')
212 for filename in changed_files:
213 print(' %s' % filename)
218 def load_git_config(non_string_options=None):
219 """Return the git configuration as a dictionary.
221 All options are assumed to be strings unless in `non_string_options`, in which
222 is a dictionary mapping option name (in lower case) to either "--bool" or
224 if non_string_options is None:
225 non_string_options = {}
227 for entry in run('git', 'config', '--list', '--null').split('\0'):
230 name, value = entry.split('\n', 1)
232 # A setting with no '=' ('\n' with --null) is implicitly 'true'
235 if name in non_string_options:
236 value = run('git', 'config', non_string_options[name], name)
241 def interpret_args(args, dash_dash, default_commit):
242 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
244 It is assumed that "--" and everything that follows has been removed from
245 args and placed in `dash_dash`.
247 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
248 left (if present) are taken as commits. Otherwise, the arguments are checked
249 from left to right if they are commits or files. If commits are not given,
250 a list with `default_commit` is used."""
253 commits = [default_commit]
256 for commit in commits:
257 object_type = get_object_type(commit)
258 if object_type not in ('commit', 'tag'):
259 if object_type is None:
260 die("'%s' is not a commit" % commit)
262 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
263 files = dash_dash[1:]
267 if not disambiguate_revision(args[0]):
269 commits.append(args.pop(0))
271 commits = [default_commit]
274 commits = [default_commit]
276 return commits, files
279 def disambiguate_revision(value):
280 """Returns True if `value` is a revision, False if it is a file, or dies."""
281 # If `value` is ambiguous (neither a commit nor a file), the following
282 # command will die with an appropriate error message.
283 run('git', 'rev-parse', value, verbose=False)
284 object_type = get_object_type(value)
285 if object_type is None:
287 if object_type in ('commit', 'tag'):
289 die('`%s` is a %s, but a commit or filename was expected' %
290 (value, object_type))
293 def get_object_type(value):
294 """Returns a string description of an object's type, or None if it is not
295 a valid git object."""
296 cmd = ['git', 'cat-file', '-t', value]
297 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
298 stdout, stderr = p.communicate()
299 if p.returncode != 0:
301 return convert_string(stdout.strip())
304 def compute_diff_and_extract_lines(commits, files, staged):
305 """Calls compute_diff() followed by extract_lines()."""
306 diff_process = compute_diff(commits, files, staged)
307 changed_lines = extract_lines(diff_process.stdout)
308 diff_process.stdout.close()
310 if diff_process.returncode != 0:
311 # Assume error was already printed to stderr.
316 def compute_diff(commits, files, staged):
317 """Return a subprocess object producing the diff from `commits`.
319 The return value's `stdin` file object will produce a patch with the
320 differences between the working directory (or stage if --staged is used) and
321 the first commit if a single one was specified, or the difference between
322 both specified commits, filtered on `files` (if non-empty).
323 Zero context lines are used in the patch."""
324 git_tool = 'diff-index'
327 git_tool = 'diff-tree'
329 extra_args += ['--cached']
330 cmd = ['git', git_tool, '-p', '-U0'] + extra_args + commits + ['--']
332 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
337 def extract_lines(patch_file):
338 """Extract the changed lines in `patch_file`.
340 The return value is a dictionary mapping filename to a list of (start_line,
343 The input must have been produced with ``-U0``, meaning unidiff format with
344 zero lines of context. The return value is a dict mapping filename to a
345 list of line `Range`s."""
347 for line in patch_file:
348 line = convert_string(line)
349 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
351 filename = match.group(1).rstrip('\r\n\t')
352 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
354 start_line = int(match.group(1))
357 line_count = int(match.group(3))
362 matches.setdefault(filename, []).append(Range(start_line, line_count))
366 def filter_by_extension(dictionary, allowed_extensions):
367 """Delete every key in `dictionary` that doesn't have an allowed extension.
369 `allowed_extensions` must be a collection of lowercase file extensions,
370 excluding the period."""
371 allowed_extensions = frozenset(allowed_extensions)
372 for filename in list(dictionary.keys()):
373 base_ext = filename.rsplit('.', 1)
374 if len(base_ext) == 1 and '' in allowed_extensions:
376 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
377 del dictionary[filename]
380 def filter_symlinks(dictionary):
381 """Delete every key in `dictionary` that is a symlink."""
382 for filename in list(dictionary.keys()):
383 if os.path.islink(filename):
384 del dictionary[filename]
387 def cd_to_toplevel():
388 """Change to the top level of the git repository."""
389 toplevel = run('git', 'rev-parse', '--show-toplevel')
393 def create_tree_from_workdir(filenames):
394 """Create a new git tree with the given files from the working directory.
396 Returns the object ID (SHA-1) of the created tree."""
397 return create_tree(filenames, '--stdin')
400 def create_tree_from_index(filenames):
401 # Copy the environment, because the files have to be read from the original
403 env = os.environ.copy()
404 def index_contents_generator():
405 for filename in filenames:
406 git_ls_files_cmd = ['git', 'ls-files', '--stage', '-z', '--', filename]
407 git_ls_files = subprocess.Popen(git_ls_files_cmd, env=env,
408 stdin=subprocess.PIPE,
409 stdout=subprocess.PIPE)
410 stdout = git_ls_files.communicate()[0]
411 yield convert_string(stdout.split(b'\0')[0])
412 return create_tree(index_contents_generator(), '--index-info')
415 def run_clang_format_and_save_to_tree(changed_lines, revision=None,
416 binary='clang-format', style=None):
417 """Run clang-format on each file and save the result to a git tree.
419 Returns the object ID (SHA-1) of the created tree."""
420 # Copy the environment when formatting the files in the index, because the
421 # files have to be read from the original index.
422 env = os.environ.copy() if revision == '' else None
423 def iteritems(container):
425 return container.iteritems() # Python 2
426 except AttributeError:
427 return container.items() # Python 3
428 def index_info_generator():
429 for filename, line_ranges in iteritems(changed_lines):
430 if revision is not None:
431 if len(revision) > 0:
432 git_metadata_cmd = ['git', 'ls-tree',
433 '%s:%s' % (revision, os.path.dirname(filename)),
434 os.path.basename(filename)]
436 git_metadata_cmd = ['git', 'ls-files', '--stage', '--', filename]
437 git_metadata = subprocess.Popen(git_metadata_cmd, env=env,
438 stdin=subprocess.PIPE,
439 stdout=subprocess.PIPE)
440 stdout = git_metadata.communicate()[0]
441 mode = oct(int(stdout.split()[0], 8))
443 mode = oct(os.stat(filename).st_mode)
444 # Adjust python3 octal format so that it matches what git expects
445 if mode.startswith('0o'):
446 mode = '0' + mode[2:]
447 blob_id = clang_format_to_blob(filename, line_ranges,
452 yield '%s %s\t%s' % (mode, blob_id, filename)
453 return create_tree(index_info_generator(), '--index-info')
456 def create_tree(input_lines, mode):
457 """Create a tree object from the given input.
459 If mode is '--stdin', it must be a list of filenames. If mode is
460 '--index-info' is must be a list of values suitable for "git update-index
461 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
463 assert mode in ('--stdin', '--index-info')
464 cmd = ['git', 'update-index', '--add', '-z', mode]
465 with temporary_index_file():
466 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
467 for line in input_lines:
468 p.stdin.write(to_bytes('%s\0' % line))
471 die('`%s` failed' % ' '.join(cmd))
472 tree_id = run('git', 'write-tree')
476 def clang_format_to_blob(filename, line_ranges, revision=None,
477 binary='clang-format', style=None, env=None):
478 """Run clang-format on the given file and save the result to a git blob.
480 Runs on the file in `revision` if not None, or on the file in the working
481 directory if `revision` is None. Revision can be set to an empty string to run
482 clang-format on the file in the index.
484 Returns the object ID (SHA-1) of the created blob."""
485 clang_format_cmd = [binary]
487 clang_format_cmd.extend(['-style='+style])
488 clang_format_cmd.extend([
489 '-lines=%s:%s' % (start_line, start_line+line_count-1)
490 for start_line, line_count in line_ranges])
491 if revision is not None:
492 clang_format_cmd.extend(['-assume-filename='+filename])
493 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
494 git_show = subprocess.Popen(git_show_cmd, env=env, stdin=subprocess.PIPE,
495 stdout=subprocess.PIPE)
496 git_show.stdin.close()
497 clang_format_stdin = git_show.stdout
499 clang_format_cmd.extend([filename])
501 clang_format_stdin = subprocess.PIPE
503 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
504 stdout=subprocess.PIPE)
505 if clang_format_stdin == subprocess.PIPE:
506 clang_format_stdin = clang_format.stdin
508 if e.errno == errno.ENOENT:
509 die('cannot find executable "%s"' % binary)
512 clang_format_stdin.close()
513 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
514 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
515 stdout=subprocess.PIPE)
516 clang_format.stdout.close()
517 stdout = hash_object.communicate()[0]
518 if hash_object.returncode != 0:
519 die('`%s` failed' % ' '.join(hash_object_cmd))
520 if clang_format.wait() != 0:
521 die('`%s` failed' % ' '.join(clang_format_cmd))
522 if git_show and git_show.wait() != 0:
523 die('`%s` failed' % ' '.join(git_show_cmd))
524 return convert_string(stdout).rstrip('\r\n')
527 @contextlib.contextmanager
528 def temporary_index_file(tree=None):
529 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
530 the file afterward."""
531 index_path = create_temporary_index(tree)
532 old_index_path = os.environ.get('GIT_INDEX_FILE')
533 os.environ['GIT_INDEX_FILE'] = index_path
537 if old_index_path is None:
538 del os.environ['GIT_INDEX_FILE']
540 os.environ['GIT_INDEX_FILE'] = old_index_path
541 os.remove(index_path)
544 def create_temporary_index(tree=None):
545 """Create a temporary index file and return the created file's path.
547 If `tree` is not None, use that as the tree to read in. Otherwise, an
548 empty index is created."""
549 gitdir = run('git', 'rev-parse', '--git-dir')
550 path = os.path.join(gitdir, temp_index_basename)
553 run('git', 'read-tree', '--index-output='+path, tree)
557 def print_diff(old_tree, new_tree):
558 """Print the diff between the two trees to stdout."""
559 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
560 # is expected to be viewed by the user, and only the former does nice things
561 # like color and pagination.
563 # We also only print modified files since `new_tree` only contains the files
564 # that were modified, so unmodified files would show as deleted without the
566 return subprocess.run(['git', 'diff', '--diff-filter=M',
567 '--exit-code', old_tree, new_tree]).returncode
569 def print_diffstat(old_tree, new_tree):
570 """Print the diffstat between the two trees to stdout."""
571 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
572 # is expected to be viewed by the user, and only the former does nice things
573 # like color and pagination.
575 # We also only print modified files since `new_tree` only contains the files
576 # that were modified, so unmodified files would show as deleted without the
578 return subprocess.run(['git', 'diff', '--diff-filter=M', '--exit-code',
579 '--stat', old_tree, new_tree]).returncode
581 def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
582 """Apply the changes in `new_tree` to the working directory.
584 Bails if there are local changes in those files and not `force`. If
585 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
586 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
587 '--name-only', old_tree,
588 new_tree).rstrip('\0').split('\0')
590 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
592 print('The following files would be modified but '
593 'have unstaged changes:', file=sys.stderr)
594 print(unstaged_files, file=sys.stderr)
595 print('Please commit, stage, or stash them first.', file=sys.stderr)
598 # In patch mode, we could just as well create an index from the new tree
599 # and checkout from that, but then the user will be presented with a
600 # message saying "Discard ... from worktree". Instead, we use the old
601 # tree as the index and checkout from new_tree, which gives the slightly
602 # better message, "Apply ... to index and worktree". This is not quite
603 # right, since it won't be applied to the user's index, but oh well.
604 with temporary_index_file(old_tree):
605 subprocess.run(['git', 'checkout', '--patch', new_tree], check=True)
606 index_tree = old_tree
608 with temporary_index_file(new_tree):
609 run('git', 'checkout-index', '-f', '--', *changed_files)
613 def run(*args, **kwargs):
614 stdin = kwargs.pop('stdin', '')
615 verbose = kwargs.pop('verbose', True)
616 strip = kwargs.pop('strip', True)
618 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
619 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
620 stdin=subprocess.PIPE)
621 stdout, stderr = p.communicate(input=stdin)
623 stdout = convert_string(stdout)
624 stderr = convert_string(stderr)
626 if p.returncode == 0:
629 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
630 print(stderr.rstrip(), file=sys.stderr)
632 stdout = stdout.rstrip('\r\n')
635 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
637 print(stderr.rstrip(), file=sys.stderr)
642 print('error:', message, file=sys.stderr)
646 def to_bytes(str_input):
647 # Encode to UTF-8 to get binary data.
648 if isinstance(str_input, bytes):
650 return str_input.encode('utf-8')
653 def to_string(bytes_input):
654 if isinstance(bytes_input, str):
656 return bytes_input.encode('utf-8')
659 def convert_string(bytes_input):
661 return to_string(bytes_input.decode('utf-8'))
662 except AttributeError: # 'str' object has no attribute 'decode'.
663 return str(bytes_input)
665 return str(bytes_input)
667 if __name__ == '__main__':