2 # -*- coding: utf-8 -*-
3 #===----------------------------------------------------------------------===##
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #===----------------------------------------------------------------------===##
10 """Checks for reverts of commits across a given git commit.
12 To clarify the meaning of 'across' with an example, if we had the following
13 commit history (where `a -> b` notes that `b` is a direct child of `a`):
15 123abc -> 223abc -> 323abc -> 423abc -> 523abc
17 And where 423abc is a revert of 223abc, this revert is considered to be 'across'
18 323abc. More generally, a revert A of a parent commit B is considered to be
19 'across' a commit C if C is a parent of A and B is a parent of C.
21 Please note that revert detection in general is really difficult, since merge
22 conflicts/etc always introduce _some_ amount of fuzziness. This script just
23 uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
24 reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
27 This is designed to be used in one of two ways: an import in Python, or run
28 directly from a shell. If you want to import this, the `find_reverts`
29 function is the thing to look at. If you'd rather use this from a shell, have a
33 ./revert_checker.py c47f97169 origin/main origin/release/12.x
36 This checks for all reverts from the tip of origin/main to c47f97169, which are
37 across the latter. It then does the same for origin/release/12.x to c47f97169.
38 Duplicate reverts discovered when walking both roots (origin/main and
39 origin/release/12.x) are deduplicated in output.
48 from typing
import Generator
, List
, NamedTuple
, Iterable
50 assert sys
.version_info
>= (3, 6), 'Only Python 3.6+ is supported.'
52 # People are creative with their reverts, and heuristics are a bit difficult.
53 # Like 90% of of reverts have "This reverts commit ${full_sha}".
54 # Some lack that entirely, while others have many of them specified in ad-hoc
55 # ways, while others use short SHAs and whatever.
57 # The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
58 # starts involving human intervention, which is probably not worth it for now.
61 def _try_parse_reverts_from_commit_message(commit_message
: str) -> List
[str]:
62 if not commit_message
:
65 results
= re
.findall(r
'This reverts commit ([a-f0-9]{40})\b', commit_message
)
67 first_line
= commit_message
.splitlines()[0]
68 initial_revert
= re
.match(r
'Revert ([a-f0-9]{6,}) "', first_line
)
70 results
.append(initial_revert
.group(1))
74 def _stream_stdout(command
: List
[str]) -> Generator
[str, None, None]:
75 with subprocess
.Popen(
76 command
, stdout
=subprocess
.PIPE
, encoding
='utf-8', errors
='replace') as p
:
77 assert p
.stdout
is not None # for mypy's happiness.
81 def _resolve_sha(git_dir
: str, sha
: str) -> str:
85 return subprocess
.check_output(
86 ['git', '-C', git_dir
, 'rev-parse', sha
],
88 stderr
=subprocess
.DEVNULL
,
92 _LogEntry
= NamedTuple('_LogEntry', [
94 ('commit_message', str),
98 def _log_stream(git_dir
: str, root_sha
: str,
99 end_at_sha
: str) -> Iterable
[_LogEntry
]:
108 '--format=' + sep
+ '%n%H%n%B%n',
111 stdout_stream
= iter(_stream_stdout(log_command
))
113 # Find the next separator line. If there's nothing to log, it may not exist.
114 # It might not be the first line if git feels complainy.
115 found_commit_header
= False
116 for line
in stdout_stream
:
117 if line
.rstrip() == sep
:
118 found_commit_header
= True
121 while found_commit_header
:
122 sha
= next(stdout_stream
, None)
123 assert sha
is not None, 'git died?'
128 found_commit_header
= False
129 for line
in stdout_stream
:
131 if line
.rstrip() == sep
:
132 found_commit_header
= True
134 commit_message
.append(line
)
136 yield _LogEntry(sha
, '\n'.join(commit_message
).rstrip())
139 def _shas_between(git_dir
: str, base_ref
: str, head_ref
: str) -> Iterable
[str]:
146 f
'{base_ref}..{head_ref}',
148 return (x
.strip() for x
in _stream_stdout(rev_list
))
151 def _rev_parse(git_dir
: str, ref
: str) -> str:
152 return subprocess
.check_output(
153 ['git', '-C', git_dir
, 'rev-parse', ref
],
158 Revert
= NamedTuple('Revert', [
160 ('reverted_sha', str),
164 def _find_common_parent_commit(git_dir
: str, ref_a
: str, ref_b
: str) -> str:
165 """Finds the closest common parent commit between `ref_a` and `ref_b`."""
166 return subprocess
.check_output(
167 ['git', '-C', git_dir
, 'merge-base', ref_a
, ref_b
],
172 def find_reverts(git_dir
: str, across_ref
: str, root
: str) -> List
[Revert
]:
173 """Finds reverts across `across_ref` in `git_dir`, starting from `root`.
175 These reverts are returned in order of oldest reverts first.
177 across_sha
= _rev_parse(git_dir
, across_ref
)
178 root_sha
= _rev_parse(git_dir
, root
)
180 common_ancestor
= _find_common_parent_commit(git_dir
, across_sha
, root_sha
)
181 if common_ancestor
!= across_sha
:
182 raise ValueError(f
"{across_sha} isn't an ancestor of {root_sha} "
183 '(common ancestor: {common_ancestor})')
185 intermediate_commits
= set(_shas_between(git_dir
, across_sha
, root_sha
))
186 assert across_sha
not in intermediate_commits
188 logging
.debug('%d commits appear between %s and %s',
189 len(intermediate_commits
), across_sha
, root_sha
)
192 for sha
, commit_message
in _log_stream(git_dir
, root_sha
, across_sha
):
193 reverts
= _try_parse_reverts_from_commit_message(commit_message
)
197 resolved_reverts
= sorted(set(_resolve_sha(git_dir
, x
) for x
in reverts
))
198 for reverted_sha
in resolved_reverts
:
199 if reverted_sha
in intermediate_commits
:
200 logging
.debug('Commit %s reverts %s, which happened after %s', sha
,
201 reverted_sha
, across_sha
)
205 object_type
= subprocess
.check_output(
206 ['git', '-C', git_dir
, 'cat-file', '-t', reverted_sha
],
208 stderr
=subprocess
.DEVNULL
,
210 except subprocess
.CalledProcessError
:
212 'Failed to resolve reverted object %s (claimed to be reverted '
213 'by sha %s)', reverted_sha
, sha
)
216 if object_type
== 'commit':
217 all_reverts
.append(Revert(sha
, reverted_sha
))
220 logging
.error("%s claims to revert %s -- which isn't a commit -- %s", sha
,
221 object_type
, reverted_sha
)
223 # Since `all_reverts` contains reverts in log order (e.g., newer comes before
224 # older), we need to reverse this to keep with our guarantee of older =
225 # earlier in the result.
226 all_reverts
.reverse()
231 parser
= argparse
.ArgumentParser(
232 description
=__doc__
, formatter_class
=argparse
.RawDescriptionHelpFormatter
)
234 'base_ref', help='Git ref or sha to check for reverts around.')
236 '-C', '--git_dir', default
='.', help='Git directory to use.')
238 'root', nargs
='+', help='Root(s) to search for commits from.')
239 parser
.add_argument('--debug', action
='store_true')
240 opts
= parser
.parse_args()
243 format
='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
244 level
=logging
.DEBUG
if opts
.debug
else logging
.INFO
,
247 # `root`s can have related history, so we want to filter duplicate commits
248 # out. The overwhelmingly common case is also to have one root, and it's way
249 # easier to reason about output that comes in an order that's meaningful to
253 for root
in opts
.root
:
254 for revert
in find_reverts(opts
.git_dir
, opts
.base_ref
, root
):
255 if revert
not in seen_reverts
:
256 seen_reverts
.add(revert
)
257 all_reverts
.append(revert
)
259 for revert
in all_reverts
:
260 print(f
'{revert.sha} claims to revert {revert.reverted_sha}')
263 if __name__
== '__main__':