[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / utils / revert_checker.py
blobef0c06bd1b5a49d156fa1f5db9be3faa2254ea46
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 #===----------------------------------------------------------------------===##
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 #===----------------------------------------------------------------------===##
10 """Checks for reverts of commits across a given git commit.
12 To clarify the meaning of 'across' with an example, if we had the following
13 commit history (where `a -> b` notes that `b` is a direct child of `a`):
15 123abc -> 223abc -> 323abc -> 423abc -> 523abc
17 And where 423abc is a revert of 223abc, this revert is considered to be 'across'
18 323abc. More generally, a revert A of a parent commit B is considered to be
19 'across' a commit C if C is a parent of A and B is a parent of C.
21 Please note that revert detection in general is really difficult, since merge
22 conflicts/etc always introduce _some_ amount of fuzziness. This script just
23 uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
24 reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
25 though.
27 This is designed to be used in one of two ways: an import in Python, or run
28 directly from a shell. If you want to import this, the `find_reverts`
29 function is the thing to look at. If you'd rather use this from a shell, have a
30 usage example:
32 ```
33 ./revert_checker.py c47f97169 origin/main origin/release/12.x
34 ```
36 This checks for all reverts from the tip of origin/main to c47f97169, which are
37 across the latter. It then does the same for origin/release/12.x to c47f97169.
38 Duplicate reverts discovered when walking both roots (origin/main and
39 origin/release/12.x) are deduplicated in output.
40 """
42 import argparse
43 import collections
44 import logging
45 import re
46 import subprocess
47 import sys
48 from typing import Generator, List, NamedTuple, Iterable
50 assert sys.version_info >= (3, 6), 'Only Python 3.6+ is supported.'
52 # People are creative with their reverts, and heuristics are a bit difficult.
53 # Like 90% of of reverts have "This reverts commit ${full_sha}".
54 # Some lack that entirely, while others have many of them specified in ad-hoc
55 # ways, while others use short SHAs and whatever.
57 # The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
58 # starts involving human intervention, which is probably not worth it for now.
61 def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]:
62 if not commit_message:
63 return []
65 results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message)
67 first_line = commit_message.splitlines()[0]
68 initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
69 if initial_revert:
70 results.append(initial_revert.group(1))
71 return results
74 def _stream_stdout(command: List[str]) -> Generator[str, None, None]:
75 with subprocess.Popen(
76 command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p:
77 assert p.stdout is not None # for mypy's happiness.
78 yield from p.stdout
81 def _resolve_sha(git_dir: str, sha: str) -> str:
82 if len(sha) == 40:
83 return sha
85 return subprocess.check_output(
86 ['git', '-C', git_dir, 'rev-parse', sha],
87 encoding='utf-8',
88 stderr=subprocess.DEVNULL,
89 ).strip()
92 _LogEntry = NamedTuple('_LogEntry', [
93 ('sha', str),
94 ('commit_message', str),
98 def _log_stream(git_dir: str, root_sha: str,
99 end_at_sha: str) -> Iterable[_LogEntry]:
100 sep = 50 * '<>'
101 log_command = [
102 'git',
103 '-C',
104 git_dir,
105 'log',
106 '^' + end_at_sha,
107 root_sha,
108 '--format=' + sep + '%n%H%n%B%n',
111 stdout_stream = iter(_stream_stdout(log_command))
113 # Find the next separator line. If there's nothing to log, it may not exist.
114 # It might not be the first line if git feels complainy.
115 found_commit_header = False
116 for line in stdout_stream:
117 if line.rstrip() == sep:
118 found_commit_header = True
119 break
121 while found_commit_header:
122 sha = next(stdout_stream, None)
123 assert sha is not None, 'git died?'
124 sha = sha.rstrip()
126 commit_message = []
128 found_commit_header = False
129 for line in stdout_stream:
130 line = line.rstrip()
131 if line.rstrip() == sep:
132 found_commit_header = True
133 break
134 commit_message.append(line)
136 yield _LogEntry(sha, '\n'.join(commit_message).rstrip())
139 def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]:
140 rev_list = [
141 'git',
142 '-C',
143 git_dir,
144 'rev-list',
145 '--first-parent',
146 f'{base_ref}..{head_ref}',
148 return (x.strip() for x in _stream_stdout(rev_list))
151 def _rev_parse(git_dir: str, ref: str) -> str:
152 return subprocess.check_output(
153 ['git', '-C', git_dir, 'rev-parse', ref],
154 encoding='utf-8',
155 ).strip()
158 Revert = NamedTuple('Revert', [
159 ('sha', str),
160 ('reverted_sha', str),
164 def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
165 """Finds the closest common parent commit between `ref_a` and `ref_b`."""
166 return subprocess.check_output(
167 ['git', '-C', git_dir, 'merge-base', ref_a, ref_b],
168 encoding='utf-8',
169 ).strip()
172 def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
173 """Finds reverts across `across_ref` in `git_dir`, starting from `root`.
175 These reverts are returned in order of oldest reverts first.
177 across_sha = _rev_parse(git_dir, across_ref)
178 root_sha = _rev_parse(git_dir, root)
180 common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha)
181 if common_ancestor != across_sha:
182 raise ValueError(f"{across_sha} isn't an ancestor of {root_sha} "
183 '(common ancestor: {common_ancestor})')
185 intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
186 assert across_sha not in intermediate_commits
188 logging.debug('%d commits appear between %s and %s',
189 len(intermediate_commits), across_sha, root_sha)
191 all_reverts = []
192 for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
193 reverts = _try_parse_reverts_from_commit_message(commit_message)
194 if not reverts:
195 continue
197 resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
198 for reverted_sha in resolved_reverts:
199 if reverted_sha in intermediate_commits:
200 logging.debug('Commit %s reverts %s, which happened after %s', sha,
201 reverted_sha, across_sha)
202 continue
204 try:
205 object_type = subprocess.check_output(
206 ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha],
207 encoding='utf-8',
208 stderr=subprocess.DEVNULL,
209 ).strip()
210 except subprocess.CalledProcessError:
211 logging.warning(
212 'Failed to resolve reverted object %s (claimed to be reverted '
213 'by sha %s)', reverted_sha, sha)
214 continue
216 if object_type == 'commit':
217 all_reverts.append(Revert(sha, reverted_sha))
218 continue
220 logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha,
221 object_type, reverted_sha)
223 # Since `all_reverts` contains reverts in log order (e.g., newer comes before
224 # older), we need to reverse this to keep with our guarantee of older =
225 # earlier in the result.
226 all_reverts.reverse()
227 return all_reverts
230 def _main() -> None:
231 parser = argparse.ArgumentParser(
232 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
233 parser.add_argument(
234 'base_ref', help='Git ref or sha to check for reverts around.')
235 parser.add_argument(
236 '-C', '--git_dir', default='.', help='Git directory to use.')
237 parser.add_argument(
238 'root', nargs='+', help='Root(s) to search for commits from.')
239 parser.add_argument('--debug', action='store_true')
240 parser.add_argument(
241 '-u', '--review_url', action='store_true',
242 help='Format SHAs as llvm review URLs')
243 opts = parser.parse_args()
245 logging.basicConfig(
246 format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
247 level=logging.DEBUG if opts.debug else logging.INFO,
250 # `root`s can have related history, so we want to filter duplicate commits
251 # out. The overwhelmingly common case is also to have one root, and it's way
252 # easier to reason about output that comes in an order that's meaningful to
253 # git.
254 seen_reverts = set()
255 all_reverts = []
256 for root in opts.root:
257 for revert in find_reverts(opts.git_dir, opts.base_ref, root):
258 if revert not in seen_reverts:
259 seen_reverts.add(revert)
260 all_reverts.append(revert)
262 for revert in all_reverts:
263 sha_fmt = (f'https://reviews.llvm.org/rG{revert.sha}'
264 if opts.review_url else revert.sha)
265 reverted_sha_fmt = (f'https://reviews.llvm.org/rG{revert.reverted_sha}'
266 if opts.review_url else revert.reverted_sha)
267 print(f'{sha_fmt} claims to revert {reverted_sha_fmt}')
270 if __name__ == '__main__':
271 _main()