Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / utils / revert_checker.py
blob34395a6fe50572cc6c4171aea50d00f21a5d0fa5
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 # ===----------------------------------------------------------------------===##
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 # ===----------------------------------------------------------------------===##
10 """Checks for reverts of commits across a given git commit.
12 To clarify the meaning of 'across' with an example, if we had the following
13 commit history (where `a -> b` notes that `b` is a direct child of `a`):
15 123abc -> 223abc -> 323abc -> 423abc -> 523abc
17 And where 423abc is a revert of 223abc, this revert is considered to be 'across'
18 323abc. More generally, a revert A of a parent commit B is considered to be
19 'across' a commit C if C is a parent of A and B is a parent of C.
21 Please note that revert detection in general is really difficult, since merge
22 conflicts/etc always introduce _some_ amount of fuzziness. This script just
23 uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
24 reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
25 though.
27 This is designed to be used in one of two ways: an import in Python, or run
28 directly from a shell. If you want to import this, the `find_reverts`
29 function is the thing to look at. If you'd rather use this from a shell, have a
30 usage example:
32 ```
33 ./revert_checker.py c47f97169 origin/main origin/release/12.x
34 ```
36 This checks for all reverts from the tip of origin/main to c47f97169, which are
37 across the latter. It then does the same for origin/release/12.x to c47f97169.
38 Duplicate reverts discovered when walking both roots (origin/main and
39 origin/release/12.x) are deduplicated in output.
40 """
42 import argparse
43 import collections
44 import logging
45 import re
46 import subprocess
47 import sys
48 from typing import Generator, List, NamedTuple, Iterable
50 assert sys.version_info >= (3, 6), "Only Python 3.6+ is supported."
52 # People are creative with their reverts, and heuristics are a bit difficult.
53 # Like 90% of of reverts have "This reverts commit ${full_sha}".
54 # Some lack that entirely, while others have many of them specified in ad-hoc
55 # ways, while others use short SHAs and whatever.
57 # The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
58 # starts involving human intervention, which is probably not worth it for now.
61 def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]:
62 if not commit_message:
63 return []
65 results = re.findall(r"This reverts commit ([a-f0-9]{40})\b", commit_message)
67 first_line = commit_message.splitlines()[0]
68 initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
69 if initial_revert:
70 results.append(initial_revert.group(1))
71 return results
74 def _stream_stdout(command: List[str]) -> Generator[str, None, None]:
75 with subprocess.Popen(
76 command, stdout=subprocess.PIPE, encoding="utf-8", errors="replace"
77 ) as p:
78 assert p.stdout is not None # for mypy's happiness.
79 yield from p.stdout
82 def _resolve_sha(git_dir: str, sha: str) -> str:
83 if len(sha) == 40:
84 return sha
86 return subprocess.check_output(
87 ["git", "-C", git_dir, "rev-parse", sha],
88 encoding="utf-8",
89 stderr=subprocess.DEVNULL,
90 ).strip()
93 _LogEntry = NamedTuple(
94 "_LogEntry",
96 ("sha", str),
97 ("commit_message", str),
102 def _log_stream(git_dir: str, root_sha: str, end_at_sha: str) -> Iterable[_LogEntry]:
103 sep = 50 * "<>"
104 log_command = [
105 "git",
106 "-C",
107 git_dir,
108 "log",
109 "^" + end_at_sha,
110 root_sha,
111 "--format=" + sep + "%n%H%n%B%n",
114 stdout_stream = iter(_stream_stdout(log_command))
116 # Find the next separator line. If there's nothing to log, it may not exist.
117 # It might not be the first line if git feels complainy.
118 found_commit_header = False
119 for line in stdout_stream:
120 if line.rstrip() == sep:
121 found_commit_header = True
122 break
124 while found_commit_header:
125 sha = next(stdout_stream, None)
126 assert sha is not None, "git died?"
127 sha = sha.rstrip()
129 commit_message = []
131 found_commit_header = False
132 for line in stdout_stream:
133 line = line.rstrip()
134 if line.rstrip() == sep:
135 found_commit_header = True
136 break
137 commit_message.append(line)
139 yield _LogEntry(sha, "\n".join(commit_message).rstrip())
142 def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]:
143 rev_list = [
144 "git",
145 "-C",
146 git_dir,
147 "rev-list",
148 "--first-parent",
149 f"{base_ref}..{head_ref}",
151 return (x.strip() for x in _stream_stdout(rev_list))
154 def _rev_parse(git_dir: str, ref: str) -> str:
155 return subprocess.check_output(
156 ["git", "-C", git_dir, "rev-parse", ref],
157 encoding="utf-8",
158 ).strip()
161 Revert = NamedTuple(
162 "Revert",
164 ("sha", str),
165 ("reverted_sha", str),
170 def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
171 """Finds the closest common parent commit between `ref_a` and `ref_b`."""
172 return subprocess.check_output(
173 ["git", "-C", git_dir, "merge-base", ref_a, ref_b],
174 encoding="utf-8",
175 ).strip()
178 def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
179 """Finds reverts across `across_ref` in `git_dir`, starting from `root`.
181 These reverts are returned in order of oldest reverts first.
183 across_sha = _rev_parse(git_dir, across_ref)
184 root_sha = _rev_parse(git_dir, root)
186 common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha)
187 if common_ancestor != across_sha:
188 raise ValueError(
189 f"{across_sha} isn't an ancestor of {root_sha} "
190 "(common ancestor: {common_ancestor})"
193 intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
194 assert across_sha not in intermediate_commits
196 logging.debug(
197 "%d commits appear between %s and %s",
198 len(intermediate_commits),
199 across_sha,
200 root_sha,
203 all_reverts = []
204 for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
205 reverts = _try_parse_reverts_from_commit_message(commit_message)
206 if not reverts:
207 continue
209 resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
210 for reverted_sha in resolved_reverts:
211 if reverted_sha in intermediate_commits:
212 logging.debug(
213 "Commit %s reverts %s, which happened after %s",
214 sha,
215 reverted_sha,
216 across_sha,
218 continue
220 try:
221 object_type = subprocess.check_output(
222 ["git", "-C", git_dir, "cat-file", "-t", reverted_sha],
223 encoding="utf-8",
224 stderr=subprocess.DEVNULL,
225 ).strip()
226 except subprocess.CalledProcessError:
227 logging.warning(
228 "Failed to resolve reverted object %s (claimed to be reverted "
229 "by sha %s)",
230 reverted_sha,
231 sha,
233 continue
235 if object_type == "commit":
236 all_reverts.append(Revert(sha, reverted_sha))
237 continue
239 logging.error(
240 "%s claims to revert %s -- which isn't a commit -- %s",
241 sha,
242 object_type,
243 reverted_sha,
246 # Since `all_reverts` contains reverts in log order (e.g., newer comes before
247 # older), we need to reverse this to keep with our guarantee of older =
248 # earlier in the result.
249 all_reverts.reverse()
250 return all_reverts
253 def _main() -> None:
254 parser = argparse.ArgumentParser(
255 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
257 parser.add_argument("base_ref", help="Git ref or sha to check for reverts around.")
258 parser.add_argument("-C", "--git_dir", default=".", help="Git directory to use.")
259 parser.add_argument("root", nargs="+", help="Root(s) to search for commits from.")
260 parser.add_argument("--debug", action="store_true")
261 parser.add_argument(
262 "-u",
263 "--review_url",
264 action="store_true",
265 help="Format SHAs as llvm review URLs",
267 opts = parser.parse_args()
269 logging.basicConfig(
270 format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s",
271 level=logging.DEBUG if opts.debug else logging.INFO,
274 # `root`s can have related history, so we want to filter duplicate commits
275 # out. The overwhelmingly common case is also to have one root, and it's way
276 # easier to reason about output that comes in an order that's meaningful to
277 # git.
278 seen_reverts = set()
279 all_reverts = []
280 for root in opts.root:
281 for revert in find_reverts(opts.git_dir, opts.base_ref, root):
282 if revert not in seen_reverts:
283 seen_reverts.add(revert)
284 all_reverts.append(revert)
286 for revert in all_reverts:
287 sha_fmt = (
288 f"https://reviews.llvm.org/rG{revert.sha}"
289 if opts.review_url
290 else revert.sha
292 reverted_sha_fmt = (
293 f"https://reviews.llvm.org/rG{revert.reverted_sha}"
294 if opts.review_url
295 else revert.reverted_sha
297 print(f"{sha_fmt} claims to revert {reverted_sha_fmt}")
300 if __name__ == "__main__":
301 _main()