[clang-format] Add `TT_CompoundRequirementLBrace` for better annotation (#121539)
[llvm-project.git] / llvm / utils / git / code-format-helper.py
blob48a338aca9c8e6167c39cd8189e22d1e89e80e05
1 #!/usr/bin/env python3
3 # ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
5 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 # See https://llvm.org/LICENSE.txt for license information.
7 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9 # ==--------------------------------------------------------------------------------------==#
11 import argparse
12 import os
13 import re
14 import shlex
15 import subprocess
16 import sys
17 from typing import List, Optional
19 """
20 This script is run by GitHub actions to ensure that the code in PR's conform to
21 the coding style of LLVM. It can also be installed as a pre-commit git hook to
22 check the coding style before submitting it. The canonical source of this script
23 is in the LLVM source tree under llvm/utils/git.
25 For C/C++ code it uses clang-format and for Python code it uses darker (which
26 in turn invokes black).
28 You can learn more about the LLVM coding style on llvm.org:
29 https://llvm.org/docs/CodingStandards.html
31 You can install this script as a git hook by symlinking it to the .git/hooks
32 directory:
34 ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit
36 You can control the exact path to clang-format or darker with the following
37 environment variables: $CLANG_FORMAT_PATH and $DARKER_FORMAT_PATH.
38 """
41 class FormatArgs:
42 start_rev: str = None
43 end_rev: str = None
44 repo: str = None
45 changed_files: List[str] = []
46 token: str = None
47 verbose: bool = True
48 issue_number: int = 0
49 write_comment_to_file: bool = False
51 def __init__(self, args: argparse.Namespace = None) -> None:
52 if not args is None:
53 self.start_rev = args.start_rev
54 self.end_rev = args.end_rev
55 self.repo = args.repo
56 self.token = args.token
57 self.changed_files = args.changed_files
58 self.issue_number = args.issue_number
59 self.write_comment_to_file = args.write_comment_to_file
62 class FormatHelper:
63 COMMENT_TAG = "<!--LLVM CODE FORMAT COMMENT: {fmt}-->"
64 name: str
65 friendly_name: str
66 comment: dict = None
68 @property
69 def comment_tag(self) -> str:
70 return self.COMMENT_TAG.replace("fmt", self.name)
72 @property
73 def instructions(self) -> str:
74 raise NotImplementedError()
76 def has_tool(self) -> bool:
77 raise NotImplementedError()
79 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
80 raise NotImplementedError()
82 def pr_comment_text_for_diff(self, diff: str) -> str:
83 return f"""
84 :warning: {self.friendly_name}, {self.name} found issues in your code. :warning:
86 <details>
87 <summary>
88 You can test this locally with the following command:
89 </summary>
91 ``````````bash
92 {self.instructions}
93 ``````````
95 </details>
97 <details>
98 <summary>
99 View the diff from {self.name} here.
100 </summary>
102 ``````````diff
103 {diff}
104 ``````````
106 </details>
109 # TODO: any type should be replaced with the correct github type, but it requires refactoring to
110 # not require the github module to be installed everywhere.
111 def find_comment(self, pr: any) -> any:
112 for comment in pr.as_issue().get_comments():
113 if self.comment_tag in comment.body:
114 return comment
115 return None
117 def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
118 import github
119 from github import IssueComment, PullRequest
121 repo = github.Github(args.token).get_repo(args.repo)
122 pr = repo.get_issue(args.issue_number).as_pull_request()
124 comment_text = self.comment_tag + "\n\n" + comment_text
126 existing_comment = self.find_comment(pr)
128 if args.write_comment_to_file:
129 if create_new or existing_comment:
130 self.comment = {"body": comment_text}
131 if existing_comment:
132 self.comment["id"] = existing_comment.id
133 return
135 if existing_comment:
136 existing_comment.edit(comment_text)
137 elif create_new:
138 pr.as_issue().create_comment(comment_text)
140 def run(self, changed_files: List[str], args: FormatArgs) -> bool:
141 changed_files = [arg for arg in changed_files if "third-party" not in arg]
142 diff = self.format_run(changed_files, args)
143 should_update_gh = args.token is not None and args.repo is not None
145 if diff is None:
146 if should_update_gh:
147 comment_text = (
148 ":white_check_mark: With the latest revision "
149 f"this PR passed the {self.friendly_name}."
151 self.update_pr(comment_text, args, create_new=False)
152 return True
153 elif len(diff) > 0:
154 if should_update_gh:
155 comment_text = self.pr_comment_text_for_diff(diff)
156 self.update_pr(comment_text, args, create_new=True)
157 else:
158 print(
159 f"Warning: {self.friendly_name}, {self.name} detected "
160 "some issues with your code formatting..."
162 return False
163 else:
164 # The formatter failed but didn't output a diff (e.g. some sort of
165 # infrastructure failure).
166 comment_text = (
167 f":warning: The {self.friendly_name} failed without printing "
168 "a diff. Check the logs for stderr output. :warning:"
170 self.update_pr(comment_text, args, create_new=False)
171 return False
174 class ClangFormatHelper(FormatHelper):
175 name = "clang-format"
176 friendly_name = "C/C++ code formatter"
178 @property
179 def instructions(self) -> str:
180 return " ".join(self.cf_cmd)
182 def should_include_extensionless_file(self, path: str) -> bool:
183 return path.startswith("libcxx/include")
185 def filter_changed_files(self, changed_files: List[str]) -> List[str]:
186 filtered_files = []
187 for path in changed_files:
188 _, ext = os.path.splitext(path)
189 if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
190 filtered_files.append(path)
191 elif ext == "" and self.should_include_extensionless_file(path):
192 filtered_files.append(path)
193 return filtered_files
195 @property
196 def clang_fmt_path(self) -> str:
197 if "CLANG_FORMAT_PATH" in os.environ:
198 return os.environ["CLANG_FORMAT_PATH"]
199 return "git-clang-format"
201 def has_tool(self) -> bool:
202 cmd = [self.clang_fmt_path, "-h"]
203 proc = None
204 try:
205 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
206 except:
207 return False
208 return proc.returncode == 0
210 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
211 cpp_files = self.filter_changed_files(changed_files)
212 if not cpp_files:
213 return None
215 cf_cmd = [self.clang_fmt_path, "--diff"]
217 if args.start_rev and args.end_rev:
218 cf_cmd.append(args.start_rev)
219 cf_cmd.append(args.end_rev)
221 # Gather the extension of all modified files and pass them explicitly to git-clang-format.
222 # This prevents git-clang-format from applying its own filtering rules on top of ours.
223 extensions = set()
224 for file in cpp_files:
225 _, ext = os.path.splitext(file)
226 extensions.add(
227 ext.strip(".")
228 ) # Exclude periods since git-clang-format takes extensions without them
229 cf_cmd.append("--extensions")
230 cf_cmd.append(",".join(extensions))
232 cf_cmd.append("--")
233 cf_cmd += cpp_files
235 if args.verbose:
236 print(f"Running: {' '.join(cf_cmd)}")
237 self.cf_cmd = cf_cmd
238 proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
239 sys.stdout.write(proc.stderr.decode("utf-8"))
241 if proc.returncode != 0:
242 # formatting needed, or the command otherwise failed
243 if args.verbose:
244 print(f"error: {self.name} exited with code {proc.returncode}")
245 # Print the diff in the log so that it is viewable there
246 print(proc.stdout.decode("utf-8"))
247 return proc.stdout.decode("utf-8")
248 else:
249 return None
252 class DarkerFormatHelper(FormatHelper):
253 name = "darker"
254 friendly_name = "Python code formatter"
256 @property
257 def instructions(self) -> str:
258 return " ".join(self.darker_cmd)
260 def filter_changed_files(self, changed_files: List[str]) -> List[str]:
261 filtered_files = []
262 for path in changed_files:
263 name, ext = os.path.splitext(path)
264 if ext == ".py":
265 filtered_files.append(path)
267 return filtered_files
269 @property
270 def darker_fmt_path(self) -> str:
271 if "DARKER_FORMAT_PATH" in os.environ:
272 return os.environ["DARKER_FORMAT_PATH"]
273 return "darker"
275 def has_tool(self) -> bool:
276 cmd = [self.darker_fmt_path, "--version"]
277 proc = None
278 try:
279 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
280 except:
281 return False
282 return proc.returncode == 0
284 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
285 py_files = self.filter_changed_files(changed_files)
286 if not py_files:
287 return None
288 darker_cmd = [
289 self.darker_fmt_path,
290 "--check",
291 "--diff",
293 if args.start_rev and args.end_rev:
294 darker_cmd += ["-r", f"{args.start_rev}...{args.end_rev}"]
295 darker_cmd += py_files
296 if args.verbose:
297 print(f"Running: {' '.join(darker_cmd)}")
298 self.darker_cmd = darker_cmd
299 proc = subprocess.run(
300 darker_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
302 if args.verbose:
303 sys.stdout.write(proc.stderr.decode("utf-8"))
305 if proc.returncode != 0:
306 # formatting needed, or the command otherwise failed
307 if args.verbose:
308 print(f"error: {self.name} exited with code {proc.returncode}")
309 # Print the diff in the log so that it is viewable there
310 print(proc.stdout.decode("utf-8"))
311 return proc.stdout.decode("utf-8")
312 else:
313 sys.stdout.write(proc.stdout.decode("utf-8"))
314 return None
317 class UndefGetFormatHelper(FormatHelper):
318 name = "undef deprecator"
319 friendly_name = "undef deprecator"
321 @property
322 def instructions(self) -> str:
323 return " ".join(shlex.quote(c) for c in self.cmd)
325 def filter_changed_files(self, changed_files: List[str]) -> List[str]:
326 filtered_files = []
327 for path in changed_files:
328 _, ext = os.path.splitext(path)
329 if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm", ".ll"):
330 filtered_files.append(path)
331 return filtered_files
333 def has_tool(self) -> bool:
334 return True
336 def pr_comment_text_for_diff(self, diff: str) -> str:
337 return f"""
338 :warning: {self.name} found issues in your code. :warning:
340 <details>
341 <summary>
342 You can test this locally with the following command:
343 </summary>
345 ``````````bash
346 {self.instructions}
347 ``````````
349 </details>
351 {diff}
354 def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
355 files = self.filter_changed_files(changed_files)
357 # Use git to find files that have had a change in the number of undefs
358 regex = "([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)"
359 cmd = ["git", "diff", "-U0", "--pickaxe-regex", "-S", regex]
361 if args.start_rev and args.end_rev:
362 cmd.append(args.start_rev)
363 cmd.append(args.end_rev)
365 cmd += files
366 self.cmd = cmd
368 if args.verbose:
369 print(f"Running: {self.instructions}")
371 proc = subprocess.run(
372 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8"
374 sys.stdout.write(proc.stderr)
375 stdout = proc.stdout
377 files = []
378 # Split the diff so we have one array entry per file.
379 # Each file is prefixed like:
380 # diff --git a/file b/file
381 for file in re.split("^diff --git ", stdout, 0, re.MULTILINE):
382 # We skip checking in MIR files as undef is a valid token and not
383 # going away.
384 if file.endswith(".mir"):
385 continue
386 # search for additions of undef
387 if re.search(r"^[+](?!\s*#\s*).*(\bundef\b|UndefValue::get)", file, re.MULTILINE):
388 files.append(re.match("a/([^ ]+)", file.splitlines()[0])[1])
390 if not files:
391 return None
393 files = "\n".join(" - " + f for f in files)
394 report = f"""
395 The following files introduce new uses of undef:
396 {files}
398 [Undef](https://llvm.org/docs/LangRef.html#undefined-values) is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields `undef`. You should use `poison` values for placeholders instead.
400 In tests, avoid using `undef` and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.
402 For example, this is considered a bad practice:
403 ```llvm
404 define void @fn() {{
406 br i1 undef, ...
410 Please use the following instead:
411 ```llvm
412 define void @fn(i1 %cond) {{
414 br i1 %cond, ...
418 Please refer to the [Undefined Behavior Manual](https://llvm.org/docs/UndefinedBehavior.html) for more information.
420 if args.verbose:
421 print(f"error: {self.name} failed")
422 print(report)
423 return report
426 ALL_FORMATTERS = (DarkerFormatHelper(), ClangFormatHelper(), UndefGetFormatHelper())
429 def hook_main():
430 # fill out args
431 args = FormatArgs()
432 args.verbose = os.getenv("FORMAT_HOOK_VERBOSE", False)
434 # find the changed files
435 cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
436 proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
437 output = proc.stdout.decode("utf-8")
438 for line in output.splitlines():
439 args.changed_files.append(line)
441 failed_fmts = []
442 for fmt in ALL_FORMATTERS:
443 if fmt.has_tool():
444 if not fmt.run(args.changed_files, args):
445 failed_fmts.append(fmt.name)
446 if fmt.comment:
447 comments.append(fmt.comment)
448 else:
449 print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())
451 if len(failed_fmts) > 0:
452 print(
453 "Pre-commit format hook failed, rerun with FORMAT_HOOK_VERBOSE=1 environment for verbose output"
455 sys.exit(1)
457 sys.exit(0)
460 if __name__ == "__main__":
461 script_path = os.path.abspath(__file__)
462 if ".git/hooks" in script_path:
463 hook_main()
464 sys.exit(0)
466 parser = argparse.ArgumentParser()
467 parser.add_argument(
468 "--token", type=str, required=True, help="GitHub authentiation token"
470 parser.add_argument(
471 "--repo",
472 type=str,
473 default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
474 help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
476 parser.add_argument("--issue-number", type=int, required=True)
477 parser.add_argument(
478 "--start-rev",
479 type=str,
480 required=True,
481 help="Compute changes from this revision.",
483 parser.add_argument(
484 "--end-rev", type=str, required=True, help="Compute changes to this revision"
486 parser.add_argument(
487 "--changed-files",
488 type=str,
489 help="Comma separated list of files that has been changed",
491 parser.add_argument(
492 "--write-comment-to-file",
493 action="store_true",
494 help="Don't post comments on the PR, instead write the comments and metadata a file called 'comment'",
497 args = FormatArgs(parser.parse_args())
499 changed_files = []
500 if args.changed_files:
501 changed_files = args.changed_files.split(",")
503 failed_formatters = []
504 comments = []
505 for fmt in ALL_FORMATTERS:
506 if not fmt.run(changed_files, args):
507 failed_formatters.append(fmt.name)
508 if fmt.comment:
509 comments.append(fmt.comment)
511 if len(comments):
512 with open("comments", "w") as f:
513 import json
515 json.dump(comments, f)
517 if len(failed_formatters) > 0:
518 print(f"error: some formatters failed: {' '.join(failed_formatters)}")
519 sys.exit(1)