[NFC][Py Reformat] Reformat python files in llvm
[llvm-project.git] / llvm / utils / filecheck_lint / filecheck_lint.py
blobdc054ab76a098e2f3e20e4eb2f83dd6bf082921f
1 # ===----------------------------------------------------------------------===##
3 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 # See https://llvm.org/LICENSE.txt for license information.
5 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 # ===----------------------------------------------------------------------===##
8 """A linter that detects potential typos in FileCheck directive names.
10 Consider a broken test foo.cpp:
12 // RUN: clang -cc1 -ast-dump %s | FileCheck %s --check-prefix=NEW
13 // RUN: clang -cc1 -ast-dump %s -std=c++98 | FileCheck %s --check-prefix=OLD
14 auto x = 42;
15 // NEWW: auto is a c++11 extension
16 // ODL-NOT: auto is a c++11 extension
18 We first detect the locally valid FileCheck directive prefixes by parsing the
19 --check-prefix flags. Here we get {CHECK, NEW, OLD}, so our directive names are
20 {CHECK, NEW, OLD, CHECK-NOT, NEW-NOT, ...}.
22 Then we look for lines that look like directives. These are of the form 'FOO:',
23 usually at the beginning of a line or a comment. If any of these are a
24 "near-miss" for a directive name, then we suspect this is a typo and report it.
26 Usage: filecheck_lint path/to/test/file/1 ... path/to/test/file/n
27 """
29 import itertools
30 import logging
31 import pathlib
32 import re
33 import sys
34 from typing import Generator, Sequence, Tuple
36 _distance_threshold = 3
37 _prefixes = {"CHECK"}
38 _suffixes = {"-DAG", "-COUNT", "-EMPTY", "-LABEL", "-NEXT", "-NOT", "-SAME"}
39 # 'NOTE' and 'TODO' are not directives, but are likely to be false positives
40 # if encountered and to generate noise as a result. We filter them out also to
41 # avoid this.
42 _lit_directives = {
43 "RUN",
44 "REQUIRES",
45 "UNSUPPORTED",
46 "XFAIL",
47 "DEFINE",
48 "REDEFINE",
50 # 'COM' and 'RUN' are default comment prefixes for FileCheck.
51 _comment_prefixes = {"COM", "RUN"}
52 _ignore = _lit_directives.union(_comment_prefixes).union({"NOTE", "TODO"})
55 def levenshtein(s1: str, s2: str) -> int: # pylint: disable=g-doc-args
56 """Computes the edit distance between two strings.
58 Additions, deletions, and substitutions all count as a single operation.
59 """
60 if not s1:
61 return len(s2)
62 if not s2:
63 return len(s1)
65 distances = range(len(s2) + 1)
66 for i in range(len(s1)):
67 new_distances = [i + 1]
68 for j in range(len(s2)):
69 cost = min(
70 distances[j] + int(s1[i] != s2[j]),
71 distances[j + 1] + 1,
72 new_distances[-1] + 1,
74 new_distances.append(cost)
75 distances = new_distances
76 return distances[-1]
79 class FileRange:
80 """Stores the coordinates of a span on a single line within a file.
82 Attributes:
83 line: the line number
84 start_column: the (inclusive) column where the span starts
85 end_column: the (inclusive) column where the span ends
86 """
88 line: int
89 start_column: int
90 end_column: int
92 def __init__(
93 self, content: str, start_byte: int, end_byte: int
94 ): # pylint: disable=g-doc-args
95 """Derives a span's coordinates based on a string and start/end bytes.
97 `start_byte` and `end_byte` are assumed to be on the same line.
98 """
99 content_before_span = content[:start_byte]
100 self.line = content_before_span.count("\n") + 1
101 self.start_column = start_byte - content_before_span.rfind("\n")
102 self.end_column = self.start_column + (end_byte - start_byte - 1)
104 def __str__(self) -> str:
105 return f"{self.line}:{self.start_column}-{self.end_column}"
108 class Diagnostic:
109 """Stores information about one typo and a suggested fix.
111 Attributes:
112 filepath: the path to the file in which the typo was found
113 filerange: the position at which the typo was found in the file
114 typo: the typo
115 fix: a suggested fix
118 filepath: pathlib.Path
119 filerange: FileRange
120 typo: str
121 fix: str
123 def __init__(
124 self,
125 filepath: pathlib.Path,
126 filerange: FileRange,
127 typo: str,
128 fix: str, # pylint: disable=redefined-outer-name
130 self.filepath = filepath
131 self.filerange = filerange
132 self.typo = typo
133 self.fix = fix
135 def __str__(self) -> str:
136 return f"{self.filepath}:" + str(self.filerange) + f": {self.summary()}"
138 def summary(self) -> str:
139 return (
140 f'Found potentially misspelled directive "{self.typo}". Did you mean '
141 f'"{self.fix}"?'
145 def find_potential_directives(
146 content: str,
147 ) -> Generator[Tuple[FileRange, str], None, None]:
148 """Extracts all the potential FileCheck directives from a string.
150 What constitutes a potential directive is loosely defined---we err on the side
151 of capturing more strings than is necessary, rather than missing any.
153 Args:
154 content: the string in which to look for directives
156 Yields:
157 Tuples (p, d) where p is the span where the potential directive occurs
158 within the string and d is the potential directive.
160 directive_pattern = re.compile(
161 r"(?:^|//|;|#)[^\d\w\-_]*([\d\w\-_][\s\d\w\-_]*):", re.MULTILINE
163 for match in re.finditer(directive_pattern, content):
164 potential_directive, span = match.group(1), match.span(1)
165 yield (FileRange(content, span[0], span[1]), potential_directive)
168 # TODO(bchetioui): also parse comment prefixes to ignore.
169 def parse_custom_prefixes(
170 content: str,
171 ) -> Generator[str, None, None]: # pylint: disable=g-doc-args
172 """Parses custom prefixes defined in the string provided.
174 For example, given the following file content:
175 RUN: something | FileCheck %s -check-prefixes CHECK1,CHECK2
176 RUN: something_else | FileCheck %s -check-prefix 'CHECK3'
178 the custom prefixes are CHECK1, CHECK2, and CHECK3.
180 param_re = r"|".join([r"'[^']*'", r'"[^"]*"', r'[^\'"\s]+'])
181 for m in re.finditer(
182 r"-check-prefix(?:es)?(?:\s+|=)({})".format(param_re), content
184 prefixes = m.group(1)
185 if prefixes.startswith("'") or prefixes.startswith('"'):
186 prefixes = prefixes[1:-1]
187 for prefix in prefixes.split(","):
188 yield prefix
191 def find_directive_typos(
192 content: str,
193 filepath: pathlib.Path,
194 threshold: int = 3,
195 ) -> Generator[Diagnostic, None, None]:
196 """Detects potential typos in FileCheck directives.
198 Args:
199 content: the content of the file
200 filepath: the path to the file to check for typos in directives
201 threshold: the (inclusive) maximum edit distance between a potential
202 directive and an actual directive, such that the potential directive is
203 classified as a typo
205 Yields:
206 Diagnostics, in order from the top of the file.
208 all_prefixes = _prefixes.union(set(parse_custom_prefixes(content)))
209 all_directives = (
211 f"{prefix}{suffix}"
212 for prefix, suffix in itertools.product(all_prefixes, _suffixes)
214 + list(_ignore)
215 + list(all_prefixes)
218 def find_best_match(typo):
219 return min(
220 [(threshold + 1, typo)]
222 (levenshtein(typo, d), d)
223 for d in all_directives
224 if abs(len(d) - len(typo)) <= threshold
226 key=lambda tup: tup[0],
229 potential_directives = find_potential_directives(content)
231 for filerange, potential_directive in potential_directives:
232 # TODO(bchetioui): match count directives more finely. We skip directives
233 # starting with 'CHECK-COUNT-' for the moment as they require more complex
234 # logic to be handled correctly.
235 if any(
236 potential_directive.startswith(f"{prefix}-COUNT-")
237 for prefix in all_prefixes
239 continue
241 # Ignoring potential typos that will not be matched later due to a too low
242 # threshold, in order to avoid potentially long computation times.
243 if len(potential_directive) > max(map(len, all_directives)) + threshold:
244 continue
246 score, best_match = find_best_match(potential_directive)
247 if score == 0: # This is an actual directive, ignore.
248 continue
249 elif score <= threshold and best_match not in _ignore:
250 yield Diagnostic(filepath, filerange, potential_directive, best_match)
253 def main(argv: Sequence[str]):
254 if len(argv) < 2:
255 print(f"Usage: {argv[0]} path/to/file/1 ... path/to/file/n")
256 exit(1)
258 for filepath in argv[1:]:
259 logging.info("Checking %s", filepath)
260 with open(filepath, "rt") as f:
261 content = f.read()
262 for diagnostic in find_directive_typos(
263 content,
264 pathlib.Path(filepath),
265 threshold=_distance_threshold,
267 print(diagnostic)
270 if __name__ == "__main__":
271 main(sys.argv)