[Clang] prevent errors for deduction guides using deduced type aliases (#117450)
[llvm-project.git] / llvm / utils / filecheck_lint / filecheck_lint.py
blob12f8299b83610ca587923b604ce6a88a698b4b5a
1 #!/usr/bin/env python3
2 # ===----------------------------------------------------------------------===##
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===----------------------------------------------------------------------===##
9 """A linter that detects potential typos in FileCheck directive names.
11 Consider a broken test foo.cpp:
13 // RUN: clang -cc1 -ast-dump %s | FileCheck %s --check-prefix=NEW
14 // RUN: clang -cc1 -ast-dump %s -std=c++98 | FileCheck %s --check-prefix=OLD
15 auto x = 42;
16 // NEWW: auto is a c++11 extension
17 // ODL-NOT: auto is a c++11 extension
19 We first detect the locally valid FileCheck directive prefixes by parsing the
20 --check-prefix flags. Here we get {CHECK, NEW, OLD}, so our directive names are
21 {CHECK, NEW, OLD, CHECK-NOT, NEW-NOT, ...}.
23 Then we look for lines that look like directives. These are of the form 'FOO:',
24 usually at the beginning of a line or a comment. If any of these are a
25 "near-miss" for a directive name, then we suspect this is a typo and report it.
27 Usage: filecheck_lint path/to/test/file/1 ... path/to/test/file/n
28 """
30 import itertools
31 import logging
32 import pathlib
33 import re
34 import sys
35 from typing import Generator, Sequence, Tuple
37 _distance_threshold = 3
38 _prefixes = {"CHECK"}
39 _suffixes = {"-DAG", "-COUNT", "-EMPTY", "-LABEL", "-NEXT", "-NOT", "-SAME"}
40 # 'NOTE' and 'TODO' are not directives, but are likely to be false positives
41 # if encountered and to generate noise as a result. We filter them out also to
42 # avoid this.
43 _lit_directives = {
44 "RUN",
45 "REQUIRES",
46 "UNSUPPORTED",
47 "XFAIL",
48 "DEFINE",
49 "REDEFINE",
51 # 'COM' and 'RUN' are default comment prefixes for FileCheck.
52 _comment_prefixes = {"COM", "RUN"}
53 _ignore = _lit_directives.union(_comment_prefixes).union({"NOTE", "TODO"})
56 def levenshtein(s1: str, s2: str) -> int: # pylint: disable=g-doc-args
57 """Computes the edit distance between two strings.
59 Additions, deletions, and substitutions all count as a single operation.
60 """
61 if not s1:
62 return len(s2)
63 if not s2:
64 return len(s1)
66 distances = range(len(s2) + 1)
67 for i in range(len(s1)):
68 new_distances = [i + 1]
69 for j in range(len(s2)):
70 cost = min(
71 distances[j] + int(s1[i] != s2[j]),
72 distances[j + 1] + 1,
73 new_distances[-1] + 1,
75 new_distances.append(cost)
76 distances = new_distances
77 return distances[-1]
80 class FileRange:
81 """Stores the coordinates of a span on a single line within a file.
83 Attributes:
84 content: line str
85 start_byte: the (inclusive) byte offset the span starts
86 end_byte: the (inclusive) byte offset the span ends
87 """
89 content: str
90 start_byte: int
91 end_byte: int
93 def __init__(
94 self, content: str, start_byte: int, end_byte: int
95 ): # pylint: disable=g-doc-args
96 """
97 Stores the coordinates of a span based on a string and start/end bytes.
99 `start_byte` and `end_byte` are assumed to be on the same line.
101 self.content = content
102 self.start_byte = start_byte
103 self.end_byte = end_byte
105 def as_str(self):
107 Derives span from line and coordinates.
109 start_column: the (inclusive) column where the span starts
110 end_column: the (inclusive) column where the span ends
112 content_before_span = self.content[: self.start_byte]
113 line = content_before_span.count("\n") + 1
114 start_column = self.start_byte - content_before_span.rfind("\n")
115 end_column = start_column + (self.end_byte - self.start_byte - 1)
117 return f"{line}:{start_column}-{end_column}"
120 class Diagnostic:
121 """Stores information about one typo and a suggested fix.
123 Attributes:
124 filepath: the path to the file in which the typo was found
125 filerange: the position at which the typo was found in the file
126 typo: the typo
127 fix: a suggested fix
130 filepath: pathlib.Path
131 filerange: FileRange
132 typo: str
133 fix: str
135 def __init__(
136 self,
137 filepath: pathlib.Path,
138 filerange: FileRange,
139 typo: str,
140 fix: str, # pylint: disable=redefined-outer-name
142 self.filepath = filepath
143 self.filerange = filerange
144 self.typo = typo
145 self.fix = fix
147 def __str__(self) -> str:
148 return f"{self.filepath}:" + self.filerange.as_str() + f": {self.summary()}"
150 def summary(self) -> str:
151 return (
152 f'Found potentially misspelled directive "{self.typo}". Did you mean '
153 f'"{self.fix}"?'
157 def find_potential_directives(
158 content: str,
159 ) -> Generator[Tuple[FileRange, str], None, None]:
160 """Extracts all the potential FileCheck directives from a string.
162 What constitutes a potential directive is loosely defined---we err on the side
163 of capturing more strings than is necessary, rather than missing any.
165 Args:
166 content: the string in which to look for directives
168 Yields:
169 Tuples (p, d) where p is the span where the potential directive occurs
170 within the string and d is the potential directive.
172 directive_pattern = re.compile(
173 r"(?:^|//|;|#)[^\d\w\-_]*([\d\w\-_][\s\d\w\-_]*):", re.MULTILINE
175 for match in re.finditer(directive_pattern, content):
176 potential_directive, span = match.group(1), match.span(1)
177 yield (FileRange(content, span[0], span[1]), potential_directive)
180 # TODO(bchetioui): also parse comment prefixes to ignore.
181 def parse_custom_prefixes(
182 content: str,
183 ) -> Generator[str, None, None]: # pylint: disable=g-doc-args
184 """Parses custom prefixes defined in the string provided.
186 For example, given the following file content:
187 RUN: something | FileCheck %s -check-prefixes CHECK1,CHECK2
188 RUN: something_else | FileCheck %s -check-prefix 'CHECK3'
190 the custom prefixes are CHECK1, CHECK2, and CHECK3.
192 param_re = r"|".join([r"'[^']*'", r'"[^"]*"', r'[^\'"\s]+'])
193 for m in re.finditer(
194 r"-check-prefix(?:es)?(?:\s+|=)({})".format(param_re), content
196 prefixes = m.group(1)
197 if prefixes.startswith("'") or prefixes.startswith('"'):
198 prefixes = prefixes[1:-1]
199 for prefix in prefixes.split(","):
200 yield prefix
203 def find_directive_typos(
204 content: str,
205 filepath: pathlib.Path,
206 threshold: int = 3,
207 ) -> Generator[Diagnostic, None, None]:
208 """Detects potential typos in FileCheck directives.
210 Args:
211 content: the content of the file
212 filepath: the path to the file to check for typos in directives
213 threshold: the (inclusive) maximum edit distance between a potential
214 directive and an actual directive, such that the potential directive is
215 classified as a typo
217 Yields:
218 Diagnostics, in order from the top of the file.
220 all_prefixes = _prefixes.union(set(parse_custom_prefixes(content)))
221 all_directives = (
223 f"{prefix}{suffix}"
224 for prefix, suffix in itertools.product(all_prefixes, _suffixes)
226 + list(_ignore)
227 + list(all_prefixes)
230 def find_best_match(typo):
231 return min(
232 [(threshold + 1, typo)]
234 (levenshtein(typo, d), d)
235 for d in all_directives
236 if abs(len(d) - len(typo)) <= threshold
238 key=lambda tup: tup[0],
241 potential_directives = find_potential_directives(content)
242 # Cache score and best_match to skip recalculating.
243 score_and_best_match_for_potential_directive = dict()
244 for filerange, potential_directive in potential_directives:
245 # TODO(bchetioui): match count directives more finely. We skip directives
246 # starting with 'CHECK-COUNT-' for the moment as they require more complex
247 # logic to be handled correctly.
248 if any(
249 potential_directive.startswith(f"{prefix}-COUNT-")
250 for prefix in all_prefixes
252 continue
254 # Ignoring potential typos that will not be matched later due to a too low
255 # threshold, in order to avoid potentially long computation times.
256 if len(potential_directive) > max(map(len, all_directives)) + threshold:
257 continue
259 if potential_directive not in score_and_best_match_for_potential_directive:
260 score, best_match = find_best_match(potential_directive)
261 score_and_best_match_for_potential_directive[potential_directive] = (
262 score,
263 best_match,
265 else:
266 score, best_match = score_and_best_match_for_potential_directive[
267 potential_directive
269 if score == 0: # This is an actual directive, ignore.
270 continue
271 elif score <= threshold and best_match not in _ignore:
272 yield Diagnostic(filepath, filerange, potential_directive, best_match)
275 def main(argv: Sequence[str]):
276 if len(argv) < 2:
277 print(f"Usage: {argv[0]} path/to/file/1 ... path/to/file/n")
278 exit(1)
280 for filepath in argv[1:]:
281 logging.info("Checking %s", filepath)
282 with open(filepath, "rt") as f:
283 content = f.read()
284 for diagnostic in find_directive_typos(
285 content,
286 pathlib.Path(filepath),
287 threshold=_distance_threshold,
289 print(diagnostic)
292 if __name__ == "__main__":
293 main(sys.argv)