2 # ===----------------------------------------------------------------------===##
4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 # See https://llvm.org/LICENSE.txt for license information.
6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 # ===----------------------------------------------------------------------===##
9 """A linter that detects potential typos in FileCheck directive names.
11 Consider a broken test foo.cpp:
13 // RUN: clang -cc1 -ast-dump %s | FileCheck %s --check-prefix=NEW
14 // RUN: clang -cc1 -ast-dump %s -std=c++98 | FileCheck %s --check-prefix=OLD
16 // NEWW: auto is a c++11 extension
17 // ODL-NOT: auto is a c++11 extension
19 We first detect the locally valid FileCheck directive prefixes by parsing the
20 --check-prefix flags. Here we get {CHECK, NEW, OLD}, so our directive names are
21 {CHECK, NEW, OLD, CHECK-NOT, NEW-NOT, ...}.
23 Then we look for lines that look like directives. These are of the form 'FOO:',
24 usually at the beginning of a line or a comment. If any of these are a
25 "near-miss" for a directive name, then we suspect this is a typo and report it.
27 Usage: filecheck_lint path/to/test/file/1 ... path/to/test/file/n
35 from typing
import Generator
, Sequence
, Tuple
37 _distance_threshold
= 3
39 _suffixes
= {"-DAG", "-COUNT", "-EMPTY", "-LABEL", "-NEXT", "-NOT", "-SAME"}
40 # 'NOTE' and 'TODO' are not directives, but are likely to be false positives
41 # if encountered and to generate noise as a result. We filter them out also to
51 # 'COM' and 'RUN' are default comment prefixes for FileCheck.
52 _comment_prefixes
= {"COM", "RUN"}
53 _ignore
= _lit_directives
.union(_comment_prefixes
).union({"NOTE", "TODO"})
56 def levenshtein(s1
: str, s2
: str) -> int: # pylint: disable=g-doc-args
57 """Computes the edit distance between two strings.
59 Additions, deletions, and substitutions all count as a single operation.
66 distances
= range(len(s2
) + 1)
67 for i
in range(len(s1
)):
68 new_distances
= [i
+ 1]
69 for j
in range(len(s2
)):
71 distances
[j
] + int(s1
[i
] != s2
[j
]),
73 new_distances
[-1] + 1,
75 new_distances
.append(cost
)
76 distances
= new_distances
81 """Stores the coordinates of a span on a single line within a file.
85 start_byte: the (inclusive) byte offset the span starts
86 end_byte: the (inclusive) byte offset the span ends
94 self
, content
: str, start_byte
: int, end_byte
: int
95 ): # pylint: disable=g-doc-args
97 Stores the coordinates of a span based on a string and start/end bytes.
99 `start_byte` and `end_byte` are assumed to be on the same line.
101 self
.content
= content
102 self
.start_byte
= start_byte
103 self
.end_byte
= end_byte
107 Derives span from line and coordinates.
109 start_column: the (inclusive) column where the span starts
110 end_column: the (inclusive) column where the span ends
112 content_before_span
= self
.content
[: self
.start_byte
]
113 line
= content_before_span
.count("\n") + 1
114 start_column
= self
.start_byte
- content_before_span
.rfind("\n")
115 end_column
= start_column
+ (self
.end_byte
- self
.start_byte
- 1)
117 return f
"{line}:{start_column}-{end_column}"
121 """Stores information about one typo and a suggested fix.
124 filepath: the path to the file in which the typo was found
125 filerange: the position at which the typo was found in the file
130 filepath
: pathlib
.Path
137 filepath
: pathlib
.Path
,
138 filerange
: FileRange
,
140 fix
: str, # pylint: disable=redefined-outer-name
142 self
.filepath
= filepath
143 self
.filerange
= filerange
147 def __str__(self
) -> str:
148 return f
"{self.filepath}:" + self
.filerange
.as_str() + f
": {self.summary()}"
150 def summary(self
) -> str:
152 f
'Found potentially misspelled directive "{self.typo}". Did you mean '
157 def find_potential_directives(
159 ) -> Generator
[Tuple
[FileRange
, str], None, None]:
160 """Extracts all the potential FileCheck directives from a string.
162 What constitutes a potential directive is loosely defined---we err on the side
163 of capturing more strings than is necessary, rather than missing any.
166 content: the string in which to look for directives
169 Tuples (p, d) where p is the span where the potential directive occurs
170 within the string and d is the potential directive.
172 directive_pattern
= re
.compile(
173 r
"(?:^|//|;|#)[^\d\w\-_]*([\d\w\-_][\s\d\w\-_]*):", re
.MULTILINE
175 for match
in re
.finditer(directive_pattern
, content
):
176 potential_directive
, span
= match
.group(1), match
.span(1)
177 yield (FileRange(content
, span
[0], span
[1]), potential_directive
)
180 # TODO(bchetioui): also parse comment prefixes to ignore.
181 def parse_custom_prefixes(
183 ) -> Generator
[str, None, None]: # pylint: disable=g-doc-args
184 """Parses custom prefixes defined in the string provided.
186 For example, given the following file content:
187 RUN: something | FileCheck %s -check-prefixes CHECK1,CHECK2
188 RUN: something_else | FileCheck %s -check-prefix 'CHECK3'
190 the custom prefixes are CHECK1, CHECK2, and CHECK3.
192 param_re
= r
"|".join([r
"'[^']*'", r
'"[^"]*"', r
'[^\'"\s]+'])
193 for m in re.finditer(
194 r"-check
-prefix(?
:es
)?
(?
:\s
+|
=)({})".format(param_re), content
196 prefixes = m.group(1)
197 if prefixes.startswith("'") or prefixes.startswith('"'):
198 prefixes = prefixes[1:-1]
199 for prefix in prefixes.split(","):
203 def find_directive_typos(
205 filepath: pathlib.Path,
207 ) -> Generator[Diagnostic, None, None]:
208 """Detects potential typos in FileCheck directives.
211 content: the content of the file
212 filepath: the path to the file to check for typos in directives
213 threshold: the (inclusive) maximum edit distance between a potential
214 directive and an actual directive, such that the potential directive is
218 Diagnostics, in order from the top of the file.
220 all_prefixes = _prefixes.union(set(parse_custom_prefixes(content)))
224 for prefix, suffix in itertools.product(all_prefixes, _suffixes)
230 def find_best_match(typo):
232 [(threshold + 1, typo)]
234 (levenshtein(typo, d), d)
235 for d in all_directives
236 if abs(len(d) - len(typo)) <= threshold
238 key=lambda tup: tup[0],
241 potential_directives = find_potential_directives(content)
242 # Cache score and best_match to skip recalculating.
243 score_and_best_match_for_potential_directive = dict()
244 for filerange, potential_directive in potential_directives:
245 # TODO(bchetioui): match count directives more finely. We skip directives
246 # starting with 'CHECK-COUNT-' for the moment as they require more complex
247 # logic to be handled correctly.
249 potential_directive.startswith(f"{prefix}
-COUNT
-")
250 for prefix in all_prefixes
254 # Ignoring potential typos that will not be matched later due to a too low
255 # threshold, in order to avoid potentially long computation times.
256 if len(potential_directive) > max(map(len, all_directives)) + threshold:
259 if potential_directive not in score_and_best_match_for_potential_directive:
260 score, best_match = find_best_match(potential_directive)
261 score_and_best_match_for_potential_directive[potential_directive] = (
266 score, best_match = score_and_best_match_for_potential_directive[
269 if score == 0: # This is an actual directive, ignore.
271 elif score <= threshold and best_match not in _ignore:
272 yield Diagnostic(filepath, filerange, potential_directive, best_match)
275 def main(argv: Sequence[str]):
277 print(f"Usage
: {argv
[0]} path
/to
/file/1 ... path
/to
/file/n
")
280 for filepath in argv[1:]:
281 logging.info("Checking
%s", filepath)
282 with open(filepath, "rt
") as f:
284 for diagnostic in find_directive_typos(
286 pathlib.Path(filepath),
287 threshold=_distance_threshold,
292 if __name__ == "__main__
":