1 # ===----------------------------------------------------------------------===##
3 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 # See https://llvm.org/LICENSE.txt for license information.
5 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 # ===----------------------------------------------------------------------===##
8 """A linter that detects potential typos in FileCheck directive names.
10 Consider a broken test foo.cpp:
12 // RUN: clang -cc1 -ast-dump %s | FileCheck %s --check-prefix=NEW
13 // RUN: clang -cc1 -ast-dump %s -std=c++98 | FileCheck %s --check-prefix=OLD
15 // NEWW: auto is a c++11 extension
16 // ODL-NOT: auto is a c++11 extension
18 We first detect the locally valid FileCheck directive prefixes by parsing the
19 --check-prefix flags. Here we get {CHECK, NEW, OLD}, so our directive names are
20 {CHECK, NEW, OLD, CHECK-NOT, NEW-NOT, ...}.
22 Then we look for lines that look like directives. These are of the form 'FOO:',
23 usually at the beginning of a line or a comment. If any of these are a
24 "near-miss" for a directive name, then we suspect this is a typo and report it.
26 Usage: filecheck_lint path/to/test/file/1 ... path/to/test/file/n
34 from typing
import Generator
, Sequence
, Tuple
36 _distance_threshold
= 3
38 _suffixes
= {"-DAG", "-COUNT", "-EMPTY", "-LABEL", "-NEXT", "-NOT", "-SAME"}
39 # 'NOTE' and 'TODO' are not directives, but are likely to be false positives
40 # if encountered and to generate noise as a result. We filter them out also to
50 # 'COM' and 'RUN' are default comment prefixes for FileCheck.
51 _comment_prefixes
= {"COM", "RUN"}
52 _ignore
= _lit_directives
.union(_comment_prefixes
).union({"NOTE", "TODO"})
55 def levenshtein(s1
: str, s2
: str) -> int: # pylint: disable=g-doc-args
56 """Computes the edit distance between two strings.
58 Additions, deletions, and substitutions all count as a single operation.
65 distances
= range(len(s2
) + 1)
66 for i
in range(len(s1
)):
67 new_distances
= [i
+ 1]
68 for j
in range(len(s2
)):
70 distances
[j
] + int(s1
[i
] != s2
[j
]),
72 new_distances
[-1] + 1,
74 new_distances
.append(cost
)
75 distances
= new_distances
80 """Stores the coordinates of a span on a single line within a file.
84 start_column: the (inclusive) column where the span starts
85 end_column: the (inclusive) column where the span ends
93 self
, content
: str, start_byte
: int, end_byte
: int
94 ): # pylint: disable=g-doc-args
95 """Derives a span's coordinates based on a string and start/end bytes.
97 `start_byte` and `end_byte` are assumed to be on the same line.
99 content_before_span
= content
[:start_byte
]
100 self
.line
= content_before_span
.count("\n") + 1
101 self
.start_column
= start_byte
- content_before_span
.rfind("\n")
102 self
.end_column
= self
.start_column
+ (end_byte
- start_byte
- 1)
104 def __str__(self
) -> str:
105 return f
"{self.line}:{self.start_column}-{self.end_column}"
109 """Stores information about one typo and a suggested fix.
112 filepath: the path to the file in which the typo was found
113 filerange: the position at which the typo was found in the file
118 filepath
: pathlib
.Path
125 filepath
: pathlib
.Path
,
126 filerange
: FileRange
,
128 fix
: str, # pylint: disable=redefined-outer-name
130 self
.filepath
= filepath
131 self
.filerange
= filerange
135 def __str__(self
) -> str:
136 return f
"{self.filepath}:" + str(self
.filerange
) + f
": {self.summary()}"
138 def summary(self
) -> str:
140 f
'Found potentially misspelled directive "{self.typo}". Did you mean '
145 def find_potential_directives(
147 ) -> Generator
[Tuple
[FileRange
, str], None, None]:
148 """Extracts all the potential FileCheck directives from a string.
150 What constitutes a potential directive is loosely defined---we err on the side
151 of capturing more strings than is necessary, rather than missing any.
154 content: the string in which to look for directives
157 Tuples (p, d) where p is the span where the potential directive occurs
158 within the string and d is the potential directive.
160 directive_pattern
= re
.compile(
161 r
"(?:^|//|;|#)[^\d\w\-_]*([\d\w\-_][\s\d\w\-_]*):", re
.MULTILINE
163 for match
in re
.finditer(directive_pattern
, content
):
164 potential_directive
, span
= match
.group(1), match
.span(1)
165 yield (FileRange(content
, span
[0], span
[1]), potential_directive
)
168 # TODO(bchetioui): also parse comment prefixes to ignore.
169 def parse_custom_prefixes(
171 ) -> Generator
[str, None, None]: # pylint: disable=g-doc-args
172 """Parses custom prefixes defined in the string provided.
174 For example, given the following file content:
175 RUN: something | FileCheck %s -check-prefixes CHECK1,CHECK2
176 RUN: something_else | FileCheck %s -check-prefix 'CHECK3'
178 the custom prefixes are CHECK1, CHECK2, and CHECK3.
180 param_re
= r
"|".join([r
"'[^']*'", r
'"[^"]*"', r
'[^\'"\s]+'])
181 for m in re.finditer(
182 r"-check
-prefix(?
:es
)?
(?
:\s
+|
=)({})".format(param_re), content
184 prefixes = m.group(1)
185 if prefixes.startswith("'") or prefixes.startswith('"'):
186 prefixes = prefixes[1:-1]
187 for prefix in prefixes.split(","):
191 def find_directive_typos(
193 filepath: pathlib.Path,
195 ) -> Generator[Diagnostic, None, None]:
196 """Detects potential typos in FileCheck directives.
199 content: the content of the file
200 filepath: the path to the file to check for typos in directives
201 threshold: the (inclusive) maximum edit distance between a potential
202 directive and an actual directive, such that the potential directive is
206 Diagnostics, in order from the top of the file.
208 all_prefixes = _prefixes.union(set(parse_custom_prefixes(content)))
212 for prefix, suffix in itertools.product(all_prefixes, _suffixes)
218 def find_best_match(typo):
220 [(threshold + 1, typo)]
222 (levenshtein(typo, d), d)
223 for d in all_directives
224 if abs(len(d) - len(typo)) <= threshold
226 key=lambda tup: tup[0],
229 potential_directives = find_potential_directives(content)
231 for filerange, potential_directive in potential_directives:
232 # TODO(bchetioui): match count directives more finely. We skip directives
233 # starting with 'CHECK-COUNT-' for the moment as they require more complex
234 # logic to be handled correctly.
236 potential_directive.startswith(f"{prefix}
-COUNT
-")
237 for prefix in all_prefixes
241 # Ignoring potential typos that will not be matched later due to a too low
242 # threshold, in order to avoid potentially long computation times.
243 if len(potential_directive) > max(map(len, all_directives)) + threshold:
246 score, best_match = find_best_match(potential_directive)
247 if score == 0: # This is an actual directive, ignore.
249 elif score <= threshold and best_match not in _ignore:
250 yield Diagnostic(filepath, filerange, potential_directive, best_match)
253 def main(argv: Sequence[str]):
255 print(f"Usage
: {argv
[0]} path
/to
/file/1 ... path
/to
/file/n
")
258 for filepath in argv[1:]:
259 logging.info("Checking
%s", filepath)
260 with open(filepath, "rt
") as f:
262 for diagnostic in find_directive_typos(
264 pathlib.Path(filepath),
265 threshold=_distance_threshold,
270 if __name__ == "__main__
":