[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / utils / creduce-clang-crash.py
blob27361bb8850581845230a6b0c953ee7f4c837248
1 #!/usr/bin/env python3
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
4 Output files:
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
8 """
10 from __future__ import print_function
11 from argparse import ArgumentParser, RawTextHelpFormatter
12 import os
13 import re
14 import shutil
15 import stat
16 import sys
17 import subprocess
18 import pipes
19 import shlex
20 import tempfile
21 import shutil
22 import multiprocessing
24 verbose = False
25 creduce_cmd = None
26 clang_cmd = None
29 def verbose_print(*args, **kwargs):
30 if verbose:
31 print(*args, **kwargs)
34 def check_file(fname):
35 fname = os.path.normpath(fname)
36 if not os.path.isfile(fname):
37 sys.exit("ERROR: %s does not exist" % (fname))
38 return fname
41 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
42 """
43 Returns absolute path to cmd_path if it is given,
44 or absolute path to cmd_dir/cmd_name.
45 """
46 if cmd_path:
47 # Make the path absolute so the creduce test can be run from any directory.
48 cmd_path = os.path.abspath(cmd_path)
49 cmd = shutil.which(cmd_path)
50 if cmd:
51 return cmd
52 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
54 cmd = shutil.which(cmd_name, path=cmd_dir)
55 if cmd:
56 return cmd
58 if not cmd_dir:
59 cmd_dir = "$PATH"
60 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
63 def quote_cmd(cmd):
64 return " ".join(pipes.quote(arg) for arg in cmd)
67 def write_to_script(text, filename):
68 with open(filename, "w") as f:
69 f.write(text)
70 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
73 class Reduce(object):
74 def __init__(self, crash_script, file_to_reduce, core_number):
75 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
76 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
78 self.testfile = file_reduce_name + ".test.sh"
79 self.crash_script = crash_script_name + ".reduced" + crash_script_ext
80 self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext
81 shutil.copy(file_to_reduce, self.file_to_reduce)
83 self.clang = clang_cmd
84 self.clang_args = []
85 self.expected_output = []
86 self.needs_stack_trace = False
87 self.creduce_flags = ["--tidy"]
88 self.creduce_flags = ["--n", str(core_number)]
90 self.read_clang_args(crash_script, file_to_reduce)
91 self.read_expected_output()
93 def get_crash_cmd(self, cmd=None, args=None, filename=None):
94 if not cmd:
95 cmd = self.clang
96 if not args:
97 args = self.clang_args
98 if not filename:
99 filename = self.file_to_reduce
101 return [cmd] + args + [filename]
103 def read_clang_args(self, crash_script, filename):
104 print("\nReading arguments from crash script...")
105 with open(crash_script) as f:
106 # Assume clang call is the first non comment line.
107 cmd = []
108 for line in f:
109 if not line.lstrip().startswith("#"):
110 cmd = shlex.split(line)
111 break
112 if not cmd:
113 sys.exit("Could not find command in the crash script.")
115 # Remove clang and filename from the command
116 # Assume the last occurrence of the filename is the clang input file
117 del cmd[0]
118 for i in range(len(cmd) - 1, -1, -1):
119 if cmd[i] == filename:
120 del cmd[i]
121 break
122 self.clang_args = cmd
123 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
125 def read_expected_output(self):
126 print("\nGetting expected crash output...")
127 p = subprocess.Popen(
128 self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT
130 crash_output, _ = p.communicate()
131 result = []
133 # Remove color codes
134 ansi_escape = r"\x1b\[[0-?]*m"
135 crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8"))
137 # Look for specific error messages
138 regexes = [
139 r"Assertion .+ failed", # Linux assert()
140 r"Assertion failed: .+,", # FreeBSD/Mac assert()
141 r"fatal error: error in backend: .+",
142 r"LLVM ERROR: .+",
143 r"UNREACHABLE executed at .+?!",
144 r"LLVM IR generation of declaration '.+'",
145 r"Generating code for declaration '.+'",
146 r"\*\*\* Bad machine code: .+ \*\*\*",
147 r"ERROR: .*Sanitizer: [^ ]+ ",
149 for msg_re in regexes:
150 match = re.search(msg_re, crash_output)
151 if match:
152 msg = match.group(0)
153 result = [msg]
154 print("Found message:", msg)
155 break
157 # If no message was found, use the top five stack trace functions,
158 # ignoring some common functions
159 # Five is a somewhat arbitrary number; the goal is to get a small number
160 # of identifying functions with some leeway for common functions
161 if not result:
162 self.needs_stack_trace = True
163 stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("
164 filters = [
165 "PrintStackTrace",
166 "RunSignalHandlers",
167 "CleanupOnSignal",
168 "HandleCrash",
169 "SignalHandler",
170 "__restore_rt",
171 "gsignal",
172 "abort",
175 def skip_function(func_name):
176 return any(name in func_name for name in filters)
178 matches = re.findall(stacktrace_re, crash_output)
179 result = [x for x in matches if x and not skip_function(x)][:5]
180 for msg in result:
181 print("Found stack trace function:", msg)
183 if not result:
184 print("ERROR: no crash was found")
185 print("The crash output was:\n========\n%s========" % crash_output)
186 sys.exit(1)
188 self.expected_output = result
190 def check_expected_output(self, args=None, filename=None):
191 if not args:
192 args = self.clang_args
193 if not filename:
194 filename = self.file_to_reduce
196 p = subprocess.Popen(
197 self.get_crash_cmd(args=args, filename=filename),
198 stdout=subprocess.PIPE,
199 stderr=subprocess.STDOUT,
201 crash_output, _ = p.communicate()
202 return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)
204 def write_interestingness_test(self):
205 print("\nCreating the interestingness test...")
207 # Disable symbolization if it's not required to avoid slow symbolization.
208 disable_symbolization = ""
209 if not self.needs_stack_trace:
210 disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1"
212 output = """#!/bin/bash
214 if %s >& t.log ; then
215 exit 1
217 """ % (
218 disable_symbolization,
219 quote_cmd(self.get_crash_cmd()),
222 for msg in self.expected_output:
223 output += "grep -F %s t.log || exit 1\n" % pipes.quote(msg)
225 write_to_script(output, self.testfile)
226 self.check_interestingness()
228 def check_interestingness(self):
229 testfile = os.path.abspath(self.testfile)
231 # Check that the test considers the original file interesting
232 with open(os.devnull, "w") as devnull:
233 returncode = subprocess.call(testfile, stdout=devnull)
234 if returncode:
235 sys.exit("The interestingness test does not pass for the original file.")
237 # Check that an empty file is not interesting
238 # Instead of modifying the filename in the test file, just run the command
239 with tempfile.NamedTemporaryFile() as empty_file:
240 is_interesting = self.check_expected_output(filename=empty_file.name)
241 if is_interesting:
242 sys.exit("The interestingness test passes for an empty file.")
244 def clang_preprocess(self):
245 print("\nTrying to preprocess the source file...")
246 with tempfile.NamedTemporaryFile() as tmpfile:
247 cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name]
248 cmd_preprocess_no_lines = cmd_preprocess + ["-P"]
249 try:
250 subprocess.check_call(cmd_preprocess_no_lines)
251 if self.check_expected_output(filename=tmpfile.name):
252 print("Successfully preprocessed with line markers removed")
253 shutil.copy(tmpfile.name, self.file_to_reduce)
254 else:
255 subprocess.check_call(cmd_preprocess)
256 if self.check_expected_output(filename=tmpfile.name):
257 print("Successfully preprocessed without removing line markers")
258 shutil.copy(tmpfile.name, self.file_to_reduce)
259 else:
260 print(
261 "No longer crashes after preprocessing -- "
262 "using original source"
264 except subprocess.CalledProcessError:
265 print("Preprocessing failed")
267 @staticmethod
268 def filter_args(
269 args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[]
271 result = []
272 skip_next = False
273 for arg in args:
274 if skip_next:
275 skip_next = False
276 continue
277 if any(arg == a for a in opts_equal):
278 continue
279 if any(arg.startswith(a) for a in opts_startswith):
280 continue
281 if any(arg.startswith(a) for a in opts_one_arg_startswith):
282 skip_next = True
283 continue
284 result.append(arg)
285 return result
287 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
288 new_args = self.filter_args(args, **kwargs)
290 if extra_arg:
291 if extra_arg in new_args:
292 new_args.remove(extra_arg)
293 new_args.append(extra_arg)
295 if new_args != args and self.check_expected_output(args=new_args):
296 if msg:
297 verbose_print(msg)
298 return new_args
299 return args
301 def try_remove_arg_by_index(self, args, index):
302 new_args = args[:index] + args[index + 1 :]
303 removed_arg = args[index]
305 # Heuristic for grouping arguments:
306 # remove next argument if it doesn't start with "-"
307 if index < len(new_args) and not new_args[index].startswith("-"):
308 del new_args[index]
309 removed_arg += " " + args[index + 1]
311 if self.check_expected_output(args=new_args):
312 verbose_print("Removed", removed_arg)
313 return new_args, index
314 return args, index + 1
316 def simplify_clang_args(self):
317 """Simplify clang arguments before running C-Reduce to reduce the time the
318 interestingness test takes to run.
320 print("\nSimplifying the clang command...")
322 # Remove some clang arguments to speed up the interestingness test
323 new_args = self.clang_args
324 new_args = self.try_remove_args(
325 new_args,
326 msg="Removed debug info options",
327 opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],
330 new_args = self.try_remove_args(
331 new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"]
333 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
334 # after preprocessing
335 new_args = self.try_remove_args(
336 new_args,
337 msg="Replaced -W options with -w",
338 extra_arg="-w",
339 opts_startswith=["-W"],
341 new_args = self.try_remove_args(
342 new_args,
343 msg="Replaced optimization level with -O0",
344 extra_arg="-O0",
345 opts_startswith=["-O"],
348 # Try to remove compilation steps
349 new_args = self.try_remove_args(
350 new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm"
352 new_args = self.try_remove_args(
353 new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only"
356 # Try to make implicit int an error for more sensible test output
357 new_args = self.try_remove_args(
358 new_args,
359 msg="Added -Werror=implicit-int",
360 opts_equal=["-w"],
361 extra_arg="-Werror=implicit-int",
364 self.clang_args = new_args
365 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
367 def reduce_clang_args(self):
368 """Minimize the clang arguments after running C-Reduce, to get the smallest
369 command that reproduces the crash on the reduced file.
371 print("\nReducing the clang crash command...")
373 new_args = self.clang_args
375 # Remove some often occurring args
376 new_args = self.try_remove_args(
377 new_args, msg="Removed -D options", opts_startswith=["-D"]
379 new_args = self.try_remove_args(
380 new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"]
382 new_args = self.try_remove_args(
383 new_args, msg="Removed -I options", opts_startswith=["-I"]
385 new_args = self.try_remove_args(
386 new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"]
388 new_args = self.try_remove_args(
389 new_args, msg="Removed -W options", opts_startswith=["-W"]
392 # Remove other cases that aren't covered by the heuristic
393 new_args = self.try_remove_args(
394 new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"]
397 i = 0
398 while i < len(new_args):
399 new_args, i = self.try_remove_arg_by_index(new_args, i)
401 self.clang_args = new_args
403 reduced_cmd = quote_cmd(self.get_crash_cmd())
404 write_to_script(reduced_cmd, self.crash_script)
405 print("Reduced command:", reduced_cmd)
407 def run_creduce(self):
408 print("\nRunning C-Reduce...")
409 try:
410 p = subprocess.Popen(
411 [creduce_cmd]
412 + self.creduce_flags
413 + [self.testfile, self.file_to_reduce]
415 p.communicate()
416 except KeyboardInterrupt:
417 # Hack to kill C-Reduce because it jumps into its own pgid
418 print("\n\nctrl-c detected, killed creduce")
419 p.kill()
422 def main():
423 global verbose
424 global creduce_cmd
425 global clang_cmd
427 parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
428 parser.add_argument(
429 "crash_script",
430 type=str,
431 nargs=1,
432 help="Name of the script that generates the crash.",
434 parser.add_argument(
435 "file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced."
437 parser.add_argument(
438 "--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory."
440 parser.add_argument(
441 "--clang",
442 dest="clang",
443 type=str,
444 help="The path to the `clang` executable. "
445 "By default uses the llvm-bin directory.",
447 parser.add_argument(
448 "--creduce",
449 dest="creduce",
450 type=str,
451 help="The path to the `creduce` executable. "
452 "Required if `creduce` is not in PATH environment.",
454 parser.add_argument(
455 "--n",
456 dest="core_number",
457 type=int,
458 default=max(4, multiprocessing.cpu_count() // 2),
459 help="Number of cores to use.",
461 parser.add_argument("-v", "--verbose", action="store_true")
462 args = parser.parse_args()
464 verbose = args.verbose
465 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
466 creduce_cmd = check_cmd("creduce", None, args.creduce)
467 clang_cmd = check_cmd("clang", llvm_bin, args.clang)
468 core_number = args.core_number
470 crash_script = check_file(args.crash_script[0])
471 file_to_reduce = check_file(args.file_to_reduce[0])
473 r = Reduce(crash_script, file_to_reduce, core_number)
475 r.simplify_clang_args()
476 r.write_interestingness_test()
477 r.clang_preprocess()
478 r.run_creduce()
479 r.reduce_clang_args()
482 if __name__ == "__main__":
483 main()