2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
10 from __future__
import print_function
11 from argparse
import ArgumentParser
, RawTextHelpFormatter
22 import multiprocessing
29 def verbose_print(*args
, **kwargs
):
31 print(*args
, **kwargs
)
34 def check_file(fname
):
35 fname
= os
.path
.normpath(fname
)
36 if not os
.path
.isfile(fname
):
37 sys
.exit("ERROR: %s does not exist" % (fname
))
41 def check_cmd(cmd_name
, cmd_dir
, cmd_path
=None):
43 Returns absolute path to cmd_path if it is given,
44 or absolute path to cmd_dir/cmd_name.
47 # Make the path absolute so the creduce test can be run from any directory.
48 cmd_path
= os
.path
.abspath(cmd_path
)
49 cmd
= shutil
.which(cmd_path
)
52 sys
.exit("ERROR: executable `%s` not found" % (cmd_path
))
54 cmd
= shutil
.which(cmd_name
, path
=cmd_dir
)
60 sys
.exit("ERROR: `%s` not found in %s" % (cmd_name
, cmd_dir
))
64 return " ".join(pipes
.quote(arg
) for arg
in cmd
)
67 def write_to_script(text
, filename
):
68 with
open(filename
, "w") as f
:
70 os
.chmod(filename
, os
.stat(filename
).st_mode | stat
.S_IEXEC
)
74 def __init__(self
, crash_script
, file_to_reduce
, core_number
):
75 crash_script_name
, crash_script_ext
= os
.path
.splitext(crash_script
)
76 file_reduce_name
, file_reduce_ext
= os
.path
.splitext(file_to_reduce
)
78 self
.testfile
= file_reduce_name
+ ".test.sh"
79 self
.crash_script
= crash_script_name
+ ".reduced" + crash_script_ext
80 self
.file_to_reduce
= file_reduce_name
+ ".reduced" + file_reduce_ext
81 shutil
.copy(file_to_reduce
, self
.file_to_reduce
)
83 self
.clang
= clang_cmd
85 self
.expected_output
= []
86 self
.needs_stack_trace
= False
87 self
.creduce_flags
= ["--tidy"]
88 self
.creduce_flags
= ["--n", str(core_number
)]
90 self
.read_clang_args(crash_script
, file_to_reduce
)
91 self
.read_expected_output()
93 def get_crash_cmd(self
, cmd
=None, args
=None, filename
=None):
97 args
= self
.clang_args
99 filename
= self
.file_to_reduce
101 return [cmd
] + args
+ [filename
]
103 def read_clang_args(self
, crash_script
, filename
):
104 print("\nReading arguments from crash script...")
105 with
open(crash_script
) as f
:
106 # Assume clang call is the first non comment line.
109 if not line
.lstrip().startswith("#"):
110 cmd
= shlex
.split(line
)
113 sys
.exit("Could not find command in the crash script.")
115 # Remove clang and filename from the command
116 # Assume the last occurrence of the filename is the clang input file
118 for i
in range(len(cmd
) - 1, -1, -1):
119 if cmd
[i
] == filename
:
122 self
.clang_args
= cmd
123 verbose_print("Clang arguments:", quote_cmd(self
.clang_args
))
125 def read_expected_output(self
):
126 print("\nGetting expected crash output...")
127 p
= subprocess
.Popen(
128 self
.get_crash_cmd(), stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
130 crash_output
, _
= p
.communicate()
134 ansi_escape
= r
"\x1b\[[0-?]*m"
135 crash_output
= re
.sub(ansi_escape
, "", crash_output
.decode("utf-8"))
137 # Look for specific error messages
139 r
"Assertion .+ failed", # Linux assert()
140 r
"Assertion failed: .+,", # FreeBSD/Mac assert()
141 r
"fatal error: error in backend: .+",
143 r
"UNREACHABLE executed at .+?!",
144 r
"LLVM IR generation of declaration '.+'",
145 r
"Generating code for declaration '.+'",
146 r
"\*\*\* Bad machine code: .+ \*\*\*",
147 r
"ERROR: .*Sanitizer: [^ ]+ ",
149 for msg_re
in regexes
:
150 match
= re
.search(msg_re
, crash_output
)
154 print("Found message:", msg
)
157 # If no message was found, use the top five stack trace functions,
158 # ignoring some common functions
159 # Five is a somewhat arbitrary number; the goal is to get a small number
160 # of identifying functions with some leeway for common functions
162 self
.needs_stack_trace
= True
163 stacktrace_re
= r
"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("
175 def skip_function(func_name
):
176 return any(name
in func_name
for name
in filters
)
178 matches
= re
.findall(stacktrace_re
, crash_output
)
179 result
= [x
for x
in matches
if x
and not skip_function(x
)][:5]
181 print("Found stack trace function:", msg
)
184 print("ERROR: no crash was found")
185 print("The crash output was:\n========\n%s========" % crash_output
)
188 self
.expected_output
= result
190 def check_expected_output(self
, args
=None, filename
=None):
192 args
= self
.clang_args
194 filename
= self
.file_to_reduce
196 p
= subprocess
.Popen(
197 self
.get_crash_cmd(args
=args
, filename
=filename
),
198 stdout
=subprocess
.PIPE
,
199 stderr
=subprocess
.STDOUT
,
201 crash_output
, _
= p
.communicate()
202 return all(msg
in crash_output
.decode("utf-8") for msg
in self
.expected_output
)
204 def write_interestingness_test(self
):
205 print("\nCreating the interestingness test...")
207 # Disable symbolization if it's not required to avoid slow symbolization.
208 disable_symbolization
= ""
209 if not self
.needs_stack_trace
:
210 disable_symbolization
= "export LLVM_DISABLE_SYMBOLIZATION=1"
212 output
= """#!/bin/bash
214 if %s >& t.log ; then
218 disable_symbolization
,
219 quote_cmd(self
.get_crash_cmd()),
222 for msg
in self
.expected_output
:
223 output
+= "grep -F %s t.log || exit 1\n" % pipes
.quote(msg
)
225 write_to_script(output
, self
.testfile
)
226 self
.check_interestingness()
228 def check_interestingness(self
):
229 testfile
= os
.path
.abspath(self
.testfile
)
231 # Check that the test considers the original file interesting
232 with
open(os
.devnull
, "w") as devnull
:
233 returncode
= subprocess
.call(testfile
, stdout
=devnull
)
235 sys
.exit("The interestingness test does not pass for the original file.")
237 # Check that an empty file is not interesting
238 # Instead of modifying the filename in the test file, just run the command
239 with tempfile
.NamedTemporaryFile() as empty_file
:
240 is_interesting
= self
.check_expected_output(filename
=empty_file
.name
)
242 sys
.exit("The interestingness test passes for an empty file.")
244 def clang_preprocess(self
):
245 print("\nTrying to preprocess the source file...")
246 with tempfile
.NamedTemporaryFile() as tmpfile
:
247 cmd_preprocess
= self
.get_crash_cmd() + ["-E", "-o", tmpfile
.name
]
248 cmd_preprocess_no_lines
= cmd_preprocess
+ ["-P"]
250 subprocess
.check_call(cmd_preprocess_no_lines
)
251 if self
.check_expected_output(filename
=tmpfile
.name
):
252 print("Successfully preprocessed with line markers removed")
253 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
255 subprocess
.check_call(cmd_preprocess
)
256 if self
.check_expected_output(filename
=tmpfile
.name
):
257 print("Successfully preprocessed without removing line markers")
258 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
261 "No longer crashes after preprocessing -- "
262 "using original source"
264 except subprocess
.CalledProcessError
:
265 print("Preprocessing failed")
269 args
, opts_equal
=[], opts_startswith
=[], opts_one_arg_startswith
=[]
277 if any(arg
== a
for a
in opts_equal
):
279 if any(arg
.startswith(a
) for a
in opts_startswith
):
281 if any(arg
.startswith(a
) for a
in opts_one_arg_startswith
):
287 def try_remove_args(self
, args
, msg
=None, extra_arg
=None, **kwargs
):
288 new_args
= self
.filter_args(args
, **kwargs
)
291 if extra_arg
in new_args
:
292 new_args
.remove(extra_arg
)
293 new_args
.append(extra_arg
)
295 if new_args
!= args
and self
.check_expected_output(args
=new_args
):
301 def try_remove_arg_by_index(self
, args
, index
):
302 new_args
= args
[:index
] + args
[index
+ 1 :]
303 removed_arg
= args
[index
]
305 # Heuristic for grouping arguments:
306 # remove next argument if it doesn't start with "-"
307 if index
< len(new_args
) and not new_args
[index
].startswith("-"):
309 removed_arg
+= " " + args
[index
+ 1]
311 if self
.check_expected_output(args
=new_args
):
312 verbose_print("Removed", removed_arg
)
313 return new_args
, index
314 return args
, index
+ 1
316 def simplify_clang_args(self
):
317 """Simplify clang arguments before running C-Reduce to reduce the time the
318 interestingness test takes to run.
320 print("\nSimplifying the clang command...")
322 # Remove some clang arguments to speed up the interestingness test
323 new_args
= self
.clang_args
324 new_args
= self
.try_remove_args(
326 msg
="Removed debug info options",
327 opts_startswith
=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],
330 new_args
= self
.try_remove_args(
331 new_args
, msg
="Removed --show-includes", opts_startswith
=["--show-includes"]
333 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
334 # after preprocessing
335 new_args
= self
.try_remove_args(
337 msg
="Replaced -W options with -w",
339 opts_startswith
=["-W"],
341 new_args
= self
.try_remove_args(
343 msg
="Replaced optimization level with -O0",
345 opts_startswith
=["-O"],
348 # Try to remove compilation steps
349 new_args
= self
.try_remove_args(
350 new_args
, msg
="Added -emit-llvm", extra_arg
="-emit-llvm"
352 new_args
= self
.try_remove_args(
353 new_args
, msg
="Added -fsyntax-only", extra_arg
="-fsyntax-only"
356 # Try to make implicit int an error for more sensible test output
357 new_args
= self
.try_remove_args(
359 msg
="Added -Werror=implicit-int",
361 extra_arg
="-Werror=implicit-int",
364 self
.clang_args
= new_args
365 verbose_print("Simplified command:", quote_cmd(self
.get_crash_cmd()))
367 def reduce_clang_args(self
):
368 """Minimize the clang arguments after running C-Reduce, to get the smallest
369 command that reproduces the crash on the reduced file.
371 print("\nReducing the clang crash command...")
373 new_args
= self
.clang_args
375 # Remove some often occurring args
376 new_args
= self
.try_remove_args(
377 new_args
, msg
="Removed -D options", opts_startswith
=["-D"]
379 new_args
= self
.try_remove_args(
380 new_args
, msg
="Removed -D options", opts_one_arg_startswith
=["-D"]
382 new_args
= self
.try_remove_args(
383 new_args
, msg
="Removed -I options", opts_startswith
=["-I"]
385 new_args
= self
.try_remove_args(
386 new_args
, msg
="Removed -I options", opts_one_arg_startswith
=["-I"]
388 new_args
= self
.try_remove_args(
389 new_args
, msg
="Removed -W options", opts_startswith
=["-W"]
392 # Remove other cases that aren't covered by the heuristic
393 new_args
= self
.try_remove_args(
394 new_args
, msg
="Removed -mllvm", opts_one_arg_startswith
=["-mllvm"]
398 while i
< len(new_args
):
399 new_args
, i
= self
.try_remove_arg_by_index(new_args
, i
)
401 self
.clang_args
= new_args
403 reduced_cmd
= quote_cmd(self
.get_crash_cmd())
404 write_to_script(reduced_cmd
, self
.crash_script
)
405 print("Reduced command:", reduced_cmd
)
407 def run_creduce(self
):
408 print("\nRunning C-Reduce...")
410 p
= subprocess
.Popen(
413 + [self
.testfile
, self
.file_to_reduce
]
416 except KeyboardInterrupt:
417 # Hack to kill C-Reduce because it jumps into its own pgid
418 print("\n\nctrl-c detected, killed creduce")
427 parser
= ArgumentParser(description
=__doc__
, formatter_class
=RawTextHelpFormatter
)
432 help="Name of the script that generates the crash.",
435 "file_to_reduce", type=str, nargs
=1, help="Name of the file to be reduced."
438 "--llvm-bin", dest
="llvm_bin", type=str, help="Path to the LLVM bin directory."
444 help="The path to the `clang` executable. "
445 "By default uses the llvm-bin directory.",
451 help="The path to the `creduce` executable. "
452 "Required if `creduce` is not in PATH environment.",
458 default
=max(4, multiprocessing
.cpu_count() // 2),
459 help="Number of cores to use.",
461 parser
.add_argument("-v", "--verbose", action
="store_true")
462 args
= parser
.parse_args()
464 verbose
= args
.verbose
465 llvm_bin
= os
.path
.abspath(args
.llvm_bin
) if args
.llvm_bin
else None
466 creduce_cmd
= check_cmd("creduce", None, args
.creduce
)
467 clang_cmd
= check_cmd("clang", llvm_bin
, args
.clang
)
468 core_number
= args
.core_number
470 crash_script
= check_file(args
.crash_script
[0])
471 file_to_reduce
= check_file(args
.file_to_reduce
[0])
473 r
= Reduce(crash_script
, file_to_reduce
, core_number
)
475 r
.simplify_clang_args()
476 r
.write_interestingness_test()
479 r
.reduce_clang_args()
482 if __name__
== "__main__":