2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
3 Unknown arguments are treated at creduce options.
6 *.reduced.sh -- crash reproducer with minimal arguments
7 *.reduced.cpp -- the reduced file
8 *.test.sh -- interestingness test for C-Reduce
11 from __future__
import print_function
12 from argparse
import ArgumentParser
, RawTextHelpFormatter
22 import multiprocessing
29 def verbose_print(*args
, **kwargs
):
31 print(*args
, **kwargs
)
34 def check_file(fname
):
35 fname
= os
.path
.normpath(fname
)
36 if not os
.path
.isfile(fname
):
37 sys
.exit("ERROR: %s does not exist" % (fname
))
41 def check_cmd(cmd_name
, cmd_dir
, cmd_path
=None):
43 Returns absolute path to cmd_path if it is given,
44 or absolute path to cmd_dir/cmd_name.
47 # Make the path absolute so the creduce test can be run from any directory.
48 cmd_path
= os
.path
.abspath(cmd_path
)
49 cmd
= shutil
.which(cmd_path
)
52 sys
.exit("ERROR: executable `%s` not found" % (cmd_path
))
54 cmd
= shutil
.which(cmd_name
, path
=cmd_dir
)
60 sys
.exit("ERROR: `%s` not found in %s" % (cmd_name
, cmd_dir
))
64 return " ".join(shlex
.quote(arg
) for arg
in cmd
)
67 def write_to_script(text
, filename
):
68 with
open(filename
, "w") as f
:
70 os
.chmod(filename
, os
.stat(filename
).st_mode | stat
.S_IEXEC
)
74 def __init__(self
, crash_script
, file_to_reduce
, creduce_flags
):
75 crash_script_name
, crash_script_ext
= os
.path
.splitext(crash_script
)
76 file_reduce_name
, file_reduce_ext
= os
.path
.splitext(file_to_reduce
)
78 self
.testfile
= file_reduce_name
+ ".test.sh"
79 self
.crash_script
= crash_script_name
+ ".reduced" + crash_script_ext
80 self
.file_to_reduce
= file_reduce_name
+ ".reduced" + file_reduce_ext
81 shutil
.copy(file_to_reduce
, self
.file_to_reduce
)
83 self
.clang
= clang_cmd
85 self
.expected_output
= []
86 self
.needs_stack_trace
= False
87 self
.creduce_flags
= ["--tidy"] + creduce_flags
89 self
.read_clang_args(crash_script
, file_to_reduce
)
90 self
.read_expected_output()
92 def get_crash_cmd(self
, cmd
=None, args
=None, filename
=None):
96 args
= self
.clang_args
98 filename
= self
.file_to_reduce
100 return [cmd
] + args
+ [filename
]
102 def read_clang_args(self
, crash_script
, filename
):
103 print("\nReading arguments from crash script...")
104 with
open(crash_script
) as f
:
105 # Assume clang call is the first non comment line.
108 if not line
.lstrip().startswith("#"):
109 cmd
= shlex
.split(line
)
112 sys
.exit("Could not find command in the crash script.")
114 # Remove clang and filename from the command
115 # Assume the last occurrence of the filename is the clang input file
117 for i
in range(len(cmd
) - 1, -1, -1):
118 if cmd
[i
] == filename
:
121 self
.clang_args
= cmd
122 verbose_print("Clang arguments:", quote_cmd(self
.clang_args
))
124 def read_expected_output(self
):
125 print("\nGetting expected crash output...")
126 p
= subprocess
.Popen(
127 self
.get_crash_cmd(), stdout
=subprocess
.PIPE
, stderr
=subprocess
.STDOUT
129 crash_output
, _
= p
.communicate()
133 ansi_escape
= r
"\x1b\[[0-?]*m"
134 crash_output
= re
.sub(ansi_escape
, "", crash_output
.decode("utf-8"))
136 # Look for specific error messages
138 r
"Assertion .+ failed", # Linux assert()
139 r
"Assertion failed: .+,", # FreeBSD/Mac assert()
140 r
"fatal error: error in backend: .+",
142 r
"UNREACHABLE executed at .+?!",
143 r
"LLVM IR generation of declaration '.+'",
144 r
"Generating code for declaration '.+'",
145 r
"\*\*\* Bad machine code: .+ \*\*\*",
146 r
"ERROR: .*Sanitizer: [^ ]+ ",
148 for msg_re
in regexes
:
149 match
= re
.search(msg_re
, crash_output
)
153 print("Found message:", msg
)
156 # If no message was found, use the top five stack trace functions,
157 # ignoring some common functions
158 # Five is a somewhat arbitrary number; the goal is to get a small number
159 # of identifying functions with some leeway for common functions
161 self
.needs_stack_trace
= True
162 stacktrace_re
= r
"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("
174 def skip_function(func_name
):
175 return any(name
in func_name
for name
in filters
)
177 matches
= re
.findall(stacktrace_re
, crash_output
)
178 result
= [x
for x
in matches
if x
and not skip_function(x
)][:5]
180 print("Found stack trace function:", msg
)
183 print("ERROR: no crash was found")
184 print("The crash output was:\n========\n%s========" % crash_output
)
187 self
.expected_output
= result
189 def check_expected_output(self
, args
=None, filename
=None):
191 args
= self
.clang_args
193 filename
= self
.file_to_reduce
195 p
= subprocess
.Popen(
196 self
.get_crash_cmd(args
=args
, filename
=filename
),
197 stdout
=subprocess
.PIPE
,
198 stderr
=subprocess
.STDOUT
,
200 crash_output
, _
= p
.communicate()
201 return all(msg
in crash_output
.decode("utf-8") for msg
in self
.expected_output
)
203 def write_interestingness_test(self
):
204 print("\nCreating the interestingness test...")
206 # Disable symbolization if it's not required to avoid slow symbolization.
207 disable_symbolization
= ""
208 if not self
.needs_stack_trace
:
209 disable_symbolization
= "export LLVM_DISABLE_SYMBOLIZATION=1"
211 output
= """#!/bin/bash
213 if %s >& t.log ; then
217 disable_symbolization
,
218 quote_cmd(self
.get_crash_cmd()),
221 for msg
in self
.expected_output
:
222 output
+= "grep -F %s t.log || exit 1\n" % shlex
.quote(msg
)
224 write_to_script(output
, self
.testfile
)
225 self
.check_interestingness()
227 def check_interestingness(self
):
228 testfile
= os
.path
.abspath(self
.testfile
)
230 # Check that the test considers the original file interesting
231 with
open(os
.devnull
, "w") as devnull
:
232 returncode
= subprocess
.call(testfile
, stdout
=devnull
)
234 sys
.exit("The interestingness test does not pass for the original file.")
236 # Check that an empty file is not interesting
237 # Instead of modifying the filename in the test file, just run the command
238 with tempfile
.NamedTemporaryFile() as empty_file
:
239 is_interesting
= self
.check_expected_output(filename
=empty_file
.name
)
241 sys
.exit("The interestingness test passes for an empty file.")
243 def clang_preprocess(self
):
244 print("\nTrying to preprocess the source file...")
245 with tempfile
.NamedTemporaryFile() as tmpfile
:
246 cmd_preprocess
= self
.get_crash_cmd() + ["-E", "-o", tmpfile
.name
]
247 cmd_preprocess_no_lines
= cmd_preprocess
+ ["-P"]
249 subprocess
.check_call(cmd_preprocess_no_lines
)
250 if self
.check_expected_output(filename
=tmpfile
.name
):
251 print("Successfully preprocessed with line markers removed")
252 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
254 subprocess
.check_call(cmd_preprocess
)
255 if self
.check_expected_output(filename
=tmpfile
.name
):
256 print("Successfully preprocessed without removing line markers")
257 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
260 "No longer crashes after preprocessing -- "
261 "using original source"
263 except subprocess
.CalledProcessError
:
264 print("Preprocessing failed")
268 args
, opts_equal
=[], opts_startswith
=[], opts_one_arg_startswith
=[]
276 if any(arg
== a
for a
in opts_equal
):
278 if any(arg
.startswith(a
) for a
in opts_startswith
):
280 if any(arg
.startswith(a
) for a
in opts_one_arg_startswith
):
286 def try_remove_args(self
, args
, msg
=None, extra_arg
=None, **kwargs
):
287 new_args
= self
.filter_args(args
, **kwargs
)
290 if extra_arg
in new_args
:
291 new_args
.remove(extra_arg
)
292 new_args
.append(extra_arg
)
294 if new_args
!= args
and self
.check_expected_output(args
=new_args
):
300 def try_remove_arg_by_index(self
, args
, index
):
301 new_args
= args
[:index
] + args
[index
+ 1 :]
302 removed_arg
= args
[index
]
304 # Heuristic for grouping arguments:
305 # remove next argument if it doesn't start with "-"
306 if index
< len(new_args
) and not new_args
[index
].startswith("-"):
308 removed_arg
+= " " + args
[index
+ 1]
310 if self
.check_expected_output(args
=new_args
):
311 verbose_print("Removed", removed_arg
)
312 return new_args
, index
313 return args
, index
+ 1
315 def simplify_clang_args(self
):
316 """Simplify clang arguments before running C-Reduce to reduce the time the
317 interestingness test takes to run.
319 print("\nSimplifying the clang command...")
320 new_args
= self
.clang_args
322 # Remove the color diagnostics flag to make it easier to match error
324 new_args
= self
.try_remove_args(
326 msg
="Removed -fcolor-diagnostics",
327 opts_equal
=["-fcolor-diagnostics"],
330 # Remove some clang arguments to speed up the interestingness test
331 new_args
= self
.try_remove_args(
333 msg
="Removed debug info options",
334 opts_startswith
=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],
337 new_args
= self
.try_remove_args(
338 new_args
, msg
="Removed --show-includes", opts_startswith
=["--show-includes"]
340 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
341 # after preprocessing
342 new_args
= self
.try_remove_args(
344 msg
="Replaced -W options with -w",
346 opts_startswith
=["-W"],
348 new_args
= self
.try_remove_args(
350 msg
="Replaced optimization level with -O0",
352 opts_startswith
=["-O"],
355 # Try to remove compilation steps
356 new_args
= self
.try_remove_args(
357 new_args
, msg
="Added -emit-llvm", extra_arg
="-emit-llvm"
359 new_args
= self
.try_remove_args(
360 new_args
, msg
="Added -fsyntax-only", extra_arg
="-fsyntax-only"
363 # Try to make implicit int an error for more sensible test output
364 new_args
= self
.try_remove_args(
366 msg
="Added -Werror=implicit-int",
368 extra_arg
="-Werror=implicit-int",
371 self
.clang_args
= new_args
372 verbose_print("Simplified command:", quote_cmd(self
.get_crash_cmd()))
374 def reduce_clang_args(self
):
375 """Minimize the clang arguments after running C-Reduce, to get the smallest
376 command that reproduces the crash on the reduced file.
378 print("\nReducing the clang crash command...")
380 new_args
= self
.clang_args
382 # Remove some often occurring args
383 new_args
= self
.try_remove_args(
384 new_args
, msg
="Removed -D options", opts_startswith
=["-D"]
386 new_args
= self
.try_remove_args(
387 new_args
, msg
="Removed -D options", opts_one_arg_startswith
=["-D"]
389 new_args
= self
.try_remove_args(
390 new_args
, msg
="Removed -I options", opts_startswith
=["-I"]
392 new_args
= self
.try_remove_args(
393 new_args
, msg
="Removed -I options", opts_one_arg_startswith
=["-I"]
395 new_args
= self
.try_remove_args(
396 new_args
, msg
="Removed -W options", opts_startswith
=["-W"]
399 # Remove other cases that aren't covered by the heuristic
400 new_args
= self
.try_remove_args(
401 new_args
, msg
="Removed -mllvm", opts_one_arg_startswith
=["-mllvm"]
405 while i
< len(new_args
):
406 new_args
, i
= self
.try_remove_arg_by_index(new_args
, i
)
408 self
.clang_args
= new_args
410 reduced_cmd
= quote_cmd(self
.get_crash_cmd())
411 write_to_script(reduced_cmd
, self
.crash_script
)
412 print("Reduced command:", reduced_cmd
)
414 def run_creduce(self
):
416 [creduce_cmd
] + self
.creduce_flags
+ [self
.testfile
, self
.file_to_reduce
]
418 print("\nRunning C-Reduce...")
419 verbose_print(quote_cmd(full_creduce_cmd
))
421 p
= subprocess
.Popen(full_creduce_cmd
)
423 except KeyboardInterrupt:
424 # Hack to kill C-Reduce because it jumps into its own pgid
425 print("\n\nctrl-c detected, killed creduce")
434 parser
= ArgumentParser(description
=__doc__
, formatter_class
=RawTextHelpFormatter
)
439 help="Name of the script that generates the crash.",
442 "file_to_reduce", type=str, nargs
=1, help="Name of the file to be reduced."
445 "--llvm-bin", dest
="llvm_bin", type=str, help="Path to the LLVM bin directory."
451 help="The path to the `clang` executable. "
452 "By default uses the llvm-bin directory.",
458 help="The path to the `creduce` executable. "
459 "Required if `creduce` is not in PATH environment.",
461 parser
.add_argument("-v", "--verbose", action
="store_true")
462 args
, creduce_flags
= parser
.parse_known_args()
463 verbose
= args
.verbose
464 llvm_bin
= os
.path
.abspath(args
.llvm_bin
) if args
.llvm_bin
else None
465 creduce_cmd
= check_cmd("creduce", None, args
.creduce
)
466 clang_cmd
= check_cmd("clang", llvm_bin
, args
.clang
)
468 crash_script
= check_file(args
.crash_script
[0])
469 file_to_reduce
= check_file(args
.file_to_reduce
[0])
471 if "--n" not in creduce_flags
:
472 creduce_flags
+= ["--n", str(max(4, multiprocessing
.cpu_count() // 2))]
474 r
= Reduce(crash_script
, file_to_reduce
, creduce_flags
)
476 r
.simplify_clang_args()
477 r
.write_interestingness_test()
480 r
.reduce_clang_args()
483 if __name__
== "__main__":