2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
10 from __future__
import print_function
11 from argparse
import ArgumentParser
, RawTextHelpFormatter
21 from distutils
.spawn
import find_executable
22 import multiprocessing
28 def verbose_print(*args
, **kwargs
):
30 print(*args
, **kwargs
)
32 def check_file(fname
):
33 fname
= os
.path
.normpath(fname
)
34 if not os
.path
.isfile(fname
):
35 sys
.exit("ERROR: %s does not exist" % (fname
))
38 def check_cmd(cmd_name
, cmd_dir
, cmd_path
=None):
40 Returns absolute path to cmd_path if it is given,
41 or absolute path to cmd_dir/cmd_name.
44 # Make the path absolute so the creduce test can be run from any directory.
45 cmd_path
= os
.path
.abspath(cmd_path
)
46 cmd
= find_executable(cmd_path
)
49 sys
.exit("ERROR: executable `%s` not found" % (cmd_path
))
51 cmd
= find_executable(cmd_name
, path
=cmd_dir
)
57 sys
.exit("ERROR: `%s` not found in %s" % (cmd_name
, cmd_dir
))
60 return ' '.join(pipes
.quote(arg
) for arg
in cmd
)
62 def write_to_script(text
, filename
):
63 with
open(filename
, 'w') as f
:
65 os
.chmod(filename
, os
.stat(filename
).st_mode | stat
.S_IEXEC
)
68 def __init__(self
, crash_script
, file_to_reduce
, core_number
):
69 crash_script_name
, crash_script_ext
= os
.path
.splitext(crash_script
)
70 file_reduce_name
, file_reduce_ext
= os
.path
.splitext(file_to_reduce
)
72 self
.testfile
= file_reduce_name
+ '.test.sh'
73 self
.crash_script
= crash_script_name
+ '.reduced' + crash_script_ext
74 self
.file_to_reduce
= file_reduce_name
+ '.reduced' + file_reduce_ext
75 shutil
.copy(file_to_reduce
, self
.file_to_reduce
)
77 self
.clang
= clang_cmd
79 self
.expected_output
= []
80 self
.needs_stack_trace
= False
81 self
.creduce_flags
= ["--tidy"]
82 self
.creduce_flags
= ["--n", str(core_number
)]
84 self
.read_clang_args(crash_script
, file_to_reduce
)
85 self
.read_expected_output()
87 def get_crash_cmd(self
, cmd
=None, args
=None, filename
=None):
91 args
= self
.clang_args
93 filename
= self
.file_to_reduce
95 return [cmd
] + args
+ [filename
]
97 def read_clang_args(self
, crash_script
, filename
):
98 print("\nReading arguments from crash script...")
99 with
open(crash_script
) as f
:
100 # Assume clang call is the first non comment line.
103 if not line
.lstrip().startswith('#'):
104 cmd
= shlex
.split(line
)
107 sys
.exit("Could not find command in the crash script.");
109 # Remove clang and filename from the command
110 # Assume the last occurrence of the filename is the clang input file
112 for i
in range(len(cmd
)-1, -1, -1):
113 if cmd
[i
] == filename
:
116 self
.clang_args
= cmd
117 verbose_print("Clang arguments:", quote_cmd(self
.clang_args
))
119 def read_expected_output(self
):
120 print("\nGetting expected crash output...")
121 p
= subprocess
.Popen(self
.get_crash_cmd(),
122 stdout
=subprocess
.PIPE
,
123 stderr
=subprocess
.STDOUT
)
124 crash_output
, _
= p
.communicate()
128 ansi_escape
= r
'\x1b\[[0-?]*m'
129 crash_output
= re
.sub(ansi_escape
, '', crash_output
.decode('utf-8'))
131 # Look for specific error messages
132 regexes
= [r
"Assertion .+ failed", # Linux assert()
133 r
"Assertion failed: .+,", # FreeBSD/Mac assert()
134 r
"fatal error: error in backend: .+",
136 r
"UNREACHABLE executed at .+?!",
137 r
"LLVM IR generation of declaration '.+'",
138 r
"Generating code for declaration '.+'",
139 r
"\*\*\* Bad machine code: .+ \*\*\*",
140 r
"ERROR: .*Sanitizer: [^ ]+ "]
141 for msg_re
in regexes
:
142 match
= re
.search(msg_re
, crash_output
)
146 print("Found message:", msg
)
149 # If no message was found, use the top five stack trace functions,
150 # ignoring some common functions
151 # Five is a somewhat arbitrary number; the goal is to get a small number
152 # of identifying functions with some leeway for common functions
154 self
.needs_stack_trace
= True
155 stacktrace_re
= r
'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
156 filters
= ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
157 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
158 def skip_function(func_name
):
159 return any(name
in func_name
for name
in filters
)
161 matches
= re
.findall(stacktrace_re
, crash_output
)
162 result
= [x
for x
in matches
if x
and not skip_function(x
)][:5]
164 print("Found stack trace function:", msg
)
167 print("ERROR: no crash was found")
168 print("The crash output was:\n========\n%s========" % crash_output
)
171 self
.expected_output
= result
173 def check_expected_output(self
, args
=None, filename
=None):
175 args
= self
.clang_args
177 filename
= self
.file_to_reduce
179 p
= subprocess
.Popen(self
.get_crash_cmd(args
=args
, filename
=filename
),
180 stdout
=subprocess
.PIPE
,
181 stderr
=subprocess
.STDOUT
)
182 crash_output
, _
= p
.communicate()
183 return all(msg
in crash_output
.decode('utf-8') for msg
in
184 self
.expected_output
)
186 def write_interestingness_test(self
):
187 print("\nCreating the interestingness test...")
189 # Disable symbolization if it's not required to avoid slow symbolization.
190 disable_symbolization
= ''
191 if not self
.needs_stack_trace
:
192 disable_symbolization
= 'export LLVM_DISABLE_SYMBOLIZATION=1'
194 output
= """#!/bin/bash
196 if %s >& t.log ; then
199 """ % (disable_symbolization
, quote_cmd(self
.get_crash_cmd()))
201 for msg
in self
.expected_output
:
202 output
+= 'grep -F %s t.log || exit 1\n' % pipes
.quote(msg
)
204 write_to_script(output
, self
.testfile
)
205 self
.check_interestingness()
207 def check_interestingness(self
):
208 testfile
= os
.path
.abspath(self
.testfile
)
210 # Check that the test considers the original file interesting
211 with
open(os
.devnull
, 'w') as devnull
:
212 returncode
= subprocess
.call(testfile
, stdout
=devnull
)
214 sys
.exit("The interestingness test does not pass for the original file.")
216 # Check that an empty file is not interesting
217 # Instead of modifying the filename in the test file, just run the command
218 with tempfile
.NamedTemporaryFile() as empty_file
:
219 is_interesting
= self
.check_expected_output(filename
=empty_file
.name
)
221 sys
.exit("The interestingness test passes for an empty file.")
223 def clang_preprocess(self
):
224 print("\nTrying to preprocess the source file...")
225 with tempfile
.NamedTemporaryFile() as tmpfile
:
226 cmd_preprocess
= self
.get_crash_cmd() + ['-E', '-o', tmpfile
.name
]
227 cmd_preprocess_no_lines
= cmd_preprocess
+ ['-P']
229 subprocess
.check_call(cmd_preprocess_no_lines
)
230 if self
.check_expected_output(filename
=tmpfile
.name
):
231 print("Successfully preprocessed with line markers removed")
232 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
234 subprocess
.check_call(cmd_preprocess
)
235 if self
.check_expected_output(filename
=tmpfile
.name
):
236 print("Successfully preprocessed without removing line markers")
237 shutil
.copy(tmpfile
.name
, self
.file_to_reduce
)
239 print("No longer crashes after preprocessing -- "
240 "using original source")
241 except subprocess
.CalledProcessError
:
242 print("Preprocessing failed")
245 def filter_args(args
, opts_equal
=[], opts_startswith
=[],
246 opts_one_arg_startswith
=[]):
253 if any(arg
== a
for a
in opts_equal
):
255 if any(arg
.startswith(a
) for a
in opts_startswith
):
257 if any(arg
.startswith(a
) for a
in opts_one_arg_startswith
):
263 def try_remove_args(self
, args
, msg
=None, extra_arg
=None, **kwargs
):
264 new_args
= self
.filter_args(args
, **kwargs
)
267 if extra_arg
in new_args
:
268 new_args
.remove(extra_arg
)
269 new_args
.append(extra_arg
)
271 if (new_args
!= args
and
272 self
.check_expected_output(args
=new_args
)):
278 def try_remove_arg_by_index(self
, args
, index
):
279 new_args
= args
[:index
] + args
[index
+1:]
280 removed_arg
= args
[index
]
282 # Heuristic for grouping arguments:
283 # remove next argument if it doesn't start with "-"
284 if index
< len(new_args
) and not new_args
[index
].startswith('-'):
286 removed_arg
+= ' ' + args
[index
+1]
288 if self
.check_expected_output(args
=new_args
):
289 verbose_print("Removed", removed_arg
)
290 return new_args
, index
293 def simplify_clang_args(self
):
294 """Simplify clang arguments before running C-Reduce to reduce the time the
295 interestingness test takes to run.
297 print("\nSimplifying the clang command...")
299 # Remove some clang arguments to speed up the interestingness test
300 new_args
= self
.clang_args
301 new_args
= self
.try_remove_args(new_args
,
302 msg
="Removed debug info options",
303 opts_startswith
=["-gcodeview",
305 "-debugger-tuning="])
307 new_args
= self
.try_remove_args(new_args
,
308 msg
="Removed --show-includes",
309 opts_startswith
=["--show-includes"])
310 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
311 # after preprocessing
312 new_args
= self
.try_remove_args(new_args
,
313 msg
="Replaced -W options with -w",
315 opts_startswith
=["-W"])
316 new_args
= self
.try_remove_args(new_args
,
317 msg
="Replaced optimization level with -O0",
319 opts_startswith
=["-O"])
321 # Try to remove compilation steps
322 new_args
= self
.try_remove_args(new_args
, msg
="Added -emit-llvm",
323 extra_arg
="-emit-llvm")
324 new_args
= self
.try_remove_args(new_args
, msg
="Added -fsyntax-only",
325 extra_arg
="-fsyntax-only")
327 # Try to make implicit int an error for more sensible test output
328 new_args
= self
.try_remove_args(new_args
, msg
="Added -Werror=implicit-int",
330 extra_arg
="-Werror=implicit-int")
332 self
.clang_args
= new_args
333 verbose_print("Simplified command:", quote_cmd(self
.get_crash_cmd()))
335 def reduce_clang_args(self
):
336 """Minimize the clang arguments after running C-Reduce, to get the smallest
337 command that reproduces the crash on the reduced file.
339 print("\nReducing the clang crash command...")
341 new_args
= self
.clang_args
343 # Remove some often occurring args
344 new_args
= self
.try_remove_args(new_args
, msg
="Removed -D options",
345 opts_startswith
=["-D"])
346 new_args
= self
.try_remove_args(new_args
, msg
="Removed -D options",
347 opts_one_arg_startswith
=["-D"])
348 new_args
= self
.try_remove_args(new_args
, msg
="Removed -I options",
349 opts_startswith
=["-I"])
350 new_args
= self
.try_remove_args(new_args
, msg
="Removed -I options",
351 opts_one_arg_startswith
=["-I"])
352 new_args
= self
.try_remove_args(new_args
, msg
="Removed -W options",
353 opts_startswith
=["-W"])
355 # Remove other cases that aren't covered by the heuristic
356 new_args
= self
.try_remove_args(new_args
, msg
="Removed -mllvm",
357 opts_one_arg_startswith
=["-mllvm"])
360 while i
< len(new_args
):
361 new_args
, i
= self
.try_remove_arg_by_index(new_args
, i
)
363 self
.clang_args
= new_args
365 reduced_cmd
= quote_cmd(self
.get_crash_cmd())
366 write_to_script(reduced_cmd
, self
.crash_script
)
367 print("Reduced command:", reduced_cmd
)
369 def run_creduce(self
):
370 print("\nRunning C-Reduce...")
372 p
= subprocess
.Popen([creduce_cmd
] + self
.creduce_flags
+
373 [self
.testfile
, self
.file_to_reduce
])
375 except KeyboardInterrupt:
376 # Hack to kill C-Reduce because it jumps into its own pgid
377 print('\n\nctrl-c detected, killed creduce')
385 parser
= ArgumentParser(description
=__doc__
,
386 formatter_class
=RawTextHelpFormatter
)
387 parser
.add_argument('crash_script', type=str, nargs
=1,
388 help="Name of the script that generates the crash.")
389 parser
.add_argument('file_to_reduce', type=str, nargs
=1,
390 help="Name of the file to be reduced.")
391 parser
.add_argument('--llvm-bin', dest
='llvm_bin', type=str,
392 help="Path to the LLVM bin directory.")
393 parser
.add_argument('--clang', dest
='clang', type=str,
394 help="The path to the `clang` executable. "
395 "By default uses the llvm-bin directory.")
396 parser
.add_argument('--creduce', dest
='creduce', type=str,
397 help="The path to the `creduce` executable. "
398 "Required if `creduce` is not in PATH environment.")
399 parser
.add_argument('--n', dest
='core_number', type=int,
400 default
=max(4, multiprocessing
.cpu_count() / 2),
401 help="Number of cores to use.")
402 parser
.add_argument('-v', '--verbose', action
='store_true')
403 args
= parser
.parse_args()
405 verbose
= args
.verbose
406 llvm_bin
= os
.path
.abspath(args
.llvm_bin
) if args
.llvm_bin
else None
407 creduce_cmd
= check_cmd('creduce', None, args
.creduce
)
408 clang_cmd
= check_cmd('clang', llvm_bin
, args
.clang
)
409 core_number
= args
.core_number
411 crash_script
= check_file(args
.crash_script
[0])
412 file_to_reduce
= check_file(args
.file_to_reduce
[0])
414 r
= Reduce(crash_script
, file_to_reduce
, core_number
)
416 r
.simplify_clang_args()
417 r
.write_interestingness_test()
420 r
.reduce_clang_args()
422 if __name__
== '__main__':