[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / clang / utils / creduce-clang-crash.py
blob08056e52b8264f9296f781830db6d477ae06f196
1 #!/usr/bin/env python
2 """Calls C-Reduce to create a minimal reproducer for clang crashes.
4 Output files:
5 *.reduced.sh -- crash reproducer with minimal arguments
6 *.reduced.cpp -- the reduced file
7 *.test.sh -- interestingness test for C-Reduce
8 """
10 from __future__ import print_function
11 from argparse import ArgumentParser, RawTextHelpFormatter
12 import os
13 import re
14 import stat
15 import sys
16 import subprocess
17 import pipes
18 import shlex
19 import tempfile
20 import shutil
21 from distutils.spawn import find_executable
22 import multiprocessing
24 verbose = False
25 creduce_cmd = None
26 clang_cmd = None
28 def verbose_print(*args, **kwargs):
29 if verbose:
30 print(*args, **kwargs)
32 def check_file(fname):
33 fname = os.path.normpath(fname)
34 if not os.path.isfile(fname):
35 sys.exit("ERROR: %s does not exist" % (fname))
36 return fname
38 def check_cmd(cmd_name, cmd_dir, cmd_path=None):
39 """
40 Returns absolute path to cmd_path if it is given,
41 or absolute path to cmd_dir/cmd_name.
42 """
43 if cmd_path:
44 # Make the path absolute so the creduce test can be run from any directory.
45 cmd_path = os.path.abspath(cmd_path)
46 cmd = find_executable(cmd_path)
47 if cmd:
48 return cmd
49 sys.exit("ERROR: executable `%s` not found" % (cmd_path))
51 cmd = find_executable(cmd_name, path=cmd_dir)
52 if cmd:
53 return cmd
55 if not cmd_dir:
56 cmd_dir = "$PATH"
57 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
59 def quote_cmd(cmd):
60 return ' '.join(pipes.quote(arg) for arg in cmd)
62 def write_to_script(text, filename):
63 with open(filename, 'w') as f:
64 f.write(text)
65 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
67 class Reduce(object):
68 def __init__(self, crash_script, file_to_reduce, core_number):
69 crash_script_name, crash_script_ext = os.path.splitext(crash_script)
70 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
72 self.testfile = file_reduce_name + '.test.sh'
73 self.crash_script = crash_script_name + '.reduced' + crash_script_ext
74 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
75 shutil.copy(file_to_reduce, self.file_to_reduce)
77 self.clang = clang_cmd
78 self.clang_args = []
79 self.expected_output = []
80 self.needs_stack_trace = False
81 self.creduce_flags = ["--tidy"]
82 self.creduce_flags = ["--n", str(core_number)]
84 self.read_clang_args(crash_script, file_to_reduce)
85 self.read_expected_output()
87 def get_crash_cmd(self, cmd=None, args=None, filename=None):
88 if not cmd:
89 cmd = self.clang
90 if not args:
91 args = self.clang_args
92 if not filename:
93 filename = self.file_to_reduce
95 return [cmd] + args + [filename]
97 def read_clang_args(self, crash_script, filename):
98 print("\nReading arguments from crash script...")
99 with open(crash_script) as f:
100 # Assume clang call is the first non comment line.
101 cmd = []
102 for line in f:
103 if not line.lstrip().startswith('#'):
104 cmd = shlex.split(line)
105 break
106 if not cmd:
107 sys.exit("Could not find command in the crash script.");
109 # Remove clang and filename from the command
110 # Assume the last occurrence of the filename is the clang input file
111 del cmd[0]
112 for i in range(len(cmd)-1, -1, -1):
113 if cmd[i] == filename:
114 del cmd[i]
115 break
116 self.clang_args = cmd
117 verbose_print("Clang arguments:", quote_cmd(self.clang_args))
119 def read_expected_output(self):
120 print("\nGetting expected crash output...")
121 p = subprocess.Popen(self.get_crash_cmd(),
122 stdout=subprocess.PIPE,
123 stderr=subprocess.STDOUT)
124 crash_output, _ = p.communicate()
125 result = []
127 # Remove color codes
128 ansi_escape = r'\x1b\[[0-?]*m'
129 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
131 # Look for specific error messages
132 regexes = [r"Assertion .+ failed", # Linux assert()
133 r"Assertion failed: .+,", # FreeBSD/Mac assert()
134 r"fatal error: error in backend: .+",
135 r"LLVM ERROR: .+",
136 r"UNREACHABLE executed at .+?!",
137 r"LLVM IR generation of declaration '.+'",
138 r"Generating code for declaration '.+'",
139 r"\*\*\* Bad machine code: .+ \*\*\*",
140 r"ERROR: .*Sanitizer: [^ ]+ "]
141 for msg_re in regexes:
142 match = re.search(msg_re, crash_output)
143 if match:
144 msg = match.group(0)
145 result = [msg]
146 print("Found message:", msg)
147 break
149 # If no message was found, use the top five stack trace functions,
150 # ignoring some common functions
151 # Five is a somewhat arbitrary number; the goal is to get a small number
152 # of identifying functions with some leeway for common functions
153 if not result:
154 self.needs_stack_trace = True
155 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
156 filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
157 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
158 def skip_function(func_name):
159 return any(name in func_name for name in filters)
161 matches = re.findall(stacktrace_re, crash_output)
162 result = [x for x in matches if x and not skip_function(x)][:5]
163 for msg in result:
164 print("Found stack trace function:", msg)
166 if not result:
167 print("ERROR: no crash was found")
168 print("The crash output was:\n========\n%s========" % crash_output)
169 sys.exit(1)
171 self.expected_output = result
173 def check_expected_output(self, args=None, filename=None):
174 if not args:
175 args = self.clang_args
176 if not filename:
177 filename = self.file_to_reduce
179 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
180 stdout=subprocess.PIPE,
181 stderr=subprocess.STDOUT)
182 crash_output, _ = p.communicate()
183 return all(msg in crash_output.decode('utf-8') for msg in
184 self.expected_output)
186 def write_interestingness_test(self):
187 print("\nCreating the interestingness test...")
189 # Disable symbolization if it's not required to avoid slow symbolization.
190 disable_symbolization = ''
191 if not self.needs_stack_trace:
192 disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
194 output = """#!/bin/bash
196 if %s >& t.log ; then
197 exit 1
199 """ % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
201 for msg in self.expected_output:
202 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
204 write_to_script(output, self.testfile)
205 self.check_interestingness()
207 def check_interestingness(self):
208 testfile = os.path.abspath(self.testfile)
210 # Check that the test considers the original file interesting
211 with open(os.devnull, 'w') as devnull:
212 returncode = subprocess.call(testfile, stdout=devnull)
213 if returncode:
214 sys.exit("The interestingness test does not pass for the original file.")
216 # Check that an empty file is not interesting
217 # Instead of modifying the filename in the test file, just run the command
218 with tempfile.NamedTemporaryFile() as empty_file:
219 is_interesting = self.check_expected_output(filename=empty_file.name)
220 if is_interesting:
221 sys.exit("The interestingness test passes for an empty file.")
223 def clang_preprocess(self):
224 print("\nTrying to preprocess the source file...")
225 with tempfile.NamedTemporaryFile() as tmpfile:
226 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
227 cmd_preprocess_no_lines = cmd_preprocess + ['-P']
228 try:
229 subprocess.check_call(cmd_preprocess_no_lines)
230 if self.check_expected_output(filename=tmpfile.name):
231 print("Successfully preprocessed with line markers removed")
232 shutil.copy(tmpfile.name, self.file_to_reduce)
233 else:
234 subprocess.check_call(cmd_preprocess)
235 if self.check_expected_output(filename=tmpfile.name):
236 print("Successfully preprocessed without removing line markers")
237 shutil.copy(tmpfile.name, self.file_to_reduce)
238 else:
239 print("No longer crashes after preprocessing -- "
240 "using original source")
241 except subprocess.CalledProcessError:
242 print("Preprocessing failed")
244 @staticmethod
245 def filter_args(args, opts_equal=[], opts_startswith=[],
246 opts_one_arg_startswith=[]):
247 result = []
248 skip_next = False
249 for arg in args:
250 if skip_next:
251 skip_next = False
252 continue
253 if any(arg == a for a in opts_equal):
254 continue
255 if any(arg.startswith(a) for a in opts_startswith):
256 continue
257 if any(arg.startswith(a) for a in opts_one_arg_startswith):
258 skip_next = True
259 continue
260 result.append(arg)
261 return result
263 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
264 new_args = self.filter_args(args, **kwargs)
266 if extra_arg:
267 if extra_arg in new_args:
268 new_args.remove(extra_arg)
269 new_args.append(extra_arg)
271 if (new_args != args and
272 self.check_expected_output(args=new_args)):
273 if msg:
274 verbose_print(msg)
275 return new_args
276 return args
278 def try_remove_arg_by_index(self, args, index):
279 new_args = args[:index] + args[index+1:]
280 removed_arg = args[index]
282 # Heuristic for grouping arguments:
283 # remove next argument if it doesn't start with "-"
284 if index < len(new_args) and not new_args[index].startswith('-'):
285 del new_args[index]
286 removed_arg += ' ' + args[index+1]
288 if self.check_expected_output(args=new_args):
289 verbose_print("Removed", removed_arg)
290 return new_args, index
291 return args, index+1
293 def simplify_clang_args(self):
294 """Simplify clang arguments before running C-Reduce to reduce the time the
295 interestingness test takes to run.
297 print("\nSimplifying the clang command...")
299 # Remove some clang arguments to speed up the interestingness test
300 new_args = self.clang_args
301 new_args = self.try_remove_args(new_args,
302 msg="Removed debug info options",
303 opts_startswith=["-gcodeview",
304 "-debug-info-kind=",
305 "-debugger-tuning="])
307 new_args = self.try_remove_args(new_args,
308 msg="Removed --show-includes",
309 opts_startswith=["--show-includes"])
310 # Not suppressing warnings (-w) sometimes prevents the crash from occurring
311 # after preprocessing
312 new_args = self.try_remove_args(new_args,
313 msg="Replaced -W options with -w",
314 extra_arg='-w',
315 opts_startswith=["-W"])
316 new_args = self.try_remove_args(new_args,
317 msg="Replaced optimization level with -O0",
318 extra_arg="-O0",
319 opts_startswith=["-O"])
321 # Try to remove compilation steps
322 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
323 extra_arg="-emit-llvm")
324 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
325 extra_arg="-fsyntax-only")
327 # Try to make implicit int an error for more sensible test output
328 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
329 opts_equal=["-w"],
330 extra_arg="-Werror=implicit-int")
332 self.clang_args = new_args
333 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
335 def reduce_clang_args(self):
336 """Minimize the clang arguments after running C-Reduce, to get the smallest
337 command that reproduces the crash on the reduced file.
339 print("\nReducing the clang crash command...")
341 new_args = self.clang_args
343 # Remove some often occurring args
344 new_args = self.try_remove_args(new_args, msg="Removed -D options",
345 opts_startswith=["-D"])
346 new_args = self.try_remove_args(new_args, msg="Removed -D options",
347 opts_one_arg_startswith=["-D"])
348 new_args = self.try_remove_args(new_args, msg="Removed -I options",
349 opts_startswith=["-I"])
350 new_args = self.try_remove_args(new_args, msg="Removed -I options",
351 opts_one_arg_startswith=["-I"])
352 new_args = self.try_remove_args(new_args, msg="Removed -W options",
353 opts_startswith=["-W"])
355 # Remove other cases that aren't covered by the heuristic
356 new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
357 opts_one_arg_startswith=["-mllvm"])
359 i = 0
360 while i < len(new_args):
361 new_args, i = self.try_remove_arg_by_index(new_args, i)
363 self.clang_args = new_args
365 reduced_cmd = quote_cmd(self.get_crash_cmd())
366 write_to_script(reduced_cmd, self.crash_script)
367 print("Reduced command:", reduced_cmd)
369 def run_creduce(self):
370 print("\nRunning C-Reduce...")
371 try:
372 p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
373 [self.testfile, self.file_to_reduce])
374 p.communicate()
375 except KeyboardInterrupt:
376 # Hack to kill C-Reduce because it jumps into its own pgid
377 print('\n\nctrl-c detected, killed creduce')
378 p.kill()
380 def main():
381 global verbose
382 global creduce_cmd
383 global clang_cmd
385 parser = ArgumentParser(description=__doc__,
386 formatter_class=RawTextHelpFormatter)
387 parser.add_argument('crash_script', type=str, nargs=1,
388 help="Name of the script that generates the crash.")
389 parser.add_argument('file_to_reduce', type=str, nargs=1,
390 help="Name of the file to be reduced.")
391 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
392 help="Path to the LLVM bin directory.")
393 parser.add_argument('--clang', dest='clang', type=str,
394 help="The path to the `clang` executable. "
395 "By default uses the llvm-bin directory.")
396 parser.add_argument('--creduce', dest='creduce', type=str,
397 help="The path to the `creduce` executable. "
398 "Required if `creduce` is not in PATH environment.")
399 parser.add_argument('--n', dest='core_number', type=int,
400 default=max(4, multiprocessing.cpu_count() / 2),
401 help="Number of cores to use.")
402 parser.add_argument('-v', '--verbose', action='store_true')
403 args = parser.parse_args()
405 verbose = args.verbose
406 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
407 creduce_cmd = check_cmd('creduce', None, args.creduce)
408 clang_cmd = check_cmd('clang', llvm_bin, args.clang)
409 core_number = args.core_number
411 crash_script = check_file(args.crash_script[0])
412 file_to_reduce = check_file(args.file_to_reduce[0])
414 r = Reduce(crash_script, file_to_reduce, core_number)
416 r.simplify_clang_args()
417 r.write_interestingness_test()
418 r.clang_preprocess()
419 r.run_creduce()
420 r.reduce_clang_args()
422 if __name__ == '__main__':
423 main()