[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / llvm / utils / collect_and_build_with_pgo.py
blob0851b91b0283cad63aefa78a82914f4f752bc724
1 #!/usr/bin/env python3
2 """
3 This script:
4 - Builds clang with user-defined flags
5 - Uses that clang to build an instrumented clang, which can be used to collect
6 PGO samples
7 - Builds a user-defined set of sources (default: clang) to act as a
8 "benchmark" to generate a PGO profile
9 - Builds clang once more with the PGO profile generated above
11 This is a total of four clean builds of clang (by default). This may take a
12 while. :)
14 This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15 Eventually, it will be updated to instead call the cmake cache mentioned there.
16 """
18 import argparse
19 import collections
20 import multiprocessing
21 import os
22 import shlex
23 import shutil
24 import subprocess
25 import sys
27 ### User configuration
30 # If you want to use a different 'benchmark' than building clang, make this
31 # function do what you want. out_dir is the build directory for clang, so all
32 # of the clang binaries will live under "${out_dir}/bin/". Using clang in
33 # ${out_dir} will magically have the profiles go to the right place.
35 # You may assume that out_dir is a freshly-built directory that you can reach
36 # in to build more things, if you'd like.
37 def _run_benchmark(env, out_dir, include_debug_info):
38 """The 'benchmark' we run to generate profile data."""
39 target_dir = env.output_subdir('instrumentation_run')
41 # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42 # former lets us touch on the non-x86 backends a bit if configured, and the
43 # latter gives us more C to chew on (and will send us through diagnostic
44 # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45 # branches should still heavily be weighted in the not-taken direction,
46 # since we built all of LLVM/etc).
47 _build_things_in(env, out_dir, what=['check-llvm', 'check-clang'])
49 # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50 # have them anyway, but that's less of an issue)
51 cmake = _get_cmake_invocation_for_bootstrap_from(
52 env, out_dir, skip_tablegens=False)
54 if include_debug_info:
55 cmake.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo')
57 _run_fresh_cmake(env, cmake, target_dir)
59 # Just build all the things. The more data we have, the better.
60 _build_things_in(env, target_dir, what=['all'])
62 ### Script
65 class CmakeInvocation:
66 _cflags = ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS']
67 _ldflags = [
68 'CMAKE_EXE_LINKER_FLAGS',
69 'CMAKE_MODULE_LINKER_FLAGS',
70 'CMAKE_SHARED_LINKER_FLAGS',
73 def __init__(self, cmake, maker, cmake_dir):
74 self._prefix = [cmake, '-G', maker, cmake_dir]
76 # Map of str -> (list|str).
77 self._flags = {}
78 for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:
79 self._flags[flag] = []
81 def add_new_flag(self, key, value):
82 self.add_flag(key, value, allow_overwrites=False)
84 def add_flag(self, key, value, allow_overwrites=True):
85 if key not in self._flags:
86 self._flags[key] = value
87 return
89 existing_value = self._flags[key]
90 if isinstance(existing_value, list):
91 existing_value.append(value)
92 return
94 if not allow_overwrites:
95 raise ValueError('Invalid overwrite of %s requested' % key)
97 self._flags[key] = value
99 def add_cflags(self, flags):
100 # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101 assert not isinstance(flags, str)
102 for f in CmakeInvocation._cflags:
103 self._flags[f].extend(flags)
105 def add_ldflags(self, flags):
106 assert not isinstance(flags, str)
107 for f in CmakeInvocation._ldflags:
108 self._flags[f].extend(flags)
110 def to_args(self):
111 args = self._prefix.copy()
112 for key, value in sorted(self._flags.items()):
113 if isinstance(value, list):
114 # We preload all of the list-y values (cflags, ...). If we've
115 # nothing to add, don't.
116 if not value:
117 continue
118 value = ' '.join(value)
120 arg = '-D' + key
121 if value != '':
122 arg += '=' + value
123 args.append(arg)
124 return args
127 class Env:
128 def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args,
129 dry_run):
130 self.llvm_dir = llvm_dir
131 self.use_make = use_make
132 self.output_dir = output_dir
133 self.default_cmake_args = default_cmake_args.copy()
134 self.dry_run = dry_run
136 def get_default_cmake_args_kv(self):
137 return self.default_cmake_args.items()
139 def get_cmake_maker(self):
140 return 'Ninja' if not self.use_make else 'Unix Makefiles'
142 def get_make_command(self):
143 if self.use_make:
144 return ['make', '-j{}'.format(multiprocessing.cpu_count())]
145 return ['ninja']
147 def output_subdir(self, name):
148 return os.path.join(self.output_dir, name)
150 def has_llvm_subproject(self, name):
151 if name == 'compiler-rt':
152 subdir = '../compiler-rt'
153 elif name == 'clang':
154 subdir = '../clang'
155 else:
156 raise ValueError('Unknown subproject: %s' % name)
158 return os.path.isdir(os.path.join(self.llvm_dir, subdir))
160 # Note that we don't allow capturing stdout/stderr. This works quite nicely
161 # with dry_run.
162 def run_command(self,
163 cmd,
164 cwd=None,
165 check=False,
166 silent_unless_error=False):
167 print(
168 'Running `%s` in %s' % (cmd, shlex.quote(cwd or os.getcwd())))
170 if self.dry_run:
171 return
173 if silent_unless_error:
174 stdout, stderr = subprocess.PIPE, subprocess.STDOUT
175 else:
176 stdout, stderr = None, None
178 # Don't use subprocess.run because it's >= py3.5 only, and it's not too
179 # much extra effort to get what it gives us anyway.
180 popen = subprocess.Popen(
181 cmd,
182 stdin=subprocess.DEVNULL,
183 stdout=stdout,
184 stderr=stderr,
185 cwd=cwd)
186 stdout, _ = popen.communicate()
187 return_code = popen.wait(timeout=0)
189 if not return_code:
190 return
192 if silent_unless_error:
193 print(stdout.decode('utf-8', 'ignore'))
195 if check:
196 raise subprocess.CalledProcessError(
197 returncode=return_code, cmd=cmd, output=stdout, stderr=None)
200 def _get_default_cmake_invocation(env):
201 inv = CmakeInvocation(
202 cmake='cmake', maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir)
203 for key, value in env.get_default_cmake_args_kv():
204 inv.add_new_flag(key, value)
205 return inv
208 def _get_cmake_invocation_for_bootstrap_from(env, out_dir,
209 skip_tablegens=True):
210 clang = os.path.join(out_dir, 'bin', 'clang')
211 cmake = _get_default_cmake_invocation(env)
212 cmake.add_new_flag('CMAKE_C_COMPILER', clang)
213 cmake.add_new_flag('CMAKE_CXX_COMPILER', clang + '++')
215 # We often get no value out of building new tblgens; the previous build
216 # should have them. It's still correct to build them, just slower.
217 def add_tablegen(key, binary):
218 path = os.path.join(out_dir, 'bin', binary)
220 # Check that this exists, since the user's allowed to specify their own
221 # stage1 directory (which is generally where we'll source everything
222 # from). Dry runs should hope for the best from our user, as well.
223 if env.dry_run or os.path.exists(path):
224 cmake.add_new_flag(key, path)
226 if skip_tablegens:
227 add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen')
228 add_tablegen('CLANG_TABLEGEN', 'clang-tblgen')
230 return cmake
233 def _build_things_in(env, target_dir, what):
234 cmd = env.get_make_command() + what
235 env.run_command(cmd, cwd=target_dir, check=True)
238 def _run_fresh_cmake(env, cmake, target_dir):
239 if not env.dry_run:
240 try:
241 shutil.rmtree(target_dir)
242 except FileNotFoundError:
243 pass
245 os.makedirs(target_dir, mode=0o755)
247 cmake_args = cmake.to_args()
248 env.run_command(
249 cmake_args, cwd=target_dir, check=True, silent_unless_error=True)
252 def _build_stage1_clang(env):
253 target_dir = env.output_subdir('stage1')
254 cmake = _get_default_cmake_invocation(env)
255 _run_fresh_cmake(env, cmake, target_dir)
256 _build_things_in(env, target_dir, what=['clang', 'llvm-profdata', 'profile'])
257 return target_dir
260 def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir,
261 output_file):
262 llvm_profdata = os.path.join(stage1_dir, 'bin', 'llvm-profdata')
263 if env.dry_run:
264 profiles = [os.path.join(profile_dir, '*.profraw')]
265 else:
266 profiles = [
267 os.path.join(profile_dir, f) for f in os.listdir(profile_dir)
268 if f.endswith('.profraw')
270 cmd = [llvm_profdata, 'merge', '-output=' + output_file] + profiles
271 env.run_command(cmd, check=True)
274 def _build_instrumented_clang(env, stage1_dir):
275 assert os.path.isabs(stage1_dir)
277 target_dir = os.path.join(env.output_dir, 'instrumented')
278 cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
279 cmake.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR')
281 # libcxx's configure step messes with our link order: we'll link
282 # libclang_rt.profile after libgcc, and the former requires atexit from the
283 # latter. So, configure checks fail.
285 # Since we don't need libcxx or compiler-rt anyway, just disable them.
286 cmake.add_new_flag('LLVM_BUILD_RUNTIME', 'No')
288 _run_fresh_cmake(env, cmake, target_dir)
289 _build_things_in(env, target_dir, what=['clang', 'lld'])
291 profiles_dir = os.path.join(target_dir, 'profiles')
292 return target_dir, profiles_dir
295 def _build_optimized_clang(env, stage1_dir, profdata_file):
296 if not env.dry_run and not os.path.exists(profdata_file):
297 raise ValueError('Looks like the profdata file at %s doesn\'t exist' %
298 profdata_file)
300 target_dir = os.path.join(env.output_dir, 'optimized')
301 cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
302 cmake.add_new_flag('LLVM_PROFDATA_FILE', os.path.abspath(profdata_file))
304 # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
305 # it.
306 cmake.add_cflags(['-Wno-backend-plugin'])
307 _run_fresh_cmake(env, cmake, target_dir)
308 _build_things_in(env, target_dir, what=['clang'])
309 return target_dir
312 Args = collections.namedtuple('Args', [
313 'do_optimized_build',
314 'include_debug_info',
315 'profile_location',
316 'stage1_dir',
320 def _parse_args():
321 parser = argparse.ArgumentParser(
322 description='Builds LLVM and Clang with instrumentation, collects '
323 'instrumentation profiles for them, and (optionally) builds things '
324 'with these PGO profiles. By default, it\'s assumed that you\'re '
325 'running this from your LLVM root, and all build artifacts will be '
326 'saved to $PWD/out.')
327 parser.add_argument(
328 '--cmake-extra-arg',
329 action='append',
330 default=[],
331 help='an extra arg to pass to all cmake invocations. Note that this '
332 'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will '
333 'be passed as -DFOO=BAR. This may be specified multiple times.')
334 parser.add_argument(
335 '--dry-run',
336 action='store_true',
337 help='print commands instead of running them')
338 parser.add_argument(
339 '--llvm-dir',
340 default='.',
341 help='directory containing an LLVM checkout (default: $PWD)')
342 parser.add_argument(
343 '--no-optimized-build',
344 action='store_true',
345 help='disable the final, PGO-optimized build')
346 parser.add_argument(
347 '--out-dir',
348 help='directory to write artifacts to (default: $llvm_dir/out)')
349 parser.add_argument(
350 '--profile-output',
351 help='where to output the profile (default is $out/pgo_profile.prof)')
352 parser.add_argument(
353 '--stage1-dir',
354 help='instead of having an initial build of everything, use the given '
355 'directory. It is expected that this directory will have clang, '
356 'llvm-profdata, and the appropriate libclang_rt.profile already built')
357 parser.add_argument(
358 '--use-debug-info-in-benchmark',
359 action='store_true',
360 help='use a regular build instead of RelWithDebInfo in the benchmark. '
361 'This increases benchmark execution time and disk space requirements, '
362 'but gives more coverage over debuginfo bits in LLVM and clang.')
363 parser.add_argument(
364 '--use-make',
365 action='store_true',
366 default=shutil.which('ninja') is None,
367 help='use Makefiles instead of ninja')
369 args = parser.parse_args()
371 llvm_dir = os.path.abspath(args.llvm_dir)
372 if args.out_dir is None:
373 output_dir = os.path.join(llvm_dir, 'out')
374 else:
375 output_dir = os.path.abspath(args.out_dir)
377 extra_args = {'CMAKE_BUILD_TYPE': 'Release',
378 'LLVM_ENABLE_PROJECTS': 'clang;compiler-rt;lld'}
379 for arg in args.cmake_extra_arg:
380 if arg.startswith('-D'):
381 arg = arg[2:]
382 elif arg.startswith('-'):
383 raise ValueError('Unknown not- -D arg encountered; you may need '
384 'to tweak the source...')
385 split = arg.split('=', 1)
386 if len(split) == 1:
387 key, val = split[0], ''
388 else:
389 key, val = split
390 extra_args[key] = val
392 env = Env(
393 default_cmake_args=extra_args,
394 dry_run=args.dry_run,
395 llvm_dir=llvm_dir,
396 output_dir=output_dir,
397 use_make=args.use_make,
400 if args.profile_output is not None:
401 profile_location = args.profile_output
402 else:
403 profile_location = os.path.join(env.output_dir, 'pgo_profile.prof')
405 result_args = Args(
406 do_optimized_build=not args.no_optimized_build,
407 include_debug_info=args.use_debug_info_in_benchmark,
408 profile_location=profile_location,
409 stage1_dir=args.stage1_dir,
412 return env, result_args
415 def _looks_like_llvm_dir(directory):
416 """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
418 Errs on the side of false-positives."""
420 contents = set(os.listdir(directory))
421 expected_contents = [
422 'CODE_OWNERS.TXT',
423 'cmake',
424 'docs',
425 'include',
426 'utils',
429 if not all(c in contents for c in expected_contents):
430 return False
432 try:
433 include_listing = os.listdir(os.path.join(directory, 'include'))
434 except NotADirectoryError:
435 return False
437 return 'llvm' in include_listing
440 def _die(*args, **kwargs):
441 kwargs['file'] = sys.stderr
442 print(*args, **kwargs)
443 sys.exit(1)
446 def _main():
447 env, args = _parse_args()
449 if not _looks_like_llvm_dir(env.llvm_dir):
450 _die('Looks like %s isn\'t an LLVM directory; please see --help' %
451 env.llvm_dir)
452 if not env.has_llvm_subproject('clang'):
453 _die('Need a clang checkout at tools/clang')
454 if not env.has_llvm_subproject('compiler-rt'):
455 _die('Need a compiler-rt checkout at projects/compiler-rt')
457 def status(*args):
458 print(*args, file=sys.stderr)
460 if args.stage1_dir is None:
461 status('*** Building stage1 clang...')
462 stage1_out = _build_stage1_clang(env)
463 else:
464 stage1_out = args.stage1_dir
466 status('*** Building instrumented clang...')
467 instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)
468 status('*** Running profdata benchmarks...')
469 _run_benchmark(env, instrumented_out, args.include_debug_info)
470 status('*** Generating profile...')
471 _generate_instrumented_clang_profile(env, stage1_out, profile_dir,
472 args.profile_location)
474 print('Final profile:', args.profile_location)
475 if args.do_optimized_build:
476 status('*** Building PGO-optimized binaries...')
477 optimized_out = _build_optimized_clang(env, stage1_out,
478 args.profile_location)
479 print('Final build directory:', optimized_out)
482 if __name__ == '__main__':
483 _main()