4 - Builds clang with user-defined flags
5 - Uses that clang to build an instrumented clang, which can be used to collect
7 - Builds a user-defined set of sources (default: clang) to act as a
8 "benchmark" to generate a PGO profile
9 - Builds clang once more with the PGO profile generated above
11 This is a total of four clean builds of clang (by default). This may take a
14 This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15 Eventually, it will be updated to instead call the cmake cache mentioned there.
20 import multiprocessing
27 ### User configuration
30 # If you want to use a different 'benchmark' than building clang, make this
31 # function do what you want. out_dir is the build directory for clang, so all
32 # of the clang binaries will live under "${out_dir}/bin/". Using clang in
33 # ${out_dir} will magically have the profiles go to the right place.
35 # You may assume that out_dir is a freshly-built directory that you can reach
36 # in to build more things, if you'd like.
37 def _run_benchmark(env
, out_dir
, include_debug_info
):
38 """The 'benchmark' we run to generate profile data."""
39 target_dir
= env
.output_subdir('instrumentation_run')
41 # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42 # former lets us touch on the non-x86 backends a bit if configured, and the
43 # latter gives us more C to chew on (and will send us through diagnostic
44 # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45 # branches should still heavily be weighted in the not-taken direction,
46 # since we built all of LLVM/etc).
47 _build_things_in(env
, out_dir
, what
=['check-llvm', 'check-clang'])
49 # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50 # have them anyway, but that's less of an issue)
51 cmake
= _get_cmake_invocation_for_bootstrap_from(
52 env
, out_dir
, skip_tablegens
=False)
54 if include_debug_info
:
55 cmake
.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo')
57 _run_fresh_cmake(env
, cmake
, target_dir
)
59 # Just build all the things. The more data we have, the better.
60 _build_things_in(env
, target_dir
, what
=['all'])
65 class CmakeInvocation
:
66 _cflags
= ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS']
68 'CMAKE_EXE_LINKER_FLAGS',
69 'CMAKE_MODULE_LINKER_FLAGS',
70 'CMAKE_SHARED_LINKER_FLAGS',
73 def __init__(self
, cmake
, maker
, cmake_dir
):
74 self
._prefix
= [cmake
, '-G', maker
, cmake_dir
]
76 # Map of str -> (list|str).
78 for flag
in CmakeInvocation
._cflags
+ CmakeInvocation
._ldflags
:
79 self
._flags
[flag
] = []
81 def add_new_flag(self
, key
, value
):
82 self
.add_flag(key
, value
, allow_overwrites
=False)
84 def add_flag(self
, key
, value
, allow_overwrites
=True):
85 if key
not in self
._flags
:
86 self
._flags
[key
] = value
89 existing_value
= self
._flags
[key
]
90 if isinstance(existing_value
, list):
91 existing_value
.append(value
)
94 if not allow_overwrites
:
95 raise ValueError('Invalid overwrite of %s requested' % key
)
97 self
._flags
[key
] = value
99 def add_cflags(self
, flags
):
100 # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101 assert not isinstance(flags
, str)
102 for f
in CmakeInvocation
._cflags
:
103 self
._flags
[f
].extend(flags
)
105 def add_ldflags(self
, flags
):
106 assert not isinstance(flags
, str)
107 for f
in CmakeInvocation
._ldflags
:
108 self
._flags
[f
].extend(flags
)
111 args
= self
._prefix
.copy()
112 for key
, value
in sorted(self
._flags
.items()):
113 if isinstance(value
, list):
114 # We preload all of the list-y values (cflags, ...). If we've
115 # nothing to add, don't.
118 value
= ' '.join(value
)
128 def __init__(self
, llvm_dir
, use_make
, output_dir
, default_cmake_args
,
130 self
.llvm_dir
= llvm_dir
131 self
.use_make
= use_make
132 self
.output_dir
= output_dir
133 self
.default_cmake_args
= default_cmake_args
.copy()
134 self
.dry_run
= dry_run
136 def get_default_cmake_args_kv(self
):
137 return self
.default_cmake_args
.items()
139 def get_cmake_maker(self
):
140 return 'Ninja' if not self
.use_make
else 'Unix Makefiles'
142 def get_make_command(self
):
144 return ['make', '-j{}'.format(multiprocessing
.cpu_count())]
147 def output_subdir(self
, name
):
148 return os
.path
.join(self
.output_dir
, name
)
150 def has_llvm_subproject(self
, name
):
151 if name
== 'compiler-rt':
152 subdir
= '../compiler-rt'
153 elif name
== 'clang':
156 raise ValueError('Unknown subproject: %s' % name
)
158 return os
.path
.isdir(os
.path
.join(self
.llvm_dir
, subdir
))
160 # Note that we don't allow capturing stdout/stderr. This works quite nicely
162 def run_command(self
,
166 silent_unless_error
=False):
168 'Running `%s` in %s' % (cmd
, shlex
.quote(cwd
or os
.getcwd())))
173 if silent_unless_error
:
174 stdout
, stderr
= subprocess
.PIPE
, subprocess
.STDOUT
176 stdout
, stderr
= None, None
178 # Don't use subprocess.run because it's >= py3.5 only, and it's not too
179 # much extra effort to get what it gives us anyway.
180 popen
= subprocess
.Popen(
182 stdin
=subprocess
.DEVNULL
,
186 stdout
, _
= popen
.communicate()
187 return_code
= popen
.wait(timeout
=0)
192 if silent_unless_error
:
193 print(stdout
.decode('utf-8', 'ignore'))
196 raise subprocess
.CalledProcessError(
197 returncode
=return_code
, cmd
=cmd
, output
=stdout
, stderr
=None)
200 def _get_default_cmake_invocation(env
):
201 inv
= CmakeInvocation(
202 cmake
='cmake', maker
=env
.get_cmake_maker(), cmake_dir
=env
.llvm_dir
)
203 for key
, value
in env
.get_default_cmake_args_kv():
204 inv
.add_new_flag(key
, value
)
208 def _get_cmake_invocation_for_bootstrap_from(env
, out_dir
,
209 skip_tablegens
=True):
210 clang
= os
.path
.join(out_dir
, 'bin', 'clang')
211 cmake
= _get_default_cmake_invocation(env
)
212 cmake
.add_new_flag('CMAKE_C_COMPILER', clang
)
213 cmake
.add_new_flag('CMAKE_CXX_COMPILER', clang
+ '++')
215 # We often get no value out of building new tblgens; the previous build
216 # should have them. It's still correct to build them, just slower.
217 def add_tablegen(key
, binary
):
218 path
= os
.path
.join(out_dir
, 'bin', binary
)
220 # Check that this exists, since the user's allowed to specify their own
221 # stage1 directory (which is generally where we'll source everything
222 # from). Dry runs should hope for the best from our user, as well.
223 if env
.dry_run
or os
.path
.exists(path
):
224 cmake
.add_new_flag(key
, path
)
227 add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen')
228 add_tablegen('CLANG_TABLEGEN', 'clang-tblgen')
233 def _build_things_in(env
, target_dir
, what
):
234 cmd
= env
.get_make_command() + what
235 env
.run_command(cmd
, cwd
=target_dir
, check
=True)
238 def _run_fresh_cmake(env
, cmake
, target_dir
):
241 shutil
.rmtree(target_dir
)
242 except FileNotFoundError
:
245 os
.makedirs(target_dir
, mode
=0o755)
247 cmake_args
= cmake
.to_args()
249 cmake_args
, cwd
=target_dir
, check
=True, silent_unless_error
=True)
252 def _build_stage1_clang(env
):
253 target_dir
= env
.output_subdir('stage1')
254 cmake
= _get_default_cmake_invocation(env
)
255 _run_fresh_cmake(env
, cmake
, target_dir
)
256 _build_things_in(env
, target_dir
, what
=['clang', 'llvm-profdata', 'profile'])
260 def _generate_instrumented_clang_profile(env
, stage1_dir
, profile_dir
,
262 llvm_profdata
= os
.path
.join(stage1_dir
, 'bin', 'llvm-profdata')
264 profiles
= [os
.path
.join(profile_dir
, '*.profraw')]
267 os
.path
.join(profile_dir
, f
) for f
in os
.listdir(profile_dir
)
268 if f
.endswith('.profraw')
270 cmd
= [llvm_profdata
, 'merge', '-output=' + output_file
] + profiles
271 env
.run_command(cmd
, check
=True)
274 def _build_instrumented_clang(env
, stage1_dir
):
275 assert os
.path
.isabs(stage1_dir
)
277 target_dir
= os
.path
.join(env
.output_dir
, 'instrumented')
278 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
279 cmake
.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR')
281 # libcxx's configure step messes with our link order: we'll link
282 # libclang_rt.profile after libgcc, and the former requires atexit from the
283 # latter. So, configure checks fail.
285 # Since we don't need libcxx or compiler-rt anyway, just disable them.
286 cmake
.add_new_flag('LLVM_BUILD_RUNTIME', 'No')
288 _run_fresh_cmake(env
, cmake
, target_dir
)
289 _build_things_in(env
, target_dir
, what
=['clang', 'lld'])
291 profiles_dir
= os
.path
.join(target_dir
, 'profiles')
292 return target_dir
, profiles_dir
295 def _build_optimized_clang(env
, stage1_dir
, profdata_file
):
296 if not env
.dry_run
and not os
.path
.exists(profdata_file
):
297 raise ValueError('Looks like the profdata file at %s doesn\'t exist' %
300 target_dir
= os
.path
.join(env
.output_dir
, 'optimized')
301 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
302 cmake
.add_new_flag('LLVM_PROFDATA_FILE', os
.path
.abspath(profdata_file
))
304 # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
306 cmake
.add_cflags(['-Wno-backend-plugin'])
307 _run_fresh_cmake(env
, cmake
, target_dir
)
308 _build_things_in(env
, target_dir
, what
=['clang'])
312 Args
= collections
.namedtuple('Args', [
313 'do_optimized_build',
314 'include_debug_info',
321 parser
= argparse
.ArgumentParser(
322 description
='Builds LLVM and Clang with instrumentation, collects '
323 'instrumentation profiles for them, and (optionally) builds things '
324 'with these PGO profiles. By default, it\'s assumed that you\'re '
325 'running this from your LLVM root, and all build artifacts will be '
326 'saved to $PWD/out.')
331 help='an extra arg to pass to all cmake invocations. Note that this '
332 'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will '
333 'be passed as -DFOO=BAR. This may be specified multiple times.')
337 help='print commands instead of running them')
341 help='directory containing an LLVM checkout (default: $PWD)')
343 '--no-optimized-build',
345 help='disable the final, PGO-optimized build')
348 help='directory to write artifacts to (default: $llvm_dir/out)')
351 help='where to output the profile (default is $out/pgo_profile.prof)')
354 help='instead of having an initial build of everything, use the given '
355 'directory. It is expected that this directory will have clang, '
356 'llvm-profdata, and the appropriate libclang_rt.profile already built')
358 '--use-debug-info-in-benchmark',
360 help='use a regular build instead of RelWithDebInfo in the benchmark. '
361 'This increases benchmark execution time and disk space requirements, '
362 'but gives more coverage over debuginfo bits in LLVM and clang.')
366 default
=shutil
.which('ninja') is None,
367 help='use Makefiles instead of ninja')
369 args
= parser
.parse_args()
371 llvm_dir
= os
.path
.abspath(args
.llvm_dir
)
372 if args
.out_dir
is None:
373 output_dir
= os
.path
.join(llvm_dir
, 'out')
375 output_dir
= os
.path
.abspath(args
.out_dir
)
377 extra_args
= {'CMAKE_BUILD_TYPE': 'Release',
378 'LLVM_ENABLE_PROJECTS': 'clang;compiler-rt;lld'}
379 for arg
in args
.cmake_extra_arg
:
380 if arg
.startswith('-D'):
382 elif arg
.startswith('-'):
383 raise ValueError('Unknown not- -D arg encountered; you may need '
384 'to tweak the source...')
385 split
= arg
.split('=', 1)
387 key
, val
= split
[0], ''
390 extra_args
[key
] = val
393 default_cmake_args
=extra_args
,
394 dry_run
=args
.dry_run
,
396 output_dir
=output_dir
,
397 use_make
=args
.use_make
,
400 if args
.profile_output
is not None:
401 profile_location
= args
.profile_output
403 profile_location
= os
.path
.join(env
.output_dir
, 'pgo_profile.prof')
406 do_optimized_build
=not args
.no_optimized_build
,
407 include_debug_info
=args
.use_debug_info_in_benchmark
,
408 profile_location
=profile_location
,
409 stage1_dir
=args
.stage1_dir
,
412 return env
, result_args
415 def _looks_like_llvm_dir(directory
):
416 """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
418 Errs on the side of false-positives."""
420 contents
= set(os
.listdir(directory
))
421 expected_contents
= [
429 if not all(c
in contents
for c
in expected_contents
):
433 include_listing
= os
.listdir(os
.path
.join(directory
, 'include'))
434 except NotADirectoryError
:
437 return 'llvm' in include_listing
440 def _die(*args
, **kwargs
):
441 kwargs
['file'] = sys
.stderr
442 print(*args
, **kwargs
)
447 env
, args
= _parse_args()
449 if not _looks_like_llvm_dir(env
.llvm_dir
):
450 _die('Looks like %s isn\'t an LLVM directory; please see --help' %
452 if not env
.has_llvm_subproject('clang'):
453 _die('Need a clang checkout at tools/clang')
454 if not env
.has_llvm_subproject('compiler-rt'):
455 _die('Need a compiler-rt checkout at projects/compiler-rt')
458 print(*args
, file=sys
.stderr
)
460 if args
.stage1_dir
is None:
461 status('*** Building stage1 clang...')
462 stage1_out
= _build_stage1_clang(env
)
464 stage1_out
= args
.stage1_dir
466 status('*** Building instrumented clang...')
467 instrumented_out
, profile_dir
= _build_instrumented_clang(env
, stage1_out
)
468 status('*** Running profdata benchmarks...')
469 _run_benchmark(env
, instrumented_out
, args
.include_debug_info
)
470 status('*** Generating profile...')
471 _generate_instrumented_clang_profile(env
, stage1_out
, profile_dir
,
472 args
.profile_location
)
474 print('Final profile:', args
.profile_location
)
475 if args
.do_optimized_build
:
476 status('*** Building PGO-optimized binaries...')
477 optimized_out
= _build_optimized_clang(env
, stage1_out
,
478 args
.profile_location
)
479 print('Final build directory:', optimized_out
)
482 if __name__
== '__main__':