4 - Builds clang with user-defined flags
5 - Uses that clang to build an instrumented clang, which can be used to collect
7 - Builds a user-defined set of sources (default: clang) to act as a
8 "benchmark" to generate a PGO profile
9 - Builds clang once more with the PGO profile generated above
11 This is a total of four clean builds of clang (by default). This may take a
17 import multiprocessing
24 ### User configuration
27 # If you want to use a different 'benchmark' than building clang, make this
28 # function do what you want. out_dir is the build directory for clang, so all
29 # of the clang binaries will live under "${out_dir}/bin/". Using clang in
30 # ${out_dir} will magically have the profiles go to the right place.
32 # You may assume that out_dir is a freshly-built directory that you can reach
33 # in to build more things, if you'd like.
34 def _run_benchmark(env
, out_dir
, include_debug_info
):
35 """The 'benchmark' we run to generate profile data."""
36 target_dir
= env
.output_subdir('instrumentation_run')
38 # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
39 # former lets us touch on the non-x86 backends a bit if configured, and the
40 # latter gives us more C to chew on (and will send us through diagnostic
41 # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
42 # branches should still heavily be weighted in the not-taken direction,
43 # since we built all of LLVM/etc).
44 _build_things_in(env
, out_dir
, what
=['check-llvm', 'check-clang'])
46 # Building tblgen gets us coverage; don't skip it. (out_dir may also not
47 # have them anyway, but that's less of an issue)
48 cmake
= _get_cmake_invocation_for_bootstrap_from(
49 env
, out_dir
, skip_tablegens
=False)
51 if include_debug_info
:
52 cmake
.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo')
54 _run_fresh_cmake(env
, cmake
, target_dir
)
56 # Just build all the things. The more data we have, the better.
57 _build_things_in(env
, target_dir
, what
=['all'])
62 class CmakeInvocation
:
63 _cflags
= ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS']
65 'CMAKE_EXE_LINKER_FLAGS',
66 'CMAKE_MODULE_LINKER_FLAGS',
67 'CMAKE_SHARED_LINKER_FLAGS',
70 def __init__(self
, cmake
, maker
, cmake_dir
):
71 self
._prefix
= [cmake
, '-G', maker
, cmake_dir
]
73 # Map of str -> (list|str).
75 for flag
in CmakeInvocation
._cflags
+ CmakeInvocation
._ldflags
:
76 self
._flags
[flag
] = []
78 def add_new_flag(self
, key
, value
):
79 self
.add_flag(key
, value
, allow_overwrites
=False)
81 def add_flag(self
, key
, value
, allow_overwrites
=True):
82 if key
not in self
._flags
:
83 self
._flags
[key
] = value
86 existing_value
= self
._flags
[key
]
87 if isinstance(existing_value
, list):
88 existing_value
.append(value
)
91 if not allow_overwrites
:
92 raise ValueError('Invalid overwrite of %s requested' % key
)
94 self
._flags
[key
] = value
96 def add_cflags(self
, flags
):
97 # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
98 assert not isinstance(flags
, str)
99 for f
in CmakeInvocation
._cflags
:
100 self
._flags
[f
].extend(flags
)
102 def add_ldflags(self
, flags
):
103 assert not isinstance(flags
, str)
104 for f
in CmakeInvocation
._ldflags
:
105 self
._flags
[f
].extend(flags
)
108 args
= self
._prefix
.copy()
109 for key
, value
in sorted(self
._flags
.items()):
110 if isinstance(value
, list):
111 # We preload all of the list-y values (cflags, ...). If we've
112 # nothing to add, don't.
115 value
= ' '.join(value
)
125 def __init__(self
, llvm_dir
, use_make
, output_dir
, default_cmake_args
,
127 self
.llvm_dir
= llvm_dir
128 self
.use_make
= use_make
129 self
.output_dir
= output_dir
130 self
.default_cmake_args
= default_cmake_args
.copy()
131 self
.dry_run
= dry_run
133 def get_default_cmake_args_kv(self
):
134 return self
.default_cmake_args
.items()
136 def get_cmake_maker(self
):
137 return 'Ninja' if not self
.use_make
else 'Unix Makefiles'
139 def get_make_command(self
):
141 return ['make', '-j{}'.format(multiprocessing
.cpu_count())]
144 def output_subdir(self
, name
):
145 return os
.path
.join(self
.output_dir
, name
)
147 def has_llvm_subproject(self
, name
):
148 if name
== 'compiler-rt':
149 subdir
= 'projects/compiler-rt'
150 elif name
== 'clang':
151 subdir
= 'tools/clang'
153 raise ValueError('Unknown subproject: %s' % name
)
155 return os
.path
.isdir(os
.path
.join(self
.llvm_dir
, subdir
))
157 # Note that we don't allow capturing stdout/stderr. This works quite nicely
159 def run_command(self
,
163 silent_unless_error
=False):
164 cmd_str
= ' '.join(shlex
.quote(s
) for s
in cmd
)
166 'Running `%s` in %s' % (cmd_str
, shlex
.quote(cwd
or os
.getcwd())))
171 if silent_unless_error
:
172 stdout
, stderr
= subprocess
.PIPE
, subprocess
.STDOUT
174 stdout
, stderr
= None, None
176 # Don't use subprocess.run because it's >= py3.5 only, and it's not too
177 # much extra effort to get what it gives us anyway.
178 popen
= subprocess
.Popen(
180 stdin
=subprocess
.DEVNULL
,
184 stdout
, _
= popen
.communicate()
185 return_code
= popen
.wait(timeout
=0)
190 if silent_unless_error
:
191 print(stdout
.decode('utf-8', 'ignore'))
194 raise subprocess
.CalledProcessError(
195 returncode
=return_code
, cmd
=cmd
, output
=stdout
, stderr
=None)
198 def _get_default_cmake_invocation(env
):
199 inv
= CmakeInvocation(
200 cmake
='cmake', maker
=env
.get_cmake_maker(), cmake_dir
=env
.llvm_dir
)
201 for key
, value
in env
.get_default_cmake_args_kv():
202 inv
.add_new_flag(key
, value
)
206 def _get_cmake_invocation_for_bootstrap_from(env
, out_dir
,
207 skip_tablegens
=True):
208 clang
= os
.path
.join(out_dir
, 'bin', 'clang')
209 cmake
= _get_default_cmake_invocation(env
)
210 cmake
.add_new_flag('CMAKE_C_COMPILER', clang
)
211 cmake
.add_new_flag('CMAKE_CXX_COMPILER', clang
+ '++')
213 # We often get no value out of building new tblgens; the previous build
214 # should have them. It's still correct to build them, just slower.
215 def add_tablegen(key
, binary
):
216 path
= os
.path
.join(out_dir
, 'bin', binary
)
218 # Check that this exists, since the user's allowed to specify their own
219 # stage1 directory (which is generally where we'll source everything
220 # from). Dry runs should hope for the best from our user, as well.
221 if env
.dry_run
or os
.path
.exists(path
):
222 cmake
.add_new_flag(key
, path
)
225 add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen')
226 add_tablegen('CLANG_TABLEGEN', 'clang-tblgen')
231 def _build_things_in(env
, target_dir
, what
):
232 cmd
= env
.get_make_command() + what
233 env
.run_command(cmd
, cwd
=target_dir
, check
=True)
236 def _run_fresh_cmake(env
, cmake
, target_dir
):
239 shutil
.rmtree(target_dir
)
240 except FileNotFoundError
:
243 os
.makedirs(target_dir
, mode
=0o755)
245 cmake_args
= cmake
.to_args()
247 cmake_args
, cwd
=target_dir
, check
=True, silent_unless_error
=True)
250 def _build_stage1_clang(env
):
251 target_dir
= env
.output_subdir('stage1')
252 cmake
= _get_default_cmake_invocation(env
)
253 _run_fresh_cmake(env
, cmake
, target_dir
)
254 _build_things_in(env
, target_dir
, what
=['clang', 'llvm-profdata', 'profile'])
258 def _generate_instrumented_clang_profile(env
, stage1_dir
, profile_dir
,
260 llvm_profdata
= os
.path
.join(stage1_dir
, 'bin', 'llvm-profdata')
262 profiles
= [os
.path
.join(profile_dir
, '*.profraw')]
265 os
.path
.join(profile_dir
, f
) for f
in os
.listdir(profile_dir
)
266 if f
.endswith('.profraw')
268 cmd
= [llvm_profdata
, 'merge', '-output=' + output_file
] + profiles
269 env
.run_command(cmd
, check
=True)
272 def _build_instrumented_clang(env
, stage1_dir
):
273 assert os
.path
.isabs(stage1_dir
)
275 target_dir
= os
.path
.join(env
.output_dir
, 'instrumented')
276 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
277 cmake
.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR')
279 # libcxx's configure step messes with our link order: we'll link
280 # libclang_rt.profile after libgcc, and the former requires atexit from the
281 # latter. So, configure checks fail.
283 # Since we don't need libcxx or compiler-rt anyway, just disable them.
284 cmake
.add_new_flag('LLVM_BUILD_RUNTIME', 'No')
286 _run_fresh_cmake(env
, cmake
, target_dir
)
287 _build_things_in(env
, target_dir
, what
=['clang', 'lld'])
289 profiles_dir
= os
.path
.join(target_dir
, 'profiles')
290 return target_dir
, profiles_dir
293 def _build_optimized_clang(env
, stage1_dir
, profdata_file
):
294 if not env
.dry_run
and not os
.path
.exists(profdata_file
):
295 raise ValueError('Looks like the profdata file at %s doesn\'t exist' %
298 target_dir
= os
.path
.join(env
.output_dir
, 'optimized')
299 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
300 cmake
.add_new_flag('LLVM_PROFDATA_FILE', os
.path
.abspath(profdata_file
))
302 # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
304 cmake
.add_cflags(['-Wno-backend-plugin'])
305 _run_fresh_cmake(env
, cmake
, target_dir
)
306 _build_things_in(env
, target_dir
, what
=['clang'])
310 Args
= collections
.namedtuple('Args', [
311 'do_optimized_build',
312 'include_debug_info',
319 parser
= argparse
.ArgumentParser(
320 description
='Builds LLVM and Clang with instrumentation, collects '
321 'instrumentation profiles for them, and (optionally) builds things'
322 'with these PGO profiles. By default, it\'s assumed that you\'re '
323 'running this from your LLVM root, and all build artifacts will be '
324 'saved to $PWD/out.')
329 help='an extra arg to pass to all cmake invocations. Note that this '
330 'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will '
331 'be passed as -DFOO=BAR. This may be specified multiple times.')
335 help='print commands instead of running them')
339 help='directory containing an LLVM checkout (default: $PWD)')
341 '--no-optimized-build',
343 help='disable the final, PGO-optimized build')
346 help='directory to write artifacts to (default: $llvm_dir/out)')
349 help='where to output the profile (default is $out/pgo_profile.prof)')
352 help='instead of having an initial build of everything, use the given '
353 'directory. It is expected that this directory will have clang, '
354 'llvm-profdata, and the appropriate libclang_rt.profile already built')
356 '--use-debug-info-in-benchmark',
358 help='use a regular build instead of RelWithDebInfo in the benchmark. '
359 'This increases benchmark execution time and disk space requirements, '
360 'but gives more coverage over debuginfo bits in LLVM and clang.')
364 default
=shutil
.which('ninja') is None,
365 help='use Makefiles instead of ninja')
367 args
= parser
.parse_args()
369 llvm_dir
= os
.path
.abspath(args
.llvm_dir
)
370 if args
.out_dir
is None:
371 output_dir
= os
.path
.join(llvm_dir
, 'out')
373 output_dir
= os
.path
.abspath(args
.out_dir
)
375 extra_args
= {'CMAKE_BUILD_TYPE': 'Release'}
376 for arg
in args
.cmake_extra_arg
:
377 if arg
.startswith('-D'):
379 elif arg
.startswith('-'):
380 raise ValueError('Unknown not- -D arg encountered; you may need '
381 'to tweak the source...')
382 split
= arg
.split('=', 1)
384 key
, val
= split
[0], ''
387 extra_args
[key
] = val
390 default_cmake_args
=extra_args
,
391 dry_run
=args
.dry_run
,
393 output_dir
=output_dir
,
394 use_make
=args
.use_make
,
397 if args
.profile_output
is not None:
398 profile_location
= args
.profile_output
400 profile_location
= os
.path
.join(env
.output_dir
, 'pgo_profile.prof')
403 do_optimized_build
=not args
.no_optimized_build
,
404 include_debug_info
=args
.use_debug_info_in_benchmark
,
405 profile_location
=profile_location
,
406 stage1_dir
=args
.stage1_dir
,
409 return env
, result_args
412 def _looks_like_llvm_dir(directory
):
413 """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
415 Errs on the side of false-positives."""
417 contents
= set(os
.listdir(directory
))
418 expected_contents
= [
426 if not all(c
in contents
for c
in expected_contents
):
430 include_listing
= os
.listdir(os
.path
.join(directory
, 'include'))
431 except NotADirectoryError
:
434 return 'llvm' in include_listing
437 def _die(*args
, **kwargs
):
438 kwargs
['file'] = sys
.stderr
439 print(*args
, **kwargs
)
444 env
, args
= _parse_args()
446 if not _looks_like_llvm_dir(env
.llvm_dir
):
447 _die('Looks like %s isn\'t an LLVM directory; please see --help' %
449 if not env
.has_llvm_subproject('clang'):
450 _die('Need a clang checkout at tools/clang')
451 if not env
.has_llvm_subproject('compiler-rt'):
452 _die('Need a compiler-rt checkout at projects/compiler-rt')
455 print(*args
, file=sys
.stderr
)
457 if args
.stage1_dir
is None:
458 status('*** Building stage1 clang...')
459 stage1_out
= _build_stage1_clang(env
)
461 stage1_out
= args
.stage1_dir
463 status('*** Building instrumented clang...')
464 instrumented_out
, profile_dir
= _build_instrumented_clang(env
, stage1_out
)
465 status('*** Running profdata benchmarks...')
466 _run_benchmark(env
, instrumented_out
, args
.include_debug_info
)
467 status('*** Generating profile...')
468 _generate_instrumented_clang_profile(env
, stage1_out
, profile_dir
,
469 args
.profile_location
)
471 print('Final profile:', args
.profile_location
)
472 if args
.do_optimized_build
:
473 status('*** Building PGO-optimized binaries...')
474 optimized_out
= _build_optimized_clang(env
, stage1_out
,
475 args
.profile_location
)
476 print('Final build directory:', optimized_out
)
479 if __name__
== '__main__':