4 - Builds clang with user-defined flags
5 - Uses that clang to build an instrumented clang, which can be used to collect
7 - Builds a user-defined set of sources (default: clang) to act as a
8 "benchmark" to generate a PGO profile
9 - Builds clang once more with the PGO profile generated above
11 This is a total of four clean builds of clang (by default). This may take a
14 This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15 Eventually, it will be updated to instead call the cmake cache mentioned there.
20 import multiprocessing
27 ### User configuration
30 # If you want to use a different 'benchmark' than building clang, make this
31 # function do what you want. out_dir is the build directory for clang, so all
32 # of the clang binaries will live under "${out_dir}/bin/". Using clang in
33 # ${out_dir} will magically have the profiles go to the right place.
35 # You may assume that out_dir is a freshly-built directory that you can reach
36 # in to build more things, if you'd like.
37 def _run_benchmark(env
, out_dir
, include_debug_info
):
38 """The 'benchmark' we run to generate profile data."""
39 target_dir
= env
.output_subdir("instrumentation_run")
41 # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42 # former lets us touch on the non-x86 backends a bit if configured, and the
43 # latter gives us more C to chew on (and will send us through diagnostic
44 # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45 # branches should still heavily be weighted in the not-taken direction,
46 # since we built all of LLVM/etc).
47 _build_things_in(env
, out_dir
, what
=["check-llvm", "check-clang"])
49 # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50 # have them anyway, but that's less of an issue)
51 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, out_dir
, skip_tablegens
=False)
53 if include_debug_info
:
54 cmake
.add_flag("CMAKE_BUILD_TYPE", "RelWithDebInfo")
56 _run_fresh_cmake(env
, cmake
, target_dir
)
58 # Just build all the things. The more data we have, the better.
59 _build_things_in(env
, target_dir
, what
=["all"])
65 class CmakeInvocation
:
66 _cflags
= ["CMAKE_C_FLAGS", "CMAKE_CXX_FLAGS"]
68 "CMAKE_EXE_LINKER_FLAGS",
69 "CMAKE_MODULE_LINKER_FLAGS",
70 "CMAKE_SHARED_LINKER_FLAGS",
73 def __init__(self
, cmake
, maker
, cmake_dir
):
74 self
._prefix
= [cmake
, "-G", maker
, cmake_dir
]
76 # Map of str -> (list|str).
78 for flag
in CmakeInvocation
._cflags
+ CmakeInvocation
._ldflags
:
79 self
._flags
[flag
] = []
81 def add_new_flag(self
, key
, value
):
82 self
.add_flag(key
, value
, allow_overwrites
=False)
84 def add_flag(self
, key
, value
, allow_overwrites
=True):
85 if key
not in self
._flags
:
86 self
._flags
[key
] = value
89 existing_value
= self
._flags
[key
]
90 if isinstance(existing_value
, list):
91 existing_value
.append(value
)
94 if not allow_overwrites
:
95 raise ValueError("Invalid overwrite of %s requested" % key
)
97 self
._flags
[key
] = value
99 def add_cflags(self
, flags
):
100 # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101 assert not isinstance(flags
, str)
102 for f
in CmakeInvocation
._cflags
:
103 self
._flags
[f
].extend(flags
)
105 def add_ldflags(self
, flags
):
106 assert not isinstance(flags
, str)
107 for f
in CmakeInvocation
._ldflags
:
108 self
._flags
[f
].extend(flags
)
111 args
= self
._prefix
.copy()
112 for key
, value
in sorted(self
._flags
.items()):
113 if isinstance(value
, list):
114 # We preload all of the list-y values (cflags, ...). If we've
115 # nothing to add, don't.
118 value
= " ".join(value
)
128 def __init__(self
, llvm_dir
, use_make
, output_dir
, default_cmake_args
, dry_run
):
129 self
.llvm_dir
= llvm_dir
130 self
.use_make
= use_make
131 self
.output_dir
= output_dir
132 self
.default_cmake_args
= default_cmake_args
.copy()
133 self
.dry_run
= dry_run
135 def get_default_cmake_args_kv(self
):
136 return self
.default_cmake_args
.items()
138 def get_cmake_maker(self
):
139 return "Ninja" if not self
.use_make
else "Unix Makefiles"
141 def get_make_command(self
):
143 return ["make", "-j{}".format(multiprocessing
.cpu_count())]
146 def output_subdir(self
, name
):
147 return os
.path
.join(self
.output_dir
, name
)
149 def has_llvm_subproject(self
, name
):
150 if name
== "compiler-rt":
151 subdir
= "../compiler-rt"
152 elif name
== "clang":
155 raise ValueError("Unknown subproject: %s" % name
)
157 return os
.path
.isdir(os
.path
.join(self
.llvm_dir
, subdir
))
159 # Note that we don't allow capturing stdout/stderr. This works quite nicely
161 def run_command(self
, cmd
, cwd
=None, check
=False, silent_unless_error
=False):
162 print("Running `%s` in %s" % (cmd
, shlex
.quote(cwd
or os
.getcwd())))
167 if silent_unless_error
:
168 stdout
, stderr
= subprocess
.PIPE
, subprocess
.STDOUT
170 stdout
, stderr
= None, None
172 # Don't use subprocess.run because it's >= py3.5 only, and it's not too
173 # much extra effort to get what it gives us anyway.
174 popen
= subprocess
.Popen(
175 cmd
, stdin
=subprocess
.DEVNULL
, stdout
=stdout
, stderr
=stderr
, cwd
=cwd
177 stdout
, _
= popen
.communicate()
178 return_code
= popen
.wait(timeout
=0)
183 if silent_unless_error
:
184 print(stdout
.decode("utf-8", "ignore"))
187 raise subprocess
.CalledProcessError(
188 returncode
=return_code
, cmd
=cmd
, output
=stdout
, stderr
=None
192 def _get_default_cmake_invocation(env
):
193 inv
= CmakeInvocation(
194 cmake
="cmake", maker
=env
.get_cmake_maker(), cmake_dir
=env
.llvm_dir
196 for key
, value
in env
.get_default_cmake_args_kv():
197 inv
.add_new_flag(key
, value
)
201 def _get_cmake_invocation_for_bootstrap_from(env
, out_dir
, skip_tablegens
=True):
202 clang
= os
.path
.join(out_dir
, "bin", "clang")
203 cmake
= _get_default_cmake_invocation(env
)
204 cmake
.add_new_flag("CMAKE_C_COMPILER", clang
)
205 cmake
.add_new_flag("CMAKE_CXX_COMPILER", clang
+ "++")
207 # We often get no value out of building new tblgens; the previous build
208 # should have them. It's still correct to build them, just slower.
209 def add_tablegen(key
, binary
):
210 path
= os
.path
.join(out_dir
, "bin", binary
)
212 # Check that this exists, since the user's allowed to specify their own
213 # stage1 directory (which is generally where we'll source everything
214 # from). Dry runs should hope for the best from our user, as well.
215 if env
.dry_run
or os
.path
.exists(path
):
216 cmake
.add_new_flag(key
, path
)
219 add_tablegen("LLVM_TABLEGEN", "llvm-tblgen")
220 add_tablegen("CLANG_TABLEGEN", "clang-tblgen")
225 def _build_things_in(env
, target_dir
, what
):
226 cmd
= env
.get_make_command() + what
227 env
.run_command(cmd
, cwd
=target_dir
, check
=True)
230 def _run_fresh_cmake(env
, cmake
, target_dir
):
233 shutil
.rmtree(target_dir
)
234 except FileNotFoundError
:
237 os
.makedirs(target_dir
, mode
=0o755)
239 cmake_args
= cmake
.to_args()
240 env
.run_command(cmake_args
, cwd
=target_dir
, check
=True, silent_unless_error
=True)
243 def _build_stage1_clang(env
):
244 target_dir
= env
.output_subdir("stage1")
245 cmake
= _get_default_cmake_invocation(env
)
246 _run_fresh_cmake(env
, cmake
, target_dir
)
247 _build_things_in(env
, target_dir
, what
=["clang", "llvm-profdata", "profile"])
251 def _generate_instrumented_clang_profile(env
, stage1_dir
, profile_dir
, output_file
):
252 llvm_profdata
= os
.path
.join(stage1_dir
, "bin", "llvm-profdata")
254 profiles
= [os
.path
.join(profile_dir
, "*.profraw")]
257 os
.path
.join(profile_dir
, f
)
258 for f
in os
.listdir(profile_dir
)
259 if f
.endswith(".profraw")
261 cmd
= [llvm_profdata
, "merge", "-output=" + output_file
] + profiles
262 env
.run_command(cmd
, check
=True)
265 def _build_instrumented_clang(env
, stage1_dir
):
266 assert os
.path
.isabs(stage1_dir
)
268 target_dir
= os
.path
.join(env
.output_dir
, "instrumented")
269 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
270 cmake
.add_new_flag("LLVM_BUILD_INSTRUMENTED", "IR")
272 # libcxx's configure step messes with our link order: we'll link
273 # libclang_rt.profile after libgcc, and the former requires atexit from the
274 # latter. So, configure checks fail.
276 # Since we don't need libcxx or compiler-rt anyway, just disable them.
277 cmake
.add_new_flag("LLVM_BUILD_RUNTIME", "No")
279 _run_fresh_cmake(env
, cmake
, target_dir
)
280 _build_things_in(env
, target_dir
, what
=["clang", "lld"])
282 profiles_dir
= os
.path
.join(target_dir
, "profiles")
283 return target_dir
, profiles_dir
286 def _build_optimized_clang(env
, stage1_dir
, profdata_file
):
287 if not env
.dry_run
and not os
.path
.exists(profdata_file
):
289 "Looks like the profdata file at %s doesn't exist" % profdata_file
292 target_dir
= os
.path
.join(env
.output_dir
, "optimized")
293 cmake
= _get_cmake_invocation_for_bootstrap_from(env
, stage1_dir
)
294 cmake
.add_new_flag("LLVM_PROFDATA_FILE", os
.path
.abspath(profdata_file
))
296 # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
298 cmake
.add_cflags(["-Wno-backend-plugin"])
299 _run_fresh_cmake(env
, cmake
, target_dir
)
300 _build_things_in(env
, target_dir
, what
=["clang"])
304 Args
= collections
.namedtuple(
307 "do_optimized_build",
308 "include_debug_info",
316 parser
= argparse
.ArgumentParser(
317 description
="Builds LLVM and Clang with instrumentation, collects "
318 "instrumentation profiles for them, and (optionally) builds things "
319 "with these PGO profiles. By default, it's assumed that you're "
320 "running this from your LLVM root, and all build artifacts will be "
327 help="an extra arg to pass to all cmake invocations. Note that this "
328 "is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will "
329 "be passed as -DFOO=BAR. This may be specified multiple times.",
332 "--dry-run", action
="store_true", help="print commands instead of running them"
337 help="directory containing an LLVM checkout (default: $PWD)",
340 "--no-optimized-build",
342 help="disable the final, PGO-optimized build",
345 "--out-dir", help="directory to write artifacts to (default: $llvm_dir/out)"
349 help="where to output the profile (default is $out/pgo_profile.prof)",
353 help="instead of having an initial build of everything, use the given "
354 "directory. It is expected that this directory will have clang, "
355 "llvm-profdata, and the appropriate libclang_rt.profile already built",
358 "--use-debug-info-in-benchmark",
360 help="use a regular build instead of RelWithDebInfo in the benchmark. "
361 "This increases benchmark execution time and disk space requirements, "
362 "but gives more coverage over debuginfo bits in LLVM and clang.",
367 default
=shutil
.which("ninja") is None,
368 help="use Makefiles instead of ninja",
371 args
= parser
.parse_args()
373 llvm_dir
= os
.path
.abspath(args
.llvm_dir
)
374 if args
.out_dir
is None:
375 output_dir
= os
.path
.join(llvm_dir
, "out")
377 output_dir
= os
.path
.abspath(args
.out_dir
)
380 "CMAKE_BUILD_TYPE": "Release",
381 "LLVM_ENABLE_PROJECTS": "clang;compiler-rt;lld",
383 for arg
in args
.cmake_extra_arg
:
384 if arg
.startswith("-D"):
386 elif arg
.startswith("-"):
388 "Unknown not- -D arg encountered; you may need "
389 "to tweak the source..."
391 split
= arg
.split("=", 1)
393 key
, val
= split
[0], ""
396 extra_args
[key
] = val
399 default_cmake_args
=extra_args
,
400 dry_run
=args
.dry_run
,
402 output_dir
=output_dir
,
403 use_make
=args
.use_make
,
406 if args
.profile_output
is not None:
407 profile_location
= args
.profile_output
409 profile_location
= os
.path
.join(env
.output_dir
, "pgo_profile.prof")
412 do_optimized_build
=not args
.no_optimized_build
,
413 include_debug_info
=args
.use_debug_info_in_benchmark
,
414 profile_location
=profile_location
,
415 stage1_dir
=args
.stage1_dir
,
418 return env
, result_args
421 def _looks_like_llvm_dir(directory
):
422 """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
424 Errs on the side of false-positives."""
426 contents
= set(os
.listdir(directory
))
427 expected_contents
= [
435 if not all(c
in contents
for c
in expected_contents
):
439 include_listing
= os
.listdir(os
.path
.join(directory
, "include"))
440 except NotADirectoryError
:
443 return "llvm" in include_listing
446 def _die(*args
, **kwargs
):
447 kwargs
["file"] = sys
.stderr
448 print(*args
, **kwargs
)
453 env
, args
= _parse_args()
455 if not _looks_like_llvm_dir(env
.llvm_dir
):
456 _die("Looks like %s isn't an LLVM directory; please see --help" % env
.llvm_dir
)
457 if not env
.has_llvm_subproject("clang"):
458 _die("Need a clang checkout at tools/clang")
459 if not env
.has_llvm_subproject("compiler-rt"):
460 _die("Need a compiler-rt checkout at projects/compiler-rt")
463 print(*args
, file=sys
.stderr
)
465 if args
.stage1_dir
is None:
466 status("*** Building stage1 clang...")
467 stage1_out
= _build_stage1_clang(env
)
469 stage1_out
= args
.stage1_dir
471 status("*** Building instrumented clang...")
472 instrumented_out
, profile_dir
= _build_instrumented_clang(env
, stage1_out
)
473 status("*** Running profdata benchmarks...")
474 _run_benchmark(env
, instrumented_out
, args
.include_debug_info
)
475 status("*** Generating profile...")
476 _generate_instrumented_clang_profile(
477 env
, stage1_out
, profile_dir
, args
.profile_location
480 print("Final profile:", args
.profile_location
)
481 if args
.do_optimized_build
:
482 status("*** Building PGO-optimized binaries...")
483 optimized_out
= _build_optimized_clang(env
, stage1_out
, args
.profile_location
)
484 print("Final build directory:", optimized_out
)
487 if __name__
== "__main__":