1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // llvm-profdata merges .profdata files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Debuginfod/HTTPClient.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/ProfileData/InstrProfCorrelator.h"
20 #include "llvm/ProfileData/InstrProfReader.h"
21 #include "llvm/ProfileData/InstrProfWriter.h"
22 #include "llvm/ProfileData/MemProf.h"
23 #include "llvm/ProfileData/MemProfReader.h"
24 #include "llvm/ProfileData/ProfileCommon.h"
25 #include "llvm/ProfileData/SampleProfReader.h"
26 #include "llvm/ProfileData/SampleProfWriter.h"
27 #include "llvm/Support/BalancedPartitioning.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Discriminator.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/FormattedStream.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/ThreadPool.h"
40 #include "llvm/Support/Threading.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/Support/WithColor.h"
43 #include "llvm/Support/raw_ostream.h"
50 using ProfCorrelatorKind
= InstrProfCorrelator::ProfCorrelatorKind
;
52 // https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
53 // on each subcommand.
54 cl::SubCommand
ShowSubcommand(
56 "Takes a profile data file and displays the profiles. See detailed "
58 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
59 cl::SubCommand
OrderSubcommand(
61 "Reads temporal profiling traces from a profile and outputs a function "
62 "order that reduces the number of page faults for those traces. See "
63 "detailed documentation in "
64 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
65 cl::SubCommand
OverlapSubcommand(
67 "Computes and displays the overlap between two profiles. See detailed "
69 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
70 cl::SubCommand
MergeSubcommand(
72 "Takes several profiles and merge them together. See detailed "
74 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
77 enum ProfileKinds
{ instr
, sample
, memory
};
78 enum FailureMode
{ warnOnly
, failIfAnyAreInvalid
, failIfAllAreInvalid
};
83 PF_Compact_Binary
, // Deprecated
89 enum class ShowFormat
{ Text
, Json
, Yaml
};
93 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
94 cl::init("-"), cl::desc("Output file"),
95 cl::sub(ShowSubcommand
),
96 cl::sub(OrderSubcommand
),
97 cl::sub(OverlapSubcommand
),
98 cl::sub(MergeSubcommand
));
99 // NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
100 // will be used. llvm::cl::alias::done() method asserts this condition.
101 cl::alias
OutputFilenameA("o", cl::desc("Alias for --output"),
102 cl::aliasopt(OutputFilename
));
104 // Options common to at least two commands.
105 cl::opt
<ProfileKinds
> ProfileKind(
106 cl::desc("Profile kind:"), cl::sub(MergeSubcommand
),
107 cl::sub(OverlapSubcommand
), cl::init(instr
),
108 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
109 clEnumVal(sample
, "Sample profile")));
110 cl::opt
<std::string
> Filename(cl::Positional
, cl::desc("<profdata-file>"),
111 cl::sub(ShowSubcommand
),
112 cl::sub(OrderSubcommand
));
113 cl::opt
<unsigned> MaxDbgCorrelationWarnings(
114 "max-debug-info-correlation-warnings",
115 cl::desc("The maximum number of warnings to emit when correlating "
116 "profile from debug info (0 = no limit)"),
117 cl::sub(MergeSubcommand
), cl::sub(ShowSubcommand
), cl::init(5));
118 cl::opt
<std::string
> ProfiledBinary(
119 "profiled-binary", cl::init(""),
120 cl::desc("Path to binary from which the profile was collected."),
121 cl::sub(ShowSubcommand
), cl::sub(MergeSubcommand
));
122 cl::opt
<std::string
> DebugInfoFilename(
123 "debug-info", cl::init(""),
125 "For show, read and extract profile metadata from debug info and show "
126 "the functions it found. For merge, use the provided debug info to "
127 "correlate the raw profile."),
128 cl::sub(ShowSubcommand
), cl::sub(MergeSubcommand
));
130 BinaryFilename("binary-file", cl::init(""),
131 cl::desc("For merge, use the provided unstripped bianry to "
132 "correlate the raw profile."),
133 cl::sub(MergeSubcommand
));
134 cl::list
<std::string
> DebugFileDirectory(
135 "debug-file-directory",
136 cl::desc("Directories to search for object files by build ID"));
137 cl::opt
<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden
,
138 cl::sub(MergeSubcommand
),
139 cl::desc("Enable debuginfod"));
140 cl::opt
<ProfCorrelatorKind
> BIDFetcherProfileCorrelate(
142 cl::desc("Use debug-info or binary correlation to correlate profiles with "
144 cl::init(InstrProfCorrelator::NONE
),
145 cl::values(clEnumValN(InstrProfCorrelator::NONE
, "",
146 "No profile correlation"),
147 clEnumValN(InstrProfCorrelator::DEBUG_INFO
, "debug-info",
148 "Use debug info to correlate"),
149 clEnumValN(InstrProfCorrelator::BINARY
, "binary",
150 "Use binary to correlate")));
151 cl::opt
<std::string
> FuncNameFilter(
153 cl::desc("Only functions matching the filter are shown in the output. For "
154 "overlapping CSSPGO, this takes a function name with calling "
156 cl::sub(ShowSubcommand
), cl::sub(OverlapSubcommand
),
157 cl::sub(MergeSubcommand
));
159 // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
160 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
163 // Options specific to merge subcommand.
164 cl::list
<std::string
> InputFilenames(cl::Positional
, cl::sub(MergeSubcommand
),
165 cl::desc("<filename...>"));
166 cl::list
<std::string
> WeightedInputFilenames("weighted-input",
167 cl::sub(MergeSubcommand
),
168 cl::desc("<weight>,<filename>"));
169 cl::opt
<ProfileFormat
> OutputFormat(
170 cl::desc("Format of output profile"), cl::sub(MergeSubcommand
),
171 cl::init(PF_Ext_Binary
),
172 cl::values(clEnumValN(PF_Binary
, "binary", "Binary encoding"),
173 clEnumValN(PF_Ext_Binary
, "extbinary",
174 "Extensible binary encoding "
176 clEnumValN(PF_Text
, "text", "Text encoding"),
177 clEnumValN(PF_GCC
, "gcc",
178 "GCC encoding (only meaningful for -sample)")));
180 InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand
),
181 cl::desc("Path to file containing newline-separated "
182 "[<weight>,]<filename> entries"));
183 cl::alias
InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
184 cl::aliasopt(InputFilenamesFile
));
185 cl::opt
<bool> DumpInputFileList(
186 "dump-input-file-list", cl::init(false), cl::Hidden
,
187 cl::sub(MergeSubcommand
),
188 cl::desc("Dump the list of input files and their weights, then exit"));
189 cl::opt
<std::string
> RemappingFile("remapping-file", cl::value_desc("file"),
190 cl::sub(MergeSubcommand
),
191 cl::desc("Symbol remapping file"));
192 cl::alias
RemappingFileA("r", cl::desc("Alias for --remapping-file"),
193 cl::aliasopt(RemappingFile
));
195 UseMD5("use-md5", cl::init(false), cl::Hidden
,
196 cl::desc("Choose to use MD5 to represent string in name table (only "
197 "meaningful for -extbinary)"),
198 cl::sub(MergeSubcommand
));
199 cl::opt
<bool> CompressAllSections(
200 "compress-all-sections", cl::init(false), cl::Hidden
,
201 cl::sub(MergeSubcommand
),
202 cl::desc("Compress all sections when writing the profile (only "
203 "meaningful for -extbinary)"));
204 cl::opt
<bool> SampleMergeColdContext(
205 "sample-merge-cold-context", cl::init(false), cl::Hidden
,
206 cl::sub(MergeSubcommand
),
208 "Merge context sample profiles whose count is below cold threshold"));
209 cl::opt
<bool> SampleTrimColdContext(
210 "sample-trim-cold-context", cl::init(false), cl::Hidden
,
211 cl::sub(MergeSubcommand
),
213 "Trim context sample profiles whose count is below cold threshold"));
214 cl::opt
<uint32_t> SampleColdContextFrameDepth(
215 "sample-frame-depth-for-cold-context", cl::init(1),
216 cl::sub(MergeSubcommand
),
217 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
218 "context-less base profile"));
219 cl::opt
<size_t> OutputSizeLimit(
220 "output-size-limit", cl::init(0), cl::Hidden
, cl::sub(MergeSubcommand
),
221 cl::desc("Trim cold functions until profile size is below specified "
222 "limit in bytes. This uses a heursitic and functions may be "
223 "excessively trimmed"));
224 cl::opt
<bool> GenPartialProfile(
225 "gen-partial-profile", cl::init(false), cl::Hidden
,
226 cl::sub(MergeSubcommand
),
227 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
228 cl::opt
<bool> SplitLayout(
229 "split-layout", cl::init(false), cl::Hidden
,
230 cl::sub(MergeSubcommand
),
231 cl::desc("Split the profile to two sections with one containing sample "
232 "profiles with inlined functions and the other without (only "
233 "meaningful for -extbinary)"));
234 cl::opt
<std::string
> SupplInstrWithSample(
235 "supplement-instr-with-sample", cl::init(""), cl::Hidden
,
236 cl::sub(MergeSubcommand
),
237 cl::desc("Supplement an instr profile with sample profile, to correct "
238 "the profile unrepresentativeness issue. The sample "
239 "profile is the input of the flag. Output will be in instr "
240 "format (The flag only works with -instr)"));
241 cl::opt
<float> ZeroCounterThreshold(
242 "zero-counter-threshold", cl::init(0.7), cl::Hidden
,
243 cl::sub(MergeSubcommand
),
244 cl::desc("For the function which is cold in instr profile but hot in "
245 "sample profile, if the ratio of the number of zero counters "
246 "divided by the total number of counters is above the "
247 "threshold, the profile of the function will be regarded as "
248 "being harmful for performance and will be dropped."));
249 cl::opt
<unsigned> SupplMinSizeThreshold(
250 "suppl-min-size-threshold", cl::init(10), cl::Hidden
,
251 cl::sub(MergeSubcommand
),
252 cl::desc("If the size of a function is smaller than the threshold, "
253 "assume it can be inlined by PGO early inliner and it won't "
254 "be adjusted based on sample profile."));
255 cl::opt
<unsigned> InstrProfColdThreshold(
256 "instr-prof-cold-threshold", cl::init(0), cl::Hidden
,
257 cl::sub(MergeSubcommand
),
258 cl::desc("User specified cold threshold for instr profile which will "
259 "override the cold threshold got from profile summary. "));
260 // WARNING: This reservoir size value is propagated to any input indexed
261 // profiles for simplicity. Changing this value between invocations could
262 // result in sample bias.
263 cl::opt
<uint64_t> TemporalProfTraceReservoirSize(
264 "temporal-profile-trace-reservoir-size", cl::init(100),
265 cl::sub(MergeSubcommand
),
266 cl::desc("The maximum number of stored temporal profile traces (default: "
268 cl::opt
<uint64_t> TemporalProfMaxTraceLength(
269 "temporal-profile-max-trace-length", cl::init(10000),
270 cl::sub(MergeSubcommand
),
271 cl::desc("The maximum length of a single temporal profile trace "
272 "(default: 10000)"));
273 cl::opt
<std::string
> FuncNameNegativeFilter(
274 "no-function", cl::init(""),
275 cl::sub(MergeSubcommand
),
276 cl::desc("Exclude functions matching the filter from the output."));
279 FailMode("failure-mode", cl::init(failIfAnyAreInvalid
),
280 cl::desc("Failure mode:"), cl::sub(MergeSubcommand
),
281 cl::values(clEnumValN(warnOnly
, "warn",
282 "Do not fail and just print warnings."),
283 clEnumValN(failIfAnyAreInvalid
, "any",
284 "Fail if any profile is invalid."),
285 clEnumValN(failIfAllAreInvalid
, "all",
286 "Fail only if all profiles are invalid.")));
288 cl::opt
<bool> OutputSparse(
289 "sparse", cl::init(false), cl::sub(MergeSubcommand
),
290 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
291 cl::opt
<unsigned> NumThreads(
292 "num-threads", cl::init(0), cl::sub(MergeSubcommand
),
293 cl::desc("Number of merge threads to use (default: autodetect)"));
294 cl::alias
NumThreadsA("j", cl::desc("Alias for --num-threads"),
295 cl::aliasopt(NumThreads
));
297 cl::opt
<std::string
> ProfileSymbolListFile(
298 "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand
),
299 cl::desc("Path to file containing the list of function symbols "
300 "used to populate profile symbol list"));
302 cl::opt
<SampleProfileLayout
> ProfileLayout(
303 "convert-sample-profile-layout",
304 cl::desc("Convert the generated profile to a profile with a new layout"),
305 cl::sub(MergeSubcommand
), cl::init(SPL_None
),
307 clEnumValN(SPL_Nest
, "nest",
308 "Nested profile, the input should be CS flat profile"),
309 clEnumValN(SPL_Flat
, "flat",
310 "Profile with nested inlinee flatten out")));
312 cl::opt
<bool> DropProfileSymbolList(
313 "drop-profile-symbol-list", cl::init(false), cl::Hidden
,
314 cl::sub(MergeSubcommand
),
315 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
316 "(only meaningful for -sample)"));
318 cl::opt
<bool> KeepVTableSymbols(
319 "keep-vtable-symbols", cl::init(false), cl::Hidden
,
320 cl::sub(MergeSubcommand
),
321 cl::desc("If true, keep the vtable symbols in indexed profiles"));
323 // Temporary support for writing the previous version of the format, to enable
324 // some forward compatibility.
325 // TODO: Consider enabling this with future version changes as well, to ease
326 // deployment of newer versions of llvm-profdata.
327 cl::opt
<bool> DoWritePrevVersion(
328 "write-prev-version", cl::init(false), cl::Hidden
,
329 cl::desc("Write the previous version of indexed format, to enable "
330 "some forward compatibility."));
332 cl::opt
<memprof::IndexedVersion
> MemProfVersionRequested(
333 "memprof-version", cl::Hidden
, cl::sub(MergeSubcommand
),
334 cl::desc("Specify the version of the memprof format to use"),
335 cl::init(memprof::Version3
),
336 cl::values(clEnumValN(memprof::Version2
, "2", "version 2"),
337 clEnumValN(memprof::Version3
, "3", "version 3")));
339 cl::opt
<bool> MemProfFullSchema(
340 "memprof-full-schema", cl::Hidden
, cl::sub(MergeSubcommand
),
341 cl::desc("Use the full schema for serialization"), cl::init(false));
344 MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false),
345 cl::Hidden
, cl::sub(MergeSubcommand
),
346 cl::desc("Generate random hotness values"));
347 static cl::opt
<unsigned> MemprofGenerateRandomHotnessSeed(
348 "memprof-random-hotness-seed", cl::init(0), cl::Hidden
,
349 cl::sub(MergeSubcommand
),
350 cl::desc("Random hotness seed to use (0 to generate new seed)"));
352 // Options specific to overlap subcommand.
353 cl::opt
<std::string
> BaseFilename(cl::Positional
, cl::Required
,
354 cl::desc("<base profile file>"),
355 cl::sub(OverlapSubcommand
));
356 cl::opt
<std::string
> TestFilename(cl::Positional
, cl::Required
,
357 cl::desc("<test profile file>"),
358 cl::sub(OverlapSubcommand
));
360 cl::opt
<unsigned long long> SimilarityCutoff(
361 "similarity-cutoff", cl::init(0),
362 cl::desc("For sample profiles, list function names (with calling context "
363 "for csspgo) for overlapped functions "
364 "with similarities below the cutoff (percentage times 10000)."),
365 cl::sub(OverlapSubcommand
));
368 "cs", cl::init(false),
369 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
370 cl::sub(OverlapSubcommand
));
372 cl::opt
<unsigned long long> OverlapValueCutoff(
373 "value-cutoff", cl::init(-1),
375 "Function level overlap information for every function (with calling "
376 "context for csspgo) in test "
377 "profile with max count value greater than the parameter value"),
378 cl::sub(OverlapSubcommand
));
380 // Options specific to show subcommand.
381 cl::opt
<bool> ShowCounts("counts", cl::init(false),
382 cl::desc("Show counter values for shown functions"),
383 cl::sub(ShowSubcommand
));
385 SFormat("show-format", cl::init(ShowFormat::Text
),
386 cl::desc("Emit output in the selected format if supported"),
387 cl::sub(ShowSubcommand
),
388 cl::values(clEnumValN(ShowFormat::Text
, "text",
389 "emit normal text output (default)"),
390 clEnumValN(ShowFormat::Json
, "json", "emit JSON"),
391 clEnumValN(ShowFormat::Yaml
, "yaml", "emit YAML")));
392 // TODO: Consider replacing this with `--show-format=text-encoding`.
394 TextFormat("text", cl::init(false),
395 cl::desc("Show instr profile data in text dump format"),
396 cl::sub(ShowSubcommand
));
399 cl::desc("Show sample profile data in the JSON format "
400 "(deprecated, please use --show-format=json)"),
401 cl::sub(ShowSubcommand
));
402 cl::opt
<bool> ShowIndirectCallTargets(
403 "ic-targets", cl::init(false),
404 cl::desc("Show indirect call site target values for shown functions"),
405 cl::sub(ShowSubcommand
));
406 cl::opt
<bool> ShowVTables("show-vtables", cl::init(false),
407 cl::desc("Show vtable names for shown functions"),
408 cl::sub(ShowSubcommand
));
409 cl::opt
<bool> ShowMemOPSizes(
410 "memop-sizes", cl::init(false),
411 cl::desc("Show the profiled sizes of the memory intrinsic calls "
412 "for shown functions"),
413 cl::sub(ShowSubcommand
));
414 cl::opt
<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
415 cl::desc("Show detailed profile summary"),
416 cl::sub(ShowSubcommand
));
417 cl::list
<uint32_t> DetailedSummaryCutoffs(
418 cl::CommaSeparated
, "detailed-summary-cutoffs",
420 "Cutoff percentages (times 10000) for generating detailed summary"),
421 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand
));
423 ShowHotFuncList("hot-func-list", cl::init(false),
424 cl::desc("Show profile summary of a list of hot functions"),
425 cl::sub(ShowSubcommand
));
426 cl::opt
<bool> ShowAllFunctions("all-functions", cl::init(false),
427 cl::desc("Details for each and every function"),
428 cl::sub(ShowSubcommand
));
429 cl::opt
<bool> ShowCS("showcs", cl::init(false),
430 cl::desc("Show context sensitive counts"),
431 cl::sub(ShowSubcommand
));
432 cl::opt
<ProfileKinds
> ShowProfileKind(
433 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand
),
435 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
436 clEnumVal(sample
, "Sample profile"),
437 clEnumVal(memory
, "MemProf memory access profile")));
438 cl::opt
<uint32_t> TopNFunctions(
440 cl::desc("Show the list of functions with the largest internal counts"),
441 cl::sub(ShowSubcommand
));
442 cl::opt
<uint32_t> ShowValueCutoff(
443 "value-cutoff", cl::init(0),
444 cl::desc("Set the count value cutoff. Functions with the maximum count "
445 "less than this value will not be printed out. (Default is 0)"),
446 cl::sub(ShowSubcommand
));
447 cl::opt
<bool> OnlyListBelow(
448 "list-below-cutoff", cl::init(false),
449 cl::desc("Only output names of functions whose max count values are "
450 "below the cutoff value"),
451 cl::sub(ShowSubcommand
));
452 cl::opt
<bool> ShowProfileSymbolList(
453 "show-prof-sym-list", cl::init(false),
454 cl::desc("Show profile symbol list if it exists in the profile. "),
455 cl::sub(ShowSubcommand
));
456 cl::opt
<bool> ShowSectionInfoOnly(
457 "show-sec-info-only", cl::init(false),
458 cl::desc("Show the information of each section in the sample profile. "
459 "The flag is only usable when the sample profile is in "
461 cl::sub(ShowSubcommand
));
462 cl::opt
<bool> ShowBinaryIds("binary-ids", cl::init(false),
463 cl::desc("Show binary ids in the profile. "),
464 cl::sub(ShowSubcommand
));
465 cl::opt
<bool> ShowTemporalProfTraces(
466 "temporal-profile-traces",
467 cl::desc("Show temporal profile traces in the profile."),
468 cl::sub(ShowSubcommand
));
471 ShowCovered("covered", cl::init(false),
472 cl::desc("Show only the functions that have been executed."),
473 cl::sub(ShowSubcommand
));
475 cl::opt
<bool> ShowProfileVersion("profile-version", cl::init(false),
476 cl::desc("Show profile version. "),
477 cl::sub(ShowSubcommand
));
479 // Options specific to order subcommand.
481 NumTestTraces("num-test-traces", cl::init(0),
482 cl::desc("Keep aside the last <num-test-traces> traces in "
483 "the profile when computing the function order and "
484 "instead use them to evaluate that order"),
485 cl::sub(OrderSubcommand
));
487 // We use this string to indicate that there are
488 // multiple static functions map to the same name.
489 const std::string DuplicateNameStr
= "----";
491 static void warn(Twine Message
, StringRef Whence
= "", StringRef Hint
= "") {
492 WithColor::warning();
494 errs() << Whence
<< ": ";
495 errs() << Message
<< "\n";
497 WithColor::note() << Hint
<< "\n";
500 static void warn(Error E
, StringRef Whence
= "") {
501 if (E
.isA
<InstrProfError
>()) {
502 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
503 warn(IPE
.message(), Whence
);
508 static void exitWithError(Twine Message
, StringRef Whence
= "",
509 StringRef Hint
= "") {
512 errs() << Whence
<< ": ";
513 errs() << Message
<< "\n";
515 WithColor::note() << Hint
<< "\n";
519 static void exitWithError(Error E
, StringRef Whence
= "") {
520 if (E
.isA
<InstrProfError
>()) {
521 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
522 instrprof_error instrError
= IPE
.get();
524 if (instrError
== instrprof_error::unrecognized_format
) {
525 // Hint in case user missed specifying the profile type.
526 Hint
= "Perhaps you forgot to use the --sample or --memory option?";
528 exitWithError(IPE
.message(), Whence
, Hint
);
533 exitWithError(toString(std::move(E
)), Whence
);
536 static void exitWithErrorCode(std::error_code EC
, StringRef Whence
= "") {
537 exitWithError(EC
.message(), Whence
);
540 static void warnOrExitGivenError(FailureMode FailMode
, std::error_code EC
,
541 StringRef Whence
= "") {
542 if (FailMode
== failIfAnyAreInvalid
)
543 exitWithErrorCode(EC
, Whence
);
545 warn(EC
.message(), Whence
);
548 static void handleMergeWriterError(Error E
, StringRef WhenceFile
= "",
549 StringRef WhenceFunction
= "",
550 bool ShowHint
= true) {
551 if (!WhenceFile
.empty())
552 errs() << WhenceFile
<< ": ";
553 if (!WhenceFunction
.empty())
554 errs() << WhenceFunction
<< ": ";
556 auto IPE
= instrprof_error::success
;
557 E
= handleErrors(std::move(E
),
558 [&IPE
](std::unique_ptr
<InstrProfError
> E
) -> Error
{
560 return Error(std::move(E
));
562 errs() << toString(std::move(E
)) << "\n";
566 if (IPE
!= instrprof_error::success
) {
568 case instrprof_error::hash_mismatch
:
569 case instrprof_error::count_mismatch
:
570 case instrprof_error::value_site_count_mismatch
:
571 Hint
= "Make sure that all profile data to be merged is generated "
572 "from the same binary.";
580 errs() << Hint
<< "\n";
585 /// A remapper from original symbol names to new symbol names based on a file
586 /// containing a list of mappings from old name to new name.
587 class SymbolRemapper
{
588 std::unique_ptr
<MemoryBuffer
> File
;
589 DenseMap
<StringRef
, StringRef
> RemappingTable
;
592 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
593 static std::unique_ptr
<SymbolRemapper
> create(StringRef InputFile
) {
594 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
596 exitWithErrorCode(BufOrError
.getError(), InputFile
);
598 auto Remapper
= std::make_unique
<SymbolRemapper
>();
599 Remapper
->File
= std::move(BufOrError
.get());
601 for (line_iterator
LineIt(*Remapper
->File
, /*SkipBlanks=*/true, '#');
602 !LineIt
.is_at_eof(); ++LineIt
) {
603 std::pair
<StringRef
, StringRef
> Parts
= LineIt
->split(' ');
604 if (Parts
.first
.empty() || Parts
.second
.empty() ||
605 Parts
.second
.count(' ')) {
606 exitWithError("unexpected line in remapping file",
607 (InputFile
+ ":" + Twine(LineIt
.line_number())).str(),
608 "expected 'old_symbol new_symbol'");
610 Remapper
->RemappingTable
.insert(Parts
);
615 /// Attempt to map the given old symbol into a new symbol.
617 /// \return The new symbol, or \p Name if no such symbol was found.
618 StringRef
operator()(StringRef Name
) {
619 StringRef New
= RemappingTable
.lookup(Name
);
620 return New
.empty() ? Name
: New
;
623 FunctionId
operator()(FunctionId Name
) {
624 // MD5 name cannot be remapped.
625 if (!Name
.isStringRef())
627 StringRef New
= RemappingTable
.lookup(Name
.stringRef());
628 return New
.empty() ? Name
: FunctionId(New
);
633 struct WeightedFile
{
634 std::string Filename
;
637 typedef SmallVector
<WeightedFile
, 5> WeightedFileVector
;
639 /// Keep track of merged data and reported errors.
640 struct WriterContext
{
642 InstrProfWriter Writer
;
643 std::vector
<std::pair
<Error
, std::string
>> Errors
;
645 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
;
647 WriterContext(bool IsSparse
, std::mutex
&ErrLock
,
648 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
,
649 uint64_t ReservoirSize
= 0, uint64_t MaxTraceLength
= 0)
650 : Writer(IsSparse
, ReservoirSize
, MaxTraceLength
, DoWritePrevVersion
,
651 MemProfVersionRequested
, MemProfFullSchema
,
652 MemprofGenerateRandomHotness
, MemprofGenerateRandomHotnessSeed
),
653 ErrLock(ErrLock
), WriterErrorCodes(WriterErrorCodes
) {}
656 /// Computer the overlap b/w profile BaseFilename and TestFileName,
657 /// and store the program level result to Overlap.
658 static void overlapInput(const std::string
&BaseFilename
,
659 const std::string
&TestFilename
, WriterContext
*WC
,
660 OverlapStats
&Overlap
,
661 const OverlapFuncFilters
&FuncFilter
,
662 raw_fd_ostream
&OS
, bool IsCS
) {
663 auto FS
= vfs::getRealFileSystem();
664 auto ReaderOrErr
= InstrProfReader::create(TestFilename
, *FS
);
665 if (Error E
= ReaderOrErr
.takeError()) {
666 // Skip the empty profiles by returning sliently.
667 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
668 if (ErrorCode
!= instrprof_error::empty_raw_profile
)
669 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
674 auto Reader
= std::move(ReaderOrErr
.get());
675 for (auto &I
: *Reader
) {
676 OverlapStats
FuncOverlap(OverlapStats::FunctionLevel
);
677 FuncOverlap
.setFuncInfo(I
.Name
, I
.Hash
);
679 WC
->Writer
.overlapRecord(std::move(I
), Overlap
, FuncOverlap
, FuncFilter
);
680 FuncOverlap
.dump(OS
);
684 /// Load an input into a writer context.
686 loadInput(const WeightedFile
&Input
, SymbolRemapper
*Remapper
,
687 const InstrProfCorrelator
*Correlator
, const StringRef ProfiledBinary
,
688 WriterContext
*WC
, const object::BuildIDFetcher
*BIDFetcher
= nullptr,
689 const ProfCorrelatorKind
*BIDFetcherCorrelatorKind
= nullptr) {
690 std::unique_lock
<std::mutex
> CtxGuard
{WC
->Lock
};
692 // Copy the filename, because llvm::ThreadPool copied the input "const
693 // WeightedFile &" by value, making a reference to the filename within it
694 // invalid outside of this packaged task.
695 std::string Filename
= Input
.Filename
;
697 using ::llvm::memprof::RawMemProfReader
;
698 if (RawMemProfReader::hasFormat(Input
.Filename
)) {
699 auto ReaderOrErr
= RawMemProfReader::create(Input
.Filename
, ProfiledBinary
);
701 exitWithError(ReaderOrErr
.takeError(), Input
.Filename
);
703 std::unique_ptr
<RawMemProfReader
> Reader
= std::move(ReaderOrErr
.get());
704 // Check if the profile types can be merged, e.g. clang frontend profiles
705 // should not be merged with memprof profiles.
706 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
707 consumeError(std::move(E
));
708 WC
->Errors
.emplace_back(
709 make_error
<StringError
>(
710 "Cannot merge MemProf profile with Clang generated profile.",
716 auto MemProfError
= [&](Error E
) {
717 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
718 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
722 WC
->Writer
.addMemProfData(Reader
->takeMemProfData(), MemProfError
);
726 auto FS
= vfs::getRealFileSystem();
727 // TODO: This only saves the first non-fatal error from InstrProfReader, and
728 // then added to WriterContext::Errors. However, this is not extensible, if
729 // we have more non-fatal errors from InstrProfReader in the future. How
730 // should this interact with different -failure-mode?
731 std::optional
<std::pair
<Error
, std::string
>> ReaderWarning
;
732 auto Warn
= [&](Error E
) {
734 consumeError(std::move(E
));
737 // Only show the first time an error occurs in this file.
738 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
739 ReaderWarning
= {make_error
<InstrProfError
>(ErrCode
, Msg
), Filename
};
742 const ProfCorrelatorKind CorrelatorKind
= BIDFetcherCorrelatorKind
743 ? *BIDFetcherCorrelatorKind
744 : ProfCorrelatorKind::NONE
;
745 auto ReaderOrErr
= InstrProfReader::create(Input
.Filename
, *FS
, Correlator
,
746 BIDFetcher
, CorrelatorKind
, Warn
);
747 if (Error E
= ReaderOrErr
.takeError()) {
748 // Skip the empty profiles by returning silently.
749 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
750 if (ErrCode
!= instrprof_error::empty_raw_profile
)
751 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrCode
, Msg
),
756 auto Reader
= std::move(ReaderOrErr
.get());
757 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
758 consumeError(std::move(E
));
759 WC
->Errors
.emplace_back(
760 make_error
<StringError
>(
761 "Merge IR generated profile with Clang generated profile.",
767 for (auto &I
: *Reader
) {
769 I
.Name
= (*Remapper
)(I
.Name
);
770 const StringRef FuncName
= I
.Name
;
771 bool Reported
= false;
772 WC
->Writer
.addRecord(std::move(I
), Input
.Weight
, [&](Error E
) {
774 consumeError(std::move(E
));
778 // Only show hint the first time an error occurs.
779 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
780 std::unique_lock
<std::mutex
> ErrGuard
{WC
->ErrLock
};
781 bool firstTime
= WC
->WriterErrorCodes
.insert(ErrCode
).second
;
782 handleMergeWriterError(make_error
<InstrProfError
>(ErrCode
, Msg
),
783 Input
.Filename
, FuncName
, firstTime
);
787 if (KeepVTableSymbols
) {
788 const InstrProfSymtab
&symtab
= Reader
->getSymtab();
789 const auto &VTableNames
= symtab
.getVTableNames();
791 for (const auto &kv
: VTableNames
)
792 WC
->Writer
.addVTableName(kv
.getKey());
795 if (Reader
->hasTemporalProfile()) {
796 auto &Traces
= Reader
->getTemporalProfTraces(Input
.Weight
);
798 WC
->Writer
.addTemporalProfileTraces(
799 Traces
, Reader
->getTemporalProfTraceStreamSize());
801 if (Reader
->hasError()) {
802 if (Error E
= Reader
->getError()) {
803 WC
->Errors
.emplace_back(std::move(E
), Filename
);
808 std::vector
<llvm::object::BuildID
> BinaryIds
;
809 if (Error E
= Reader
->readBinaryIds(BinaryIds
)) {
810 WC
->Errors
.emplace_back(std::move(E
), Filename
);
813 WC
->Writer
.addBinaryIds(BinaryIds
);
816 WC
->Errors
.emplace_back(std::move(ReaderWarning
->first
),
817 ReaderWarning
->second
);
821 /// Merge the \p Src writer context into \p Dst.
822 static void mergeWriterContexts(WriterContext
*Dst
, WriterContext
*Src
) {
823 for (auto &ErrorPair
: Src
->Errors
)
824 Dst
->Errors
.push_back(std::move(ErrorPair
));
827 if (Error E
= Dst
->Writer
.mergeProfileKind(Src
->Writer
.getProfileKind()))
828 exitWithError(std::move(E
));
830 Dst
->Writer
.mergeRecordsFromWriter(std::move(Src
->Writer
), [&](Error E
) {
831 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
832 std::unique_lock
<std::mutex
> ErrGuard
{Dst
->ErrLock
};
833 bool firstTime
= Dst
->WriterErrorCodes
.insert(ErrorCode
).second
;
835 warn(toString(make_error
<InstrProfError
>(ErrorCode
, Msg
)));
840 getFuncName(const StringMap
<InstrProfWriter::ProfilingData
>::value_type
&Val
) {
845 getFuncName(const SampleProfileMap::value_type
&Val
) {
846 return Val
.second
.getContext().toString();
849 template <typename T
>
850 static void filterFunctions(T
&ProfileMap
) {
851 bool hasFilter
= !FuncNameFilter
.empty();
852 bool hasNegativeFilter
= !FuncNameNegativeFilter
.empty();
853 if (!hasFilter
&& !hasNegativeFilter
)
856 // If filter starts with '?' it is MSVC mangled name, not a regex.
857 llvm::Regex
ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
858 if (hasFilter
&& FuncNameFilter
[0] == '?' &&
859 ProbablyMSVCMangledName
.match(FuncNameFilter
))
860 FuncNameFilter
= llvm::Regex::escape(FuncNameFilter
);
861 if (hasNegativeFilter
&& FuncNameNegativeFilter
[0] == '?' &&
862 ProbablyMSVCMangledName
.match(FuncNameNegativeFilter
))
863 FuncNameNegativeFilter
= llvm::Regex::escape(FuncNameNegativeFilter
);
865 size_t Count
= ProfileMap
.size();
866 llvm::Regex
Pattern(FuncNameFilter
);
867 llvm::Regex
NegativePattern(FuncNameNegativeFilter
);
869 if (hasFilter
&& !Pattern
.isValid(Error
))
870 exitWithError(Error
);
871 if (hasNegativeFilter
&& !NegativePattern
.isValid(Error
))
872 exitWithError(Error
);
874 // Handle MD5 profile, so it is still able to match using the original name.
875 std::string MD5Name
= std::to_string(llvm::MD5Hash(FuncNameFilter
));
876 std::string NegativeMD5Name
=
877 std::to_string(llvm::MD5Hash(FuncNameNegativeFilter
));
879 for (auto I
= ProfileMap
.begin(); I
!= ProfileMap
.end();) {
881 const auto &FuncName
= getFuncName(*Tmp
);
882 // Negative filter has higher precedence than positive filter.
883 if ((hasNegativeFilter
&&
884 (NegativePattern
.match(FuncName
) ||
885 (FunctionSamples::UseMD5
&& NegativeMD5Name
== FuncName
))) ||
886 (hasFilter
&& !(Pattern
.match(FuncName
) ||
887 (FunctionSamples::UseMD5
&& MD5Name
== FuncName
))))
888 ProfileMap
.erase(Tmp
);
891 llvm::dbgs() << Count
- ProfileMap
.size() << " of " << Count
<< " functions "
892 << "in the original profile are filtered.\n";
895 static void writeInstrProfile(StringRef OutputFilename
,
896 ProfileFormat OutputFormat
,
897 InstrProfWriter
&Writer
) {
899 raw_fd_ostream
Output(OutputFilename
.data(), EC
,
900 OutputFormat
== PF_Text
? sys::fs::OF_TextWithCRLF
903 exitWithErrorCode(EC
, OutputFilename
);
905 if (OutputFormat
== PF_Text
) {
906 if (Error E
= Writer
.writeText(Output
))
909 if (Output
.is_displayed())
910 exitWithError("cannot write a non-text format profile to the terminal");
911 if (Error E
= Writer
.write(Output
))
916 static void mergeInstrProfile(const WeightedFileVector
&Inputs
,
917 SymbolRemapper
*Remapper
,
918 int MaxDbgCorrelationWarnings
,
919 const StringRef ProfiledBinary
) {
920 const uint64_t TraceReservoirSize
= TemporalProfTraceReservoirSize
.getValue();
921 const uint64_t MaxTraceLength
= TemporalProfMaxTraceLength
.getValue();
922 if (OutputFormat
== PF_Compact_Binary
)
923 exitWithError("Compact Binary is deprecated");
924 if (OutputFormat
!= PF_Binary
&& OutputFormat
!= PF_Ext_Binary
&&
925 OutputFormat
!= PF_Text
)
926 exitWithError("unknown format is specified");
928 // TODO: Maybe we should support correlation with mixture of different
929 // correlation modes(w/wo debug-info/object correlation).
930 if (DebugInfoFilename
.empty()) {
931 if (!BinaryFilename
.empty() && (DebugInfod
|| !DebugFileDirectory
.empty()))
932 exitWithError("Expected only one of -binary-file, -debuginfod or "
933 "-debug-file-directory");
934 } else if (!BinaryFilename
.empty() || DebugInfod
||
935 !DebugFileDirectory
.empty()) {
936 exitWithError("Expected only one of -debug-info, -binary-file, -debuginfod "
937 "or -debug-file-directory");
939 std::string CorrelateFilename
;
940 ProfCorrelatorKind CorrelateKind
= ProfCorrelatorKind::NONE
;
941 if (!DebugInfoFilename
.empty()) {
942 CorrelateFilename
= DebugInfoFilename
;
943 CorrelateKind
= ProfCorrelatorKind::DEBUG_INFO
;
944 } else if (!BinaryFilename
.empty()) {
945 CorrelateFilename
= BinaryFilename
;
946 CorrelateKind
= ProfCorrelatorKind::BINARY
;
949 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
950 if (CorrelateKind
!= InstrProfCorrelator::NONE
) {
951 if (auto Err
= InstrProfCorrelator::get(CorrelateFilename
, CorrelateKind
)
952 .moveInto(Correlator
))
953 exitWithError(std::move(Err
), CorrelateFilename
);
954 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
955 exitWithError(std::move(Err
), CorrelateFilename
);
958 ProfCorrelatorKind BIDFetcherCorrelateKind
= ProfCorrelatorKind::NONE
;
959 std::unique_ptr
<object::BuildIDFetcher
> BIDFetcher
;
961 llvm::HTTPClient::initialize();
962 BIDFetcher
= std::make_unique
<DebuginfodFetcher
>(DebugFileDirectory
);
963 if (!BIDFetcherProfileCorrelate
)
964 exitWithError("Expected --correlate when --debuginfod is provided");
965 BIDFetcherCorrelateKind
= BIDFetcherProfileCorrelate
;
966 } else if (!DebugFileDirectory
.empty()) {
967 BIDFetcher
= std::make_unique
<object::BuildIDFetcher
>(DebugFileDirectory
);
968 if (!BIDFetcherProfileCorrelate
)
969 exitWithError("Expected --correlate when --debug-file-directory "
971 BIDFetcherCorrelateKind
= BIDFetcherProfileCorrelate
;
972 } else if (BIDFetcherProfileCorrelate
) {
973 exitWithError("Expected --debuginfod or --debug-file-directory when "
974 "--correlate is provided");
977 std::mutex ErrorLock
;
978 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
980 // If NumThreads is not specified, auto-detect a good default.
982 NumThreads
= std::min(hardware_concurrency().compute_thread_count(),
983 unsigned((Inputs
.size() + 1) / 2));
985 // Initialize the writer contexts.
986 SmallVector
<std::unique_ptr
<WriterContext
>, 4> Contexts
;
987 for (unsigned I
= 0; I
< NumThreads
; ++I
)
988 Contexts
.emplace_back(std::make_unique
<WriterContext
>(
989 OutputSparse
, ErrorLock
, WriterErrorCodes
, TraceReservoirSize
,
992 if (NumThreads
== 1) {
993 for (const auto &Input
: Inputs
)
994 loadInput(Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
995 Contexts
[0].get(), BIDFetcher
.get(), &BIDFetcherCorrelateKind
);
997 DefaultThreadPool
Pool(hardware_concurrency(NumThreads
));
999 // Load the inputs in parallel (N/NumThreads serial steps).
1001 for (const auto &Input
: Inputs
) {
1002 Pool
.async(loadInput
, Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
1003 Contexts
[Ctx
].get(), BIDFetcher
.get(),
1004 &BIDFetcherCorrelateKind
);
1005 Ctx
= (Ctx
+ 1) % NumThreads
;
1009 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1010 unsigned Mid
= Contexts
.size() / 2;
1011 unsigned End
= Contexts
.size();
1012 assert(Mid
> 0 && "Expected more than one context");
1014 for (unsigned I
= 0; I
< Mid
; ++I
)
1015 Pool
.async(mergeWriterContexts
, Contexts
[I
].get(),
1016 Contexts
[I
+ Mid
].get());
1019 Pool
.async(mergeWriterContexts
, Contexts
[0].get(),
1020 Contexts
[End
- 1].get());
1028 // Handle deferred errors encountered during merging. If the number of errors
1029 // is equal to the number of inputs the merge failed.
1030 unsigned NumErrors
= 0;
1031 for (std::unique_ptr
<WriterContext
> &WC
: Contexts
) {
1032 for (auto &ErrorPair
: WC
->Errors
) {
1034 warn(toString(std::move(ErrorPair
.first
)), ErrorPair
.second
);
1037 if ((NumErrors
== Inputs
.size() && FailMode
== failIfAllAreInvalid
) ||
1038 (NumErrors
> 0 && FailMode
== failIfAnyAreInvalid
))
1039 exitWithError("no profile can be merged");
1041 filterFunctions(Contexts
[0]->Writer
.getProfileData());
1043 writeInstrProfile(OutputFilename
, OutputFormat
, Contexts
[0]->Writer
);
1046 /// The profile entry for a function in instrumentation profile.
1047 struct InstrProfileEntry
{
1048 uint64_t MaxCount
= 0;
1049 uint64_t NumEdgeCounters
= 0;
1050 float ZeroCounterRatio
= 0.0;
1051 InstrProfRecord
*ProfRecord
;
1052 InstrProfileEntry(InstrProfRecord
*Record
);
1053 InstrProfileEntry() = default;
1056 InstrProfileEntry::InstrProfileEntry(InstrProfRecord
*Record
) {
1057 ProfRecord
= Record
;
1058 uint64_t CntNum
= Record
->Counts
.size();
1059 uint64_t ZeroCntNum
= 0;
1060 for (size_t I
= 0; I
< CntNum
; ++I
) {
1061 MaxCount
= std::max(MaxCount
, Record
->Counts
[I
]);
1062 ZeroCntNum
+= !Record
->Counts
[I
];
1064 ZeroCounterRatio
= (float)ZeroCntNum
/ CntNum
;
1065 NumEdgeCounters
= CntNum
;
1068 /// Either set all the counters in the instr profile entry \p IFE to
1069 /// -1 / -2 /in order to drop the profile or scale up the
1070 /// counters in \p IFP to be above hot / cold threshold. We use
1071 /// the ratio of zero counters in the profile of a function to
1072 /// decide the profile is helpful or harmful for performance,
1073 /// and to choose whether to scale up or drop it.
1074 static void updateInstrProfileEntry(InstrProfileEntry
&IFE
, bool SetToHot
,
1075 uint64_t HotInstrThreshold
,
1076 uint64_t ColdInstrThreshold
,
1077 float ZeroCounterThreshold
) {
1078 InstrProfRecord
*ProfRecord
= IFE
.ProfRecord
;
1079 if (!IFE
.MaxCount
|| IFE
.ZeroCounterRatio
> ZeroCounterThreshold
) {
1080 // If all or most of the counters of the function are zero, the
1081 // profile is unaccountable and should be dropped. Reset all the
1082 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1083 // All counters being -1 also implies that the function is hot so
1084 // PGO profile-use will also set the entry count metadata to be
1085 // above hot threshold.
1086 // All counters being -2 implies that the function is warm so
1087 // PGO profile-use will also set the entry count metadata to be
1088 // above cold threshold.
1090 (SetToHot
? InstrProfRecord::PseudoHot
: InstrProfRecord::PseudoWarm
);
1091 ProfRecord
->setPseudoCount(Kind
);
1095 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1096 const unsigned MultiplyFactor
= 3;
1097 uint64_t Threshold
= (SetToHot
? HotInstrThreshold
: ColdInstrThreshold
);
1098 uint64_t Numerator
= Threshold
* MultiplyFactor
;
1100 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1101 if (!SetToHot
&& Threshold
>= HotInstrThreshold
) {
1102 Threshold
= (HotInstrThreshold
+ ColdInstrThreshold
) / 2;
1105 uint64_t Denominator
= IFE
.MaxCount
;
1106 if (Numerator
<= Denominator
)
1108 ProfRecord
->scale(Numerator
, Denominator
, [&](instrprof_error E
) {
1109 warn(toString(make_error
<InstrProfError
>(E
)));
1113 const uint64_t ColdPercentileIdx
= 15;
1114 const uint64_t HotPercentileIdx
= 11;
1116 using sampleprof::FSDiscriminatorPass
;
1118 // Internal options to set FSDiscriminatorPass. Used in merge and show
1120 static cl::opt
<FSDiscriminatorPass
> FSDiscriminatorPassOption(
1121 "fs-discriminator-pass", cl::init(PassLast
), cl::Hidden
,
1122 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1123 "pass beyond this value. The enum values are defined in "
1124 "Support/Discriminator.h"),
1125 cl::values(clEnumVal(Base
, "Use base discriminators only"),
1126 clEnumVal(Pass1
, "Use base and pass 1 discriminators"),
1127 clEnumVal(Pass2
, "Use base and pass 1-2 discriminators"),
1128 clEnumVal(Pass3
, "Use base and pass 1-3 discriminators"),
1129 clEnumVal(PassLast
, "Use all discriminator bits (default)")));
1131 static unsigned getDiscriminatorMask() {
1132 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption
.getValue()));
1135 /// Adjust the instr profile in \p WC based on the sample profile in
1138 adjustInstrProfile(std::unique_ptr
<WriterContext
> &WC
,
1139 std::unique_ptr
<sampleprof::SampleProfileReader
> &Reader
,
1140 unsigned SupplMinSizeThreshold
, float ZeroCounterThreshold
,
1141 unsigned InstrProfColdThreshold
) {
1142 // Function to its entry in instr profile.
1143 StringMap
<InstrProfileEntry
> InstrProfileMap
;
1144 StringMap
<StringRef
> StaticFuncMap
;
1145 InstrProfSummaryBuilder
IPBuilder(ProfileSummaryBuilder::DefaultCutoffs
);
1147 auto checkSampleProfileHasFUnique
= [&Reader
]() {
1148 for (const auto &PD
: Reader
->getProfiles()) {
1149 auto &FContext
= PD
.second
.getContext();
1150 if (FContext
.toString().find(FunctionSamples::UniqSuffix
) !=
1151 std::string::npos
) {
1158 bool SampleProfileHasFUnique
= checkSampleProfileHasFUnique();
1160 auto buildStaticFuncMap
= [&StaticFuncMap
,
1161 SampleProfileHasFUnique
](const StringRef Name
) {
1162 std::string FilePrefixes
[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1163 size_t PrefixPos
= StringRef::npos
;
1164 for (auto &FilePrefix
: FilePrefixes
) {
1165 std::string NamePrefix
= FilePrefix
+ GlobalIdentifierDelimiter
;
1166 PrefixPos
= Name
.find_insensitive(NamePrefix
);
1167 if (PrefixPos
== StringRef::npos
)
1169 PrefixPos
+= NamePrefix
.size();
1173 if (PrefixPos
== StringRef::npos
) {
1177 StringRef NewName
= Name
.drop_front(PrefixPos
);
1178 StringRef FName
= Name
.substr(0, PrefixPos
- 1);
1179 if (NewName
.size() == 0) {
1183 // This name should have a static linkage.
1184 size_t PostfixPos
= NewName
.find(FunctionSamples::UniqSuffix
);
1185 bool ProfileHasFUnique
= (PostfixPos
!= StringRef::npos
);
1187 // If sample profile and instrumented profile do not agree on symbol
1189 if (SampleProfileHasFUnique
!= ProfileHasFUnique
) {
1190 // If instrumented profile uses -funique-internal-linkage-symbols,
1191 // we need to trim the name.
1192 if (ProfileHasFUnique
) {
1193 NewName
= NewName
.substr(0, PostfixPos
);
1195 // If sample profile uses -funique-internal-linkage-symbols,
1196 // we build the map.
1198 NewName
.str() + getUniqueInternalLinkagePostfix(FName
);
1199 NewName
= StringRef(NStr
);
1200 StaticFuncMap
[NewName
] = Name
;
1205 auto [It
, Inserted
] = StaticFuncMap
.try_emplace(NewName
, Name
);
1207 It
->second
= DuplicateNameStr
;
1210 // We need to flatten the SampleFDO profile as the InstrFDO
1211 // profile does not have inlined callsite profiles.
1212 // One caveat is the pre-inlined function -- their samples
1213 // should be collapsed into the caller function.
1214 // Here we do a DFS traversal to get the flatten profile
1215 // info: the sum of entrycount and the max of maxcount.
1216 // Here is the algorithm:
1217 // recursive (FS, root_name) {
1218 // name = FS->getName();
1219 // get samples for FS;
1220 // if (InstrProf.find(name) {
1221 // root_name = name;
1223 // if (name is in static_func map) {
1224 // root_name = static_name;
1227 // update the Map entry for root_name;
1228 // for (subfs: FS) {
1229 // recursive(subfs, root_name);
1233 // Here is an example.
1249 // InstrProfile has two entries:
1253 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1254 // {"foo", {1000, 5000}}
1255 // {"bar.cc;bar", {11000, 30000}}
1257 // foo's has an entry count of 1000, and max body count of 5000.
1258 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1259 // 10000), and max count of 30000 (from the callsite in line 8).
1261 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1262 // entry in InstrProfile.
1263 llvm::StringMap
<std::pair
<uint64_t, uint64_t>> FlattenSampleMap
;
1264 auto BuildMaxSampleMap
= [&FlattenSampleMap
, &StaticFuncMap
,
1265 &InstrProfileMap
](const FunctionSamples
&FS
,
1266 const StringRef
&RootName
) {
1267 auto BuildMaxSampleMapImpl
= [&](const FunctionSamples
&FS
,
1268 const StringRef
&RootName
,
1269 auto &BuildImpl
) -> void {
1270 std::string NameStr
= FS
.getFunction().str();
1271 const StringRef Name
= NameStr
;
1272 const StringRef
*NewRootName
= &RootName
;
1273 uint64_t EntrySample
= FS
.getHeadSamplesEstimate();
1274 uint64_t MaxBodySample
= FS
.getMaxCountInside(/* SkipCallSite*/ true);
1276 auto It
= InstrProfileMap
.find(Name
);
1277 if (It
!= InstrProfileMap
.end()) {
1278 NewRootName
= &Name
;
1280 auto NewName
= StaticFuncMap
.find(Name
);
1281 if (NewName
!= StaticFuncMap
.end()) {
1282 It
= InstrProfileMap
.find(NewName
->second
);
1283 if (NewName
->second
!= DuplicateNameStr
) {
1284 NewRootName
= &NewName
->second
;
1287 // Here the EntrySample is of an inlined function, so we should not
1288 // update the EntrySample in the map.
1292 EntrySample
+= FlattenSampleMap
[*NewRootName
].first
;
1294 std::max(FlattenSampleMap
[*NewRootName
].second
, MaxBodySample
);
1295 FlattenSampleMap
[*NewRootName
] =
1296 std::make_pair(EntrySample
, MaxBodySample
);
1298 for (const auto &C
: FS
.getCallsiteSamples())
1299 for (const auto &F
: C
.second
)
1300 BuildImpl(F
.second
, *NewRootName
, BuildImpl
);
1302 BuildMaxSampleMapImpl(FS
, RootName
, BuildMaxSampleMapImpl
);
1305 for (auto &PD
: WC
->Writer
.getProfileData()) {
1306 // Populate IPBuilder.
1307 for (const auto &PDV
: PD
.getValue()) {
1308 InstrProfRecord Record
= PDV
.second
;
1309 IPBuilder
.addRecord(Record
);
1312 // If a function has multiple entries in instr profile, skip it.
1313 if (PD
.getValue().size() != 1)
1316 // Initialize InstrProfileMap.
1317 InstrProfRecord
*R
= &PD
.getValue().begin()->second
;
1318 StringRef FullName
= PD
.getKey();
1319 InstrProfileMap
[FullName
] = InstrProfileEntry(R
);
1320 buildStaticFuncMap(FullName
);
1323 for (auto &PD
: Reader
->getProfiles()) {
1324 sampleprof::FunctionSamples
&FS
= PD
.second
;
1325 std::string Name
= FS
.getFunction().str();
1326 BuildMaxSampleMap(FS
, Name
);
1329 ProfileSummary InstrPS
= *IPBuilder
.getSummary();
1330 ProfileSummary SamplePS
= Reader
->getSummary();
1332 // Compute cold thresholds for instr profile and sample profile.
1333 uint64_t HotSampleThreshold
=
1334 ProfileSummaryBuilder::getEntryForPercentile(
1335 SamplePS
.getDetailedSummary(),
1336 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
1338 uint64_t ColdSampleThreshold
=
1339 ProfileSummaryBuilder::getEntryForPercentile(
1340 SamplePS
.getDetailedSummary(),
1341 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
1343 uint64_t HotInstrThreshold
=
1344 ProfileSummaryBuilder::getEntryForPercentile(
1345 InstrPS
.getDetailedSummary(),
1346 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
1348 uint64_t ColdInstrThreshold
=
1349 InstrProfColdThreshold
1350 ? InstrProfColdThreshold
1351 : ProfileSummaryBuilder::getEntryForPercentile(
1352 InstrPS
.getDetailedSummary(),
1353 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
1356 // Find hot/warm functions in sample profile which is cold in instr profile
1357 // and adjust the profiles of those functions in the instr profile.
1358 for (const auto &E
: FlattenSampleMap
) {
1359 uint64_t SampleMaxCount
= std::max(E
.second
.first
, E
.second
.second
);
1360 if (SampleMaxCount
< ColdSampleThreshold
)
1362 StringRef Name
= E
.first();
1363 auto It
= InstrProfileMap
.find(Name
);
1364 if (It
== InstrProfileMap
.end()) {
1365 auto NewName
= StaticFuncMap
.find(Name
);
1366 if (NewName
!= StaticFuncMap
.end()) {
1367 It
= InstrProfileMap
.find(NewName
->second
);
1368 if (NewName
->second
== DuplicateNameStr
) {
1369 WithColor::warning()
1370 << "Static function " << Name
1371 << " has multiple promoted names, cannot adjust profile.\n";
1375 if (It
== InstrProfileMap
.end() ||
1376 It
->second
.MaxCount
> ColdInstrThreshold
||
1377 It
->second
.NumEdgeCounters
< SupplMinSizeThreshold
)
1379 bool SetToHot
= SampleMaxCount
>= HotSampleThreshold
;
1380 updateInstrProfileEntry(It
->second
, SetToHot
, HotInstrThreshold
,
1381 ColdInstrThreshold
, ZeroCounterThreshold
);
1385 /// The main function to supplement instr profile with sample profile.
1386 /// \Inputs contains the instr profile. \p SampleFilename specifies the
1387 /// sample profile. \p OutputFilename specifies the output profile name.
1388 /// \p OutputFormat specifies the output profile format. \p OutputSparse
1389 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1390 /// specifies the minimal size for the functions whose profile will be
1391 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1392 /// a function contains too many zero counters and whether its profile
1393 /// should be dropped. \p InstrProfColdThreshold is the user specified
1394 /// cold threshold which will override the cold threshold got from the
1395 /// instr profile summary.
1396 static void supplementInstrProfile(const WeightedFileVector
&Inputs
,
1397 StringRef SampleFilename
, bool OutputSparse
,
1398 unsigned SupplMinSizeThreshold
,
1399 float ZeroCounterThreshold
,
1400 unsigned InstrProfColdThreshold
) {
1401 if (OutputFilename
== "-")
1402 exitWithError("cannot write indexed profdata format to stdout");
1403 if (Inputs
.size() != 1)
1404 exitWithError("expect one input to be an instr profile");
1405 if (Inputs
[0].Weight
!= 1)
1406 exitWithError("expect instr profile doesn't have weight");
1408 StringRef InstrFilename
= Inputs
[0].Filename
;
1410 // Read sample profile.
1411 LLVMContext Context
;
1412 auto FS
= vfs::getRealFileSystem();
1413 auto ReaderOrErr
= sampleprof::SampleProfileReader::create(
1414 SampleFilename
.str(), Context
, *FS
, FSDiscriminatorPassOption
);
1415 if (std::error_code EC
= ReaderOrErr
.getError())
1416 exitWithErrorCode(EC
, SampleFilename
);
1417 auto Reader
= std::move(ReaderOrErr
.get());
1418 if (std::error_code EC
= Reader
->read())
1419 exitWithErrorCode(EC
, SampleFilename
);
1421 // Read instr profile.
1422 std::mutex ErrorLock
;
1423 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
1424 auto WC
= std::make_unique
<WriterContext
>(OutputSparse
, ErrorLock
,
1426 loadInput(Inputs
[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC
.get());
1427 if (WC
->Errors
.size() > 0)
1428 exitWithError(std::move(WC
->Errors
[0].first
), InstrFilename
);
1430 adjustInstrProfile(WC
, Reader
, SupplMinSizeThreshold
, ZeroCounterThreshold
,
1431 InstrProfColdThreshold
);
1432 writeInstrProfile(OutputFilename
, OutputFormat
, WC
->Writer
);
1435 /// Make a copy of the given function samples with all symbol names remapped
1436 /// by the provided symbol remapper.
1437 static sampleprof::FunctionSamples
1438 remapSamples(const sampleprof::FunctionSamples
&Samples
,
1439 SymbolRemapper
&Remapper
, sampleprof_error
&Error
) {
1440 sampleprof::FunctionSamples Result
;
1441 Result
.setFunction(Remapper(Samples
.getFunction()));
1442 Result
.addTotalSamples(Samples
.getTotalSamples());
1443 Result
.addHeadSamples(Samples
.getHeadSamples());
1444 for (const auto &BodySample
: Samples
.getBodySamples()) {
1445 uint32_t MaskedDiscriminator
=
1446 BodySample
.first
.Discriminator
& getDiscriminatorMask();
1447 Result
.addBodySamples(BodySample
.first
.LineOffset
, MaskedDiscriminator
,
1448 BodySample
.second
.getSamples());
1449 for (const auto &Target
: BodySample
.second
.getCallTargets()) {
1450 Result
.addCalledTargetSamples(BodySample
.first
.LineOffset
,
1451 MaskedDiscriminator
,
1452 Remapper(Target
.first
), Target
.second
);
1455 for (const auto &CallsiteSamples
: Samples
.getCallsiteSamples()) {
1456 sampleprof::FunctionSamplesMap
&Target
=
1457 Result
.functionSamplesAt(CallsiteSamples
.first
);
1458 for (const auto &Callsite
: CallsiteSamples
.second
) {
1459 sampleprof::FunctionSamples Remapped
=
1460 remapSamples(Callsite
.second
, Remapper
, Error
);
1461 mergeSampleProfErrors(Error
,
1462 Target
[Remapped
.getFunction()].merge(Remapped
));
1468 static sampleprof::SampleProfileFormat FormatMap
[] = {
1469 sampleprof::SPF_None
,
1470 sampleprof::SPF_Text
,
1471 sampleprof::SPF_None
,
1472 sampleprof::SPF_Ext_Binary
,
1473 sampleprof::SPF_GCC
,
1474 sampleprof::SPF_Binary
};
1476 static std::unique_ptr
<MemoryBuffer
>
1477 getInputFileBuf(const StringRef
&InputFile
) {
1478 if (InputFile
== "")
1481 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
1483 exitWithErrorCode(BufOrError
.getError(), InputFile
);
1485 return std::move(*BufOrError
);
1488 static void populateProfileSymbolList(MemoryBuffer
*Buffer
,
1489 sampleprof::ProfileSymbolList
&PSL
) {
1493 SmallVector
<StringRef
, 32> SymbolVec
;
1494 StringRef Data
= Buffer
->getBuffer();
1495 Data
.split(SymbolVec
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1497 for (StringRef SymbolStr
: SymbolVec
)
1498 PSL
.add(SymbolStr
.trim());
1501 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter
&Writer
,
1502 ProfileFormat OutputFormat
,
1503 MemoryBuffer
*Buffer
,
1504 sampleprof::ProfileSymbolList
&WriterList
,
1505 bool CompressAllSections
, bool UseMD5
,
1506 bool GenPartialProfile
) {
1508 if (OutputFormat
== PF_Binary
)
1509 warn("-split-layout is ignored. Specify -extbinary to enable it");
1511 Writer
.setUseCtxSplitLayout();
1514 populateProfileSymbolList(Buffer
, WriterList
);
1515 if (WriterList
.size() > 0 && OutputFormat
!= PF_Ext_Binary
)
1516 warn("Profile Symbol list is not empty but the output format is not "
1517 "ExtBinary format. The list will be lost in the output. ");
1519 Writer
.setProfileSymbolList(&WriterList
);
1521 if (CompressAllSections
) {
1522 if (OutputFormat
!= PF_Ext_Binary
)
1523 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1525 Writer
.setToCompressAllSections();
1528 if (OutputFormat
!= PF_Ext_Binary
)
1529 warn("-use-md5 is ignored. Specify -extbinary to enable it");
1533 if (GenPartialProfile
) {
1534 if (OutputFormat
!= PF_Ext_Binary
)
1535 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1537 Writer
.setPartialProfile();
1541 static void mergeSampleProfile(const WeightedFileVector
&Inputs
,
1542 SymbolRemapper
*Remapper
,
1543 StringRef ProfileSymbolListFile
,
1544 size_t OutputSizeLimit
) {
1545 using namespace sampleprof
;
1546 SampleProfileMap ProfileMap
;
1547 SmallVector
<std::unique_ptr
<sampleprof::SampleProfileReader
>, 5> Readers
;
1548 LLVMContext Context
;
1549 sampleprof::ProfileSymbolList WriterList
;
1550 std::optional
<bool> ProfileIsProbeBased
;
1551 std::optional
<bool> ProfileIsCS
;
1552 for (const auto &Input
: Inputs
) {
1553 auto FS
= vfs::getRealFileSystem();
1554 auto ReaderOrErr
= SampleProfileReader::create(Input
.Filename
, Context
, *FS
,
1555 FSDiscriminatorPassOption
);
1556 if (std::error_code EC
= ReaderOrErr
.getError()) {
1557 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1561 // We need to keep the readers around until after all the files are
1562 // read so that we do not lose the function names stored in each
1563 // reader's memory. The function names are needed to write out the
1564 // merged profile map.
1565 Readers
.push_back(std::move(ReaderOrErr
.get()));
1566 const auto Reader
= Readers
.back().get();
1567 if (std::error_code EC
= Reader
->read()) {
1568 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1573 SampleProfileMap
&Profiles
= Reader
->getProfiles();
1574 if (ProfileIsProbeBased
&&
1575 ProfileIsProbeBased
!= FunctionSamples::ProfileIsProbeBased
)
1577 "cannot merge probe-based profile with non-probe-based profile");
1578 ProfileIsProbeBased
= FunctionSamples::ProfileIsProbeBased
;
1579 if (ProfileIsCS
&& ProfileIsCS
!= FunctionSamples::ProfileIsCS
)
1580 exitWithError("cannot merge CS profile with non-CS profile");
1581 ProfileIsCS
= FunctionSamples::ProfileIsCS
;
1582 for (SampleProfileMap::iterator I
= Profiles
.begin(), E
= Profiles
.end();
1584 sampleprof_error Result
= sampleprof_error::success
;
1585 FunctionSamples Remapped
=
1586 Remapper
? remapSamples(I
->second
, *Remapper
, Result
)
1587 : FunctionSamples();
1588 FunctionSamples
&Samples
= Remapper
? Remapped
: I
->second
;
1589 SampleContext FContext
= Samples
.getContext();
1590 mergeSampleProfErrors(Result
,
1591 ProfileMap
[FContext
].merge(Samples
, Input
.Weight
));
1592 if (Result
!= sampleprof_error::success
) {
1593 std::error_code EC
= make_error_code(Result
);
1594 handleMergeWriterError(errorCodeToError(EC
), Input
.Filename
,
1595 FContext
.toString());
1599 if (!DropProfileSymbolList
) {
1600 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
1601 Reader
->getProfileSymbolList();
1603 WriterList
.merge(*ReaderList
);
1607 if (ProfileIsCS
&& (SampleMergeColdContext
|| SampleTrimColdContext
)) {
1608 // Use threshold calculated from profile summary unless specified.
1609 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1610 auto Summary
= Builder
.computeSummaryForProfiles(ProfileMap
);
1611 uint64_t SampleProfColdThreshold
=
1612 ProfileSummaryBuilder::getColdCountThreshold(
1613 (Summary
->getDetailedSummary()));
1615 // Trim and merge cold context profile using cold threshold above;
1616 SampleContextTrimmer(ProfileMap
)
1617 .trimAndMergeColdContextProfiles(
1618 SampleProfColdThreshold
, SampleTrimColdContext
,
1619 SampleMergeColdContext
, SampleColdContextFrameDepth
, false);
1622 if (ProfileLayout
== llvm::sampleprof::SPL_Flat
) {
1623 ProfileConverter::flattenProfile(ProfileMap
, FunctionSamples::ProfileIsCS
);
1624 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1625 } else if (ProfileIsCS
&& ProfileLayout
== llvm::sampleprof::SPL_Nest
) {
1626 ProfileConverter
CSConverter(ProfileMap
);
1627 CSConverter
.convertCSProfiles();
1628 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1631 filterFunctions(ProfileMap
);
1634 SampleProfileWriter::create(OutputFilename
, FormatMap
[OutputFormat
]);
1635 if (std::error_code EC
= WriterOrErr
.getError())
1636 exitWithErrorCode(EC
, OutputFilename
);
1638 auto Writer
= std::move(WriterOrErr
.get());
1639 // WriterList will have StringRef refering to string in Buffer.
1640 // Make sure Buffer lives as long as WriterList.
1641 auto Buffer
= getInputFileBuf(ProfileSymbolListFile
);
1642 handleExtBinaryWriter(*Writer
, OutputFormat
, Buffer
.get(), WriterList
,
1643 CompressAllSections
, UseMD5
, GenPartialProfile
);
1645 // If OutputSizeLimit is 0 (default), it is the same as write().
1646 if (std::error_code EC
=
1647 Writer
->writeWithSizeLimit(ProfileMap
, OutputSizeLimit
))
1648 exitWithErrorCode(EC
);
1651 static WeightedFile
parseWeightedFile(const StringRef
&WeightedFilename
) {
1652 StringRef WeightStr
, FileName
;
1653 std::tie(WeightStr
, FileName
) = WeightedFilename
.split(',');
1656 if (WeightStr
.getAsInteger(10, Weight
) || Weight
< 1)
1657 exitWithError("input weight must be a positive integer");
1659 return {std::string(FileName
), Weight
};
1662 static void addWeightedInput(WeightedFileVector
&WNI
, const WeightedFile
&WF
) {
1663 StringRef Filename
= WF
.Filename
;
1664 uint64_t Weight
= WF
.Weight
;
1666 // If it's STDIN just pass it on.
1667 if (Filename
== "-") {
1668 WNI
.push_back({std::string(Filename
), Weight
});
1672 llvm::sys::fs::file_status Status
;
1673 llvm::sys::fs::status(Filename
, Status
);
1674 if (!llvm::sys::fs::exists(Status
))
1675 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory
),
1677 // If it's a source file, collect it.
1678 if (llvm::sys::fs::is_regular_file(Status
)) {
1679 WNI
.push_back({std::string(Filename
), Weight
});
1683 if (llvm::sys::fs::is_directory(Status
)) {
1685 for (llvm::sys::fs::recursive_directory_iterator
F(Filename
, EC
), E
;
1686 F
!= E
&& !EC
; F
.increment(EC
)) {
1687 if (llvm::sys::fs::is_regular_file(F
->path())) {
1688 addWeightedInput(WNI
, {F
->path(), Weight
});
1692 exitWithErrorCode(EC
, Filename
);
1696 static void parseInputFilenamesFile(MemoryBuffer
*Buffer
,
1697 WeightedFileVector
&WFV
) {
1701 SmallVector
<StringRef
, 8> Entries
;
1702 StringRef Data
= Buffer
->getBuffer();
1703 Data
.split(Entries
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1704 for (const StringRef
&FileWeightEntry
: Entries
) {
1705 StringRef SanitizedEntry
= FileWeightEntry
.trim(" \t\v\f\r");
1707 if (SanitizedEntry
.starts_with("#"))
1709 // If there's no comma, it's an unweighted profile.
1710 else if (!SanitizedEntry
.contains(','))
1711 addWeightedInput(WFV
, {std::string(SanitizedEntry
), 1});
1713 addWeightedInput(WFV
, parseWeightedFile(SanitizedEntry
));
1717 static int merge_main(StringRef ProgName
) {
1718 WeightedFileVector WeightedInputs
;
1719 for (StringRef Filename
: InputFilenames
)
1720 addWeightedInput(WeightedInputs
, {std::string(Filename
), 1});
1721 for (StringRef WeightedFilename
: WeightedInputFilenames
)
1722 addWeightedInput(WeightedInputs
, parseWeightedFile(WeightedFilename
));
1724 // Make sure that the file buffer stays alive for the duration of the
1725 // weighted input vector's lifetime.
1726 auto Buffer
= getInputFileBuf(InputFilenamesFile
);
1727 parseInputFilenamesFile(Buffer
.get(), WeightedInputs
);
1729 if (WeightedInputs
.empty())
1730 exitWithError("no input files specified. See " + ProgName
+ " merge -help");
1732 if (DumpInputFileList
) {
1733 for (auto &WF
: WeightedInputs
)
1734 outs() << WF
.Weight
<< "," << WF
.Filename
<< "\n";
1738 std::unique_ptr
<SymbolRemapper
> Remapper
;
1739 if (!RemappingFile
.empty())
1740 Remapper
= SymbolRemapper::create(RemappingFile
);
1742 if (!SupplInstrWithSample
.empty()) {
1743 if (ProfileKind
!= instr
)
1745 "-supplement-instr-with-sample can only work with -instr. ");
1747 supplementInstrProfile(WeightedInputs
, SupplInstrWithSample
, OutputSparse
,
1748 SupplMinSizeThreshold
, ZeroCounterThreshold
,
1749 InstrProfColdThreshold
);
1753 if (ProfileKind
== instr
)
1754 mergeInstrProfile(WeightedInputs
, Remapper
.get(), MaxDbgCorrelationWarnings
,
1757 mergeSampleProfile(WeightedInputs
, Remapper
.get(), ProfileSymbolListFile
,
1762 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1763 static void overlapInstrProfile(const std::string
&BaseFilename
,
1764 const std::string
&TestFilename
,
1765 const OverlapFuncFilters
&FuncFilter
,
1766 raw_fd_ostream
&OS
, bool IsCS
) {
1767 std::mutex ErrorLock
;
1768 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
1769 WriterContext
Context(false, ErrorLock
, WriterErrorCodes
);
1770 WeightedFile WeightedInput
{BaseFilename
, 1};
1771 OverlapStats Overlap
;
1772 Error E
= Overlap
.accumulateCounts(BaseFilename
, TestFilename
, IsCS
);
1774 exitWithError(std::move(E
), "error in getting profile count sums");
1775 if (Overlap
.Base
.CountSum
< 1.0f
) {
1776 OS
<< "Sum of edge counts for profile " << BaseFilename
<< " is 0.\n";
1779 if (Overlap
.Test
.CountSum
< 1.0f
) {
1780 OS
<< "Sum of edge counts for profile " << TestFilename
<< " is 0.\n";
1783 loadInput(WeightedInput
, nullptr, nullptr, /*ProfiledBinary=*/"", &Context
);
1784 overlapInput(BaseFilename
, TestFilename
, &Context
, Overlap
, FuncFilter
, OS
,
1790 struct SampleOverlapStats
{
1791 SampleContext BaseName
;
1792 SampleContext TestName
;
1793 // Number of overlap units
1794 uint64_t OverlapCount
= 0;
1795 // Total samples of overlap units
1796 uint64_t OverlapSample
= 0;
1797 // Number of and total samples of units that only present in base or test
1799 uint64_t BaseUniqueCount
= 0;
1800 uint64_t BaseUniqueSample
= 0;
1801 uint64_t TestUniqueCount
= 0;
1802 uint64_t TestUniqueSample
= 0;
1803 // Number of units and total samples in base or test profile
1804 uint64_t BaseCount
= 0;
1805 uint64_t BaseSample
= 0;
1806 uint64_t TestCount
= 0;
1807 uint64_t TestSample
= 0;
1808 // Number of and total samples of units that present in at least one profile
1809 uint64_t UnionCount
= 0;
1810 uint64_t UnionSample
= 0;
1811 // Weighted similarity
1812 double Similarity
= 0.0;
1813 // For SampleOverlapStats instances representing functions, weights of the
1814 // function in base and test profiles
1815 double BaseWeight
= 0.0;
1816 double TestWeight
= 0.0;
1818 SampleOverlapStats() = default;
1820 } // end anonymous namespace
1823 struct FuncSampleStats
{
1824 uint64_t SampleSum
= 0;
1825 uint64_t MaxSample
= 0;
1826 uint64_t HotBlockCount
= 0;
1827 FuncSampleStats() = default;
1828 FuncSampleStats(uint64_t SampleSum
, uint64_t MaxSample
,
1829 uint64_t HotBlockCount
)
1830 : SampleSum(SampleSum
), MaxSample(MaxSample
),
1831 HotBlockCount(HotBlockCount
) {}
1833 } // end anonymous namespace
1836 enum MatchStatus
{ MS_Match
, MS_FirstUnique
, MS_SecondUnique
, MS_None
};
1838 // Class for updating merging steps for two sorted maps. The class should be
1839 // instantiated with a map iterator type.
1840 template <class T
> class MatchStep
{
1842 MatchStep() = delete;
1844 MatchStep(T FirstIter
, T FirstEnd
, T SecondIter
, T SecondEnd
)
1845 : FirstIter(FirstIter
), FirstEnd(FirstEnd
), SecondIter(SecondIter
),
1846 SecondEnd(SecondEnd
), Status(MS_None
) {}
1848 bool areBothFinished() const {
1849 return (FirstIter
== FirstEnd
&& SecondIter
== SecondEnd
);
1852 bool isFirstFinished() const { return FirstIter
== FirstEnd
; }
1854 bool isSecondFinished() const { return SecondIter
== SecondEnd
; }
1856 /// Advance one step based on the previous match status unless the previous
1857 /// status is MS_None. Then update Status based on the comparison between two
1858 /// container iterators at the current step. If the previous status is
1859 /// MS_None, it means two iterators are at the beginning and no comparison has
1860 /// been made, so we simply update Status without advancing the iterators.
1861 void updateOneStep();
1863 T
getFirstIter() const { return FirstIter
; }
1865 T
getSecondIter() const { return SecondIter
; }
1867 MatchStatus
getMatchStatus() const { return Status
; }
1870 // Current iterator and end iterator of the first container.
1873 // Current iterator and end iterator of the second container.
1876 // Match status of the current step.
1879 } // end anonymous namespace
1881 template <class T
> void MatchStep
<T
>::updateOneStep() {
1887 case MS_FirstUnique
:
1890 case MS_SecondUnique
:
1897 // Update Status according to iterators at the current step.
1898 if (areBothFinished())
1900 if (FirstIter
!= FirstEnd
&&
1901 (SecondIter
== SecondEnd
|| FirstIter
->first
< SecondIter
->first
))
1902 Status
= MS_FirstUnique
;
1903 else if (SecondIter
!= SecondEnd
&&
1904 (FirstIter
== FirstEnd
|| SecondIter
->first
< FirstIter
->first
))
1905 Status
= MS_SecondUnique
;
1910 // Return the sum of line/block samples, the max line/block sample, and the
1911 // number of line/block samples above the given threshold in a function
1912 // including its inlinees.
1913 static void getFuncSampleStats(const sampleprof::FunctionSamples
&Func
,
1914 FuncSampleStats
&FuncStats
,
1915 uint64_t HotThreshold
) {
1916 for (const auto &L
: Func
.getBodySamples()) {
1917 uint64_t Sample
= L
.second
.getSamples();
1918 FuncStats
.SampleSum
+= Sample
;
1919 FuncStats
.MaxSample
= std::max(FuncStats
.MaxSample
, Sample
);
1920 if (Sample
>= HotThreshold
)
1921 ++FuncStats
.HotBlockCount
;
1924 for (const auto &C
: Func
.getCallsiteSamples()) {
1925 for (const auto &F
: C
.second
)
1926 getFuncSampleStats(F
.second
, FuncStats
, HotThreshold
);
1930 /// Predicate that determines if a function is hot with a given threshold. We
1931 /// keep it separate from its callsites for possible extension in the future.
1932 static bool isFunctionHot(const FuncSampleStats
&FuncStats
,
1933 uint64_t HotThreshold
) {
1934 // We intentionally compare the maximum sample count in a function with the
1935 // HotThreshold to get an approximate determination on hot functions.
1936 return (FuncStats
.MaxSample
>= HotThreshold
);
1940 class SampleOverlapAggregator
{
1942 SampleOverlapAggregator(const std::string
&BaseFilename
,
1943 const std::string
&TestFilename
,
1944 double LowSimilarityThreshold
, double Epsilon
,
1945 const OverlapFuncFilters
&FuncFilter
)
1946 : BaseFilename(BaseFilename
), TestFilename(TestFilename
),
1947 LowSimilarityThreshold(LowSimilarityThreshold
), Epsilon(Epsilon
),
1948 FuncFilter(FuncFilter
) {}
1950 /// Detect 0-sample input profile and report to output stream. This interface
1951 /// should be called after loadProfiles().
1952 bool detectZeroSampleProfile(raw_fd_ostream
&OS
) const;
1954 /// Write out function-level similarity statistics for functions specified by
1955 /// options --function, --value-cutoff, and --similarity-cutoff.
1956 void dumpFuncSimilarity(raw_fd_ostream
&OS
) const;
1958 /// Write out program-level similarity and overlap statistics.
1959 void dumpProgramSummary(raw_fd_ostream
&OS
) const;
1961 /// Write out hot-function and hot-block statistics for base_profile,
1962 /// test_profile, and their overlap. For both cases, the overlap HO is
1963 /// calculated as follows:
1964 /// Given the number of functions (or blocks) that are hot in both profiles
1965 /// HCommon and the number of functions (or blocks) that are hot in at
1966 /// least one profile HUnion, HO = HCommon / HUnion.
1967 void dumpHotFuncAndBlockOverlap(raw_fd_ostream
&OS
) const;
1969 /// This function tries matching functions in base and test profiles. For each
1970 /// pair of matched functions, it aggregates the function-level
1971 /// similarity into a profile-level similarity. It also dump function-level
1972 /// similarity information of functions specified by --function,
1973 /// --value-cutoff, and --similarity-cutoff options. The program-level
1974 /// similarity PS is computed as follows:
1975 /// Given function-level similarity FS(A) for all function A, the
1976 /// weight of function A in base profile WB(A), and the weight of function
1977 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1978 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1979 /// meaning no-overlap.
1980 void computeSampleProfileOverlap(raw_fd_ostream
&OS
);
1982 /// Initialize ProfOverlap with the sum of samples in base and test
1983 /// profiles. This function also computes and keeps the sum of samples and
1984 /// max sample counts of each function in BaseStats and TestStats for later
1985 /// use to avoid re-computations.
1986 void initializeSampleProfileOverlap();
1988 /// Load profiles specified by BaseFilename and TestFilename.
1989 std::error_code
loadProfiles();
1991 using FuncSampleStatsMap
=
1992 std::unordered_map
<SampleContext
, FuncSampleStats
, SampleContext::Hash
>;
1995 SampleOverlapStats ProfOverlap
;
1996 SampleOverlapStats HotFuncOverlap
;
1997 SampleOverlapStats HotBlockOverlap
;
1998 std::string BaseFilename
;
1999 std::string TestFilename
;
2000 std::unique_ptr
<sampleprof::SampleProfileReader
> BaseReader
;
2001 std::unique_ptr
<sampleprof::SampleProfileReader
> TestReader
;
2002 // BaseStats and TestStats hold FuncSampleStats for each function, with
2003 // function name as the key.
2004 FuncSampleStatsMap BaseStats
;
2005 FuncSampleStatsMap TestStats
;
2006 // Low similarity threshold in floating point number
2007 double LowSimilarityThreshold
;
2008 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2009 // for tracking hot blocks.
2010 uint64_t BaseHotThreshold
;
2011 uint64_t TestHotThreshold
;
2012 // A small threshold used to round the results of floating point accumulations
2013 // to resolve imprecision.
2014 const double Epsilon
;
2015 std::multimap
<double, SampleOverlapStats
, std::greater
<double>>
2017 // FuncFilter carries specifications in options --value-cutoff and
2019 OverlapFuncFilters FuncFilter
;
2020 // Column offsets for printing the function-level details table.
2021 static const unsigned int TestWeightCol
= 15;
2022 static const unsigned int SimilarityCol
= 30;
2023 static const unsigned int OverlapCol
= 43;
2024 static const unsigned int BaseUniqueCol
= 53;
2025 static const unsigned int TestUniqueCol
= 67;
2026 static const unsigned int BaseSampleCol
= 81;
2027 static const unsigned int TestSampleCol
= 96;
2028 static const unsigned int FuncNameCol
= 111;
2030 /// Return a similarity of two line/block sample counters in the same
2031 /// function in base and test profiles. The line/block-similarity BS(i) is
2032 /// computed as follows:
2033 /// For an offsets i, given the sample count at i in base profile BB(i),
2034 /// the sample count at i in test profile BT(i), the sum of sample counts
2035 /// in this function in base profile SB, and the sum of sample counts in
2036 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2037 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2038 double computeBlockSimilarity(uint64_t BaseSample
, uint64_t TestSample
,
2039 const SampleOverlapStats
&FuncOverlap
) const;
2041 void updateHotBlockOverlap(uint64_t BaseSample
, uint64_t TestSample
,
2042 uint64_t HotBlockCount
);
2044 void getHotFunctions(const FuncSampleStatsMap
&ProfStats
,
2045 FuncSampleStatsMap
&HotFunc
,
2046 uint64_t HotThreshold
) const;
2048 void computeHotFuncOverlap();
2050 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2051 /// Difference for two sample units in a matched function according to the
2052 /// given match status.
2053 void updateOverlapStatsForFunction(uint64_t BaseSample
, uint64_t TestSample
,
2054 uint64_t HotBlockCount
,
2055 SampleOverlapStats
&FuncOverlap
,
2056 double &Difference
, MatchStatus Status
);
2058 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2059 /// Difference for unmatched callees that only present in one profile in a
2060 /// matched caller function.
2061 void updateForUnmatchedCallee(const sampleprof::FunctionSamples
&Func
,
2062 SampleOverlapStats
&FuncOverlap
,
2063 double &Difference
, MatchStatus Status
);
2065 /// This function updates sample overlap statistics of an overlap function in
2066 /// base and test profile. It also calculates a function-internal similarity
2068 /// For offsets i that have samples in at least one profile in this
2069 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2070 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2071 /// 0.0 meaning no overlap.
2072 double computeSampleFunctionInternalOverlap(
2073 const sampleprof::FunctionSamples
&BaseFunc
,
2074 const sampleprof::FunctionSamples
&TestFunc
,
2075 SampleOverlapStats
&FuncOverlap
);
2077 /// Function-level similarity (FS) is a weighted value over function internal
2078 /// similarity (FIS). This function computes a function's FS from its FIS by
2079 /// applying the weight.
2080 double weightForFuncSimilarity(double FuncSimilarity
, uint64_t BaseFuncSample
,
2081 uint64_t TestFuncSample
) const;
2083 /// The function-level similarity FS(A) for a function A is computed as
2085 /// Compute a function-internal similarity FIS(A) by
2086 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2087 /// function A in base profile WB(A), and the weight of function A in test
2088 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2089 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2091 computeSampleFunctionOverlap(const sampleprof::FunctionSamples
*BaseFunc
,
2092 const sampleprof::FunctionSamples
*TestFunc
,
2093 SampleOverlapStats
*FuncOverlap
,
2094 uint64_t BaseFuncSample
,
2095 uint64_t TestFuncSample
);
2097 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2098 /// similarities (FS). This method weights the FS value by the function
2099 /// weights in the base and test profiles for the aggregation.
2100 double weightByImportance(double FuncSimilarity
, uint64_t BaseFuncSample
,
2101 uint64_t TestFuncSample
) const;
2103 } // end anonymous namespace
2105 bool SampleOverlapAggregator::detectZeroSampleProfile(
2106 raw_fd_ostream
&OS
) const {
2107 bool HaveZeroSample
= false;
2108 if (ProfOverlap
.BaseSample
== 0) {
2109 OS
<< "Sum of sample counts for profile " << BaseFilename
<< " is 0.\n";
2110 HaveZeroSample
= true;
2112 if (ProfOverlap
.TestSample
== 0) {
2113 OS
<< "Sum of sample counts for profile " << TestFilename
<< " is 0.\n";
2114 HaveZeroSample
= true;
2116 return HaveZeroSample
;
2119 double SampleOverlapAggregator::computeBlockSimilarity(
2120 uint64_t BaseSample
, uint64_t TestSample
,
2121 const SampleOverlapStats
&FuncOverlap
) const {
2122 double BaseFrac
= 0.0;
2123 double TestFrac
= 0.0;
2124 if (FuncOverlap
.BaseSample
> 0)
2125 BaseFrac
= static_cast<double>(BaseSample
) / FuncOverlap
.BaseSample
;
2126 if (FuncOverlap
.TestSample
> 0)
2127 TestFrac
= static_cast<double>(TestSample
) / FuncOverlap
.TestSample
;
2128 return 1.0 - std::fabs(BaseFrac
- TestFrac
);
2131 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample
,
2132 uint64_t TestSample
,
2133 uint64_t HotBlockCount
) {
2134 bool IsBaseHot
= (BaseSample
>= BaseHotThreshold
);
2135 bool IsTestHot
= (TestSample
>= TestHotThreshold
);
2136 if (!IsBaseHot
&& !IsTestHot
)
2139 HotBlockOverlap
.UnionCount
+= HotBlockCount
;
2141 HotBlockOverlap
.BaseCount
+= HotBlockCount
;
2143 HotBlockOverlap
.TestCount
+= HotBlockCount
;
2144 if (IsBaseHot
&& IsTestHot
)
2145 HotBlockOverlap
.OverlapCount
+= HotBlockCount
;
2148 void SampleOverlapAggregator::getHotFunctions(
2149 const FuncSampleStatsMap
&ProfStats
, FuncSampleStatsMap
&HotFunc
,
2150 uint64_t HotThreshold
) const {
2151 for (const auto &F
: ProfStats
) {
2152 if (isFunctionHot(F
.second
, HotThreshold
))
2153 HotFunc
.emplace(F
.first
, F
.second
);
2157 void SampleOverlapAggregator::computeHotFuncOverlap() {
2158 FuncSampleStatsMap BaseHotFunc
;
2159 getHotFunctions(BaseStats
, BaseHotFunc
, BaseHotThreshold
);
2160 HotFuncOverlap
.BaseCount
= BaseHotFunc
.size();
2162 FuncSampleStatsMap TestHotFunc
;
2163 getHotFunctions(TestStats
, TestHotFunc
, TestHotThreshold
);
2164 HotFuncOverlap
.TestCount
= TestHotFunc
.size();
2165 HotFuncOverlap
.UnionCount
= HotFuncOverlap
.TestCount
;
2167 for (const auto &F
: BaseHotFunc
) {
2168 if (TestHotFunc
.count(F
.first
))
2169 ++HotFuncOverlap
.OverlapCount
;
2171 ++HotFuncOverlap
.UnionCount
;
2175 void SampleOverlapAggregator::updateOverlapStatsForFunction(
2176 uint64_t BaseSample
, uint64_t TestSample
, uint64_t HotBlockCount
,
2177 SampleOverlapStats
&FuncOverlap
, double &Difference
, MatchStatus Status
) {
2178 assert(Status
!= MS_None
&&
2179 "Match status should be updated before updating overlap statistics");
2180 if (Status
== MS_FirstUnique
) {
2182 FuncOverlap
.BaseUniqueSample
+= BaseSample
;
2183 } else if (Status
== MS_SecondUnique
) {
2185 FuncOverlap
.TestUniqueSample
+= TestSample
;
2187 ++FuncOverlap
.OverlapCount
;
2190 FuncOverlap
.UnionSample
+= std::max(BaseSample
, TestSample
);
2191 FuncOverlap
.OverlapSample
+= std::min(BaseSample
, TestSample
);
2193 1.0 - computeBlockSimilarity(BaseSample
, TestSample
, FuncOverlap
);
2194 updateHotBlockOverlap(BaseSample
, TestSample
, HotBlockCount
);
2197 void SampleOverlapAggregator::updateForUnmatchedCallee(
2198 const sampleprof::FunctionSamples
&Func
, SampleOverlapStats
&FuncOverlap
,
2199 double &Difference
, MatchStatus Status
) {
2200 assert((Status
== MS_FirstUnique
|| Status
== MS_SecondUnique
) &&
2201 "Status must be either of the two unmatched cases");
2202 FuncSampleStats FuncStats
;
2203 if (Status
== MS_FirstUnique
) {
2204 getFuncSampleStats(Func
, FuncStats
, BaseHotThreshold
);
2205 updateOverlapStatsForFunction(FuncStats
.SampleSum
, 0,
2206 FuncStats
.HotBlockCount
, FuncOverlap
,
2207 Difference
, Status
);
2209 getFuncSampleStats(Func
, FuncStats
, TestHotThreshold
);
2210 updateOverlapStatsForFunction(0, FuncStats
.SampleSum
,
2211 FuncStats
.HotBlockCount
, FuncOverlap
,
2212 Difference
, Status
);
2216 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2217 const sampleprof::FunctionSamples
&BaseFunc
,
2218 const sampleprof::FunctionSamples
&TestFunc
,
2219 SampleOverlapStats
&FuncOverlap
) {
2221 using namespace sampleprof
;
2223 double Difference
= 0;
2225 // Accumulate Difference for regular line/block samples in the function.
2226 // We match them through sort-merge join algorithm because
2227 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2228 // by their offsets.
2229 MatchStep
<BodySampleMap::const_iterator
> BlockIterStep(
2230 BaseFunc
.getBodySamples().cbegin(), BaseFunc
.getBodySamples().cend(),
2231 TestFunc
.getBodySamples().cbegin(), TestFunc
.getBodySamples().cend());
2232 BlockIterStep
.updateOneStep();
2233 while (!BlockIterStep
.areBothFinished()) {
2234 uint64_t BaseSample
=
2235 BlockIterStep
.isFirstFinished()
2237 : BlockIterStep
.getFirstIter()->second
.getSamples();
2238 uint64_t TestSample
=
2239 BlockIterStep
.isSecondFinished()
2241 : BlockIterStep
.getSecondIter()->second
.getSamples();
2242 updateOverlapStatsForFunction(BaseSample
, TestSample
, 1, FuncOverlap
,
2243 Difference
, BlockIterStep
.getMatchStatus());
2245 BlockIterStep
.updateOneStep();
2248 // Accumulate Difference for callsite lines in the function. We match
2249 // them through sort-merge algorithm because
2250 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2251 // ordered by their offsets.
2252 MatchStep
<CallsiteSampleMap::const_iterator
> CallsiteIterStep(
2253 BaseFunc
.getCallsiteSamples().cbegin(),
2254 BaseFunc
.getCallsiteSamples().cend(),
2255 TestFunc
.getCallsiteSamples().cbegin(),
2256 TestFunc
.getCallsiteSamples().cend());
2257 CallsiteIterStep
.updateOneStep();
2258 while (!CallsiteIterStep
.areBothFinished()) {
2259 MatchStatus CallsiteStepStatus
= CallsiteIterStep
.getMatchStatus();
2260 assert(CallsiteStepStatus
!= MS_None
&&
2261 "Match status should be updated before entering loop body");
2263 if (CallsiteStepStatus
!= MS_Match
) {
2264 auto Callsite
= (CallsiteStepStatus
== MS_FirstUnique
)
2265 ? CallsiteIterStep
.getFirstIter()
2266 : CallsiteIterStep
.getSecondIter();
2267 for (const auto &F
: Callsite
->second
)
2268 updateForUnmatchedCallee(F
.second
, FuncOverlap
, Difference
,
2269 CallsiteStepStatus
);
2271 // There may be multiple inlinees at the same offset, so we need to try
2272 // matching all of them. This match is implemented through sort-merge
2273 // algorithm because callsite records at the same offset are ordered by
2275 MatchStep
<FunctionSamplesMap::const_iterator
> CalleeIterStep(
2276 CallsiteIterStep
.getFirstIter()->second
.cbegin(),
2277 CallsiteIterStep
.getFirstIter()->second
.cend(),
2278 CallsiteIterStep
.getSecondIter()->second
.cbegin(),
2279 CallsiteIterStep
.getSecondIter()->second
.cend());
2280 CalleeIterStep
.updateOneStep();
2281 while (!CalleeIterStep
.areBothFinished()) {
2282 MatchStatus CalleeStepStatus
= CalleeIterStep
.getMatchStatus();
2283 if (CalleeStepStatus
!= MS_Match
) {
2284 auto Callee
= (CalleeStepStatus
== MS_FirstUnique
)
2285 ? CalleeIterStep
.getFirstIter()
2286 : CalleeIterStep
.getSecondIter();
2287 updateForUnmatchedCallee(Callee
->second
, FuncOverlap
, Difference
,
2290 // An inlined function can contain other inlinees inside, so compute
2291 // the Difference recursively.
2292 Difference
+= 2.0 - 2 * computeSampleFunctionInternalOverlap(
2293 CalleeIterStep
.getFirstIter()->second
,
2294 CalleeIterStep
.getSecondIter()->second
,
2297 CalleeIterStep
.updateOneStep();
2300 CallsiteIterStep
.updateOneStep();
2303 // Difference reflects the total differences of line/block samples in this
2304 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2305 // reflect the similarity between function profiles in [0.0f to 1.0f].
2306 return (2.0 - Difference
) / 2;
2309 double SampleOverlapAggregator::weightForFuncSimilarity(
2310 double FuncInternalSimilarity
, uint64_t BaseFuncSample
,
2311 uint64_t TestFuncSample
) const {
2312 // Compute the weight as the distance between the function weights in two
2314 double BaseFrac
= 0.0;
2315 double TestFrac
= 0.0;
2316 assert(ProfOverlap
.BaseSample
> 0 &&
2317 "Total samples in base profile should be greater than 0");
2318 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
;
2319 assert(ProfOverlap
.TestSample
> 0 &&
2320 "Total samples in test profile should be greater than 0");
2321 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
;
2322 double WeightDistance
= std::fabs(BaseFrac
- TestFrac
);
2324 // Take WeightDistance into the similarity.
2325 return FuncInternalSimilarity
* (1 - WeightDistance
);
2329 SampleOverlapAggregator::weightByImportance(double FuncSimilarity
,
2330 uint64_t BaseFuncSample
,
2331 uint64_t TestFuncSample
) const {
2333 double BaseFrac
= 0.0;
2334 double TestFrac
= 0.0;
2335 assert(ProfOverlap
.BaseSample
> 0 &&
2336 "Total samples in base profile should be greater than 0");
2337 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
/ 2.0;
2338 assert(ProfOverlap
.TestSample
> 0 &&
2339 "Total samples in test profile should be greater than 0");
2340 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
/ 2.0;
2341 return FuncSimilarity
* (BaseFrac
+ TestFrac
);
2344 double SampleOverlapAggregator::computeSampleFunctionOverlap(
2345 const sampleprof::FunctionSamples
*BaseFunc
,
2346 const sampleprof::FunctionSamples
*TestFunc
,
2347 SampleOverlapStats
*FuncOverlap
, uint64_t BaseFuncSample
,
2348 uint64_t TestFuncSample
) {
2349 // Default function internal similarity before weighted, meaning two functions
2351 const double DefaultFuncInternalSimilarity
= 0;
2352 double FuncSimilarity
;
2353 double FuncInternalSimilarity
;
2355 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2356 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2358 if (!BaseFunc
|| !TestFunc
) {
2359 FuncInternalSimilarity
= DefaultFuncInternalSimilarity
;
2361 assert(FuncOverlap
!= nullptr &&
2362 "FuncOverlap should be provided in this case");
2363 FuncInternalSimilarity
= computeSampleFunctionInternalOverlap(
2364 *BaseFunc
, *TestFunc
, *FuncOverlap
);
2365 // Now, FuncInternalSimilarity may be a little less than 0 due to
2366 // imprecision of floating point accumulations. Make it zero if the
2367 // difference is below Epsilon.
2368 FuncInternalSimilarity
= (std::fabs(FuncInternalSimilarity
- 0) < Epsilon
)
2370 : FuncInternalSimilarity
;
2372 FuncSimilarity
= weightForFuncSimilarity(FuncInternalSimilarity
,
2373 BaseFuncSample
, TestFuncSample
);
2374 return FuncSimilarity
;
2377 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream
&OS
) {
2378 using namespace sampleprof
;
2380 std::unordered_map
<SampleContext
, const FunctionSamples
*,
2381 SampleContext::Hash
>
2383 const auto &BaseProfiles
= BaseReader
->getProfiles();
2384 for (const auto &BaseFunc
: BaseProfiles
) {
2385 BaseFuncProf
.emplace(BaseFunc
.second
.getContext(), &(BaseFunc
.second
));
2387 ProfOverlap
.UnionCount
= BaseFuncProf
.size();
2389 const auto &TestProfiles
= TestReader
->getProfiles();
2390 for (const auto &TestFunc
: TestProfiles
) {
2391 SampleOverlapStats FuncOverlap
;
2392 FuncOverlap
.TestName
= TestFunc
.second
.getContext();
2393 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2394 "TestStats should have records for all functions in test profile "
2396 FuncOverlap
.TestSample
= TestStats
[FuncOverlap
.TestName
].SampleSum
;
2398 bool Matched
= false;
2399 const auto Match
= BaseFuncProf
.find(FuncOverlap
.TestName
);
2400 if (Match
== BaseFuncProf
.end()) {
2401 const FuncSampleStats
&FuncStats
= TestStats
[FuncOverlap
.TestName
];
2402 ++ProfOverlap
.TestUniqueCount
;
2403 ProfOverlap
.TestUniqueSample
+= FuncStats
.SampleSum
;
2404 FuncOverlap
.TestUniqueSample
= FuncStats
.SampleSum
;
2406 updateHotBlockOverlap(0, FuncStats
.SampleSum
, FuncStats
.HotBlockCount
);
2408 double FuncSimilarity
= computeSampleFunctionOverlap(
2409 nullptr, nullptr, nullptr, 0, FuncStats
.SampleSum
);
2410 ProfOverlap
.Similarity
+=
2411 weightByImportance(FuncSimilarity
, 0, FuncStats
.SampleSum
);
2413 ++ProfOverlap
.UnionCount
;
2414 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2416 ++ProfOverlap
.OverlapCount
;
2418 // Two functions match with each other. Compute function-level overlap and
2419 // aggregate them into profile-level overlap.
2420 FuncOverlap
.BaseName
= Match
->second
->getContext();
2421 assert(BaseStats
.count(FuncOverlap
.BaseName
) &&
2422 "BaseStats should have records for all functions in base profile "
2424 FuncOverlap
.BaseSample
= BaseStats
[FuncOverlap
.BaseName
].SampleSum
;
2426 FuncOverlap
.Similarity
= computeSampleFunctionOverlap(
2427 Match
->second
, &TestFunc
.second
, &FuncOverlap
, FuncOverlap
.BaseSample
,
2428 FuncOverlap
.TestSample
);
2429 ProfOverlap
.Similarity
+=
2430 weightByImportance(FuncOverlap
.Similarity
, FuncOverlap
.BaseSample
,
2431 FuncOverlap
.TestSample
);
2432 ProfOverlap
.OverlapSample
+= FuncOverlap
.OverlapSample
;
2433 ProfOverlap
.UnionSample
+= FuncOverlap
.UnionSample
;
2435 // Accumulate the percentage of base unique and test unique samples into
2437 ProfOverlap
.BaseUniqueSample
+= FuncOverlap
.BaseUniqueSample
;
2438 ProfOverlap
.TestUniqueSample
+= FuncOverlap
.TestUniqueSample
;
2440 // Remove matched base functions for later reporting functions not found
2442 BaseFuncProf
.erase(Match
);
2446 // Print function-level similarity information if specified by options.
2447 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2448 "TestStats should have records for all functions in test profile "
2450 if (TestStats
[FuncOverlap
.TestName
].MaxSample
>= FuncFilter
.ValueCutoff
||
2451 (Matched
&& FuncOverlap
.Similarity
< LowSimilarityThreshold
) ||
2452 (Matched
&& !FuncFilter
.NameFilter
.empty() &&
2453 FuncOverlap
.BaseName
.toString().find(FuncFilter
.NameFilter
) !=
2454 std::string::npos
)) {
2455 assert(ProfOverlap
.BaseSample
> 0 &&
2456 "Total samples in base profile should be greater than 0");
2457 FuncOverlap
.BaseWeight
=
2458 static_cast<double>(FuncOverlap
.BaseSample
) / ProfOverlap
.BaseSample
;
2459 assert(ProfOverlap
.TestSample
> 0 &&
2460 "Total samples in test profile should be greater than 0");
2461 FuncOverlap
.TestWeight
=
2462 static_cast<double>(FuncOverlap
.TestSample
) / ProfOverlap
.TestSample
;
2463 FuncSimilarityDump
.emplace(FuncOverlap
.BaseWeight
, FuncOverlap
);
2467 // Traverse through functions in base profile but not in test profile.
2468 for (const auto &F
: BaseFuncProf
) {
2469 assert(BaseStats
.count(F
.second
->getContext()) &&
2470 "BaseStats should have records for all functions in base profile "
2472 const FuncSampleStats
&FuncStats
= BaseStats
[F
.second
->getContext()];
2473 ++ProfOverlap
.BaseUniqueCount
;
2474 ProfOverlap
.BaseUniqueSample
+= FuncStats
.SampleSum
;
2476 updateHotBlockOverlap(FuncStats
.SampleSum
, 0, FuncStats
.HotBlockCount
);
2478 double FuncSimilarity
= computeSampleFunctionOverlap(
2479 nullptr, nullptr, nullptr, FuncStats
.SampleSum
, 0);
2480 ProfOverlap
.Similarity
+=
2481 weightByImportance(FuncSimilarity
, FuncStats
.SampleSum
, 0);
2483 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2486 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2487 // of floating point accumulations. Make it 1.0 if the difference is below
2489 ProfOverlap
.Similarity
= (std::fabs(ProfOverlap
.Similarity
- 1) < Epsilon
)
2491 : ProfOverlap
.Similarity
;
2493 computeHotFuncOverlap();
2496 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2497 const auto &BaseProf
= BaseReader
->getProfiles();
2498 for (const auto &I
: BaseProf
) {
2499 ++ProfOverlap
.BaseCount
;
2500 FuncSampleStats FuncStats
;
2501 getFuncSampleStats(I
.second
, FuncStats
, BaseHotThreshold
);
2502 ProfOverlap
.BaseSample
+= FuncStats
.SampleSum
;
2503 BaseStats
.emplace(I
.second
.getContext(), FuncStats
);
2506 const auto &TestProf
= TestReader
->getProfiles();
2507 for (const auto &I
: TestProf
) {
2508 ++ProfOverlap
.TestCount
;
2509 FuncSampleStats FuncStats
;
2510 getFuncSampleStats(I
.second
, FuncStats
, TestHotThreshold
);
2511 ProfOverlap
.TestSample
+= FuncStats
.SampleSum
;
2512 TestStats
.emplace(I
.second
.getContext(), FuncStats
);
2515 ProfOverlap
.BaseName
= StringRef(BaseFilename
);
2516 ProfOverlap
.TestName
= StringRef(TestFilename
);
2519 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream
&OS
) const {
2520 using namespace sampleprof
;
2522 if (FuncSimilarityDump
.empty())
2525 formatted_raw_ostream
FOS(OS
);
2526 FOS
<< "Function-level details:\n";
2527 FOS
<< "Base weight";
2528 FOS
.PadToColumn(TestWeightCol
);
2529 FOS
<< "Test weight";
2530 FOS
.PadToColumn(SimilarityCol
);
2531 FOS
<< "Similarity";
2532 FOS
.PadToColumn(OverlapCol
);
2534 FOS
.PadToColumn(BaseUniqueCol
);
2535 FOS
<< "Base unique";
2536 FOS
.PadToColumn(TestUniqueCol
);
2537 FOS
<< "Test unique";
2538 FOS
.PadToColumn(BaseSampleCol
);
2539 FOS
<< "Base samples";
2540 FOS
.PadToColumn(TestSampleCol
);
2541 FOS
<< "Test samples";
2542 FOS
.PadToColumn(FuncNameCol
);
2543 FOS
<< "Function name\n";
2544 for (const auto &F
: FuncSimilarityDump
) {
2545 double OverlapPercent
=
2546 F
.second
.UnionSample
> 0
2547 ? static_cast<double>(F
.second
.OverlapSample
) / F
.second
.UnionSample
2549 double BaseUniquePercent
=
2550 F
.second
.BaseSample
> 0
2551 ? static_cast<double>(F
.second
.BaseUniqueSample
) /
2554 double TestUniquePercent
=
2555 F
.second
.TestSample
> 0
2556 ? static_cast<double>(F
.second
.TestUniqueSample
) /
2560 FOS
<< format("%.2f%%", F
.second
.BaseWeight
* 100);
2561 FOS
.PadToColumn(TestWeightCol
);
2562 FOS
<< format("%.2f%%", F
.second
.TestWeight
* 100);
2563 FOS
.PadToColumn(SimilarityCol
);
2564 FOS
<< format("%.2f%%", F
.second
.Similarity
* 100);
2565 FOS
.PadToColumn(OverlapCol
);
2566 FOS
<< format("%.2f%%", OverlapPercent
* 100);
2567 FOS
.PadToColumn(BaseUniqueCol
);
2568 FOS
<< format("%.2f%%", BaseUniquePercent
* 100);
2569 FOS
.PadToColumn(TestUniqueCol
);
2570 FOS
<< format("%.2f%%", TestUniquePercent
* 100);
2571 FOS
.PadToColumn(BaseSampleCol
);
2572 FOS
<< F
.second
.BaseSample
;
2573 FOS
.PadToColumn(TestSampleCol
);
2574 FOS
<< F
.second
.TestSample
;
2575 FOS
.PadToColumn(FuncNameCol
);
2576 FOS
<< F
.second
.TestName
.toString() << "\n";
2580 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream
&OS
) const {
2581 OS
<< "Profile overlap infomation for base_profile: "
2582 << ProfOverlap
.BaseName
.toString()
2583 << " and test_profile: " << ProfOverlap
.TestName
.toString()
2584 << "\nProgram level:\n";
2586 OS
<< " Whole program profile similarity: "
2587 << format("%.3f%%", ProfOverlap
.Similarity
* 100) << "\n";
2589 assert(ProfOverlap
.UnionSample
> 0 &&
2590 "Total samples in two profile should be greater than 0");
2591 double OverlapPercent
=
2592 static_cast<double>(ProfOverlap
.OverlapSample
) / ProfOverlap
.UnionSample
;
2593 assert(ProfOverlap
.BaseSample
> 0 &&
2594 "Total samples in base profile should be greater than 0");
2595 double BaseUniquePercent
= static_cast<double>(ProfOverlap
.BaseUniqueSample
) /
2596 ProfOverlap
.BaseSample
;
2597 assert(ProfOverlap
.TestSample
> 0 &&
2598 "Total samples in test profile should be greater than 0");
2599 double TestUniquePercent
= static_cast<double>(ProfOverlap
.TestUniqueSample
) /
2600 ProfOverlap
.TestSample
;
2602 OS
<< " Whole program sample overlap: "
2603 << format("%.3f%%", OverlapPercent
* 100) << "\n";
2604 OS
<< " percentage of samples unique in base profile: "
2605 << format("%.3f%%", BaseUniquePercent
* 100) << "\n";
2606 OS
<< " percentage of samples unique in test profile: "
2607 << format("%.3f%%", TestUniquePercent
* 100) << "\n";
2608 OS
<< " total samples in base profile: " << ProfOverlap
.BaseSample
<< "\n"
2609 << " total samples in test profile: " << ProfOverlap
.TestSample
<< "\n";
2611 assert(ProfOverlap
.UnionCount
> 0 &&
2612 "There should be at least one function in two input profiles");
2613 double FuncOverlapPercent
=
2614 static_cast<double>(ProfOverlap
.OverlapCount
) / ProfOverlap
.UnionCount
;
2615 OS
<< " Function overlap: " << format("%.3f%%", FuncOverlapPercent
* 100)
2617 OS
<< " overlap functions: " << ProfOverlap
.OverlapCount
<< "\n";
2618 OS
<< " functions unique in base profile: " << ProfOverlap
.BaseUniqueCount
2620 OS
<< " functions unique in test profile: " << ProfOverlap
.TestUniqueCount
2624 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2625 raw_fd_ostream
&OS
) const {
2626 assert(HotFuncOverlap
.UnionCount
> 0 &&
2627 "There should be at least one hot function in two input profiles");
2628 OS
<< " Hot-function overlap: "
2629 << format("%.3f%%", static_cast<double>(HotFuncOverlap
.OverlapCount
) /
2630 HotFuncOverlap
.UnionCount
* 100)
2632 OS
<< " overlap hot functions: " << HotFuncOverlap
.OverlapCount
<< "\n";
2633 OS
<< " hot functions unique in base profile: "
2634 << HotFuncOverlap
.BaseCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2635 OS
<< " hot functions unique in test profile: "
2636 << HotFuncOverlap
.TestCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2638 assert(HotBlockOverlap
.UnionCount
> 0 &&
2639 "There should be at least one hot block in two input profiles");
2640 OS
<< " Hot-block overlap: "
2641 << format("%.3f%%", static_cast<double>(HotBlockOverlap
.OverlapCount
) /
2642 HotBlockOverlap
.UnionCount
* 100)
2644 OS
<< " overlap hot blocks: " << HotBlockOverlap
.OverlapCount
<< "\n";
2645 OS
<< " hot blocks unique in base profile: "
2646 << HotBlockOverlap
.BaseCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2647 OS
<< " hot blocks unique in test profile: "
2648 << HotBlockOverlap
.TestCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2651 std::error_code
SampleOverlapAggregator::loadProfiles() {
2652 using namespace sampleprof
;
2654 LLVMContext Context
;
2655 auto FS
= vfs::getRealFileSystem();
2656 auto BaseReaderOrErr
= SampleProfileReader::create(BaseFilename
, Context
, *FS
,
2657 FSDiscriminatorPassOption
);
2658 if (std::error_code EC
= BaseReaderOrErr
.getError())
2659 exitWithErrorCode(EC
, BaseFilename
);
2661 auto TestReaderOrErr
= SampleProfileReader::create(TestFilename
, Context
, *FS
,
2662 FSDiscriminatorPassOption
);
2663 if (std::error_code EC
= TestReaderOrErr
.getError())
2664 exitWithErrorCode(EC
, TestFilename
);
2666 BaseReader
= std::move(BaseReaderOrErr
.get());
2667 TestReader
= std::move(TestReaderOrErr
.get());
2669 if (std::error_code EC
= BaseReader
->read())
2670 exitWithErrorCode(EC
, BaseFilename
);
2671 if (std::error_code EC
= TestReader
->read())
2672 exitWithErrorCode(EC
, TestFilename
);
2673 if (BaseReader
->profileIsProbeBased() != TestReader
->profileIsProbeBased())
2675 "cannot compare probe-based profile with non-probe-based profile");
2676 if (BaseReader
->profileIsCS() != TestReader
->profileIsCS())
2677 exitWithError("cannot compare CS profile with non-CS profile");
2679 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2681 ProfileSummary
&BasePS
= BaseReader
->getSummary();
2682 ProfileSummary
&TestPS
= TestReader
->getSummary();
2684 ProfileSummaryBuilder::getHotCountThreshold(BasePS
.getDetailedSummary());
2686 ProfileSummaryBuilder::getHotCountThreshold(TestPS
.getDetailedSummary());
2688 return std::error_code();
2691 void overlapSampleProfile(const std::string
&BaseFilename
,
2692 const std::string
&TestFilename
,
2693 const OverlapFuncFilters
&FuncFilter
,
2694 uint64_t SimilarityCutoff
, raw_fd_ostream
&OS
) {
2695 using namespace sampleprof
;
2697 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2698 // report 2--3 places after decimal point in percentage numbers.
2699 SampleOverlapAggregator
OverlapAggr(
2700 BaseFilename
, TestFilename
,
2701 static_cast<double>(SimilarityCutoff
) / 1000000, 0.000005, FuncFilter
);
2702 if (std::error_code EC
= OverlapAggr
.loadProfiles())
2703 exitWithErrorCode(EC
);
2705 OverlapAggr
.initializeSampleProfileOverlap();
2706 if (OverlapAggr
.detectZeroSampleProfile(OS
))
2709 OverlapAggr
.computeSampleProfileOverlap(OS
);
2711 OverlapAggr
.dumpProgramSummary(OS
);
2712 OverlapAggr
.dumpHotFuncAndBlockOverlap(OS
);
2713 OverlapAggr
.dumpFuncSimilarity(OS
);
2716 static int overlap_main() {
2718 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
2720 exitWithErrorCode(EC
, OutputFilename
);
2722 if (ProfileKind
== instr
)
2723 overlapInstrProfile(BaseFilename
, TestFilename
,
2724 OverlapFuncFilters
{OverlapValueCutoff
, FuncNameFilter
},
2727 overlapSampleProfile(BaseFilename
, TestFilename
,
2728 OverlapFuncFilters
{OverlapValueCutoff
, FuncNameFilter
},
2729 SimilarityCutoff
, OS
);
2735 struct ValueSitesStats
{
2736 ValueSitesStats() = default;
2737 uint64_t TotalNumValueSites
= 0;
2738 uint64_t TotalNumValueSitesWithValueProfile
= 0;
2739 uint64_t TotalNumValues
= 0;
2740 std::vector
<unsigned> ValueSitesHistogram
;
2744 static void traverseAllValueSites(const InstrProfRecord
&Func
, uint32_t VK
,
2745 ValueSitesStats
&Stats
, raw_fd_ostream
&OS
,
2746 InstrProfSymtab
*Symtab
) {
2747 uint32_t NS
= Func
.getNumValueSites(VK
);
2748 Stats
.TotalNumValueSites
+= NS
;
2749 for (size_t I
= 0; I
< NS
; ++I
) {
2750 auto VD
= Func
.getValueArrayForSite(VK
, I
);
2751 uint32_t NV
= VD
.size();
2754 Stats
.TotalNumValues
+= NV
;
2755 Stats
.TotalNumValueSitesWithValueProfile
++;
2756 if (NV
> Stats
.ValueSitesHistogram
.size())
2757 Stats
.ValueSitesHistogram
.resize(NV
, 0);
2758 Stats
.ValueSitesHistogram
[NV
- 1]++;
2760 uint64_t SiteSum
= 0;
2761 for (const auto &V
: VD
)
2766 for (const auto &V
: VD
) {
2767 OS
<< "\t[ " << format("%2u", I
) << ", ";
2768 if (Symtab
== nullptr)
2769 OS
<< format("%4" PRIu64
, V
.Value
);
2771 OS
<< Symtab
->getFuncOrVarName(V
.Value
);
2772 OS
<< ", " << format("%10" PRId64
, V
.Count
) << " ] ("
2773 << format("%.2f%%", (V
.Count
* 100.0 / SiteSum
)) << ")\n";
2778 static void showValueSitesStats(raw_fd_ostream
&OS
, uint32_t VK
,
2779 ValueSitesStats
&Stats
) {
2780 OS
<< " Total number of sites: " << Stats
.TotalNumValueSites
<< "\n";
2781 OS
<< " Total number of sites with values: "
2782 << Stats
.TotalNumValueSitesWithValueProfile
<< "\n";
2783 OS
<< " Total number of profiled values: " << Stats
.TotalNumValues
<< "\n";
2785 OS
<< " Value sites histogram:\n\tNumTargets, SiteCount\n";
2786 for (unsigned I
= 0; I
< Stats
.ValueSitesHistogram
.size(); I
++) {
2787 if (Stats
.ValueSitesHistogram
[I
] > 0)
2788 OS
<< "\t" << I
+ 1 << ", " << Stats
.ValueSitesHistogram
[I
] << "\n";
2792 static int showInstrProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2793 if (SFormat
== ShowFormat::Json
)
2794 exitWithError("JSON output is not supported for instr profiles");
2795 if (SFormat
== ShowFormat::Yaml
)
2796 exitWithError("YAML output is not supported for instr profiles");
2797 auto FS
= vfs::getRealFileSystem();
2798 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
2799 std::vector
<uint32_t> Cutoffs
= std::move(DetailedSummaryCutoffs
);
2800 if (ShowDetailedSummary
&& Cutoffs
.empty()) {
2801 Cutoffs
= ProfileSummaryBuilder::DefaultCutoffs
;
2803 InstrProfSummaryBuilder
Builder(std::move(Cutoffs
));
2804 if (Error E
= ReaderOrErr
.takeError())
2805 exitWithError(std::move(E
), Filename
);
2807 auto Reader
= std::move(ReaderOrErr
.get());
2808 bool IsIRInstr
= Reader
->isIRLevelProfile();
2809 size_t ShownFunctions
= 0;
2810 size_t BelowCutoffFunctions
= 0;
2811 int NumVPKind
= IPVK_Last
- IPVK_First
+ 1;
2812 std::vector
<ValueSitesStats
> VPStats(NumVPKind
);
2814 auto MinCmp
= [](const std::pair
<std::string
, uint64_t> &v1
,
2815 const std::pair
<std::string
, uint64_t> &v2
) {
2816 return v1
.second
> v2
.second
;
2819 std::priority_queue
<std::pair
<std::string
, uint64_t>,
2820 std::vector
<std::pair
<std::string
, uint64_t>>,
2822 HottestFuncs(MinCmp
);
2824 if (!TextFormat
&& OnlyListBelow
) {
2825 OS
<< "The list of functions with the maximum counter less than "
2826 << ShowValueCutoff
<< ":\n";
2829 // Add marker so that IR-level instrumentation round-trips properly.
2830 if (TextFormat
&& IsIRInstr
)
2833 for (const auto &Func
: *Reader
) {
2834 if (Reader
->isIRLevelProfile()) {
2835 bool FuncIsCS
= NamedInstrProfRecord::hasCSFlagInHash(Func
.Hash
);
2836 if (FuncIsCS
!= ShowCS
)
2839 bool Show
= ShowAllFunctions
||
2840 (!FuncNameFilter
.empty() && Func
.Name
.contains(FuncNameFilter
));
2842 bool doTextFormatDump
= (Show
&& TextFormat
);
2844 if (doTextFormatDump
) {
2845 InstrProfSymtab
&Symtab
= Reader
->getSymtab();
2846 InstrProfWriter::writeRecordInText(Func
.Name
, Func
.Hash
, Func
, Symtab
,
2851 assert(Func
.Counts
.size() > 0 && "function missing entry counter");
2852 Builder
.addRecord(Func
);
2855 if (llvm::any_of(Func
.Counts
, [](uint64_t C
) { return C
; }))
2856 OS
<< Func
.Name
<< "\n";
2860 uint64_t FuncMax
= 0;
2861 uint64_t FuncSum
= 0;
2863 auto PseudoKind
= Func
.getCountPseudoKind();
2864 if (PseudoKind
!= InstrProfRecord::NotPseudo
) {
2866 if (!ShownFunctions
)
2867 OS
<< "Counters:\n";
2869 OS
<< " " << Func
.Name
<< ":\n"
2870 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2871 << " Counters: " << Func
.Counts
.size();
2872 if (PseudoKind
== InstrProfRecord::PseudoHot
)
2873 OS
<< " <PseudoHot>\n";
2874 else if (PseudoKind
== InstrProfRecord::PseudoWarm
)
2875 OS
<< " <PseudoWarm>\n";
2877 llvm_unreachable("Unknown PseudoKind");
2882 for (size_t I
= 0, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2883 FuncMax
= std::max(FuncMax
, Func
.Counts
[I
]);
2884 FuncSum
+= Func
.Counts
[I
];
2887 if (FuncMax
< ShowValueCutoff
) {
2888 ++BelowCutoffFunctions
;
2889 if (OnlyListBelow
) {
2890 OS
<< " " << Func
.Name
<< ": (Max = " << FuncMax
2891 << " Sum = " << FuncSum
<< ")\n";
2894 } else if (OnlyListBelow
)
2897 if (TopNFunctions
) {
2898 if (HottestFuncs
.size() == TopNFunctions
) {
2899 if (HottestFuncs
.top().second
< FuncMax
) {
2901 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2904 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2908 if (!ShownFunctions
)
2909 OS
<< "Counters:\n";
2913 OS
<< " " << Func
.Name
<< ":\n"
2914 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2915 << " Counters: " << Func
.Counts
.size() << "\n";
2917 OS
<< " Function count: " << Func
.Counts
[0] << "\n";
2919 if (ShowIndirectCallTargets
)
2920 OS
<< " Indirect Call Site Count: "
2921 << Func
.getNumValueSites(IPVK_IndirectCallTarget
) << "\n";
2924 OS
<< " Number of instrumented vtables: "
2925 << Func
.getNumValueSites(IPVK_VTableTarget
) << "\n";
2927 uint32_t NumMemOPCalls
= Func
.getNumValueSites(IPVK_MemOPSize
);
2928 if (ShowMemOPSizes
&& NumMemOPCalls
> 0)
2929 OS
<< " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2933 OS
<< " Block counts: [";
2934 size_t Start
= (IsIRInstr
? 0 : 1);
2935 for (size_t I
= Start
, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2936 OS
<< (I
== Start
? "" : ", ") << Func
.Counts
[I
];
2941 if (ShowIndirectCallTargets
) {
2942 OS
<< " Indirect Target Results:\n";
2943 traverseAllValueSites(Func
, IPVK_IndirectCallTarget
,
2944 VPStats
[IPVK_IndirectCallTarget
], OS
,
2945 &(Reader
->getSymtab()));
2949 OS
<< " VTable Results:\n";
2950 traverseAllValueSites(Func
, IPVK_VTableTarget
,
2951 VPStats
[IPVK_VTableTarget
], OS
,
2952 &(Reader
->getSymtab()));
2955 if (ShowMemOPSizes
&& NumMemOPCalls
> 0) {
2956 OS
<< " Memory Intrinsic Size Results:\n";
2957 traverseAllValueSites(Func
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
], OS
,
2962 if (Reader
->hasError())
2963 exitWithError(Reader
->getError(), Filename
);
2965 if (TextFormat
|| ShowCovered
)
2967 std::unique_ptr
<ProfileSummary
> PS(Builder
.getSummary());
2968 bool IsIR
= Reader
->isIRLevelProfile();
2969 OS
<< "Instrumentation level: " << (IsIR
? "IR" : "Front-end");
2971 OS
<< " entry_first = " << Reader
->instrEntryBBEnabled();
2973 if (ShowAllFunctions
|| !FuncNameFilter
.empty())
2974 OS
<< "Functions shown: " << ShownFunctions
<< "\n";
2975 OS
<< "Total functions: " << PS
->getNumFunctions() << "\n";
2976 if (ShowValueCutoff
> 0) {
2977 OS
<< "Number of functions with maximum count (< " << ShowValueCutoff
2978 << "): " << BelowCutoffFunctions
<< "\n";
2979 OS
<< "Number of functions with maximum count (>= " << ShowValueCutoff
2980 << "): " << PS
->getNumFunctions() - BelowCutoffFunctions
<< "\n";
2982 OS
<< "Maximum function count: " << PS
->getMaxFunctionCount() << "\n";
2983 OS
<< "Maximum internal block count: " << PS
->getMaxInternalCount() << "\n";
2985 if (TopNFunctions
) {
2986 std::vector
<std::pair
<std::string
, uint64_t>> SortedHottestFuncs
;
2987 while (!HottestFuncs
.empty()) {
2988 SortedHottestFuncs
.emplace_back(HottestFuncs
.top());
2991 OS
<< "Top " << TopNFunctions
2992 << " functions with the largest internal block counts: \n";
2993 for (auto &hotfunc
: llvm::reverse(SortedHottestFuncs
))
2994 OS
<< " " << hotfunc
.first
<< ", max count = " << hotfunc
.second
<< "\n";
2997 if (ShownFunctions
&& ShowIndirectCallTargets
) {
2998 OS
<< "Statistics for indirect call sites profile:\n";
2999 showValueSitesStats(OS
, IPVK_IndirectCallTarget
,
3000 VPStats
[IPVK_IndirectCallTarget
]);
3003 if (ShownFunctions
&& ShowVTables
) {
3004 OS
<< "Statistics for vtable profile:\n";
3005 showValueSitesStats(OS
, IPVK_VTableTarget
, VPStats
[IPVK_VTableTarget
]);
3008 if (ShownFunctions
&& ShowMemOPSizes
) {
3009 OS
<< "Statistics for memory intrinsic calls sizes profile:\n";
3010 showValueSitesStats(OS
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
]);
3013 if (ShowDetailedSummary
) {
3014 OS
<< "Total number of blocks: " << PS
->getNumCounts() << "\n";
3015 OS
<< "Total count: " << PS
->getTotalCount() << "\n";
3016 PS
->printDetailedSummary(OS
);
3020 if (Error E
= Reader
->printBinaryIds(OS
))
3021 exitWithError(std::move(E
), Filename
);
3023 if (ShowProfileVersion
)
3024 OS
<< "Profile version: " << Reader
->getVersion() << "\n";
3026 if (ShowTemporalProfTraces
) {
3027 auto &Traces
= Reader
->getTemporalProfTraces();
3028 OS
<< "Temporal Profile Traces (samples=" << Traces
.size()
3029 << " seen=" << Reader
->getTemporalProfTraceStreamSize() << "):\n";
3030 for (unsigned i
= 0; i
< Traces
.size(); i
++) {
3031 OS
<< " Temporal Profile Trace " << i
<< " (weight=" << Traces
[i
].Weight
3032 << " count=" << Traces
[i
].FunctionNameRefs
.size() << "):\n";
3033 for (auto &NameRef
: Traces
[i
].FunctionNameRefs
)
3034 OS
<< " " << Reader
->getSymtab().getFuncOrVarName(NameRef
) << "\n";
3041 static void showSectionInfo(sampleprof::SampleProfileReader
*Reader
,
3042 raw_fd_ostream
&OS
) {
3043 if (!Reader
->dumpSectionInfo(OS
)) {
3044 WithColor::warning() << "-show-sec-info-only is only supported for "
3045 << "sample profile in extbinary format and is "
3046 << "ignored for other formats.\n";
3052 struct HotFuncInfo
{
3053 std::string FuncName
;
3054 uint64_t TotalCount
= 0;
3055 double TotalCountPercent
= 0.0f
;
3056 uint64_t MaxCount
= 0;
3057 uint64_t EntryCount
= 0;
3059 HotFuncInfo() = default;
3061 HotFuncInfo(StringRef FN
, uint64_t TS
, double TSP
, uint64_t MS
, uint64_t ES
)
3062 : FuncName(FN
.begin(), FN
.end()), TotalCount(TS
), TotalCountPercent(TSP
),
3063 MaxCount(MS
), EntryCount(ES
) {}
3067 // Print out detailed information about hot functions in PrintValues vector.
3068 // Users specify titles and offset of every columns through ColumnTitle and
3069 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3070 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
3071 // print out or let it be an empty string.
3072 static void dumpHotFunctionList(const std::vector
<std::string
> &ColumnTitle
,
3073 const std::vector
<int> &ColumnOffset
,
3074 const std::vector
<HotFuncInfo
> &PrintValues
,
3075 uint64_t HotFuncCount
, uint64_t TotalFuncCount
,
3076 uint64_t HotProfCount
, uint64_t TotalProfCount
,
3077 const std::string
&HotFuncMetric
,
3078 uint32_t TopNFunctions
, raw_fd_ostream
&OS
) {
3079 assert(ColumnOffset
.size() == ColumnTitle
.size() &&
3080 "ColumnOffset and ColumnTitle should have the same size");
3081 assert(ColumnTitle
.size() >= 4 &&
3082 "ColumnTitle should have at least 4 elements");
3083 assert(TotalFuncCount
> 0 &&
3084 "There should be at least one function in the profile");
3085 double TotalProfPercent
= 0;
3086 if (TotalProfCount
> 0)
3087 TotalProfPercent
= static_cast<double>(HotProfCount
) / TotalProfCount
* 100;
3089 formatted_raw_ostream
FOS(OS
);
3090 FOS
<< HotFuncCount
<< " out of " << TotalFuncCount
3091 << " functions with profile ("
3093 (static_cast<double>(HotFuncCount
) / TotalFuncCount
* 100))
3094 << ") are considered hot functions";
3095 if (!HotFuncMetric
.empty())
3096 FOS
<< " (" << HotFuncMetric
<< ")";
3098 FOS
<< HotProfCount
<< " out of " << TotalProfCount
<< " profile counts ("
3099 << format("%.2f%%", TotalProfPercent
) << ") are from hot functions.\n";
3101 for (size_t I
= 0; I
< ColumnTitle
.size(); ++I
) {
3102 FOS
.PadToColumn(ColumnOffset
[I
]);
3103 FOS
<< ColumnTitle
[I
];
3108 for (const auto &R
: PrintValues
) {
3109 if (TopNFunctions
&& (Count
++ == TopNFunctions
))
3111 FOS
.PadToColumn(ColumnOffset
[0]);
3112 FOS
<< R
.TotalCount
<< " (" << format("%.2f%%", R
.TotalCountPercent
) << ")";
3113 FOS
.PadToColumn(ColumnOffset
[1]);
3115 FOS
.PadToColumn(ColumnOffset
[2]);
3116 FOS
<< R
.EntryCount
;
3117 FOS
.PadToColumn(ColumnOffset
[3]);
3118 FOS
<< R
.FuncName
<< "\n";
3122 static int showHotFunctionList(const sampleprof::SampleProfileMap
&Profiles
,
3123 ProfileSummary
&PS
, uint32_t TopN
,
3124 raw_fd_ostream
&OS
) {
3125 using namespace sampleprof
;
3127 const uint32_t HotFuncCutoff
= 990000;
3128 auto &SummaryVector
= PS
.getDetailedSummary();
3129 uint64_t MinCountThreshold
= 0;
3130 for (const ProfileSummaryEntry
&SummaryEntry
: SummaryVector
) {
3131 if (SummaryEntry
.Cutoff
== HotFuncCutoff
) {
3132 MinCountThreshold
= SummaryEntry
.MinCount
;
3137 // Traverse all functions in the profile and keep only hot functions.
3138 // The following loop also calculates the sum of total samples of all
3140 std::multimap
<uint64_t, std::pair
<const FunctionSamples
*, const uint64_t>,
3141 std::greater
<uint64_t>>
3143 uint64_t ProfileTotalSample
= 0;
3144 uint64_t HotFuncSample
= 0;
3145 uint64_t HotFuncCount
= 0;
3147 for (const auto &I
: Profiles
) {
3148 FuncSampleStats FuncStats
;
3149 const FunctionSamples
&FuncProf
= I
.second
;
3150 ProfileTotalSample
+= FuncProf
.getTotalSamples();
3151 getFuncSampleStats(FuncProf
, FuncStats
, MinCountThreshold
);
3153 if (isFunctionHot(FuncStats
, MinCountThreshold
)) {
3154 HotFunc
.emplace(FuncProf
.getTotalSamples(),
3155 std::make_pair(&(I
.second
), FuncStats
.MaxSample
));
3156 HotFuncSample
+= FuncProf
.getTotalSamples();
3161 std::vector
<std::string
> ColumnTitle
{"Total sample (%)", "Max sample",
3162 "Entry sample", "Function name"};
3163 std::vector
<int> ColumnOffset
{0, 24, 42, 58};
3164 std::string Metric
=
3165 std::string("max sample >= ") + std::to_string(MinCountThreshold
);
3166 std::vector
<HotFuncInfo
> PrintValues
;
3167 for (const auto &FuncPair
: HotFunc
) {
3168 const FunctionSamples
&Func
= *FuncPair
.second
.first
;
3169 double TotalSamplePercent
=
3170 (ProfileTotalSample
> 0)
3171 ? (Func
.getTotalSamples() * 100.0) / ProfileTotalSample
3173 PrintValues
.emplace_back(
3174 HotFuncInfo(Func
.getContext().toString(), Func
.getTotalSamples(),
3175 TotalSamplePercent
, FuncPair
.second
.second
,
3176 Func
.getHeadSamplesEstimate()));
3178 dumpHotFunctionList(ColumnTitle
, ColumnOffset
, PrintValues
, HotFuncCount
,
3179 Profiles
.size(), HotFuncSample
, ProfileTotalSample
,
3185 static int showSampleProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3186 if (SFormat
== ShowFormat::Yaml
)
3187 exitWithError("YAML output is not supported for sample profiles");
3188 using namespace sampleprof
;
3189 LLVMContext Context
;
3190 auto FS
= vfs::getRealFileSystem();
3191 auto ReaderOrErr
= SampleProfileReader::create(Filename
, Context
, *FS
,
3192 FSDiscriminatorPassOption
);
3193 if (std::error_code EC
= ReaderOrErr
.getError())
3194 exitWithErrorCode(EC
, Filename
);
3196 auto Reader
= std::move(ReaderOrErr
.get());
3197 if (ShowSectionInfoOnly
) {
3198 showSectionInfo(Reader
.get(), OS
);
3202 if (std::error_code EC
= Reader
->read())
3203 exitWithErrorCode(EC
, Filename
);
3205 if (ShowAllFunctions
|| FuncNameFilter
.empty()) {
3206 if (SFormat
== ShowFormat::Json
)
3207 Reader
->dumpJson(OS
);
3211 if (SFormat
== ShowFormat::Json
)
3213 "the JSON format is supported only when all functions are to "
3216 // TODO: parse context string to support filtering by contexts.
3217 FunctionSamples
*FS
= Reader
->getSamplesFor(StringRef(FuncNameFilter
));
3218 Reader
->dumpFunctionProfile(FS
? *FS
: FunctionSamples(), OS
);
3221 if (ShowProfileSymbolList
) {
3222 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
3223 Reader
->getProfileSymbolList();
3224 ReaderList
->dump(OS
);
3227 if (ShowDetailedSummary
) {
3228 auto &PS
= Reader
->getSummary();
3229 PS
.printSummary(OS
);
3230 PS
.printDetailedSummary(OS
);
3233 if (ShowHotFuncList
|| TopNFunctions
)
3234 showHotFunctionList(Reader
->getProfiles(), Reader
->getSummary(),
3240 static int showMemProfProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3241 if (SFormat
== ShowFormat::Json
)
3242 exitWithError("JSON output is not supported for MemProf");
3243 auto ReaderOr
= llvm::memprof::RawMemProfReader::create(
3244 Filename
, ProfiledBinary
, /*KeepNames=*/true);
3245 if (Error E
= ReaderOr
.takeError())
3246 // Since the error can be related to the profile or the binary we do not
3247 // pass whence. Instead additional context is provided where necessary in
3248 // the error message.
3249 exitWithError(std::move(E
), /*Whence*/ "");
3251 std::unique_ptr
<llvm::memprof::RawMemProfReader
> Reader(
3252 ReaderOr
.get().release());
3254 Reader
->printYAML(OS
);
3258 static int showDebugInfoCorrelation(const std::string
&Filename
,
3259 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3260 if (SFormat
== ShowFormat::Json
)
3261 exitWithError("JSON output is not supported for debug info correlation");
3262 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
3264 InstrProfCorrelator::get(Filename
, InstrProfCorrelator::DEBUG_INFO
)
3265 .moveInto(Correlator
))
3266 exitWithError(std::move(Err
), Filename
);
3267 if (SFormat
== ShowFormat::Yaml
) {
3268 if (auto Err
= Correlator
->dumpYaml(MaxDbgCorrelationWarnings
, OS
))
3269 exitWithError(std::move(Err
), Filename
);
3273 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
3274 exitWithError(std::move(Err
), Filename
);
3276 InstrProfSymtab Symtab
;
3277 if (auto Err
= Symtab
.create(
3278 StringRef(Correlator
->getNamesPointer(), Correlator
->getNamesSize())))
3279 exitWithError(std::move(Err
), Filename
);
3281 if (ShowProfileSymbolList
)
3282 Symtab
.dumpNames(OS
);
3283 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3284 // counters the section holds.
3285 if (ShowDetailedSummary
)
3286 OS
<< "Counters section size: 0x"
3287 << Twine::utohexstr(Correlator
->getCountersSectionSize()) << " bytes\n";
3288 OS
<< "Found " << Correlator
->getDataSize() << " functions\n";
3293 static int show_main(StringRef ProgName
) {
3294 if (Filename
.empty() && DebugInfoFilename
.empty())
3296 "the positional argument '<profdata-file>' is required unless '--" +
3297 DebugInfoFilename
.ArgStr
+ "' is provided");
3299 if (Filename
== OutputFilename
) {
3301 << " show: Input file name cannot be the same as the output file "
3306 SFormat
= ShowFormat::Json
;
3309 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3311 exitWithErrorCode(EC
, OutputFilename
);
3313 if (ShowAllFunctions
&& !FuncNameFilter
.empty())
3314 WithColor::warning() << "-function argument ignored: showing all functions\n";
3316 if (!DebugInfoFilename
.empty())
3317 return showDebugInfoCorrelation(DebugInfoFilename
, SFormat
, OS
);
3319 if (ShowProfileKind
== instr
)
3320 return showInstrProfile(SFormat
, OS
);
3321 if (ShowProfileKind
== sample
)
3322 return showSampleProfile(SFormat
, OS
);
3323 return showMemProfProfile(SFormat
, OS
);
3326 static int order_main() {
3328 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3330 exitWithErrorCode(EC
, OutputFilename
);
3331 auto FS
= vfs::getRealFileSystem();
3332 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
3333 if (Error E
= ReaderOrErr
.takeError())
3334 exitWithError(std::move(E
), Filename
);
3336 auto Reader
= std::move(ReaderOrErr
.get());
3337 for (auto &I
: *Reader
) {
3341 ArrayRef Traces
= Reader
->getTemporalProfTraces();
3342 if (NumTestTraces
&& NumTestTraces
>= Traces
.size())
3344 "--" + NumTestTraces
.ArgStr
+
3345 " must be smaller than the total number of traces: expected: < " +
3346 Twine(Traces
.size()) + ", actual: " + Twine(NumTestTraces
));
3347 ArrayRef TestTraces
= Traces
.take_back(NumTestTraces
);
3348 Traces
= Traces
.drop_back(NumTestTraces
);
3350 std::vector
<BPFunctionNode
> Nodes
;
3351 TemporalProfTraceTy::createBPFunctionNodes(Traces
, Nodes
);
3352 BalancedPartitioningConfig Config
;
3353 BalancedPartitioning
BP(Config
);
3356 OS
<< "# Ordered " << Nodes
.size() << " functions\n";
3357 if (!TestTraces
.empty()) {
3358 // Since we don't know the symbol sizes, we assume 32 functions per page.
3359 DenseMap
<BPFunctionNode::IDT
, unsigned> IdToPageNumber
;
3360 for (auto &Node
: Nodes
)
3361 IdToPageNumber
[Node
.Id
] = IdToPageNumber
.size() / 32;
3363 SmallSet
<unsigned, 0> TouchedPages
;
3365 for (auto &Trace
: TestTraces
) {
3366 for (auto Id
: Trace
.FunctionNameRefs
) {
3367 auto It
= IdToPageNumber
.find(Id
);
3368 if (It
== IdToPageNumber
.end())
3370 TouchedPages
.insert(It
->getSecond());
3371 Area
+= TouchedPages
.size();
3373 TouchedPages
.clear();
3375 OS
<< "# Total area under the page fault curve: " << (float)Area
<< "\n";
3377 OS
<< "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3378 "linkage and this output does not take that into account. Some "
3379 "post-processing may be required before passing to the linker via "
3381 for (auto &N
: Nodes
) {
3382 auto [Filename
, ParsedFuncName
] =
3383 getParsedIRPGOName(Reader
->getSymtab().getFuncOrVarName(N
.Id
));
3384 if (!Filename
.empty())
3385 OS
<< "# " << Filename
<< "\n";
3386 OS
<< ParsedFuncName
<< "\n";
3391 int llvm_profdata_main(int argc
, char **argvNonConst
,
3392 const llvm::ToolContext
&) {
3393 const char **argv
= const_cast<const char **>(argvNonConst
);
3395 StringRef
ProgName(sys::path::filename(argv
[0]));
3400 << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3404 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data\n");
3407 return show_main(ProgName
);
3409 if (OrderSubcommand
)
3410 return order_main();
3412 if (OverlapSubcommand
)
3413 return overlap_main();
3415 if (MergeSubcommand
)
3416 return merge_main(ProgName
);
3419 << ": Unknown command. Run llvm-profdata --help for usage.\n";