1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // llvm-profdata merges .profdata files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Debuginfod/HTTPClient.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/ProfileData/InstrProfCorrelator.h"
20 #include "llvm/ProfileData/InstrProfReader.h"
21 #include "llvm/ProfileData/InstrProfWriter.h"
22 #include "llvm/ProfileData/MemProf.h"
23 #include "llvm/ProfileData/MemProfReader.h"
24 #include "llvm/ProfileData/ProfileCommon.h"
25 #include "llvm/ProfileData/SampleProfReader.h"
26 #include "llvm/ProfileData/SampleProfWriter.h"
27 #include "llvm/Support/BalancedPartitioning.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Discriminator.h"
30 #include "llvm/Support/Errc.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/FormattedStream.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Regex.h"
39 #include "llvm/Support/ThreadPool.h"
40 #include "llvm/Support/Threading.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/Support/WithColor.h"
43 #include "llvm/Support/raw_ostream.h"
50 using ProfCorrelatorKind
= InstrProfCorrelator::ProfCorrelatorKind
;
52 // https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
53 // on each subcommand.
54 cl::SubCommand
ShowSubcommand(
56 "Takes a profile data file and displays the profiles. See detailed "
58 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
59 cl::SubCommand
OrderSubcommand(
61 "Reads temporal profiling traces from a profile and outputs a function "
62 "order that reduces the number of page faults for those traces. See "
63 "detailed documentation in "
64 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
65 cl::SubCommand
OverlapSubcommand(
67 "Computes and displays the overlap between two profiles. See detailed "
69 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
70 cl::SubCommand
MergeSubcommand(
72 "Takes several profiles and merge them together. See detailed "
74 "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
77 enum ProfileKinds
{ instr
, sample
, memory
};
78 enum FailureMode
{ warnOnly
, failIfAnyAreInvalid
, failIfAllAreInvalid
};
83 PF_Compact_Binary
, // Deprecated
89 enum class ShowFormat
{ Text
, Json
, Yaml
};
93 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
94 cl::init("-"), cl::desc("Output file"),
95 cl::sub(ShowSubcommand
),
96 cl::sub(OrderSubcommand
),
97 cl::sub(OverlapSubcommand
),
98 cl::sub(MergeSubcommand
));
99 // NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
100 // will be used. llvm::cl::alias::done() method asserts this condition.
101 cl::alias
OutputFilenameA("o", cl::desc("Alias for --output"),
102 cl::aliasopt(OutputFilename
));
104 // Options common to at least two commands.
105 cl::opt
<ProfileKinds
> ProfileKind(
106 cl::desc("Profile kind:"), cl::sub(MergeSubcommand
),
107 cl::sub(OverlapSubcommand
), cl::init(instr
),
108 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
109 clEnumVal(sample
, "Sample profile")));
110 cl::opt
<std::string
> Filename(cl::Positional
, cl::desc("<profdata-file>"),
111 cl::sub(ShowSubcommand
),
112 cl::sub(OrderSubcommand
));
113 cl::opt
<unsigned> MaxDbgCorrelationWarnings(
114 "max-debug-info-correlation-warnings",
115 cl::desc("The maximum number of warnings to emit when correlating "
116 "profile from debug info (0 = no limit)"),
117 cl::sub(MergeSubcommand
), cl::sub(ShowSubcommand
), cl::init(5));
118 cl::opt
<std::string
> ProfiledBinary(
119 "profiled-binary", cl::init(""),
120 cl::desc("Path to binary from which the profile was collected."),
121 cl::sub(ShowSubcommand
), cl::sub(MergeSubcommand
));
122 cl::opt
<std::string
> DebugInfoFilename(
123 "debug-info", cl::init(""),
125 "For show, read and extract profile metadata from debug info and show "
126 "the functions it found. For merge, use the provided debug info to "
127 "correlate the raw profile."),
128 cl::sub(ShowSubcommand
), cl::sub(MergeSubcommand
));
130 BinaryFilename("binary-file", cl::init(""),
131 cl::desc("For merge, use the provided unstripped bianry to "
132 "correlate the raw profile."),
133 cl::sub(MergeSubcommand
));
134 cl::list
<std::string
> DebugFileDirectory(
135 "debug-file-directory",
136 cl::desc("Directories to search for object files by build ID"));
137 cl::opt
<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden
,
138 cl::sub(MergeSubcommand
),
139 cl::desc("Enable debuginfod"));
140 cl::opt
<ProfCorrelatorKind
> BIDFetcherProfileCorrelate(
142 cl::desc("Use debug-info or binary correlation to correlate profiles with "
144 cl::init(InstrProfCorrelator::NONE
),
145 cl::values(clEnumValN(InstrProfCorrelator::NONE
, "",
146 "No profile correlation"),
147 clEnumValN(InstrProfCorrelator::DEBUG_INFO
, "debug-info",
148 "Use debug info to correlate"),
149 clEnumValN(InstrProfCorrelator::BINARY
, "binary",
150 "Use binary to correlate")));
151 cl::opt
<std::string
> FuncNameFilter(
153 cl::desc("Only functions matching the filter are shown in the output. For "
154 "overlapping CSSPGO, this takes a function name with calling "
156 cl::sub(ShowSubcommand
), cl::sub(OverlapSubcommand
),
157 cl::sub(MergeSubcommand
));
159 // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
160 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
163 // Options specific to merge subcommand.
164 cl::list
<std::string
> InputFilenames(cl::Positional
, cl::sub(MergeSubcommand
),
165 cl::desc("<filename...>"));
166 cl::list
<std::string
> WeightedInputFilenames("weighted-input",
167 cl::sub(MergeSubcommand
),
168 cl::desc("<weight>,<filename>"));
169 cl::opt
<ProfileFormat
> OutputFormat(
170 cl::desc("Format of output profile"), cl::sub(MergeSubcommand
),
171 cl::init(PF_Ext_Binary
),
172 cl::values(clEnumValN(PF_Binary
, "binary", "Binary encoding"),
173 clEnumValN(PF_Ext_Binary
, "extbinary",
174 "Extensible binary encoding "
176 clEnumValN(PF_Text
, "text", "Text encoding"),
177 clEnumValN(PF_GCC
, "gcc",
178 "GCC encoding (only meaningful for -sample)")));
180 InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand
),
181 cl::desc("Path to file containing newline-separated "
182 "[<weight>,]<filename> entries"));
183 cl::alias
InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
184 cl::aliasopt(InputFilenamesFile
));
185 cl::opt
<bool> DumpInputFileList(
186 "dump-input-file-list", cl::init(false), cl::Hidden
,
187 cl::sub(MergeSubcommand
),
188 cl::desc("Dump the list of input files and their weights, then exit"));
189 cl::opt
<std::string
> RemappingFile("remapping-file", cl::value_desc("file"),
190 cl::sub(MergeSubcommand
),
191 cl::desc("Symbol remapping file"));
192 cl::alias
RemappingFileA("r", cl::desc("Alias for --remapping-file"),
193 cl::aliasopt(RemappingFile
));
195 UseMD5("use-md5", cl::init(false), cl::Hidden
,
196 cl::desc("Choose to use MD5 to represent string in name table (only "
197 "meaningful for -extbinary)"),
198 cl::sub(MergeSubcommand
));
199 cl::opt
<bool> CompressAllSections(
200 "compress-all-sections", cl::init(false), cl::Hidden
,
201 cl::sub(MergeSubcommand
),
202 cl::desc("Compress all sections when writing the profile (only "
203 "meaningful for -extbinary)"));
204 cl::opt
<bool> SampleMergeColdContext(
205 "sample-merge-cold-context", cl::init(false), cl::Hidden
,
206 cl::sub(MergeSubcommand
),
208 "Merge context sample profiles whose count is below cold threshold"));
209 cl::opt
<bool> SampleTrimColdContext(
210 "sample-trim-cold-context", cl::init(false), cl::Hidden
,
211 cl::sub(MergeSubcommand
),
213 "Trim context sample profiles whose count is below cold threshold"));
214 cl::opt
<uint32_t> SampleColdContextFrameDepth(
215 "sample-frame-depth-for-cold-context", cl::init(1),
216 cl::sub(MergeSubcommand
),
217 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
218 "context-less base profile"));
219 cl::opt
<size_t> OutputSizeLimit(
220 "output-size-limit", cl::init(0), cl::Hidden
, cl::sub(MergeSubcommand
),
221 cl::desc("Trim cold functions until profile size is below specified "
222 "limit in bytes. This uses a heursitic and functions may be "
223 "excessively trimmed"));
224 cl::opt
<bool> GenPartialProfile(
225 "gen-partial-profile", cl::init(false), cl::Hidden
,
226 cl::sub(MergeSubcommand
),
227 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
228 cl::opt
<bool> SplitLayout(
229 "split-layout", cl::init(false), cl::Hidden
,
230 cl::sub(MergeSubcommand
),
231 cl::desc("Split the profile to two sections with one containing sample "
232 "profiles with inlined functions and the other without (only "
233 "meaningful for -extbinary)"));
234 cl::opt
<std::string
> SupplInstrWithSample(
235 "supplement-instr-with-sample", cl::init(""), cl::Hidden
,
236 cl::sub(MergeSubcommand
),
237 cl::desc("Supplement an instr profile with sample profile, to correct "
238 "the profile unrepresentativeness issue. The sample "
239 "profile is the input of the flag. Output will be in instr "
240 "format (The flag only works with -instr)"));
241 cl::opt
<float> ZeroCounterThreshold(
242 "zero-counter-threshold", cl::init(0.7), cl::Hidden
,
243 cl::sub(MergeSubcommand
),
244 cl::desc("For the function which is cold in instr profile but hot in "
245 "sample profile, if the ratio of the number of zero counters "
246 "divided by the total number of counters is above the "
247 "threshold, the profile of the function will be regarded as "
248 "being harmful for performance and will be dropped."));
249 cl::opt
<unsigned> SupplMinSizeThreshold(
250 "suppl-min-size-threshold", cl::init(10), cl::Hidden
,
251 cl::sub(MergeSubcommand
),
252 cl::desc("If the size of a function is smaller than the threshold, "
253 "assume it can be inlined by PGO early inliner and it won't "
254 "be adjusted based on sample profile."));
255 cl::opt
<unsigned> InstrProfColdThreshold(
256 "instr-prof-cold-threshold", cl::init(0), cl::Hidden
,
257 cl::sub(MergeSubcommand
),
258 cl::desc("User specified cold threshold for instr profile which will "
259 "override the cold threshold got from profile summary. "));
260 // WARNING: This reservoir size value is propagated to any input indexed
261 // profiles for simplicity. Changing this value between invocations could
262 // result in sample bias.
263 cl::opt
<uint64_t> TemporalProfTraceReservoirSize(
264 "temporal-profile-trace-reservoir-size", cl::init(100),
265 cl::sub(MergeSubcommand
),
266 cl::desc("The maximum number of stored temporal profile traces (default: "
268 cl::opt
<uint64_t> TemporalProfMaxTraceLength(
269 "temporal-profile-max-trace-length", cl::init(10000),
270 cl::sub(MergeSubcommand
),
271 cl::desc("The maximum length of a single temporal profile trace "
272 "(default: 10000)"));
273 cl::opt
<std::string
> FuncNameNegativeFilter(
274 "no-function", cl::init(""),
275 cl::sub(MergeSubcommand
),
276 cl::desc("Exclude functions matching the filter from the output."));
279 FailMode("failure-mode", cl::init(failIfAnyAreInvalid
),
280 cl::desc("Failure mode:"), cl::sub(MergeSubcommand
),
281 cl::values(clEnumValN(warnOnly
, "warn",
282 "Do not fail and just print warnings."),
283 clEnumValN(failIfAnyAreInvalid
, "any",
284 "Fail if any profile is invalid."),
285 clEnumValN(failIfAllAreInvalid
, "all",
286 "Fail only if all profiles are invalid.")));
288 cl::opt
<bool> OutputSparse(
289 "sparse", cl::init(false), cl::sub(MergeSubcommand
),
290 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
291 cl::opt
<unsigned> NumThreads(
292 "num-threads", cl::init(0), cl::sub(MergeSubcommand
),
293 cl::desc("Number of merge threads to use (default: autodetect)"));
294 cl::alias
NumThreadsA("j", cl::desc("Alias for --num-threads"),
295 cl::aliasopt(NumThreads
));
297 cl::opt
<std::string
> ProfileSymbolListFile(
298 "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand
),
299 cl::desc("Path to file containing the list of function symbols "
300 "used to populate profile symbol list"));
302 cl::opt
<SampleProfileLayout
> ProfileLayout(
303 "convert-sample-profile-layout",
304 cl::desc("Convert the generated profile to a profile with a new layout"),
305 cl::sub(MergeSubcommand
), cl::init(SPL_None
),
307 clEnumValN(SPL_Nest
, "nest",
308 "Nested profile, the input should be CS flat profile"),
309 clEnumValN(SPL_Flat
, "flat",
310 "Profile with nested inlinee flatten out")));
312 cl::opt
<bool> DropProfileSymbolList(
313 "drop-profile-symbol-list", cl::init(false), cl::Hidden
,
314 cl::sub(MergeSubcommand
),
315 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
316 "(only meaningful for -sample)"));
318 cl::opt
<bool> KeepVTableSymbols(
319 "keep-vtable-symbols", cl::init(false), cl::Hidden
,
320 cl::sub(MergeSubcommand
),
321 cl::desc("If true, keep the vtable symbols in indexed profiles"));
323 // Temporary support for writing the previous version of the format, to enable
324 // some forward compatibility.
325 // TODO: Consider enabling this with future version changes as well, to ease
326 // deployment of newer versions of llvm-profdata.
327 cl::opt
<bool> DoWritePrevVersion(
328 "write-prev-version", cl::init(false), cl::Hidden
,
329 cl::desc("Write the previous version of indexed format, to enable "
330 "some forward compatibility."));
332 cl::opt
<memprof::IndexedVersion
> MemProfVersionRequested(
333 "memprof-version", cl::Hidden
, cl::sub(MergeSubcommand
),
334 cl::desc("Specify the version of the memprof format to use"),
335 cl::init(memprof::Version3
),
336 cl::values(clEnumValN(memprof::Version1
, "1", "version 1"),
337 clEnumValN(memprof::Version2
, "2", "version 2"),
338 clEnumValN(memprof::Version3
, "3", "version 3")));
340 cl::opt
<bool> MemProfFullSchema(
341 "memprof-full-schema", cl::Hidden
, cl::sub(MergeSubcommand
),
342 cl::desc("Use the full schema for serialization"), cl::init(false));
345 MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false),
346 cl::Hidden
, cl::sub(MergeSubcommand
),
347 cl::desc("Generate random hotness values"));
348 static cl::opt
<unsigned> MemprofGenerateRandomHotnessSeed(
349 "memprof-random-hotness-seed", cl::init(0), cl::Hidden
,
350 cl::sub(MergeSubcommand
),
351 cl::desc("Random hotness seed to use (0 to generate new seed)"));
353 // Options specific to overlap subcommand.
354 cl::opt
<std::string
> BaseFilename(cl::Positional
, cl::Required
,
355 cl::desc("<base profile file>"),
356 cl::sub(OverlapSubcommand
));
357 cl::opt
<std::string
> TestFilename(cl::Positional
, cl::Required
,
358 cl::desc("<test profile file>"),
359 cl::sub(OverlapSubcommand
));
361 cl::opt
<unsigned long long> SimilarityCutoff(
362 "similarity-cutoff", cl::init(0),
363 cl::desc("For sample profiles, list function names (with calling context "
364 "for csspgo) for overlapped functions "
365 "with similarities below the cutoff (percentage times 10000)."),
366 cl::sub(OverlapSubcommand
));
369 "cs", cl::init(false),
370 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
371 cl::sub(OverlapSubcommand
));
373 cl::opt
<unsigned long long> OverlapValueCutoff(
374 "value-cutoff", cl::init(-1),
376 "Function level overlap information for every function (with calling "
377 "context for csspgo) in test "
378 "profile with max count value greater than the parameter value"),
379 cl::sub(OverlapSubcommand
));
381 // Options specific to show subcommand.
382 cl::opt
<bool> ShowCounts("counts", cl::init(false),
383 cl::desc("Show counter values for shown functions"),
384 cl::sub(ShowSubcommand
));
386 SFormat("show-format", cl::init(ShowFormat::Text
),
387 cl::desc("Emit output in the selected format if supported"),
388 cl::sub(ShowSubcommand
),
389 cl::values(clEnumValN(ShowFormat::Text
, "text",
390 "emit normal text output (default)"),
391 clEnumValN(ShowFormat::Json
, "json", "emit JSON"),
392 clEnumValN(ShowFormat::Yaml
, "yaml", "emit YAML")));
393 // TODO: Consider replacing this with `--show-format=text-encoding`.
395 TextFormat("text", cl::init(false),
396 cl::desc("Show instr profile data in text dump format"),
397 cl::sub(ShowSubcommand
));
400 cl::desc("Show sample profile data in the JSON format "
401 "(deprecated, please use --show-format=json)"),
402 cl::sub(ShowSubcommand
));
403 cl::opt
<bool> ShowIndirectCallTargets(
404 "ic-targets", cl::init(false),
405 cl::desc("Show indirect call site target values for shown functions"),
406 cl::sub(ShowSubcommand
));
407 cl::opt
<bool> ShowVTables("show-vtables", cl::init(false),
408 cl::desc("Show vtable names for shown functions"),
409 cl::sub(ShowSubcommand
));
410 cl::opt
<bool> ShowMemOPSizes(
411 "memop-sizes", cl::init(false),
412 cl::desc("Show the profiled sizes of the memory intrinsic calls "
413 "for shown functions"),
414 cl::sub(ShowSubcommand
));
415 cl::opt
<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
416 cl::desc("Show detailed profile summary"),
417 cl::sub(ShowSubcommand
));
418 cl::list
<uint32_t> DetailedSummaryCutoffs(
419 cl::CommaSeparated
, "detailed-summary-cutoffs",
421 "Cutoff percentages (times 10000) for generating detailed summary"),
422 cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand
));
424 ShowHotFuncList("hot-func-list", cl::init(false),
425 cl::desc("Show profile summary of a list of hot functions"),
426 cl::sub(ShowSubcommand
));
427 cl::opt
<bool> ShowAllFunctions("all-functions", cl::init(false),
428 cl::desc("Details for each and every function"),
429 cl::sub(ShowSubcommand
));
430 cl::opt
<bool> ShowCS("showcs", cl::init(false),
431 cl::desc("Show context sensitive counts"),
432 cl::sub(ShowSubcommand
));
433 cl::opt
<ProfileKinds
> ShowProfileKind(
434 cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand
),
436 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
437 clEnumVal(sample
, "Sample profile"),
438 clEnumVal(memory
, "MemProf memory access profile")));
439 cl::opt
<uint32_t> TopNFunctions(
441 cl::desc("Show the list of functions with the largest internal counts"),
442 cl::sub(ShowSubcommand
));
443 cl::opt
<uint32_t> ShowValueCutoff(
444 "value-cutoff", cl::init(0),
445 cl::desc("Set the count value cutoff. Functions with the maximum count "
446 "less than this value will not be printed out. (Default is 0)"),
447 cl::sub(ShowSubcommand
));
448 cl::opt
<bool> OnlyListBelow(
449 "list-below-cutoff", cl::init(false),
450 cl::desc("Only output names of functions whose max count values are "
451 "below the cutoff value"),
452 cl::sub(ShowSubcommand
));
453 cl::opt
<bool> ShowProfileSymbolList(
454 "show-prof-sym-list", cl::init(false),
455 cl::desc("Show profile symbol list if it exists in the profile. "),
456 cl::sub(ShowSubcommand
));
457 cl::opt
<bool> ShowSectionInfoOnly(
458 "show-sec-info-only", cl::init(false),
459 cl::desc("Show the information of each section in the sample profile. "
460 "The flag is only usable when the sample profile is in "
462 cl::sub(ShowSubcommand
));
463 cl::opt
<bool> ShowBinaryIds("binary-ids", cl::init(false),
464 cl::desc("Show binary ids in the profile. "),
465 cl::sub(ShowSubcommand
));
466 cl::opt
<bool> ShowTemporalProfTraces(
467 "temporal-profile-traces",
468 cl::desc("Show temporal profile traces in the profile."),
469 cl::sub(ShowSubcommand
));
472 ShowCovered("covered", cl::init(false),
473 cl::desc("Show only the functions that have been executed."),
474 cl::sub(ShowSubcommand
));
476 cl::opt
<bool> ShowProfileVersion("profile-version", cl::init(false),
477 cl::desc("Show profile version. "),
478 cl::sub(ShowSubcommand
));
480 // Options specific to order subcommand.
482 NumTestTraces("num-test-traces", cl::init(0),
483 cl::desc("Keep aside the last <num-test-traces> traces in "
484 "the profile when computing the function order and "
485 "instead use them to evaluate that order"),
486 cl::sub(OrderSubcommand
));
488 // We use this string to indicate that there are
489 // multiple static functions map to the same name.
490 const std::string DuplicateNameStr
= "----";
492 static void warn(Twine Message
, StringRef Whence
= "", StringRef Hint
= "") {
493 WithColor::warning();
495 errs() << Whence
<< ": ";
496 errs() << Message
<< "\n";
498 WithColor::note() << Hint
<< "\n";
501 static void warn(Error E
, StringRef Whence
= "") {
502 if (E
.isA
<InstrProfError
>()) {
503 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
504 warn(IPE
.message(), Whence
);
509 static void exitWithError(Twine Message
, StringRef Whence
= "",
510 StringRef Hint
= "") {
513 errs() << Whence
<< ": ";
514 errs() << Message
<< "\n";
516 WithColor::note() << Hint
<< "\n";
520 static void exitWithError(Error E
, StringRef Whence
= "") {
521 if (E
.isA
<InstrProfError
>()) {
522 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
523 instrprof_error instrError
= IPE
.get();
525 if (instrError
== instrprof_error::unrecognized_format
) {
526 // Hint in case user missed specifying the profile type.
527 Hint
= "Perhaps you forgot to use the --sample or --memory option?";
529 exitWithError(IPE
.message(), Whence
, Hint
);
534 exitWithError(toString(std::move(E
)), Whence
);
537 static void exitWithErrorCode(std::error_code EC
, StringRef Whence
= "") {
538 exitWithError(EC
.message(), Whence
);
541 static void warnOrExitGivenError(FailureMode FailMode
, std::error_code EC
,
542 StringRef Whence
= "") {
543 if (FailMode
== failIfAnyAreInvalid
)
544 exitWithErrorCode(EC
, Whence
);
546 warn(EC
.message(), Whence
);
549 static void handleMergeWriterError(Error E
, StringRef WhenceFile
= "",
550 StringRef WhenceFunction
= "",
551 bool ShowHint
= true) {
552 if (!WhenceFile
.empty())
553 errs() << WhenceFile
<< ": ";
554 if (!WhenceFunction
.empty())
555 errs() << WhenceFunction
<< ": ";
557 auto IPE
= instrprof_error::success
;
558 E
= handleErrors(std::move(E
),
559 [&IPE
](std::unique_ptr
<InstrProfError
> E
) -> Error
{
561 return Error(std::move(E
));
563 errs() << toString(std::move(E
)) << "\n";
567 if (IPE
!= instrprof_error::success
) {
569 case instrprof_error::hash_mismatch
:
570 case instrprof_error::count_mismatch
:
571 case instrprof_error::value_site_count_mismatch
:
572 Hint
= "Make sure that all profile data to be merged is generated "
573 "from the same binary.";
581 errs() << Hint
<< "\n";
586 /// A remapper from original symbol names to new symbol names based on a file
587 /// containing a list of mappings from old name to new name.
588 class SymbolRemapper
{
589 std::unique_ptr
<MemoryBuffer
> File
;
590 DenseMap
<StringRef
, StringRef
> RemappingTable
;
593 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
594 static std::unique_ptr
<SymbolRemapper
> create(StringRef InputFile
) {
595 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
597 exitWithErrorCode(BufOrError
.getError(), InputFile
);
599 auto Remapper
= std::make_unique
<SymbolRemapper
>();
600 Remapper
->File
= std::move(BufOrError
.get());
602 for (line_iterator
LineIt(*Remapper
->File
, /*SkipBlanks=*/true, '#');
603 !LineIt
.is_at_eof(); ++LineIt
) {
604 std::pair
<StringRef
, StringRef
> Parts
= LineIt
->split(' ');
605 if (Parts
.first
.empty() || Parts
.second
.empty() ||
606 Parts
.second
.count(' ')) {
607 exitWithError("unexpected line in remapping file",
608 (InputFile
+ ":" + Twine(LineIt
.line_number())).str(),
609 "expected 'old_symbol new_symbol'");
611 Remapper
->RemappingTable
.insert(Parts
);
616 /// Attempt to map the given old symbol into a new symbol.
618 /// \return The new symbol, or \p Name if no such symbol was found.
619 StringRef
operator()(StringRef Name
) {
620 StringRef New
= RemappingTable
.lookup(Name
);
621 return New
.empty() ? Name
: New
;
624 FunctionId
operator()(FunctionId Name
) {
625 // MD5 name cannot be remapped.
626 if (!Name
.isStringRef())
628 StringRef New
= RemappingTable
.lookup(Name
.stringRef());
629 return New
.empty() ? Name
: FunctionId(New
);
634 struct WeightedFile
{
635 std::string Filename
;
638 typedef SmallVector
<WeightedFile
, 5> WeightedFileVector
;
640 /// Keep track of merged data and reported errors.
641 struct WriterContext
{
643 InstrProfWriter Writer
;
644 std::vector
<std::pair
<Error
, std::string
>> Errors
;
646 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
;
648 WriterContext(bool IsSparse
, std::mutex
&ErrLock
,
649 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
,
650 uint64_t ReservoirSize
= 0, uint64_t MaxTraceLength
= 0)
651 : Writer(IsSparse
, ReservoirSize
, MaxTraceLength
, DoWritePrevVersion
,
652 MemProfVersionRequested
, MemProfFullSchema
,
653 MemprofGenerateRandomHotness
, MemprofGenerateRandomHotnessSeed
),
654 ErrLock(ErrLock
), WriterErrorCodes(WriterErrorCodes
) {}
657 /// Computer the overlap b/w profile BaseFilename and TestFileName,
658 /// and store the program level result to Overlap.
659 static void overlapInput(const std::string
&BaseFilename
,
660 const std::string
&TestFilename
, WriterContext
*WC
,
661 OverlapStats
&Overlap
,
662 const OverlapFuncFilters
&FuncFilter
,
663 raw_fd_ostream
&OS
, bool IsCS
) {
664 auto FS
= vfs::getRealFileSystem();
665 auto ReaderOrErr
= InstrProfReader::create(TestFilename
, *FS
);
666 if (Error E
= ReaderOrErr
.takeError()) {
667 // Skip the empty profiles by returning sliently.
668 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
669 if (ErrorCode
!= instrprof_error::empty_raw_profile
)
670 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
675 auto Reader
= std::move(ReaderOrErr
.get());
676 for (auto &I
: *Reader
) {
677 OverlapStats
FuncOverlap(OverlapStats::FunctionLevel
);
678 FuncOverlap
.setFuncInfo(I
.Name
, I
.Hash
);
680 WC
->Writer
.overlapRecord(std::move(I
), Overlap
, FuncOverlap
, FuncFilter
);
681 FuncOverlap
.dump(OS
);
685 /// Load an input into a writer context.
687 loadInput(const WeightedFile
&Input
, SymbolRemapper
*Remapper
,
688 const InstrProfCorrelator
*Correlator
, const StringRef ProfiledBinary
,
689 WriterContext
*WC
, const object::BuildIDFetcher
*BIDFetcher
= nullptr,
690 const ProfCorrelatorKind
*BIDFetcherCorrelatorKind
= nullptr) {
691 std::unique_lock
<std::mutex
> CtxGuard
{WC
->Lock
};
693 // Copy the filename, because llvm::ThreadPool copied the input "const
694 // WeightedFile &" by value, making a reference to the filename within it
695 // invalid outside of this packaged task.
696 std::string Filename
= Input
.Filename
;
698 using ::llvm::memprof::RawMemProfReader
;
699 if (RawMemProfReader::hasFormat(Input
.Filename
)) {
700 auto ReaderOrErr
= RawMemProfReader::create(Input
.Filename
, ProfiledBinary
);
702 exitWithError(ReaderOrErr
.takeError(), Input
.Filename
);
704 std::unique_ptr
<RawMemProfReader
> Reader
= std::move(ReaderOrErr
.get());
705 // Check if the profile types can be merged, e.g. clang frontend profiles
706 // should not be merged with memprof profiles.
707 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
708 consumeError(std::move(E
));
709 WC
->Errors
.emplace_back(
710 make_error
<StringError
>(
711 "Cannot merge MemProf profile with Clang generated profile.",
717 auto MemProfError
= [&](Error E
) {
718 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
719 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
723 WC
->Writer
.addMemProfData(Reader
->takeMemProfData(), MemProfError
);
727 auto FS
= vfs::getRealFileSystem();
728 // TODO: This only saves the first non-fatal error from InstrProfReader, and
729 // then added to WriterContext::Errors. However, this is not extensible, if
730 // we have more non-fatal errors from InstrProfReader in the future. How
731 // should this interact with different -failure-mode?
732 std::optional
<std::pair
<Error
, std::string
>> ReaderWarning
;
733 auto Warn
= [&](Error E
) {
735 consumeError(std::move(E
));
738 // Only show the first time an error occurs in this file.
739 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
740 ReaderWarning
= {make_error
<InstrProfError
>(ErrCode
, Msg
), Filename
};
743 const ProfCorrelatorKind CorrelatorKind
= BIDFetcherCorrelatorKind
744 ? *BIDFetcherCorrelatorKind
745 : ProfCorrelatorKind::NONE
;
746 auto ReaderOrErr
= InstrProfReader::create(Input
.Filename
, *FS
, Correlator
,
747 BIDFetcher
, CorrelatorKind
, Warn
);
748 if (Error E
= ReaderOrErr
.takeError()) {
749 // Skip the empty profiles by returning silently.
750 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
751 if (ErrCode
!= instrprof_error::empty_raw_profile
)
752 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrCode
, Msg
),
757 auto Reader
= std::move(ReaderOrErr
.get());
758 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
759 consumeError(std::move(E
));
760 WC
->Errors
.emplace_back(
761 make_error
<StringError
>(
762 "Merge IR generated profile with Clang generated profile.",
768 for (auto &I
: *Reader
) {
770 I
.Name
= (*Remapper
)(I
.Name
);
771 const StringRef FuncName
= I
.Name
;
772 bool Reported
= false;
773 WC
->Writer
.addRecord(std::move(I
), Input
.Weight
, [&](Error E
) {
775 consumeError(std::move(E
));
779 // Only show hint the first time an error occurs.
780 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
781 std::unique_lock
<std::mutex
> ErrGuard
{WC
->ErrLock
};
782 bool firstTime
= WC
->WriterErrorCodes
.insert(ErrCode
).second
;
783 handleMergeWriterError(make_error
<InstrProfError
>(ErrCode
, Msg
),
784 Input
.Filename
, FuncName
, firstTime
);
788 if (KeepVTableSymbols
) {
789 const InstrProfSymtab
&symtab
= Reader
->getSymtab();
790 const auto &VTableNames
= symtab
.getVTableNames();
792 for (const auto &kv
: VTableNames
)
793 WC
->Writer
.addVTableName(kv
.getKey());
796 if (Reader
->hasTemporalProfile()) {
797 auto &Traces
= Reader
->getTemporalProfTraces(Input
.Weight
);
799 WC
->Writer
.addTemporalProfileTraces(
800 Traces
, Reader
->getTemporalProfTraceStreamSize());
802 if (Reader
->hasError()) {
803 if (Error E
= Reader
->getError()) {
804 WC
->Errors
.emplace_back(std::move(E
), Filename
);
809 std::vector
<llvm::object::BuildID
> BinaryIds
;
810 if (Error E
= Reader
->readBinaryIds(BinaryIds
)) {
811 WC
->Errors
.emplace_back(std::move(E
), Filename
);
814 WC
->Writer
.addBinaryIds(BinaryIds
);
817 WC
->Errors
.emplace_back(std::move(ReaderWarning
->first
),
818 ReaderWarning
->second
);
822 /// Merge the \p Src writer context into \p Dst.
823 static void mergeWriterContexts(WriterContext
*Dst
, WriterContext
*Src
) {
824 for (auto &ErrorPair
: Src
->Errors
)
825 Dst
->Errors
.push_back(std::move(ErrorPair
));
828 if (Error E
= Dst
->Writer
.mergeProfileKind(Src
->Writer
.getProfileKind()))
829 exitWithError(std::move(E
));
831 Dst
->Writer
.mergeRecordsFromWriter(std::move(Src
->Writer
), [&](Error E
) {
832 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
833 std::unique_lock
<std::mutex
> ErrGuard
{Dst
->ErrLock
};
834 bool firstTime
= Dst
->WriterErrorCodes
.insert(ErrorCode
).second
;
836 warn(toString(make_error
<InstrProfError
>(ErrorCode
, Msg
)));
841 getFuncName(const StringMap
<InstrProfWriter::ProfilingData
>::value_type
&Val
) {
846 getFuncName(const SampleProfileMap::value_type
&Val
) {
847 return Val
.second
.getContext().toString();
850 template <typename T
>
851 static void filterFunctions(T
&ProfileMap
) {
852 bool hasFilter
= !FuncNameFilter
.empty();
853 bool hasNegativeFilter
= !FuncNameNegativeFilter
.empty();
854 if (!hasFilter
&& !hasNegativeFilter
)
857 // If filter starts with '?' it is MSVC mangled name, not a regex.
858 llvm::Regex
ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
859 if (hasFilter
&& FuncNameFilter
[0] == '?' &&
860 ProbablyMSVCMangledName
.match(FuncNameFilter
))
861 FuncNameFilter
= llvm::Regex::escape(FuncNameFilter
);
862 if (hasNegativeFilter
&& FuncNameNegativeFilter
[0] == '?' &&
863 ProbablyMSVCMangledName
.match(FuncNameNegativeFilter
))
864 FuncNameNegativeFilter
= llvm::Regex::escape(FuncNameNegativeFilter
);
866 size_t Count
= ProfileMap
.size();
867 llvm::Regex
Pattern(FuncNameFilter
);
868 llvm::Regex
NegativePattern(FuncNameNegativeFilter
);
870 if (hasFilter
&& !Pattern
.isValid(Error
))
871 exitWithError(Error
);
872 if (hasNegativeFilter
&& !NegativePattern
.isValid(Error
))
873 exitWithError(Error
);
875 // Handle MD5 profile, so it is still able to match using the original name.
876 std::string MD5Name
= std::to_string(llvm::MD5Hash(FuncNameFilter
));
877 std::string NegativeMD5Name
=
878 std::to_string(llvm::MD5Hash(FuncNameNegativeFilter
));
880 for (auto I
= ProfileMap
.begin(); I
!= ProfileMap
.end();) {
882 const auto &FuncName
= getFuncName(*Tmp
);
883 // Negative filter has higher precedence than positive filter.
884 if ((hasNegativeFilter
&&
885 (NegativePattern
.match(FuncName
) ||
886 (FunctionSamples::UseMD5
&& NegativeMD5Name
== FuncName
))) ||
887 (hasFilter
&& !(Pattern
.match(FuncName
) ||
888 (FunctionSamples::UseMD5
&& MD5Name
== FuncName
))))
889 ProfileMap
.erase(Tmp
);
892 llvm::dbgs() << Count
- ProfileMap
.size() << " of " << Count
<< " functions "
893 << "in the original profile are filtered.\n";
896 static void writeInstrProfile(StringRef OutputFilename
,
897 ProfileFormat OutputFormat
,
898 InstrProfWriter
&Writer
) {
900 raw_fd_ostream
Output(OutputFilename
.data(), EC
,
901 OutputFormat
== PF_Text
? sys::fs::OF_TextWithCRLF
904 exitWithErrorCode(EC
, OutputFilename
);
906 if (OutputFormat
== PF_Text
) {
907 if (Error E
= Writer
.writeText(Output
))
910 if (Output
.is_displayed())
911 exitWithError("cannot write a non-text format profile to the terminal");
912 if (Error E
= Writer
.write(Output
))
917 static void mergeInstrProfile(const WeightedFileVector
&Inputs
,
918 SymbolRemapper
*Remapper
,
919 int MaxDbgCorrelationWarnings
,
920 const StringRef ProfiledBinary
) {
921 const uint64_t TraceReservoirSize
= TemporalProfTraceReservoirSize
.getValue();
922 const uint64_t MaxTraceLength
= TemporalProfMaxTraceLength
.getValue();
923 if (OutputFormat
== PF_Compact_Binary
)
924 exitWithError("Compact Binary is deprecated");
925 if (OutputFormat
!= PF_Binary
&& OutputFormat
!= PF_Ext_Binary
&&
926 OutputFormat
!= PF_Text
)
927 exitWithError("unknown format is specified");
929 // TODO: Maybe we should support correlation with mixture of different
930 // correlation modes(w/wo debug-info/object correlation).
931 if (DebugInfoFilename
.empty()) {
932 if (!BinaryFilename
.empty() && (DebugInfod
|| !DebugFileDirectory
.empty()))
933 exitWithError("Expected only one of -binary-file, -debuginfod or "
934 "-debug-file-directory");
935 } else if (!BinaryFilename
.empty() || DebugInfod
||
936 !DebugFileDirectory
.empty()) {
937 exitWithError("Expected only one of -debug-info, -binary-file, -debuginfod "
938 "or -debug-file-directory");
940 std::string CorrelateFilename
;
941 ProfCorrelatorKind CorrelateKind
= ProfCorrelatorKind::NONE
;
942 if (!DebugInfoFilename
.empty()) {
943 CorrelateFilename
= DebugInfoFilename
;
944 CorrelateKind
= ProfCorrelatorKind::DEBUG_INFO
;
945 } else if (!BinaryFilename
.empty()) {
946 CorrelateFilename
= BinaryFilename
;
947 CorrelateKind
= ProfCorrelatorKind::BINARY
;
950 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
951 if (CorrelateKind
!= InstrProfCorrelator::NONE
) {
952 if (auto Err
= InstrProfCorrelator::get(CorrelateFilename
, CorrelateKind
)
953 .moveInto(Correlator
))
954 exitWithError(std::move(Err
), CorrelateFilename
);
955 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
956 exitWithError(std::move(Err
), CorrelateFilename
);
959 ProfCorrelatorKind BIDFetcherCorrelateKind
= ProfCorrelatorKind::NONE
;
960 std::unique_ptr
<object::BuildIDFetcher
> BIDFetcher
;
962 llvm::HTTPClient::initialize();
963 BIDFetcher
= std::make_unique
<DebuginfodFetcher
>(DebugFileDirectory
);
964 if (!BIDFetcherProfileCorrelate
)
965 exitWithError("Expected --correlate when --debuginfod is provided");
966 BIDFetcherCorrelateKind
= BIDFetcherProfileCorrelate
;
967 } else if (!DebugFileDirectory
.empty()) {
968 BIDFetcher
= std::make_unique
<object::BuildIDFetcher
>(DebugFileDirectory
);
969 if (!BIDFetcherProfileCorrelate
)
970 exitWithError("Expected --correlate when --debug-file-directory "
972 BIDFetcherCorrelateKind
= BIDFetcherProfileCorrelate
;
973 } else if (BIDFetcherProfileCorrelate
) {
974 exitWithError("Expected --debuginfod or --debug-file-directory when "
975 "--correlate is provided");
978 std::mutex ErrorLock
;
979 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
981 // If NumThreads is not specified, auto-detect a good default.
983 NumThreads
= std::min(hardware_concurrency().compute_thread_count(),
984 unsigned((Inputs
.size() + 1) / 2));
986 // Initialize the writer contexts.
987 SmallVector
<std::unique_ptr
<WriterContext
>, 4> Contexts
;
988 for (unsigned I
= 0; I
< NumThreads
; ++I
)
989 Contexts
.emplace_back(std::make_unique
<WriterContext
>(
990 OutputSparse
, ErrorLock
, WriterErrorCodes
, TraceReservoirSize
,
993 if (NumThreads
== 1) {
994 for (const auto &Input
: Inputs
)
995 loadInput(Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
996 Contexts
[0].get(), BIDFetcher
.get(), &BIDFetcherCorrelateKind
);
998 DefaultThreadPool
Pool(hardware_concurrency(NumThreads
));
1000 // Load the inputs in parallel (N/NumThreads serial steps).
1002 for (const auto &Input
: Inputs
) {
1003 Pool
.async(loadInput
, Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
1004 Contexts
[Ctx
].get(), BIDFetcher
.get(),
1005 &BIDFetcherCorrelateKind
);
1006 Ctx
= (Ctx
+ 1) % NumThreads
;
1010 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1011 unsigned Mid
= Contexts
.size() / 2;
1012 unsigned End
= Contexts
.size();
1013 assert(Mid
> 0 && "Expected more than one context");
1015 for (unsigned I
= 0; I
< Mid
; ++I
)
1016 Pool
.async(mergeWriterContexts
, Contexts
[I
].get(),
1017 Contexts
[I
+ Mid
].get());
1020 Pool
.async(mergeWriterContexts
, Contexts
[0].get(),
1021 Contexts
[End
- 1].get());
1029 // Handle deferred errors encountered during merging. If the number of errors
1030 // is equal to the number of inputs the merge failed.
1031 unsigned NumErrors
= 0;
1032 for (std::unique_ptr
<WriterContext
> &WC
: Contexts
) {
1033 for (auto &ErrorPair
: WC
->Errors
) {
1035 warn(toString(std::move(ErrorPair
.first
)), ErrorPair
.second
);
1038 if ((NumErrors
== Inputs
.size() && FailMode
== failIfAllAreInvalid
) ||
1039 (NumErrors
> 0 && FailMode
== failIfAnyAreInvalid
))
1040 exitWithError("no profile can be merged");
1042 filterFunctions(Contexts
[0]->Writer
.getProfileData());
1044 writeInstrProfile(OutputFilename
, OutputFormat
, Contexts
[0]->Writer
);
1047 /// The profile entry for a function in instrumentation profile.
1048 struct InstrProfileEntry
{
1049 uint64_t MaxCount
= 0;
1050 uint64_t NumEdgeCounters
= 0;
1051 float ZeroCounterRatio
= 0.0;
1052 InstrProfRecord
*ProfRecord
;
1053 InstrProfileEntry(InstrProfRecord
*Record
);
1054 InstrProfileEntry() = default;
1057 InstrProfileEntry::InstrProfileEntry(InstrProfRecord
*Record
) {
1058 ProfRecord
= Record
;
1059 uint64_t CntNum
= Record
->Counts
.size();
1060 uint64_t ZeroCntNum
= 0;
1061 for (size_t I
= 0; I
< CntNum
; ++I
) {
1062 MaxCount
= std::max(MaxCount
, Record
->Counts
[I
]);
1063 ZeroCntNum
+= !Record
->Counts
[I
];
1065 ZeroCounterRatio
= (float)ZeroCntNum
/ CntNum
;
1066 NumEdgeCounters
= CntNum
;
1069 /// Either set all the counters in the instr profile entry \p IFE to
1070 /// -1 / -2 /in order to drop the profile or scale up the
1071 /// counters in \p IFP to be above hot / cold threshold. We use
1072 /// the ratio of zero counters in the profile of a function to
1073 /// decide the profile is helpful or harmful for performance,
1074 /// and to choose whether to scale up or drop it.
1075 static void updateInstrProfileEntry(InstrProfileEntry
&IFE
, bool SetToHot
,
1076 uint64_t HotInstrThreshold
,
1077 uint64_t ColdInstrThreshold
,
1078 float ZeroCounterThreshold
) {
1079 InstrProfRecord
*ProfRecord
= IFE
.ProfRecord
;
1080 if (!IFE
.MaxCount
|| IFE
.ZeroCounterRatio
> ZeroCounterThreshold
) {
1081 // If all or most of the counters of the function are zero, the
1082 // profile is unaccountable and should be dropped. Reset all the
1083 // counters to be -1 / -2 and PGO profile-use will drop the profile.
1084 // All counters being -1 also implies that the function is hot so
1085 // PGO profile-use will also set the entry count metadata to be
1086 // above hot threshold.
1087 // All counters being -2 implies that the function is warm so
1088 // PGO profile-use will also set the entry count metadata to be
1089 // above cold threshold.
1091 (SetToHot
? InstrProfRecord::PseudoHot
: InstrProfRecord::PseudoWarm
);
1092 ProfRecord
->setPseudoCount(Kind
);
1096 // Scale up the MaxCount to be multiple times above hot / cold threshold.
1097 const unsigned MultiplyFactor
= 3;
1098 uint64_t Threshold
= (SetToHot
? HotInstrThreshold
: ColdInstrThreshold
);
1099 uint64_t Numerator
= Threshold
* MultiplyFactor
;
1101 // Make sure Threshold for warm counters is below the HotInstrThreshold.
1102 if (!SetToHot
&& Threshold
>= HotInstrThreshold
) {
1103 Threshold
= (HotInstrThreshold
+ ColdInstrThreshold
) / 2;
1106 uint64_t Denominator
= IFE
.MaxCount
;
1107 if (Numerator
<= Denominator
)
1109 ProfRecord
->scale(Numerator
, Denominator
, [&](instrprof_error E
) {
1110 warn(toString(make_error
<InstrProfError
>(E
)));
1114 const uint64_t ColdPercentileIdx
= 15;
1115 const uint64_t HotPercentileIdx
= 11;
1117 using sampleprof::FSDiscriminatorPass
;
1119 // Internal options to set FSDiscriminatorPass. Used in merge and show
1121 static cl::opt
<FSDiscriminatorPass
> FSDiscriminatorPassOption(
1122 "fs-discriminator-pass", cl::init(PassLast
), cl::Hidden
,
1123 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1124 "pass beyond this value. The enum values are defined in "
1125 "Support/Discriminator.h"),
1126 cl::values(clEnumVal(Base
, "Use base discriminators only"),
1127 clEnumVal(Pass1
, "Use base and pass 1 discriminators"),
1128 clEnumVal(Pass2
, "Use base and pass 1-2 discriminators"),
1129 clEnumVal(Pass3
, "Use base and pass 1-3 discriminators"),
1130 clEnumVal(PassLast
, "Use all discriminator bits (default)")));
1132 static unsigned getDiscriminatorMask() {
1133 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption
.getValue()));
1136 /// Adjust the instr profile in \p WC based on the sample profile in
1139 adjustInstrProfile(std::unique_ptr
<WriterContext
> &WC
,
1140 std::unique_ptr
<sampleprof::SampleProfileReader
> &Reader
,
1141 unsigned SupplMinSizeThreshold
, float ZeroCounterThreshold
,
1142 unsigned InstrProfColdThreshold
) {
1143 // Function to its entry in instr profile.
1144 StringMap
<InstrProfileEntry
> InstrProfileMap
;
1145 StringMap
<StringRef
> StaticFuncMap
;
1146 InstrProfSummaryBuilder
IPBuilder(ProfileSummaryBuilder::DefaultCutoffs
);
1148 auto checkSampleProfileHasFUnique
= [&Reader
]() {
1149 for (const auto &PD
: Reader
->getProfiles()) {
1150 auto &FContext
= PD
.second
.getContext();
1151 if (FContext
.toString().find(FunctionSamples::UniqSuffix
) !=
1152 std::string::npos
) {
1159 bool SampleProfileHasFUnique
= checkSampleProfileHasFUnique();
1161 auto buildStaticFuncMap
= [&StaticFuncMap
,
1162 SampleProfileHasFUnique
](const StringRef Name
) {
1163 std::string FilePrefixes
[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1164 size_t PrefixPos
= StringRef::npos
;
1165 for (auto &FilePrefix
: FilePrefixes
) {
1166 std::string NamePrefix
= FilePrefix
+ GlobalIdentifierDelimiter
;
1167 PrefixPos
= Name
.find_insensitive(NamePrefix
);
1168 if (PrefixPos
== StringRef::npos
)
1170 PrefixPos
+= NamePrefix
.size();
1174 if (PrefixPos
== StringRef::npos
) {
1178 StringRef NewName
= Name
.drop_front(PrefixPos
);
1179 StringRef FName
= Name
.substr(0, PrefixPos
- 1);
1180 if (NewName
.size() == 0) {
1184 // This name should have a static linkage.
1185 size_t PostfixPos
= NewName
.find(FunctionSamples::UniqSuffix
);
1186 bool ProfileHasFUnique
= (PostfixPos
!= StringRef::npos
);
1188 // If sample profile and instrumented profile do not agree on symbol
1190 if (SampleProfileHasFUnique
!= ProfileHasFUnique
) {
1191 // If instrumented profile uses -funique-internal-linkage-symbols,
1192 // we need to trim the name.
1193 if (ProfileHasFUnique
) {
1194 NewName
= NewName
.substr(0, PostfixPos
);
1196 // If sample profile uses -funique-internal-linkage-symbols,
1197 // we build the map.
1199 NewName
.str() + getUniqueInternalLinkagePostfix(FName
);
1200 NewName
= StringRef(NStr
);
1201 StaticFuncMap
[NewName
] = Name
;
1206 auto [It
, Inserted
] = StaticFuncMap
.try_emplace(NewName
, Name
);
1208 It
->second
= DuplicateNameStr
;
1211 // We need to flatten the SampleFDO profile as the InstrFDO
1212 // profile does not have inlined callsite profiles.
1213 // One caveat is the pre-inlined function -- their samples
1214 // should be collapsed into the caller function.
1215 // Here we do a DFS traversal to get the flatten profile
1216 // info: the sum of entrycount and the max of maxcount.
1217 // Here is the algorithm:
1218 // recursive (FS, root_name) {
1219 // name = FS->getName();
1220 // get samples for FS;
1221 // if (InstrProf.find(name) {
1222 // root_name = name;
1224 // if (name is in static_func map) {
1225 // root_name = static_name;
1228 // update the Map entry for root_name;
1229 // for (subfs: FS) {
1230 // recursive(subfs, root_name);
1234 // Here is an example.
1250 // InstrProfile has two entries:
1254 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1255 // {"foo", {1000, 5000}}
1256 // {"bar.cc;bar", {11000, 30000}}
1258 // foo's has an entry count of 1000, and max body count of 5000.
1259 // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1260 // 10000), and max count of 30000 (from the callsite in line 8).
1262 // Note that goo's count will remain in bar.cc;bar() as it does not have an
1263 // entry in InstrProfile.
1264 llvm::StringMap
<std::pair
<uint64_t, uint64_t>> FlattenSampleMap
;
1265 auto BuildMaxSampleMap
= [&FlattenSampleMap
, &StaticFuncMap
,
1266 &InstrProfileMap
](const FunctionSamples
&FS
,
1267 const StringRef
&RootName
) {
1268 auto BuildMaxSampleMapImpl
= [&](const FunctionSamples
&FS
,
1269 const StringRef
&RootName
,
1270 auto &BuildImpl
) -> void {
1271 std::string NameStr
= FS
.getFunction().str();
1272 const StringRef Name
= NameStr
;
1273 const StringRef
*NewRootName
= &RootName
;
1274 uint64_t EntrySample
= FS
.getHeadSamplesEstimate();
1275 uint64_t MaxBodySample
= FS
.getMaxCountInside(/* SkipCallSite*/ true);
1277 auto It
= InstrProfileMap
.find(Name
);
1278 if (It
!= InstrProfileMap
.end()) {
1279 NewRootName
= &Name
;
1281 auto NewName
= StaticFuncMap
.find(Name
);
1282 if (NewName
!= StaticFuncMap
.end()) {
1283 It
= InstrProfileMap
.find(NewName
->second
);
1284 if (NewName
->second
!= DuplicateNameStr
) {
1285 NewRootName
= &NewName
->second
;
1288 // Here the EntrySample is of an inlined function, so we should not
1289 // update the EntrySample in the map.
1293 EntrySample
+= FlattenSampleMap
[*NewRootName
].first
;
1295 std::max(FlattenSampleMap
[*NewRootName
].second
, MaxBodySample
);
1296 FlattenSampleMap
[*NewRootName
] =
1297 std::make_pair(EntrySample
, MaxBodySample
);
1299 for (const auto &C
: FS
.getCallsiteSamples())
1300 for (const auto &F
: C
.second
)
1301 BuildImpl(F
.second
, *NewRootName
, BuildImpl
);
1303 BuildMaxSampleMapImpl(FS
, RootName
, BuildMaxSampleMapImpl
);
1306 for (auto &PD
: WC
->Writer
.getProfileData()) {
1307 // Populate IPBuilder.
1308 for (const auto &PDV
: PD
.getValue()) {
1309 InstrProfRecord Record
= PDV
.second
;
1310 IPBuilder
.addRecord(Record
);
1313 // If a function has multiple entries in instr profile, skip it.
1314 if (PD
.getValue().size() != 1)
1317 // Initialize InstrProfileMap.
1318 InstrProfRecord
*R
= &PD
.getValue().begin()->second
;
1319 StringRef FullName
= PD
.getKey();
1320 InstrProfileMap
[FullName
] = InstrProfileEntry(R
);
1321 buildStaticFuncMap(FullName
);
1324 for (auto &PD
: Reader
->getProfiles()) {
1325 sampleprof::FunctionSamples
&FS
= PD
.second
;
1326 std::string Name
= FS
.getFunction().str();
1327 BuildMaxSampleMap(FS
, Name
);
1330 ProfileSummary InstrPS
= *IPBuilder
.getSummary();
1331 ProfileSummary SamplePS
= Reader
->getSummary();
1333 // Compute cold thresholds for instr profile and sample profile.
1334 uint64_t HotSampleThreshold
=
1335 ProfileSummaryBuilder::getEntryForPercentile(
1336 SamplePS
.getDetailedSummary(),
1337 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
1339 uint64_t ColdSampleThreshold
=
1340 ProfileSummaryBuilder::getEntryForPercentile(
1341 SamplePS
.getDetailedSummary(),
1342 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
1344 uint64_t HotInstrThreshold
=
1345 ProfileSummaryBuilder::getEntryForPercentile(
1346 InstrPS
.getDetailedSummary(),
1347 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
1349 uint64_t ColdInstrThreshold
=
1350 InstrProfColdThreshold
1351 ? InstrProfColdThreshold
1352 : ProfileSummaryBuilder::getEntryForPercentile(
1353 InstrPS
.getDetailedSummary(),
1354 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
1357 // Find hot/warm functions in sample profile which is cold in instr profile
1358 // and adjust the profiles of those functions in the instr profile.
1359 for (const auto &E
: FlattenSampleMap
) {
1360 uint64_t SampleMaxCount
= std::max(E
.second
.first
, E
.second
.second
);
1361 if (SampleMaxCount
< ColdSampleThreshold
)
1363 StringRef Name
= E
.first();
1364 auto It
= InstrProfileMap
.find(Name
);
1365 if (It
== InstrProfileMap
.end()) {
1366 auto NewName
= StaticFuncMap
.find(Name
);
1367 if (NewName
!= StaticFuncMap
.end()) {
1368 It
= InstrProfileMap
.find(NewName
->second
);
1369 if (NewName
->second
== DuplicateNameStr
) {
1370 WithColor::warning()
1371 << "Static function " << Name
1372 << " has multiple promoted names, cannot adjust profile.\n";
1376 if (It
== InstrProfileMap
.end() ||
1377 It
->second
.MaxCount
> ColdInstrThreshold
||
1378 It
->second
.NumEdgeCounters
< SupplMinSizeThreshold
)
1380 bool SetToHot
= SampleMaxCount
>= HotSampleThreshold
;
1381 updateInstrProfileEntry(It
->second
, SetToHot
, HotInstrThreshold
,
1382 ColdInstrThreshold
, ZeroCounterThreshold
);
1386 /// The main function to supplement instr profile with sample profile.
1387 /// \Inputs contains the instr profile. \p SampleFilename specifies the
1388 /// sample profile. \p OutputFilename specifies the output profile name.
1389 /// \p OutputFormat specifies the output profile format. \p OutputSparse
1390 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1391 /// specifies the minimal size for the functions whose profile will be
1392 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1393 /// a function contains too many zero counters and whether its profile
1394 /// should be dropped. \p InstrProfColdThreshold is the user specified
1395 /// cold threshold which will override the cold threshold got from the
1396 /// instr profile summary.
1397 static void supplementInstrProfile(const WeightedFileVector
&Inputs
,
1398 StringRef SampleFilename
, bool OutputSparse
,
1399 unsigned SupplMinSizeThreshold
,
1400 float ZeroCounterThreshold
,
1401 unsigned InstrProfColdThreshold
) {
1402 if (OutputFilename
== "-")
1403 exitWithError("cannot write indexed profdata format to stdout");
1404 if (Inputs
.size() != 1)
1405 exitWithError("expect one input to be an instr profile");
1406 if (Inputs
[0].Weight
!= 1)
1407 exitWithError("expect instr profile doesn't have weight");
1409 StringRef InstrFilename
= Inputs
[0].Filename
;
1411 // Read sample profile.
1412 LLVMContext Context
;
1413 auto FS
= vfs::getRealFileSystem();
1414 auto ReaderOrErr
= sampleprof::SampleProfileReader::create(
1415 SampleFilename
.str(), Context
, *FS
, FSDiscriminatorPassOption
);
1416 if (std::error_code EC
= ReaderOrErr
.getError())
1417 exitWithErrorCode(EC
, SampleFilename
);
1418 auto Reader
= std::move(ReaderOrErr
.get());
1419 if (std::error_code EC
= Reader
->read())
1420 exitWithErrorCode(EC
, SampleFilename
);
1422 // Read instr profile.
1423 std::mutex ErrorLock
;
1424 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
1425 auto WC
= std::make_unique
<WriterContext
>(OutputSparse
, ErrorLock
,
1427 loadInput(Inputs
[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC
.get());
1428 if (WC
->Errors
.size() > 0)
1429 exitWithError(std::move(WC
->Errors
[0].first
), InstrFilename
);
1431 adjustInstrProfile(WC
, Reader
, SupplMinSizeThreshold
, ZeroCounterThreshold
,
1432 InstrProfColdThreshold
);
1433 writeInstrProfile(OutputFilename
, OutputFormat
, WC
->Writer
);
1436 /// Make a copy of the given function samples with all symbol names remapped
1437 /// by the provided symbol remapper.
1438 static sampleprof::FunctionSamples
1439 remapSamples(const sampleprof::FunctionSamples
&Samples
,
1440 SymbolRemapper
&Remapper
, sampleprof_error
&Error
) {
1441 sampleprof::FunctionSamples Result
;
1442 Result
.setFunction(Remapper(Samples
.getFunction()));
1443 Result
.addTotalSamples(Samples
.getTotalSamples());
1444 Result
.addHeadSamples(Samples
.getHeadSamples());
1445 for (const auto &BodySample
: Samples
.getBodySamples()) {
1446 uint32_t MaskedDiscriminator
=
1447 BodySample
.first
.Discriminator
& getDiscriminatorMask();
1448 Result
.addBodySamples(BodySample
.first
.LineOffset
, MaskedDiscriminator
,
1449 BodySample
.second
.getSamples());
1450 for (const auto &Target
: BodySample
.second
.getCallTargets()) {
1451 Result
.addCalledTargetSamples(BodySample
.first
.LineOffset
,
1452 MaskedDiscriminator
,
1453 Remapper(Target
.first
), Target
.second
);
1456 for (const auto &CallsiteSamples
: Samples
.getCallsiteSamples()) {
1457 sampleprof::FunctionSamplesMap
&Target
=
1458 Result
.functionSamplesAt(CallsiteSamples
.first
);
1459 for (const auto &Callsite
: CallsiteSamples
.second
) {
1460 sampleprof::FunctionSamples Remapped
=
1461 remapSamples(Callsite
.second
, Remapper
, Error
);
1462 mergeSampleProfErrors(Error
,
1463 Target
[Remapped
.getFunction()].merge(Remapped
));
1469 static sampleprof::SampleProfileFormat FormatMap
[] = {
1470 sampleprof::SPF_None
,
1471 sampleprof::SPF_Text
,
1472 sampleprof::SPF_None
,
1473 sampleprof::SPF_Ext_Binary
,
1474 sampleprof::SPF_GCC
,
1475 sampleprof::SPF_Binary
};
1477 static std::unique_ptr
<MemoryBuffer
>
1478 getInputFileBuf(const StringRef
&InputFile
) {
1479 if (InputFile
== "")
1482 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
1484 exitWithErrorCode(BufOrError
.getError(), InputFile
);
1486 return std::move(*BufOrError
);
1489 static void populateProfileSymbolList(MemoryBuffer
*Buffer
,
1490 sampleprof::ProfileSymbolList
&PSL
) {
1494 SmallVector
<StringRef
, 32> SymbolVec
;
1495 StringRef Data
= Buffer
->getBuffer();
1496 Data
.split(SymbolVec
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1498 for (StringRef SymbolStr
: SymbolVec
)
1499 PSL
.add(SymbolStr
.trim());
1502 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter
&Writer
,
1503 ProfileFormat OutputFormat
,
1504 MemoryBuffer
*Buffer
,
1505 sampleprof::ProfileSymbolList
&WriterList
,
1506 bool CompressAllSections
, bool UseMD5
,
1507 bool GenPartialProfile
) {
1509 if (OutputFormat
== PF_Binary
)
1510 warn("-split-layout is ignored. Specify -extbinary to enable it");
1512 Writer
.setUseCtxSplitLayout();
1515 populateProfileSymbolList(Buffer
, WriterList
);
1516 if (WriterList
.size() > 0 && OutputFormat
!= PF_Ext_Binary
)
1517 warn("Profile Symbol list is not empty but the output format is not "
1518 "ExtBinary format. The list will be lost in the output. ");
1520 Writer
.setProfileSymbolList(&WriterList
);
1522 if (CompressAllSections
) {
1523 if (OutputFormat
!= PF_Ext_Binary
)
1524 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1526 Writer
.setToCompressAllSections();
1529 if (OutputFormat
!= PF_Ext_Binary
)
1530 warn("-use-md5 is ignored. Specify -extbinary to enable it");
1534 if (GenPartialProfile
) {
1535 if (OutputFormat
!= PF_Ext_Binary
)
1536 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1538 Writer
.setPartialProfile();
1542 static void mergeSampleProfile(const WeightedFileVector
&Inputs
,
1543 SymbolRemapper
*Remapper
,
1544 StringRef ProfileSymbolListFile
,
1545 size_t OutputSizeLimit
) {
1546 using namespace sampleprof
;
1547 SampleProfileMap ProfileMap
;
1548 SmallVector
<std::unique_ptr
<sampleprof::SampleProfileReader
>, 5> Readers
;
1549 LLVMContext Context
;
1550 sampleprof::ProfileSymbolList WriterList
;
1551 std::optional
<bool> ProfileIsProbeBased
;
1552 std::optional
<bool> ProfileIsCS
;
1553 for (const auto &Input
: Inputs
) {
1554 auto FS
= vfs::getRealFileSystem();
1555 auto ReaderOrErr
= SampleProfileReader::create(Input
.Filename
, Context
, *FS
,
1556 FSDiscriminatorPassOption
);
1557 if (std::error_code EC
= ReaderOrErr
.getError()) {
1558 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1562 // We need to keep the readers around until after all the files are
1563 // read so that we do not lose the function names stored in each
1564 // reader's memory. The function names are needed to write out the
1565 // merged profile map.
1566 Readers
.push_back(std::move(ReaderOrErr
.get()));
1567 const auto Reader
= Readers
.back().get();
1568 if (std::error_code EC
= Reader
->read()) {
1569 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1574 SampleProfileMap
&Profiles
= Reader
->getProfiles();
1575 if (ProfileIsProbeBased
&&
1576 ProfileIsProbeBased
!= FunctionSamples::ProfileIsProbeBased
)
1578 "cannot merge probe-based profile with non-probe-based profile");
1579 ProfileIsProbeBased
= FunctionSamples::ProfileIsProbeBased
;
1580 if (ProfileIsCS
&& ProfileIsCS
!= FunctionSamples::ProfileIsCS
)
1581 exitWithError("cannot merge CS profile with non-CS profile");
1582 ProfileIsCS
= FunctionSamples::ProfileIsCS
;
1583 for (SampleProfileMap::iterator I
= Profiles
.begin(), E
= Profiles
.end();
1585 sampleprof_error Result
= sampleprof_error::success
;
1586 FunctionSamples Remapped
=
1587 Remapper
? remapSamples(I
->second
, *Remapper
, Result
)
1588 : FunctionSamples();
1589 FunctionSamples
&Samples
= Remapper
? Remapped
: I
->second
;
1590 SampleContext FContext
= Samples
.getContext();
1591 mergeSampleProfErrors(Result
,
1592 ProfileMap
[FContext
].merge(Samples
, Input
.Weight
));
1593 if (Result
!= sampleprof_error::success
) {
1594 std::error_code EC
= make_error_code(Result
);
1595 handleMergeWriterError(errorCodeToError(EC
), Input
.Filename
,
1596 FContext
.toString());
1600 if (!DropProfileSymbolList
) {
1601 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
1602 Reader
->getProfileSymbolList();
1604 WriterList
.merge(*ReaderList
);
1608 if (ProfileIsCS
&& (SampleMergeColdContext
|| SampleTrimColdContext
)) {
1609 // Use threshold calculated from profile summary unless specified.
1610 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1611 auto Summary
= Builder
.computeSummaryForProfiles(ProfileMap
);
1612 uint64_t SampleProfColdThreshold
=
1613 ProfileSummaryBuilder::getColdCountThreshold(
1614 (Summary
->getDetailedSummary()));
1616 // Trim and merge cold context profile using cold threshold above;
1617 SampleContextTrimmer(ProfileMap
)
1618 .trimAndMergeColdContextProfiles(
1619 SampleProfColdThreshold
, SampleTrimColdContext
,
1620 SampleMergeColdContext
, SampleColdContextFrameDepth
, false);
1623 if (ProfileLayout
== llvm::sampleprof::SPL_Flat
) {
1624 ProfileConverter::flattenProfile(ProfileMap
, FunctionSamples::ProfileIsCS
);
1625 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1626 } else if (ProfileIsCS
&& ProfileLayout
== llvm::sampleprof::SPL_Nest
) {
1627 ProfileConverter
CSConverter(ProfileMap
);
1628 CSConverter
.convertCSProfiles();
1629 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1632 filterFunctions(ProfileMap
);
1635 SampleProfileWriter::create(OutputFilename
, FormatMap
[OutputFormat
]);
1636 if (std::error_code EC
= WriterOrErr
.getError())
1637 exitWithErrorCode(EC
, OutputFilename
);
1639 auto Writer
= std::move(WriterOrErr
.get());
1640 // WriterList will have StringRef refering to string in Buffer.
1641 // Make sure Buffer lives as long as WriterList.
1642 auto Buffer
= getInputFileBuf(ProfileSymbolListFile
);
1643 handleExtBinaryWriter(*Writer
, OutputFormat
, Buffer
.get(), WriterList
,
1644 CompressAllSections
, UseMD5
, GenPartialProfile
);
1646 // If OutputSizeLimit is 0 (default), it is the same as write().
1647 if (std::error_code EC
=
1648 Writer
->writeWithSizeLimit(ProfileMap
, OutputSizeLimit
))
1649 exitWithErrorCode(EC
);
1652 static WeightedFile
parseWeightedFile(const StringRef
&WeightedFilename
) {
1653 StringRef WeightStr
, FileName
;
1654 std::tie(WeightStr
, FileName
) = WeightedFilename
.split(',');
1657 if (WeightStr
.getAsInteger(10, Weight
) || Weight
< 1)
1658 exitWithError("input weight must be a positive integer");
1660 return {std::string(FileName
), Weight
};
1663 static void addWeightedInput(WeightedFileVector
&WNI
, const WeightedFile
&WF
) {
1664 StringRef Filename
= WF
.Filename
;
1665 uint64_t Weight
= WF
.Weight
;
1667 // If it's STDIN just pass it on.
1668 if (Filename
== "-") {
1669 WNI
.push_back({std::string(Filename
), Weight
});
1673 llvm::sys::fs::file_status Status
;
1674 llvm::sys::fs::status(Filename
, Status
);
1675 if (!llvm::sys::fs::exists(Status
))
1676 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory
),
1678 // If it's a source file, collect it.
1679 if (llvm::sys::fs::is_regular_file(Status
)) {
1680 WNI
.push_back({std::string(Filename
), Weight
});
1684 if (llvm::sys::fs::is_directory(Status
)) {
1686 for (llvm::sys::fs::recursive_directory_iterator
F(Filename
, EC
), E
;
1687 F
!= E
&& !EC
; F
.increment(EC
)) {
1688 if (llvm::sys::fs::is_regular_file(F
->path())) {
1689 addWeightedInput(WNI
, {F
->path(), Weight
});
1693 exitWithErrorCode(EC
, Filename
);
1697 static void parseInputFilenamesFile(MemoryBuffer
*Buffer
,
1698 WeightedFileVector
&WFV
) {
1702 SmallVector
<StringRef
, 8> Entries
;
1703 StringRef Data
= Buffer
->getBuffer();
1704 Data
.split(Entries
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1705 for (const StringRef
&FileWeightEntry
: Entries
) {
1706 StringRef SanitizedEntry
= FileWeightEntry
.trim(" \t\v\f\r");
1708 if (SanitizedEntry
.starts_with("#"))
1710 // If there's no comma, it's an unweighted profile.
1711 else if (!SanitizedEntry
.contains(','))
1712 addWeightedInput(WFV
, {std::string(SanitizedEntry
), 1});
1714 addWeightedInput(WFV
, parseWeightedFile(SanitizedEntry
));
1718 static int merge_main(StringRef ProgName
) {
1719 WeightedFileVector WeightedInputs
;
1720 for (StringRef Filename
: InputFilenames
)
1721 addWeightedInput(WeightedInputs
, {std::string(Filename
), 1});
1722 for (StringRef WeightedFilename
: WeightedInputFilenames
)
1723 addWeightedInput(WeightedInputs
, parseWeightedFile(WeightedFilename
));
1725 // Make sure that the file buffer stays alive for the duration of the
1726 // weighted input vector's lifetime.
1727 auto Buffer
= getInputFileBuf(InputFilenamesFile
);
1728 parseInputFilenamesFile(Buffer
.get(), WeightedInputs
);
1730 if (WeightedInputs
.empty())
1731 exitWithError("no input files specified. See " + ProgName
+ " merge -help");
1733 if (DumpInputFileList
) {
1734 for (auto &WF
: WeightedInputs
)
1735 outs() << WF
.Weight
<< "," << WF
.Filename
<< "\n";
1739 std::unique_ptr
<SymbolRemapper
> Remapper
;
1740 if (!RemappingFile
.empty())
1741 Remapper
= SymbolRemapper::create(RemappingFile
);
1743 if (!SupplInstrWithSample
.empty()) {
1744 if (ProfileKind
!= instr
)
1746 "-supplement-instr-with-sample can only work with -instr. ");
1748 supplementInstrProfile(WeightedInputs
, SupplInstrWithSample
, OutputSparse
,
1749 SupplMinSizeThreshold
, ZeroCounterThreshold
,
1750 InstrProfColdThreshold
);
1754 if (ProfileKind
== instr
)
1755 mergeInstrProfile(WeightedInputs
, Remapper
.get(), MaxDbgCorrelationWarnings
,
1758 mergeSampleProfile(WeightedInputs
, Remapper
.get(), ProfileSymbolListFile
,
1763 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1764 static void overlapInstrProfile(const std::string
&BaseFilename
,
1765 const std::string
&TestFilename
,
1766 const OverlapFuncFilters
&FuncFilter
,
1767 raw_fd_ostream
&OS
, bool IsCS
) {
1768 std::mutex ErrorLock
;
1769 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
1770 WriterContext
Context(false, ErrorLock
, WriterErrorCodes
);
1771 WeightedFile WeightedInput
{BaseFilename
, 1};
1772 OverlapStats Overlap
;
1773 Error E
= Overlap
.accumulateCounts(BaseFilename
, TestFilename
, IsCS
);
1775 exitWithError(std::move(E
), "error in getting profile count sums");
1776 if (Overlap
.Base
.CountSum
< 1.0f
) {
1777 OS
<< "Sum of edge counts for profile " << BaseFilename
<< " is 0.\n";
1780 if (Overlap
.Test
.CountSum
< 1.0f
) {
1781 OS
<< "Sum of edge counts for profile " << TestFilename
<< " is 0.\n";
1784 loadInput(WeightedInput
, nullptr, nullptr, /*ProfiledBinary=*/"", &Context
);
1785 overlapInput(BaseFilename
, TestFilename
, &Context
, Overlap
, FuncFilter
, OS
,
1791 struct SampleOverlapStats
{
1792 SampleContext BaseName
;
1793 SampleContext TestName
;
1794 // Number of overlap units
1795 uint64_t OverlapCount
= 0;
1796 // Total samples of overlap units
1797 uint64_t OverlapSample
= 0;
1798 // Number of and total samples of units that only present in base or test
1800 uint64_t BaseUniqueCount
= 0;
1801 uint64_t BaseUniqueSample
= 0;
1802 uint64_t TestUniqueCount
= 0;
1803 uint64_t TestUniqueSample
= 0;
1804 // Number of units and total samples in base or test profile
1805 uint64_t BaseCount
= 0;
1806 uint64_t BaseSample
= 0;
1807 uint64_t TestCount
= 0;
1808 uint64_t TestSample
= 0;
1809 // Number of and total samples of units that present in at least one profile
1810 uint64_t UnionCount
= 0;
1811 uint64_t UnionSample
= 0;
1812 // Weighted similarity
1813 double Similarity
= 0.0;
1814 // For SampleOverlapStats instances representing functions, weights of the
1815 // function in base and test profiles
1816 double BaseWeight
= 0.0;
1817 double TestWeight
= 0.0;
1819 SampleOverlapStats() = default;
1821 } // end anonymous namespace
1824 struct FuncSampleStats
{
1825 uint64_t SampleSum
= 0;
1826 uint64_t MaxSample
= 0;
1827 uint64_t HotBlockCount
= 0;
1828 FuncSampleStats() = default;
1829 FuncSampleStats(uint64_t SampleSum
, uint64_t MaxSample
,
1830 uint64_t HotBlockCount
)
1831 : SampleSum(SampleSum
), MaxSample(MaxSample
),
1832 HotBlockCount(HotBlockCount
) {}
1834 } // end anonymous namespace
1837 enum MatchStatus
{ MS_Match
, MS_FirstUnique
, MS_SecondUnique
, MS_None
};
1839 // Class for updating merging steps for two sorted maps. The class should be
1840 // instantiated with a map iterator type.
1841 template <class T
> class MatchStep
{
1843 MatchStep() = delete;
1845 MatchStep(T FirstIter
, T FirstEnd
, T SecondIter
, T SecondEnd
)
1846 : FirstIter(FirstIter
), FirstEnd(FirstEnd
), SecondIter(SecondIter
),
1847 SecondEnd(SecondEnd
), Status(MS_None
) {}
1849 bool areBothFinished() const {
1850 return (FirstIter
== FirstEnd
&& SecondIter
== SecondEnd
);
1853 bool isFirstFinished() const { return FirstIter
== FirstEnd
; }
1855 bool isSecondFinished() const { return SecondIter
== SecondEnd
; }
1857 /// Advance one step based on the previous match status unless the previous
1858 /// status is MS_None. Then update Status based on the comparison between two
1859 /// container iterators at the current step. If the previous status is
1860 /// MS_None, it means two iterators are at the beginning and no comparison has
1861 /// been made, so we simply update Status without advancing the iterators.
1862 void updateOneStep();
1864 T
getFirstIter() const { return FirstIter
; }
1866 T
getSecondIter() const { return SecondIter
; }
1868 MatchStatus
getMatchStatus() const { return Status
; }
1871 // Current iterator and end iterator of the first container.
1874 // Current iterator and end iterator of the second container.
1877 // Match status of the current step.
1880 } // end anonymous namespace
1882 template <class T
> void MatchStep
<T
>::updateOneStep() {
1888 case MS_FirstUnique
:
1891 case MS_SecondUnique
:
1898 // Update Status according to iterators at the current step.
1899 if (areBothFinished())
1901 if (FirstIter
!= FirstEnd
&&
1902 (SecondIter
== SecondEnd
|| FirstIter
->first
< SecondIter
->first
))
1903 Status
= MS_FirstUnique
;
1904 else if (SecondIter
!= SecondEnd
&&
1905 (FirstIter
== FirstEnd
|| SecondIter
->first
< FirstIter
->first
))
1906 Status
= MS_SecondUnique
;
1911 // Return the sum of line/block samples, the max line/block sample, and the
1912 // number of line/block samples above the given threshold in a function
1913 // including its inlinees.
1914 static void getFuncSampleStats(const sampleprof::FunctionSamples
&Func
,
1915 FuncSampleStats
&FuncStats
,
1916 uint64_t HotThreshold
) {
1917 for (const auto &L
: Func
.getBodySamples()) {
1918 uint64_t Sample
= L
.second
.getSamples();
1919 FuncStats
.SampleSum
+= Sample
;
1920 FuncStats
.MaxSample
= std::max(FuncStats
.MaxSample
, Sample
);
1921 if (Sample
>= HotThreshold
)
1922 ++FuncStats
.HotBlockCount
;
1925 for (const auto &C
: Func
.getCallsiteSamples()) {
1926 for (const auto &F
: C
.second
)
1927 getFuncSampleStats(F
.second
, FuncStats
, HotThreshold
);
1931 /// Predicate that determines if a function is hot with a given threshold. We
1932 /// keep it separate from its callsites for possible extension in the future.
1933 static bool isFunctionHot(const FuncSampleStats
&FuncStats
,
1934 uint64_t HotThreshold
) {
1935 // We intentionally compare the maximum sample count in a function with the
1936 // HotThreshold to get an approximate determination on hot functions.
1937 return (FuncStats
.MaxSample
>= HotThreshold
);
1941 class SampleOverlapAggregator
{
1943 SampleOverlapAggregator(const std::string
&BaseFilename
,
1944 const std::string
&TestFilename
,
1945 double LowSimilarityThreshold
, double Epsilon
,
1946 const OverlapFuncFilters
&FuncFilter
)
1947 : BaseFilename(BaseFilename
), TestFilename(TestFilename
),
1948 LowSimilarityThreshold(LowSimilarityThreshold
), Epsilon(Epsilon
),
1949 FuncFilter(FuncFilter
) {}
1951 /// Detect 0-sample input profile and report to output stream. This interface
1952 /// should be called after loadProfiles().
1953 bool detectZeroSampleProfile(raw_fd_ostream
&OS
) const;
1955 /// Write out function-level similarity statistics for functions specified by
1956 /// options --function, --value-cutoff, and --similarity-cutoff.
1957 void dumpFuncSimilarity(raw_fd_ostream
&OS
) const;
1959 /// Write out program-level similarity and overlap statistics.
1960 void dumpProgramSummary(raw_fd_ostream
&OS
) const;
1962 /// Write out hot-function and hot-block statistics for base_profile,
1963 /// test_profile, and their overlap. For both cases, the overlap HO is
1964 /// calculated as follows:
1965 /// Given the number of functions (or blocks) that are hot in both profiles
1966 /// HCommon and the number of functions (or blocks) that are hot in at
1967 /// least one profile HUnion, HO = HCommon / HUnion.
1968 void dumpHotFuncAndBlockOverlap(raw_fd_ostream
&OS
) const;
1970 /// This function tries matching functions in base and test profiles. For each
1971 /// pair of matched functions, it aggregates the function-level
1972 /// similarity into a profile-level similarity. It also dump function-level
1973 /// similarity information of functions specified by --function,
1974 /// --value-cutoff, and --similarity-cutoff options. The program-level
1975 /// similarity PS is computed as follows:
1976 /// Given function-level similarity FS(A) for all function A, the
1977 /// weight of function A in base profile WB(A), and the weight of function
1978 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1979 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1980 /// meaning no-overlap.
1981 void computeSampleProfileOverlap(raw_fd_ostream
&OS
);
1983 /// Initialize ProfOverlap with the sum of samples in base and test
1984 /// profiles. This function also computes and keeps the sum of samples and
1985 /// max sample counts of each function in BaseStats and TestStats for later
1986 /// use to avoid re-computations.
1987 void initializeSampleProfileOverlap();
1989 /// Load profiles specified by BaseFilename and TestFilename.
1990 std::error_code
loadProfiles();
1992 using FuncSampleStatsMap
=
1993 std::unordered_map
<SampleContext
, FuncSampleStats
, SampleContext::Hash
>;
1996 SampleOverlapStats ProfOverlap
;
1997 SampleOverlapStats HotFuncOverlap
;
1998 SampleOverlapStats HotBlockOverlap
;
1999 std::string BaseFilename
;
2000 std::string TestFilename
;
2001 std::unique_ptr
<sampleprof::SampleProfileReader
> BaseReader
;
2002 std::unique_ptr
<sampleprof::SampleProfileReader
> TestReader
;
2003 // BaseStats and TestStats hold FuncSampleStats for each function, with
2004 // function name as the key.
2005 FuncSampleStatsMap BaseStats
;
2006 FuncSampleStatsMap TestStats
;
2007 // Low similarity threshold in floating point number
2008 double LowSimilarityThreshold
;
2009 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2010 // for tracking hot blocks.
2011 uint64_t BaseHotThreshold
;
2012 uint64_t TestHotThreshold
;
2013 // A small threshold used to round the results of floating point accumulations
2014 // to resolve imprecision.
2015 const double Epsilon
;
2016 std::multimap
<double, SampleOverlapStats
, std::greater
<double>>
2018 // FuncFilter carries specifications in options --value-cutoff and
2020 OverlapFuncFilters FuncFilter
;
2021 // Column offsets for printing the function-level details table.
2022 static const unsigned int TestWeightCol
= 15;
2023 static const unsigned int SimilarityCol
= 30;
2024 static const unsigned int OverlapCol
= 43;
2025 static const unsigned int BaseUniqueCol
= 53;
2026 static const unsigned int TestUniqueCol
= 67;
2027 static const unsigned int BaseSampleCol
= 81;
2028 static const unsigned int TestSampleCol
= 96;
2029 static const unsigned int FuncNameCol
= 111;
2031 /// Return a similarity of two line/block sample counters in the same
2032 /// function in base and test profiles. The line/block-similarity BS(i) is
2033 /// computed as follows:
2034 /// For an offsets i, given the sample count at i in base profile BB(i),
2035 /// the sample count at i in test profile BT(i), the sum of sample counts
2036 /// in this function in base profile SB, and the sum of sample counts in
2037 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2038 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2039 double computeBlockSimilarity(uint64_t BaseSample
, uint64_t TestSample
,
2040 const SampleOverlapStats
&FuncOverlap
) const;
2042 void updateHotBlockOverlap(uint64_t BaseSample
, uint64_t TestSample
,
2043 uint64_t HotBlockCount
);
2045 void getHotFunctions(const FuncSampleStatsMap
&ProfStats
,
2046 FuncSampleStatsMap
&HotFunc
,
2047 uint64_t HotThreshold
) const;
2049 void computeHotFuncOverlap();
2051 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2052 /// Difference for two sample units in a matched function according to the
2053 /// given match status.
2054 void updateOverlapStatsForFunction(uint64_t BaseSample
, uint64_t TestSample
,
2055 uint64_t HotBlockCount
,
2056 SampleOverlapStats
&FuncOverlap
,
2057 double &Difference
, MatchStatus Status
);
2059 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2060 /// Difference for unmatched callees that only present in one profile in a
2061 /// matched caller function.
2062 void updateForUnmatchedCallee(const sampleprof::FunctionSamples
&Func
,
2063 SampleOverlapStats
&FuncOverlap
,
2064 double &Difference
, MatchStatus Status
);
2066 /// This function updates sample overlap statistics of an overlap function in
2067 /// base and test profile. It also calculates a function-internal similarity
2069 /// For offsets i that have samples in at least one profile in this
2070 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
2071 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2072 /// 0.0 meaning no overlap.
2073 double computeSampleFunctionInternalOverlap(
2074 const sampleprof::FunctionSamples
&BaseFunc
,
2075 const sampleprof::FunctionSamples
&TestFunc
,
2076 SampleOverlapStats
&FuncOverlap
);
2078 /// Function-level similarity (FS) is a weighted value over function internal
2079 /// similarity (FIS). This function computes a function's FS from its FIS by
2080 /// applying the weight.
2081 double weightForFuncSimilarity(double FuncSimilarity
, uint64_t BaseFuncSample
,
2082 uint64_t TestFuncSample
) const;
2084 /// The function-level similarity FS(A) for a function A is computed as
2086 /// Compute a function-internal similarity FIS(A) by
2087 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
2088 /// function A in base profile WB(A), and the weight of function A in test
2089 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2090 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2092 computeSampleFunctionOverlap(const sampleprof::FunctionSamples
*BaseFunc
,
2093 const sampleprof::FunctionSamples
*TestFunc
,
2094 SampleOverlapStats
*FuncOverlap
,
2095 uint64_t BaseFuncSample
,
2096 uint64_t TestFuncSample
);
2098 /// Profile-level similarity (PS) is a weighted aggregate over function-level
2099 /// similarities (FS). This method weights the FS value by the function
2100 /// weights in the base and test profiles for the aggregation.
2101 double weightByImportance(double FuncSimilarity
, uint64_t BaseFuncSample
,
2102 uint64_t TestFuncSample
) const;
2104 } // end anonymous namespace
2106 bool SampleOverlapAggregator::detectZeroSampleProfile(
2107 raw_fd_ostream
&OS
) const {
2108 bool HaveZeroSample
= false;
2109 if (ProfOverlap
.BaseSample
== 0) {
2110 OS
<< "Sum of sample counts for profile " << BaseFilename
<< " is 0.\n";
2111 HaveZeroSample
= true;
2113 if (ProfOverlap
.TestSample
== 0) {
2114 OS
<< "Sum of sample counts for profile " << TestFilename
<< " is 0.\n";
2115 HaveZeroSample
= true;
2117 return HaveZeroSample
;
2120 double SampleOverlapAggregator::computeBlockSimilarity(
2121 uint64_t BaseSample
, uint64_t TestSample
,
2122 const SampleOverlapStats
&FuncOverlap
) const {
2123 double BaseFrac
= 0.0;
2124 double TestFrac
= 0.0;
2125 if (FuncOverlap
.BaseSample
> 0)
2126 BaseFrac
= static_cast<double>(BaseSample
) / FuncOverlap
.BaseSample
;
2127 if (FuncOverlap
.TestSample
> 0)
2128 TestFrac
= static_cast<double>(TestSample
) / FuncOverlap
.TestSample
;
2129 return 1.0 - std::fabs(BaseFrac
- TestFrac
);
2132 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample
,
2133 uint64_t TestSample
,
2134 uint64_t HotBlockCount
) {
2135 bool IsBaseHot
= (BaseSample
>= BaseHotThreshold
);
2136 bool IsTestHot
= (TestSample
>= TestHotThreshold
);
2137 if (!IsBaseHot
&& !IsTestHot
)
2140 HotBlockOverlap
.UnionCount
+= HotBlockCount
;
2142 HotBlockOverlap
.BaseCount
+= HotBlockCount
;
2144 HotBlockOverlap
.TestCount
+= HotBlockCount
;
2145 if (IsBaseHot
&& IsTestHot
)
2146 HotBlockOverlap
.OverlapCount
+= HotBlockCount
;
2149 void SampleOverlapAggregator::getHotFunctions(
2150 const FuncSampleStatsMap
&ProfStats
, FuncSampleStatsMap
&HotFunc
,
2151 uint64_t HotThreshold
) const {
2152 for (const auto &F
: ProfStats
) {
2153 if (isFunctionHot(F
.second
, HotThreshold
))
2154 HotFunc
.emplace(F
.first
, F
.second
);
2158 void SampleOverlapAggregator::computeHotFuncOverlap() {
2159 FuncSampleStatsMap BaseHotFunc
;
2160 getHotFunctions(BaseStats
, BaseHotFunc
, BaseHotThreshold
);
2161 HotFuncOverlap
.BaseCount
= BaseHotFunc
.size();
2163 FuncSampleStatsMap TestHotFunc
;
2164 getHotFunctions(TestStats
, TestHotFunc
, TestHotThreshold
);
2165 HotFuncOverlap
.TestCount
= TestHotFunc
.size();
2166 HotFuncOverlap
.UnionCount
= HotFuncOverlap
.TestCount
;
2168 for (const auto &F
: BaseHotFunc
) {
2169 if (TestHotFunc
.count(F
.first
))
2170 ++HotFuncOverlap
.OverlapCount
;
2172 ++HotFuncOverlap
.UnionCount
;
2176 void SampleOverlapAggregator::updateOverlapStatsForFunction(
2177 uint64_t BaseSample
, uint64_t TestSample
, uint64_t HotBlockCount
,
2178 SampleOverlapStats
&FuncOverlap
, double &Difference
, MatchStatus Status
) {
2179 assert(Status
!= MS_None
&&
2180 "Match status should be updated before updating overlap statistics");
2181 if (Status
== MS_FirstUnique
) {
2183 FuncOverlap
.BaseUniqueSample
+= BaseSample
;
2184 } else if (Status
== MS_SecondUnique
) {
2186 FuncOverlap
.TestUniqueSample
+= TestSample
;
2188 ++FuncOverlap
.OverlapCount
;
2191 FuncOverlap
.UnionSample
+= std::max(BaseSample
, TestSample
);
2192 FuncOverlap
.OverlapSample
+= std::min(BaseSample
, TestSample
);
2194 1.0 - computeBlockSimilarity(BaseSample
, TestSample
, FuncOverlap
);
2195 updateHotBlockOverlap(BaseSample
, TestSample
, HotBlockCount
);
2198 void SampleOverlapAggregator::updateForUnmatchedCallee(
2199 const sampleprof::FunctionSamples
&Func
, SampleOverlapStats
&FuncOverlap
,
2200 double &Difference
, MatchStatus Status
) {
2201 assert((Status
== MS_FirstUnique
|| Status
== MS_SecondUnique
) &&
2202 "Status must be either of the two unmatched cases");
2203 FuncSampleStats FuncStats
;
2204 if (Status
== MS_FirstUnique
) {
2205 getFuncSampleStats(Func
, FuncStats
, BaseHotThreshold
);
2206 updateOverlapStatsForFunction(FuncStats
.SampleSum
, 0,
2207 FuncStats
.HotBlockCount
, FuncOverlap
,
2208 Difference
, Status
);
2210 getFuncSampleStats(Func
, FuncStats
, TestHotThreshold
);
2211 updateOverlapStatsForFunction(0, FuncStats
.SampleSum
,
2212 FuncStats
.HotBlockCount
, FuncOverlap
,
2213 Difference
, Status
);
2217 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2218 const sampleprof::FunctionSamples
&BaseFunc
,
2219 const sampleprof::FunctionSamples
&TestFunc
,
2220 SampleOverlapStats
&FuncOverlap
) {
2222 using namespace sampleprof
;
2224 double Difference
= 0;
2226 // Accumulate Difference for regular line/block samples in the function.
2227 // We match them through sort-merge join algorithm because
2228 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2229 // by their offsets.
2230 MatchStep
<BodySampleMap::const_iterator
> BlockIterStep(
2231 BaseFunc
.getBodySamples().cbegin(), BaseFunc
.getBodySamples().cend(),
2232 TestFunc
.getBodySamples().cbegin(), TestFunc
.getBodySamples().cend());
2233 BlockIterStep
.updateOneStep();
2234 while (!BlockIterStep
.areBothFinished()) {
2235 uint64_t BaseSample
=
2236 BlockIterStep
.isFirstFinished()
2238 : BlockIterStep
.getFirstIter()->second
.getSamples();
2239 uint64_t TestSample
=
2240 BlockIterStep
.isSecondFinished()
2242 : BlockIterStep
.getSecondIter()->second
.getSamples();
2243 updateOverlapStatsForFunction(BaseSample
, TestSample
, 1, FuncOverlap
,
2244 Difference
, BlockIterStep
.getMatchStatus());
2246 BlockIterStep
.updateOneStep();
2249 // Accumulate Difference for callsite lines in the function. We match
2250 // them through sort-merge algorithm because
2251 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2252 // ordered by their offsets.
2253 MatchStep
<CallsiteSampleMap::const_iterator
> CallsiteIterStep(
2254 BaseFunc
.getCallsiteSamples().cbegin(),
2255 BaseFunc
.getCallsiteSamples().cend(),
2256 TestFunc
.getCallsiteSamples().cbegin(),
2257 TestFunc
.getCallsiteSamples().cend());
2258 CallsiteIterStep
.updateOneStep();
2259 while (!CallsiteIterStep
.areBothFinished()) {
2260 MatchStatus CallsiteStepStatus
= CallsiteIterStep
.getMatchStatus();
2261 assert(CallsiteStepStatus
!= MS_None
&&
2262 "Match status should be updated before entering loop body");
2264 if (CallsiteStepStatus
!= MS_Match
) {
2265 auto Callsite
= (CallsiteStepStatus
== MS_FirstUnique
)
2266 ? CallsiteIterStep
.getFirstIter()
2267 : CallsiteIterStep
.getSecondIter();
2268 for (const auto &F
: Callsite
->second
)
2269 updateForUnmatchedCallee(F
.second
, FuncOverlap
, Difference
,
2270 CallsiteStepStatus
);
2272 // There may be multiple inlinees at the same offset, so we need to try
2273 // matching all of them. This match is implemented through sort-merge
2274 // algorithm because callsite records at the same offset are ordered by
2276 MatchStep
<FunctionSamplesMap::const_iterator
> CalleeIterStep(
2277 CallsiteIterStep
.getFirstIter()->second
.cbegin(),
2278 CallsiteIterStep
.getFirstIter()->second
.cend(),
2279 CallsiteIterStep
.getSecondIter()->second
.cbegin(),
2280 CallsiteIterStep
.getSecondIter()->second
.cend());
2281 CalleeIterStep
.updateOneStep();
2282 while (!CalleeIterStep
.areBothFinished()) {
2283 MatchStatus CalleeStepStatus
= CalleeIterStep
.getMatchStatus();
2284 if (CalleeStepStatus
!= MS_Match
) {
2285 auto Callee
= (CalleeStepStatus
== MS_FirstUnique
)
2286 ? CalleeIterStep
.getFirstIter()
2287 : CalleeIterStep
.getSecondIter();
2288 updateForUnmatchedCallee(Callee
->second
, FuncOverlap
, Difference
,
2291 // An inlined function can contain other inlinees inside, so compute
2292 // the Difference recursively.
2293 Difference
+= 2.0 - 2 * computeSampleFunctionInternalOverlap(
2294 CalleeIterStep
.getFirstIter()->second
,
2295 CalleeIterStep
.getSecondIter()->second
,
2298 CalleeIterStep
.updateOneStep();
2301 CallsiteIterStep
.updateOneStep();
2304 // Difference reflects the total differences of line/block samples in this
2305 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2306 // reflect the similarity between function profiles in [0.0f to 1.0f].
2307 return (2.0 - Difference
) / 2;
2310 double SampleOverlapAggregator::weightForFuncSimilarity(
2311 double FuncInternalSimilarity
, uint64_t BaseFuncSample
,
2312 uint64_t TestFuncSample
) const {
2313 // Compute the weight as the distance between the function weights in two
2315 double BaseFrac
= 0.0;
2316 double TestFrac
= 0.0;
2317 assert(ProfOverlap
.BaseSample
> 0 &&
2318 "Total samples in base profile should be greater than 0");
2319 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
;
2320 assert(ProfOverlap
.TestSample
> 0 &&
2321 "Total samples in test profile should be greater than 0");
2322 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
;
2323 double WeightDistance
= std::fabs(BaseFrac
- TestFrac
);
2325 // Take WeightDistance into the similarity.
2326 return FuncInternalSimilarity
* (1 - WeightDistance
);
2330 SampleOverlapAggregator::weightByImportance(double FuncSimilarity
,
2331 uint64_t BaseFuncSample
,
2332 uint64_t TestFuncSample
) const {
2334 double BaseFrac
= 0.0;
2335 double TestFrac
= 0.0;
2336 assert(ProfOverlap
.BaseSample
> 0 &&
2337 "Total samples in base profile should be greater than 0");
2338 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
/ 2.0;
2339 assert(ProfOverlap
.TestSample
> 0 &&
2340 "Total samples in test profile should be greater than 0");
2341 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
/ 2.0;
2342 return FuncSimilarity
* (BaseFrac
+ TestFrac
);
2345 double SampleOverlapAggregator::computeSampleFunctionOverlap(
2346 const sampleprof::FunctionSamples
*BaseFunc
,
2347 const sampleprof::FunctionSamples
*TestFunc
,
2348 SampleOverlapStats
*FuncOverlap
, uint64_t BaseFuncSample
,
2349 uint64_t TestFuncSample
) {
2350 // Default function internal similarity before weighted, meaning two functions
2352 const double DefaultFuncInternalSimilarity
= 0;
2353 double FuncSimilarity
;
2354 double FuncInternalSimilarity
;
2356 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2357 // In this case, we use DefaultFuncInternalSimilarity as the function internal
2359 if (!BaseFunc
|| !TestFunc
) {
2360 FuncInternalSimilarity
= DefaultFuncInternalSimilarity
;
2362 assert(FuncOverlap
!= nullptr &&
2363 "FuncOverlap should be provided in this case");
2364 FuncInternalSimilarity
= computeSampleFunctionInternalOverlap(
2365 *BaseFunc
, *TestFunc
, *FuncOverlap
);
2366 // Now, FuncInternalSimilarity may be a little less than 0 due to
2367 // imprecision of floating point accumulations. Make it zero if the
2368 // difference is below Epsilon.
2369 FuncInternalSimilarity
= (std::fabs(FuncInternalSimilarity
- 0) < Epsilon
)
2371 : FuncInternalSimilarity
;
2373 FuncSimilarity
= weightForFuncSimilarity(FuncInternalSimilarity
,
2374 BaseFuncSample
, TestFuncSample
);
2375 return FuncSimilarity
;
2378 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream
&OS
) {
2379 using namespace sampleprof
;
2381 std::unordered_map
<SampleContext
, const FunctionSamples
*,
2382 SampleContext::Hash
>
2384 const auto &BaseProfiles
= BaseReader
->getProfiles();
2385 for (const auto &BaseFunc
: BaseProfiles
) {
2386 BaseFuncProf
.emplace(BaseFunc
.second
.getContext(), &(BaseFunc
.second
));
2388 ProfOverlap
.UnionCount
= BaseFuncProf
.size();
2390 const auto &TestProfiles
= TestReader
->getProfiles();
2391 for (const auto &TestFunc
: TestProfiles
) {
2392 SampleOverlapStats FuncOverlap
;
2393 FuncOverlap
.TestName
= TestFunc
.second
.getContext();
2394 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2395 "TestStats should have records for all functions in test profile "
2397 FuncOverlap
.TestSample
= TestStats
[FuncOverlap
.TestName
].SampleSum
;
2399 bool Matched
= false;
2400 const auto Match
= BaseFuncProf
.find(FuncOverlap
.TestName
);
2401 if (Match
== BaseFuncProf
.end()) {
2402 const FuncSampleStats
&FuncStats
= TestStats
[FuncOverlap
.TestName
];
2403 ++ProfOverlap
.TestUniqueCount
;
2404 ProfOverlap
.TestUniqueSample
+= FuncStats
.SampleSum
;
2405 FuncOverlap
.TestUniqueSample
= FuncStats
.SampleSum
;
2407 updateHotBlockOverlap(0, FuncStats
.SampleSum
, FuncStats
.HotBlockCount
);
2409 double FuncSimilarity
= computeSampleFunctionOverlap(
2410 nullptr, nullptr, nullptr, 0, FuncStats
.SampleSum
);
2411 ProfOverlap
.Similarity
+=
2412 weightByImportance(FuncSimilarity
, 0, FuncStats
.SampleSum
);
2414 ++ProfOverlap
.UnionCount
;
2415 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2417 ++ProfOverlap
.OverlapCount
;
2419 // Two functions match with each other. Compute function-level overlap and
2420 // aggregate them into profile-level overlap.
2421 FuncOverlap
.BaseName
= Match
->second
->getContext();
2422 assert(BaseStats
.count(FuncOverlap
.BaseName
) &&
2423 "BaseStats should have records for all functions in base profile "
2425 FuncOverlap
.BaseSample
= BaseStats
[FuncOverlap
.BaseName
].SampleSum
;
2427 FuncOverlap
.Similarity
= computeSampleFunctionOverlap(
2428 Match
->second
, &TestFunc
.second
, &FuncOverlap
, FuncOverlap
.BaseSample
,
2429 FuncOverlap
.TestSample
);
2430 ProfOverlap
.Similarity
+=
2431 weightByImportance(FuncOverlap
.Similarity
, FuncOverlap
.BaseSample
,
2432 FuncOverlap
.TestSample
);
2433 ProfOverlap
.OverlapSample
+= FuncOverlap
.OverlapSample
;
2434 ProfOverlap
.UnionSample
+= FuncOverlap
.UnionSample
;
2436 // Accumulate the percentage of base unique and test unique samples into
2438 ProfOverlap
.BaseUniqueSample
+= FuncOverlap
.BaseUniqueSample
;
2439 ProfOverlap
.TestUniqueSample
+= FuncOverlap
.TestUniqueSample
;
2441 // Remove matched base functions for later reporting functions not found
2443 BaseFuncProf
.erase(Match
);
2447 // Print function-level similarity information if specified by options.
2448 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2449 "TestStats should have records for all functions in test profile "
2451 if (TestStats
[FuncOverlap
.TestName
].MaxSample
>= FuncFilter
.ValueCutoff
||
2452 (Matched
&& FuncOverlap
.Similarity
< LowSimilarityThreshold
) ||
2453 (Matched
&& !FuncFilter
.NameFilter
.empty() &&
2454 FuncOverlap
.BaseName
.toString().find(FuncFilter
.NameFilter
) !=
2455 std::string::npos
)) {
2456 assert(ProfOverlap
.BaseSample
> 0 &&
2457 "Total samples in base profile should be greater than 0");
2458 FuncOverlap
.BaseWeight
=
2459 static_cast<double>(FuncOverlap
.BaseSample
) / ProfOverlap
.BaseSample
;
2460 assert(ProfOverlap
.TestSample
> 0 &&
2461 "Total samples in test profile should be greater than 0");
2462 FuncOverlap
.TestWeight
=
2463 static_cast<double>(FuncOverlap
.TestSample
) / ProfOverlap
.TestSample
;
2464 FuncSimilarityDump
.emplace(FuncOverlap
.BaseWeight
, FuncOverlap
);
2468 // Traverse through functions in base profile but not in test profile.
2469 for (const auto &F
: BaseFuncProf
) {
2470 assert(BaseStats
.count(F
.second
->getContext()) &&
2471 "BaseStats should have records for all functions in base profile "
2473 const FuncSampleStats
&FuncStats
= BaseStats
[F
.second
->getContext()];
2474 ++ProfOverlap
.BaseUniqueCount
;
2475 ProfOverlap
.BaseUniqueSample
+= FuncStats
.SampleSum
;
2477 updateHotBlockOverlap(FuncStats
.SampleSum
, 0, FuncStats
.HotBlockCount
);
2479 double FuncSimilarity
= computeSampleFunctionOverlap(
2480 nullptr, nullptr, nullptr, FuncStats
.SampleSum
, 0);
2481 ProfOverlap
.Similarity
+=
2482 weightByImportance(FuncSimilarity
, FuncStats
.SampleSum
, 0);
2484 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2487 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2488 // of floating point accumulations. Make it 1.0 if the difference is below
2490 ProfOverlap
.Similarity
= (std::fabs(ProfOverlap
.Similarity
- 1) < Epsilon
)
2492 : ProfOverlap
.Similarity
;
2494 computeHotFuncOverlap();
2497 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2498 const auto &BaseProf
= BaseReader
->getProfiles();
2499 for (const auto &I
: BaseProf
) {
2500 ++ProfOverlap
.BaseCount
;
2501 FuncSampleStats FuncStats
;
2502 getFuncSampleStats(I
.second
, FuncStats
, BaseHotThreshold
);
2503 ProfOverlap
.BaseSample
+= FuncStats
.SampleSum
;
2504 BaseStats
.emplace(I
.second
.getContext(), FuncStats
);
2507 const auto &TestProf
= TestReader
->getProfiles();
2508 for (const auto &I
: TestProf
) {
2509 ++ProfOverlap
.TestCount
;
2510 FuncSampleStats FuncStats
;
2511 getFuncSampleStats(I
.second
, FuncStats
, TestHotThreshold
);
2512 ProfOverlap
.TestSample
+= FuncStats
.SampleSum
;
2513 TestStats
.emplace(I
.second
.getContext(), FuncStats
);
2516 ProfOverlap
.BaseName
= StringRef(BaseFilename
);
2517 ProfOverlap
.TestName
= StringRef(TestFilename
);
2520 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream
&OS
) const {
2521 using namespace sampleprof
;
2523 if (FuncSimilarityDump
.empty())
2526 formatted_raw_ostream
FOS(OS
);
2527 FOS
<< "Function-level details:\n";
2528 FOS
<< "Base weight";
2529 FOS
.PadToColumn(TestWeightCol
);
2530 FOS
<< "Test weight";
2531 FOS
.PadToColumn(SimilarityCol
);
2532 FOS
<< "Similarity";
2533 FOS
.PadToColumn(OverlapCol
);
2535 FOS
.PadToColumn(BaseUniqueCol
);
2536 FOS
<< "Base unique";
2537 FOS
.PadToColumn(TestUniqueCol
);
2538 FOS
<< "Test unique";
2539 FOS
.PadToColumn(BaseSampleCol
);
2540 FOS
<< "Base samples";
2541 FOS
.PadToColumn(TestSampleCol
);
2542 FOS
<< "Test samples";
2543 FOS
.PadToColumn(FuncNameCol
);
2544 FOS
<< "Function name\n";
2545 for (const auto &F
: FuncSimilarityDump
) {
2546 double OverlapPercent
=
2547 F
.second
.UnionSample
> 0
2548 ? static_cast<double>(F
.second
.OverlapSample
) / F
.second
.UnionSample
2550 double BaseUniquePercent
=
2551 F
.second
.BaseSample
> 0
2552 ? static_cast<double>(F
.second
.BaseUniqueSample
) /
2555 double TestUniquePercent
=
2556 F
.second
.TestSample
> 0
2557 ? static_cast<double>(F
.second
.TestUniqueSample
) /
2561 FOS
<< format("%.2f%%", F
.second
.BaseWeight
* 100);
2562 FOS
.PadToColumn(TestWeightCol
);
2563 FOS
<< format("%.2f%%", F
.second
.TestWeight
* 100);
2564 FOS
.PadToColumn(SimilarityCol
);
2565 FOS
<< format("%.2f%%", F
.second
.Similarity
* 100);
2566 FOS
.PadToColumn(OverlapCol
);
2567 FOS
<< format("%.2f%%", OverlapPercent
* 100);
2568 FOS
.PadToColumn(BaseUniqueCol
);
2569 FOS
<< format("%.2f%%", BaseUniquePercent
* 100);
2570 FOS
.PadToColumn(TestUniqueCol
);
2571 FOS
<< format("%.2f%%", TestUniquePercent
* 100);
2572 FOS
.PadToColumn(BaseSampleCol
);
2573 FOS
<< F
.second
.BaseSample
;
2574 FOS
.PadToColumn(TestSampleCol
);
2575 FOS
<< F
.second
.TestSample
;
2576 FOS
.PadToColumn(FuncNameCol
);
2577 FOS
<< F
.second
.TestName
.toString() << "\n";
2581 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream
&OS
) const {
2582 OS
<< "Profile overlap infomation for base_profile: "
2583 << ProfOverlap
.BaseName
.toString()
2584 << " and test_profile: " << ProfOverlap
.TestName
.toString()
2585 << "\nProgram level:\n";
2587 OS
<< " Whole program profile similarity: "
2588 << format("%.3f%%", ProfOverlap
.Similarity
* 100) << "\n";
2590 assert(ProfOverlap
.UnionSample
> 0 &&
2591 "Total samples in two profile should be greater than 0");
2592 double OverlapPercent
=
2593 static_cast<double>(ProfOverlap
.OverlapSample
) / ProfOverlap
.UnionSample
;
2594 assert(ProfOverlap
.BaseSample
> 0 &&
2595 "Total samples in base profile should be greater than 0");
2596 double BaseUniquePercent
= static_cast<double>(ProfOverlap
.BaseUniqueSample
) /
2597 ProfOverlap
.BaseSample
;
2598 assert(ProfOverlap
.TestSample
> 0 &&
2599 "Total samples in test profile should be greater than 0");
2600 double TestUniquePercent
= static_cast<double>(ProfOverlap
.TestUniqueSample
) /
2601 ProfOverlap
.TestSample
;
2603 OS
<< " Whole program sample overlap: "
2604 << format("%.3f%%", OverlapPercent
* 100) << "\n";
2605 OS
<< " percentage of samples unique in base profile: "
2606 << format("%.3f%%", BaseUniquePercent
* 100) << "\n";
2607 OS
<< " percentage of samples unique in test profile: "
2608 << format("%.3f%%", TestUniquePercent
* 100) << "\n";
2609 OS
<< " total samples in base profile: " << ProfOverlap
.BaseSample
<< "\n"
2610 << " total samples in test profile: " << ProfOverlap
.TestSample
<< "\n";
2612 assert(ProfOverlap
.UnionCount
> 0 &&
2613 "There should be at least one function in two input profiles");
2614 double FuncOverlapPercent
=
2615 static_cast<double>(ProfOverlap
.OverlapCount
) / ProfOverlap
.UnionCount
;
2616 OS
<< " Function overlap: " << format("%.3f%%", FuncOverlapPercent
* 100)
2618 OS
<< " overlap functions: " << ProfOverlap
.OverlapCount
<< "\n";
2619 OS
<< " functions unique in base profile: " << ProfOverlap
.BaseUniqueCount
2621 OS
<< " functions unique in test profile: " << ProfOverlap
.TestUniqueCount
2625 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2626 raw_fd_ostream
&OS
) const {
2627 assert(HotFuncOverlap
.UnionCount
> 0 &&
2628 "There should be at least one hot function in two input profiles");
2629 OS
<< " Hot-function overlap: "
2630 << format("%.3f%%", static_cast<double>(HotFuncOverlap
.OverlapCount
) /
2631 HotFuncOverlap
.UnionCount
* 100)
2633 OS
<< " overlap hot functions: " << HotFuncOverlap
.OverlapCount
<< "\n";
2634 OS
<< " hot functions unique in base profile: "
2635 << HotFuncOverlap
.BaseCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2636 OS
<< " hot functions unique in test profile: "
2637 << HotFuncOverlap
.TestCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2639 assert(HotBlockOverlap
.UnionCount
> 0 &&
2640 "There should be at least one hot block in two input profiles");
2641 OS
<< " Hot-block overlap: "
2642 << format("%.3f%%", static_cast<double>(HotBlockOverlap
.OverlapCount
) /
2643 HotBlockOverlap
.UnionCount
* 100)
2645 OS
<< " overlap hot blocks: " << HotBlockOverlap
.OverlapCount
<< "\n";
2646 OS
<< " hot blocks unique in base profile: "
2647 << HotBlockOverlap
.BaseCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2648 OS
<< " hot blocks unique in test profile: "
2649 << HotBlockOverlap
.TestCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2652 std::error_code
SampleOverlapAggregator::loadProfiles() {
2653 using namespace sampleprof
;
2655 LLVMContext Context
;
2656 auto FS
= vfs::getRealFileSystem();
2657 auto BaseReaderOrErr
= SampleProfileReader::create(BaseFilename
, Context
, *FS
,
2658 FSDiscriminatorPassOption
);
2659 if (std::error_code EC
= BaseReaderOrErr
.getError())
2660 exitWithErrorCode(EC
, BaseFilename
);
2662 auto TestReaderOrErr
= SampleProfileReader::create(TestFilename
, Context
, *FS
,
2663 FSDiscriminatorPassOption
);
2664 if (std::error_code EC
= TestReaderOrErr
.getError())
2665 exitWithErrorCode(EC
, TestFilename
);
2667 BaseReader
= std::move(BaseReaderOrErr
.get());
2668 TestReader
= std::move(TestReaderOrErr
.get());
2670 if (std::error_code EC
= BaseReader
->read())
2671 exitWithErrorCode(EC
, BaseFilename
);
2672 if (std::error_code EC
= TestReader
->read())
2673 exitWithErrorCode(EC
, TestFilename
);
2674 if (BaseReader
->profileIsProbeBased() != TestReader
->profileIsProbeBased())
2676 "cannot compare probe-based profile with non-probe-based profile");
2677 if (BaseReader
->profileIsCS() != TestReader
->profileIsCS())
2678 exitWithError("cannot compare CS profile with non-CS profile");
2680 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2682 ProfileSummary
&BasePS
= BaseReader
->getSummary();
2683 ProfileSummary
&TestPS
= TestReader
->getSummary();
2685 ProfileSummaryBuilder::getHotCountThreshold(BasePS
.getDetailedSummary());
2687 ProfileSummaryBuilder::getHotCountThreshold(TestPS
.getDetailedSummary());
2689 return std::error_code();
2692 void overlapSampleProfile(const std::string
&BaseFilename
,
2693 const std::string
&TestFilename
,
2694 const OverlapFuncFilters
&FuncFilter
,
2695 uint64_t SimilarityCutoff
, raw_fd_ostream
&OS
) {
2696 using namespace sampleprof
;
2698 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2699 // report 2--3 places after decimal point in percentage numbers.
2700 SampleOverlapAggregator
OverlapAggr(
2701 BaseFilename
, TestFilename
,
2702 static_cast<double>(SimilarityCutoff
) / 1000000, 0.000005, FuncFilter
);
2703 if (std::error_code EC
= OverlapAggr
.loadProfiles())
2704 exitWithErrorCode(EC
);
2706 OverlapAggr
.initializeSampleProfileOverlap();
2707 if (OverlapAggr
.detectZeroSampleProfile(OS
))
2710 OverlapAggr
.computeSampleProfileOverlap(OS
);
2712 OverlapAggr
.dumpProgramSummary(OS
);
2713 OverlapAggr
.dumpHotFuncAndBlockOverlap(OS
);
2714 OverlapAggr
.dumpFuncSimilarity(OS
);
2717 static int overlap_main() {
2719 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
2721 exitWithErrorCode(EC
, OutputFilename
);
2723 if (ProfileKind
== instr
)
2724 overlapInstrProfile(BaseFilename
, TestFilename
,
2725 OverlapFuncFilters
{OverlapValueCutoff
, FuncNameFilter
},
2728 overlapSampleProfile(BaseFilename
, TestFilename
,
2729 OverlapFuncFilters
{OverlapValueCutoff
, FuncNameFilter
},
2730 SimilarityCutoff
, OS
);
2736 struct ValueSitesStats
{
2737 ValueSitesStats() = default;
2738 uint64_t TotalNumValueSites
= 0;
2739 uint64_t TotalNumValueSitesWithValueProfile
= 0;
2740 uint64_t TotalNumValues
= 0;
2741 std::vector
<unsigned> ValueSitesHistogram
;
2745 static void traverseAllValueSites(const InstrProfRecord
&Func
, uint32_t VK
,
2746 ValueSitesStats
&Stats
, raw_fd_ostream
&OS
,
2747 InstrProfSymtab
*Symtab
) {
2748 uint32_t NS
= Func
.getNumValueSites(VK
);
2749 Stats
.TotalNumValueSites
+= NS
;
2750 for (size_t I
= 0; I
< NS
; ++I
) {
2751 auto VD
= Func
.getValueArrayForSite(VK
, I
);
2752 uint32_t NV
= VD
.size();
2755 Stats
.TotalNumValues
+= NV
;
2756 Stats
.TotalNumValueSitesWithValueProfile
++;
2757 if (NV
> Stats
.ValueSitesHistogram
.size())
2758 Stats
.ValueSitesHistogram
.resize(NV
, 0);
2759 Stats
.ValueSitesHistogram
[NV
- 1]++;
2761 uint64_t SiteSum
= 0;
2762 for (const auto &V
: VD
)
2767 for (const auto &V
: VD
) {
2768 OS
<< "\t[ " << format("%2u", I
) << ", ";
2769 if (Symtab
== nullptr)
2770 OS
<< format("%4" PRIu64
, V
.Value
);
2772 OS
<< Symtab
->getFuncOrVarName(V
.Value
);
2773 OS
<< ", " << format("%10" PRId64
, V
.Count
) << " ] ("
2774 << format("%.2f%%", (V
.Count
* 100.0 / SiteSum
)) << ")\n";
2779 static void showValueSitesStats(raw_fd_ostream
&OS
, uint32_t VK
,
2780 ValueSitesStats
&Stats
) {
2781 OS
<< " Total number of sites: " << Stats
.TotalNumValueSites
<< "\n";
2782 OS
<< " Total number of sites with values: "
2783 << Stats
.TotalNumValueSitesWithValueProfile
<< "\n";
2784 OS
<< " Total number of profiled values: " << Stats
.TotalNumValues
<< "\n";
2786 OS
<< " Value sites histogram:\n\tNumTargets, SiteCount\n";
2787 for (unsigned I
= 0; I
< Stats
.ValueSitesHistogram
.size(); I
++) {
2788 if (Stats
.ValueSitesHistogram
[I
] > 0)
2789 OS
<< "\t" << I
+ 1 << ", " << Stats
.ValueSitesHistogram
[I
] << "\n";
2793 static int showInstrProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2794 if (SFormat
== ShowFormat::Json
)
2795 exitWithError("JSON output is not supported for instr profiles");
2796 if (SFormat
== ShowFormat::Yaml
)
2797 exitWithError("YAML output is not supported for instr profiles");
2798 auto FS
= vfs::getRealFileSystem();
2799 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
2800 std::vector
<uint32_t> Cutoffs
= std::move(DetailedSummaryCutoffs
);
2801 if (ShowDetailedSummary
&& Cutoffs
.empty()) {
2802 Cutoffs
= ProfileSummaryBuilder::DefaultCutoffs
;
2804 InstrProfSummaryBuilder
Builder(std::move(Cutoffs
));
2805 if (Error E
= ReaderOrErr
.takeError())
2806 exitWithError(std::move(E
), Filename
);
2808 auto Reader
= std::move(ReaderOrErr
.get());
2809 bool IsIRInstr
= Reader
->isIRLevelProfile();
2810 size_t ShownFunctions
= 0;
2811 size_t BelowCutoffFunctions
= 0;
2812 int NumVPKind
= IPVK_Last
- IPVK_First
+ 1;
2813 std::vector
<ValueSitesStats
> VPStats(NumVPKind
);
2815 auto MinCmp
= [](const std::pair
<std::string
, uint64_t> &v1
,
2816 const std::pair
<std::string
, uint64_t> &v2
) {
2817 return v1
.second
> v2
.second
;
2820 std::priority_queue
<std::pair
<std::string
, uint64_t>,
2821 std::vector
<std::pair
<std::string
, uint64_t>>,
2823 HottestFuncs(MinCmp
);
2825 if (!TextFormat
&& OnlyListBelow
) {
2826 OS
<< "The list of functions with the maximum counter less than "
2827 << ShowValueCutoff
<< ":\n";
2830 // Add marker so that IR-level instrumentation round-trips properly.
2831 if (TextFormat
&& IsIRInstr
)
2834 for (const auto &Func
: *Reader
) {
2835 if (Reader
->isIRLevelProfile()) {
2836 bool FuncIsCS
= NamedInstrProfRecord::hasCSFlagInHash(Func
.Hash
);
2837 if (FuncIsCS
!= ShowCS
)
2840 bool Show
= ShowAllFunctions
||
2841 (!FuncNameFilter
.empty() && Func
.Name
.contains(FuncNameFilter
));
2843 bool doTextFormatDump
= (Show
&& TextFormat
);
2845 if (doTextFormatDump
) {
2846 InstrProfSymtab
&Symtab
= Reader
->getSymtab();
2847 InstrProfWriter::writeRecordInText(Func
.Name
, Func
.Hash
, Func
, Symtab
,
2852 assert(Func
.Counts
.size() > 0 && "function missing entry counter");
2853 Builder
.addRecord(Func
);
2856 if (llvm::any_of(Func
.Counts
, [](uint64_t C
) { return C
; }))
2857 OS
<< Func
.Name
<< "\n";
2861 uint64_t FuncMax
= 0;
2862 uint64_t FuncSum
= 0;
2864 auto PseudoKind
= Func
.getCountPseudoKind();
2865 if (PseudoKind
!= InstrProfRecord::NotPseudo
) {
2867 if (!ShownFunctions
)
2868 OS
<< "Counters:\n";
2870 OS
<< " " << Func
.Name
<< ":\n"
2871 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2872 << " Counters: " << Func
.Counts
.size();
2873 if (PseudoKind
== InstrProfRecord::PseudoHot
)
2874 OS
<< " <PseudoHot>\n";
2875 else if (PseudoKind
== InstrProfRecord::PseudoWarm
)
2876 OS
<< " <PseudoWarm>\n";
2878 llvm_unreachable("Unknown PseudoKind");
2883 for (size_t I
= 0, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2884 FuncMax
= std::max(FuncMax
, Func
.Counts
[I
]);
2885 FuncSum
+= Func
.Counts
[I
];
2888 if (FuncMax
< ShowValueCutoff
) {
2889 ++BelowCutoffFunctions
;
2890 if (OnlyListBelow
) {
2891 OS
<< " " << Func
.Name
<< ": (Max = " << FuncMax
2892 << " Sum = " << FuncSum
<< ")\n";
2895 } else if (OnlyListBelow
)
2898 if (TopNFunctions
) {
2899 if (HottestFuncs
.size() == TopNFunctions
) {
2900 if (HottestFuncs
.top().second
< FuncMax
) {
2902 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2905 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2909 if (!ShownFunctions
)
2910 OS
<< "Counters:\n";
2914 OS
<< " " << Func
.Name
<< ":\n"
2915 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2916 << " Counters: " << Func
.Counts
.size() << "\n";
2918 OS
<< " Function count: " << Func
.Counts
[0] << "\n";
2920 if (ShowIndirectCallTargets
)
2921 OS
<< " Indirect Call Site Count: "
2922 << Func
.getNumValueSites(IPVK_IndirectCallTarget
) << "\n";
2925 OS
<< " Number of instrumented vtables: "
2926 << Func
.getNumValueSites(IPVK_VTableTarget
) << "\n";
2928 uint32_t NumMemOPCalls
= Func
.getNumValueSites(IPVK_MemOPSize
);
2929 if (ShowMemOPSizes
&& NumMemOPCalls
> 0)
2930 OS
<< " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2934 OS
<< " Block counts: [";
2935 size_t Start
= (IsIRInstr
? 0 : 1);
2936 for (size_t I
= Start
, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2937 OS
<< (I
== Start
? "" : ", ") << Func
.Counts
[I
];
2942 if (ShowIndirectCallTargets
) {
2943 OS
<< " Indirect Target Results:\n";
2944 traverseAllValueSites(Func
, IPVK_IndirectCallTarget
,
2945 VPStats
[IPVK_IndirectCallTarget
], OS
,
2946 &(Reader
->getSymtab()));
2950 OS
<< " VTable Results:\n";
2951 traverseAllValueSites(Func
, IPVK_VTableTarget
,
2952 VPStats
[IPVK_VTableTarget
], OS
,
2953 &(Reader
->getSymtab()));
2956 if (ShowMemOPSizes
&& NumMemOPCalls
> 0) {
2957 OS
<< " Memory Intrinsic Size Results:\n";
2958 traverseAllValueSites(Func
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
], OS
,
2963 if (Reader
->hasError())
2964 exitWithError(Reader
->getError(), Filename
);
2966 if (TextFormat
|| ShowCovered
)
2968 std::unique_ptr
<ProfileSummary
> PS(Builder
.getSummary());
2969 bool IsIR
= Reader
->isIRLevelProfile();
2970 OS
<< "Instrumentation level: " << (IsIR
? "IR" : "Front-end");
2972 OS
<< " entry_first = " << Reader
->instrEntryBBEnabled();
2974 if (ShowAllFunctions
|| !FuncNameFilter
.empty())
2975 OS
<< "Functions shown: " << ShownFunctions
<< "\n";
2976 OS
<< "Total functions: " << PS
->getNumFunctions() << "\n";
2977 if (ShowValueCutoff
> 0) {
2978 OS
<< "Number of functions with maximum count (< " << ShowValueCutoff
2979 << "): " << BelowCutoffFunctions
<< "\n";
2980 OS
<< "Number of functions with maximum count (>= " << ShowValueCutoff
2981 << "): " << PS
->getNumFunctions() - BelowCutoffFunctions
<< "\n";
2983 OS
<< "Maximum function count: " << PS
->getMaxFunctionCount() << "\n";
2984 OS
<< "Maximum internal block count: " << PS
->getMaxInternalCount() << "\n";
2986 if (TopNFunctions
) {
2987 std::vector
<std::pair
<std::string
, uint64_t>> SortedHottestFuncs
;
2988 while (!HottestFuncs
.empty()) {
2989 SortedHottestFuncs
.emplace_back(HottestFuncs
.top());
2992 OS
<< "Top " << TopNFunctions
2993 << " functions with the largest internal block counts: \n";
2994 for (auto &hotfunc
: llvm::reverse(SortedHottestFuncs
))
2995 OS
<< " " << hotfunc
.first
<< ", max count = " << hotfunc
.second
<< "\n";
2998 if (ShownFunctions
&& ShowIndirectCallTargets
) {
2999 OS
<< "Statistics for indirect call sites profile:\n";
3000 showValueSitesStats(OS
, IPVK_IndirectCallTarget
,
3001 VPStats
[IPVK_IndirectCallTarget
]);
3004 if (ShownFunctions
&& ShowVTables
) {
3005 OS
<< "Statistics for vtable profile:\n";
3006 showValueSitesStats(OS
, IPVK_VTableTarget
, VPStats
[IPVK_VTableTarget
]);
3009 if (ShownFunctions
&& ShowMemOPSizes
) {
3010 OS
<< "Statistics for memory intrinsic calls sizes profile:\n";
3011 showValueSitesStats(OS
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
]);
3014 if (ShowDetailedSummary
) {
3015 OS
<< "Total number of blocks: " << PS
->getNumCounts() << "\n";
3016 OS
<< "Total count: " << PS
->getTotalCount() << "\n";
3017 PS
->printDetailedSummary(OS
);
3021 if (Error E
= Reader
->printBinaryIds(OS
))
3022 exitWithError(std::move(E
), Filename
);
3024 if (ShowProfileVersion
)
3025 OS
<< "Profile version: " << Reader
->getVersion() << "\n";
3027 if (ShowTemporalProfTraces
) {
3028 auto &Traces
= Reader
->getTemporalProfTraces();
3029 OS
<< "Temporal Profile Traces (samples=" << Traces
.size()
3030 << " seen=" << Reader
->getTemporalProfTraceStreamSize() << "):\n";
3031 for (unsigned i
= 0; i
< Traces
.size(); i
++) {
3032 OS
<< " Temporal Profile Trace " << i
<< " (weight=" << Traces
[i
].Weight
3033 << " count=" << Traces
[i
].FunctionNameRefs
.size() << "):\n";
3034 for (auto &NameRef
: Traces
[i
].FunctionNameRefs
)
3035 OS
<< " " << Reader
->getSymtab().getFuncOrVarName(NameRef
) << "\n";
3042 static void showSectionInfo(sampleprof::SampleProfileReader
*Reader
,
3043 raw_fd_ostream
&OS
) {
3044 if (!Reader
->dumpSectionInfo(OS
)) {
3045 WithColor::warning() << "-show-sec-info-only is only supported for "
3046 << "sample profile in extbinary format and is "
3047 << "ignored for other formats.\n";
3053 struct HotFuncInfo
{
3054 std::string FuncName
;
3055 uint64_t TotalCount
= 0;
3056 double TotalCountPercent
= 0.0f
;
3057 uint64_t MaxCount
= 0;
3058 uint64_t EntryCount
= 0;
3060 HotFuncInfo() = default;
3062 HotFuncInfo(StringRef FN
, uint64_t TS
, double TSP
, uint64_t MS
, uint64_t ES
)
3063 : FuncName(FN
.begin(), FN
.end()), TotalCount(TS
), TotalCountPercent(TSP
),
3064 MaxCount(MS
), EntryCount(ES
) {}
3068 // Print out detailed information about hot functions in PrintValues vector.
3069 // Users specify titles and offset of every columns through ColumnTitle and
3070 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3071 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
3072 // print out or let it be an empty string.
3073 static void dumpHotFunctionList(const std::vector
<std::string
> &ColumnTitle
,
3074 const std::vector
<int> &ColumnOffset
,
3075 const std::vector
<HotFuncInfo
> &PrintValues
,
3076 uint64_t HotFuncCount
, uint64_t TotalFuncCount
,
3077 uint64_t HotProfCount
, uint64_t TotalProfCount
,
3078 const std::string
&HotFuncMetric
,
3079 uint32_t TopNFunctions
, raw_fd_ostream
&OS
) {
3080 assert(ColumnOffset
.size() == ColumnTitle
.size() &&
3081 "ColumnOffset and ColumnTitle should have the same size");
3082 assert(ColumnTitle
.size() >= 4 &&
3083 "ColumnTitle should have at least 4 elements");
3084 assert(TotalFuncCount
> 0 &&
3085 "There should be at least one function in the profile");
3086 double TotalProfPercent
= 0;
3087 if (TotalProfCount
> 0)
3088 TotalProfPercent
= static_cast<double>(HotProfCount
) / TotalProfCount
* 100;
3090 formatted_raw_ostream
FOS(OS
);
3091 FOS
<< HotFuncCount
<< " out of " << TotalFuncCount
3092 << " functions with profile ("
3094 (static_cast<double>(HotFuncCount
) / TotalFuncCount
* 100))
3095 << ") are considered hot functions";
3096 if (!HotFuncMetric
.empty())
3097 FOS
<< " (" << HotFuncMetric
<< ")";
3099 FOS
<< HotProfCount
<< " out of " << TotalProfCount
<< " profile counts ("
3100 << format("%.2f%%", TotalProfPercent
) << ") are from hot functions.\n";
3102 for (size_t I
= 0; I
< ColumnTitle
.size(); ++I
) {
3103 FOS
.PadToColumn(ColumnOffset
[I
]);
3104 FOS
<< ColumnTitle
[I
];
3109 for (const auto &R
: PrintValues
) {
3110 if (TopNFunctions
&& (Count
++ == TopNFunctions
))
3112 FOS
.PadToColumn(ColumnOffset
[0]);
3113 FOS
<< R
.TotalCount
<< " (" << format("%.2f%%", R
.TotalCountPercent
) << ")";
3114 FOS
.PadToColumn(ColumnOffset
[1]);
3116 FOS
.PadToColumn(ColumnOffset
[2]);
3117 FOS
<< R
.EntryCount
;
3118 FOS
.PadToColumn(ColumnOffset
[3]);
3119 FOS
<< R
.FuncName
<< "\n";
3123 static int showHotFunctionList(const sampleprof::SampleProfileMap
&Profiles
,
3124 ProfileSummary
&PS
, uint32_t TopN
,
3125 raw_fd_ostream
&OS
) {
3126 using namespace sampleprof
;
3128 const uint32_t HotFuncCutoff
= 990000;
3129 auto &SummaryVector
= PS
.getDetailedSummary();
3130 uint64_t MinCountThreshold
= 0;
3131 for (const ProfileSummaryEntry
&SummaryEntry
: SummaryVector
) {
3132 if (SummaryEntry
.Cutoff
== HotFuncCutoff
) {
3133 MinCountThreshold
= SummaryEntry
.MinCount
;
3138 // Traverse all functions in the profile and keep only hot functions.
3139 // The following loop also calculates the sum of total samples of all
3141 std::multimap
<uint64_t, std::pair
<const FunctionSamples
*, const uint64_t>,
3142 std::greater
<uint64_t>>
3144 uint64_t ProfileTotalSample
= 0;
3145 uint64_t HotFuncSample
= 0;
3146 uint64_t HotFuncCount
= 0;
3148 for (const auto &I
: Profiles
) {
3149 FuncSampleStats FuncStats
;
3150 const FunctionSamples
&FuncProf
= I
.second
;
3151 ProfileTotalSample
+= FuncProf
.getTotalSamples();
3152 getFuncSampleStats(FuncProf
, FuncStats
, MinCountThreshold
);
3154 if (isFunctionHot(FuncStats
, MinCountThreshold
)) {
3155 HotFunc
.emplace(FuncProf
.getTotalSamples(),
3156 std::make_pair(&(I
.second
), FuncStats
.MaxSample
));
3157 HotFuncSample
+= FuncProf
.getTotalSamples();
3162 std::vector
<std::string
> ColumnTitle
{"Total sample (%)", "Max sample",
3163 "Entry sample", "Function name"};
3164 std::vector
<int> ColumnOffset
{0, 24, 42, 58};
3165 std::string Metric
=
3166 std::string("max sample >= ") + std::to_string(MinCountThreshold
);
3167 std::vector
<HotFuncInfo
> PrintValues
;
3168 for (const auto &FuncPair
: HotFunc
) {
3169 const FunctionSamples
&Func
= *FuncPair
.second
.first
;
3170 double TotalSamplePercent
=
3171 (ProfileTotalSample
> 0)
3172 ? (Func
.getTotalSamples() * 100.0) / ProfileTotalSample
3174 PrintValues
.emplace_back(
3175 HotFuncInfo(Func
.getContext().toString(), Func
.getTotalSamples(),
3176 TotalSamplePercent
, FuncPair
.second
.second
,
3177 Func
.getHeadSamplesEstimate()));
3179 dumpHotFunctionList(ColumnTitle
, ColumnOffset
, PrintValues
, HotFuncCount
,
3180 Profiles
.size(), HotFuncSample
, ProfileTotalSample
,
3186 static int showSampleProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3187 if (SFormat
== ShowFormat::Yaml
)
3188 exitWithError("YAML output is not supported for sample profiles");
3189 using namespace sampleprof
;
3190 LLVMContext Context
;
3191 auto FS
= vfs::getRealFileSystem();
3192 auto ReaderOrErr
= SampleProfileReader::create(Filename
, Context
, *FS
,
3193 FSDiscriminatorPassOption
);
3194 if (std::error_code EC
= ReaderOrErr
.getError())
3195 exitWithErrorCode(EC
, Filename
);
3197 auto Reader
= std::move(ReaderOrErr
.get());
3198 if (ShowSectionInfoOnly
) {
3199 showSectionInfo(Reader
.get(), OS
);
3203 if (std::error_code EC
= Reader
->read())
3204 exitWithErrorCode(EC
, Filename
);
3206 if (ShowAllFunctions
|| FuncNameFilter
.empty()) {
3207 if (SFormat
== ShowFormat::Json
)
3208 Reader
->dumpJson(OS
);
3212 if (SFormat
== ShowFormat::Json
)
3214 "the JSON format is supported only when all functions are to "
3217 // TODO: parse context string to support filtering by contexts.
3218 FunctionSamples
*FS
= Reader
->getSamplesFor(StringRef(FuncNameFilter
));
3219 Reader
->dumpFunctionProfile(FS
? *FS
: FunctionSamples(), OS
);
3222 if (ShowProfileSymbolList
) {
3223 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
3224 Reader
->getProfileSymbolList();
3225 ReaderList
->dump(OS
);
3228 if (ShowDetailedSummary
) {
3229 auto &PS
= Reader
->getSummary();
3230 PS
.printSummary(OS
);
3231 PS
.printDetailedSummary(OS
);
3234 if (ShowHotFuncList
|| TopNFunctions
)
3235 showHotFunctionList(Reader
->getProfiles(), Reader
->getSummary(),
3241 static int showMemProfProfile(ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3242 if (SFormat
== ShowFormat::Json
)
3243 exitWithError("JSON output is not supported for MemProf");
3244 auto ReaderOr
= llvm::memprof::RawMemProfReader::create(
3245 Filename
, ProfiledBinary
, /*KeepNames=*/true);
3246 if (Error E
= ReaderOr
.takeError())
3247 // Since the error can be related to the profile or the binary we do not
3248 // pass whence. Instead additional context is provided where necessary in
3249 // the error message.
3250 exitWithError(std::move(E
), /*Whence*/ "");
3252 std::unique_ptr
<llvm::memprof::RawMemProfReader
> Reader(
3253 ReaderOr
.get().release());
3255 Reader
->printYAML(OS
);
3259 static int showDebugInfoCorrelation(const std::string
&Filename
,
3260 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
3261 if (SFormat
== ShowFormat::Json
)
3262 exitWithError("JSON output is not supported for debug info correlation");
3263 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
3265 InstrProfCorrelator::get(Filename
, InstrProfCorrelator::DEBUG_INFO
)
3266 .moveInto(Correlator
))
3267 exitWithError(std::move(Err
), Filename
);
3268 if (SFormat
== ShowFormat::Yaml
) {
3269 if (auto Err
= Correlator
->dumpYaml(MaxDbgCorrelationWarnings
, OS
))
3270 exitWithError(std::move(Err
), Filename
);
3274 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
3275 exitWithError(std::move(Err
), Filename
);
3277 InstrProfSymtab Symtab
;
3278 if (auto Err
= Symtab
.create(
3279 StringRef(Correlator
->getNamesPointer(), Correlator
->getNamesSize())))
3280 exitWithError(std::move(Err
), Filename
);
3282 if (ShowProfileSymbolList
)
3283 Symtab
.dumpNames(OS
);
3284 // TODO: Read "Profile Data Type" from debug info to compute and show how many
3285 // counters the section holds.
3286 if (ShowDetailedSummary
)
3287 OS
<< "Counters section size: 0x"
3288 << Twine::utohexstr(Correlator
->getCountersSectionSize()) << " bytes\n";
3289 OS
<< "Found " << Correlator
->getDataSize() << " functions\n";
3294 static int show_main(StringRef ProgName
) {
3295 if (Filename
.empty() && DebugInfoFilename
.empty())
3297 "the positional argument '<profdata-file>' is required unless '--" +
3298 DebugInfoFilename
.ArgStr
+ "' is provided");
3300 if (Filename
== OutputFilename
) {
3302 << " show: Input file name cannot be the same as the output file "
3307 SFormat
= ShowFormat::Json
;
3310 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3312 exitWithErrorCode(EC
, OutputFilename
);
3314 if (ShowAllFunctions
&& !FuncNameFilter
.empty())
3315 WithColor::warning() << "-function argument ignored: showing all functions\n";
3317 if (!DebugInfoFilename
.empty())
3318 return showDebugInfoCorrelation(DebugInfoFilename
, SFormat
, OS
);
3320 if (ShowProfileKind
== instr
)
3321 return showInstrProfile(SFormat
, OS
);
3322 if (ShowProfileKind
== sample
)
3323 return showSampleProfile(SFormat
, OS
);
3324 return showMemProfProfile(SFormat
, OS
);
3327 static int order_main() {
3329 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3331 exitWithErrorCode(EC
, OutputFilename
);
3332 auto FS
= vfs::getRealFileSystem();
3333 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
3334 if (Error E
= ReaderOrErr
.takeError())
3335 exitWithError(std::move(E
), Filename
);
3337 auto Reader
= std::move(ReaderOrErr
.get());
3338 for (auto &I
: *Reader
) {
3342 ArrayRef Traces
= Reader
->getTemporalProfTraces();
3343 if (NumTestTraces
&& NumTestTraces
>= Traces
.size())
3345 "--" + NumTestTraces
.ArgStr
+
3346 " must be smaller than the total number of traces: expected: < " +
3347 Twine(Traces
.size()) + ", actual: " + Twine(NumTestTraces
));
3348 ArrayRef TestTraces
= Traces
.take_back(NumTestTraces
);
3349 Traces
= Traces
.drop_back(NumTestTraces
);
3351 std::vector
<BPFunctionNode
> Nodes
;
3352 TemporalProfTraceTy::createBPFunctionNodes(Traces
, Nodes
);
3353 BalancedPartitioningConfig Config
;
3354 BalancedPartitioning
BP(Config
);
3357 OS
<< "# Ordered " << Nodes
.size() << " functions\n";
3358 if (!TestTraces
.empty()) {
3359 // Since we don't know the symbol sizes, we assume 32 functions per page.
3360 DenseMap
<BPFunctionNode::IDT
, unsigned> IdToPageNumber
;
3361 for (auto &Node
: Nodes
)
3362 IdToPageNumber
[Node
.Id
] = IdToPageNumber
.size() / 32;
3364 SmallSet
<unsigned, 0> TouchedPages
;
3366 for (auto &Trace
: TestTraces
) {
3367 for (auto Id
: Trace
.FunctionNameRefs
) {
3368 auto It
= IdToPageNumber
.find(Id
);
3369 if (It
== IdToPageNumber
.end())
3371 TouchedPages
.insert(It
->getSecond());
3372 Area
+= TouchedPages
.size();
3374 TouchedPages
.clear();
3376 OS
<< "# Total area under the page fault curve: " << (float)Area
<< "\n";
3378 OS
<< "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3379 "linkage and this output does not take that into account. Some "
3380 "post-processing may be required before passing to the linker via "
3382 for (auto &N
: Nodes
) {
3383 auto [Filename
, ParsedFuncName
] =
3384 getParsedIRPGOName(Reader
->getSymtab().getFuncOrVarName(N
.Id
));
3385 if (!Filename
.empty())
3386 OS
<< "# " << Filename
<< "\n";
3387 OS
<< ParsedFuncName
<< "\n";
3392 int llvm_profdata_main(int argc
, char **argvNonConst
,
3393 const llvm::ToolContext
&) {
3394 const char **argv
= const_cast<const char **>(argvNonConst
);
3396 StringRef
ProgName(sys::path::filename(argv
[0]));
3401 << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3405 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data\n");
3408 return show_main(ProgName
);
3410 if (OrderSubcommand
)
3411 return order_main();
3413 if (OverlapSubcommand
)
3414 return overlap_main();
3416 if (MergeSubcommand
)
3417 return merge_main(ProgName
);
3420 << ": Unknown command. Run llvm-profdata --help for usage.\n";