[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / tools / llvm-profdata / llvm-profdata.cpp
blobe7e7f8228d7d9c3b8aeab544efcd512e8d05ea8a
1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // llvm-profdata merges .profdata files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/LLVMContext.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/ProfileData/InstrProfCorrelator.h"
19 #include "llvm/ProfileData/InstrProfReader.h"
20 #include "llvm/ProfileData/InstrProfWriter.h"
21 #include "llvm/ProfileData/MemProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/RawMemProfReader.h"
24 #include "llvm/ProfileData/SampleProfReader.h"
25 #include "llvm/ProfileData/SampleProfWriter.h"
26 #include "llvm/Support/BalancedPartitioning.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Discriminator.h"
29 #include "llvm/Support/Errc.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/FormattedStream.h"
33 #include "llvm/Support/InitLLVM.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Support/Threading.h"
40 #include "llvm/Support/VirtualFileSystem.h"
41 #include "llvm/Support/WithColor.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <algorithm>
44 #include <cmath>
45 #include <optional>
46 #include <queue>
48 using namespace llvm;
50 // We use this string to indicate that there are
51 // multiple static functions map to the same name.
52 const std::string DuplicateNameStr = "----";
54 enum ProfileFormat {
55 PF_None = 0,
56 PF_Text,
57 PF_Compact_Binary, // Deprecated
58 PF_Ext_Binary,
59 PF_GCC,
60 PF_Binary
63 enum class ShowFormat { Text, Json, Yaml };
65 static void warn(Twine Message, std::string Whence = "",
66 std::string Hint = "") {
67 WithColor::warning();
68 if (!Whence.empty())
69 errs() << Whence << ": ";
70 errs() << Message << "\n";
71 if (!Hint.empty())
72 WithColor::note() << Hint << "\n";
75 static void warn(Error E, StringRef Whence = "") {
76 if (E.isA<InstrProfError>()) {
77 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
78 warn(IPE.message(), std::string(Whence), std::string(""));
79 });
83 static void exitWithError(Twine Message, std::string Whence = "",
84 std::string Hint = "") {
85 WithColor::error();
86 if (!Whence.empty())
87 errs() << Whence << ": ";
88 errs() << Message << "\n";
89 if (!Hint.empty())
90 WithColor::note() << Hint << "\n";
91 ::exit(1);
94 static void exitWithError(Error E, StringRef Whence = "") {
95 if (E.isA<InstrProfError>()) {
96 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
97 instrprof_error instrError = IPE.get();
98 StringRef Hint = "";
99 if (instrError == instrprof_error::unrecognized_format) {
100 // Hint in case user missed specifying the profile type.
101 Hint = "Perhaps you forgot to use the --sample or --memory option?";
103 exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
105 return;
108 exitWithError(toString(std::move(E)), std::string(Whence));
111 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
112 exitWithError(EC.message(), std::string(Whence));
115 namespace {
116 enum ProfileKinds { instr, sample, memory };
117 enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
118 } // namespace
120 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
121 StringRef Whence = "") {
122 if (FailMode == failIfAnyAreInvalid)
123 exitWithErrorCode(EC, Whence);
124 else
125 warn(EC.message(), std::string(Whence));
128 static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
129 StringRef WhenceFunction = "",
130 bool ShowHint = true) {
131 if (!WhenceFile.empty())
132 errs() << WhenceFile << ": ";
133 if (!WhenceFunction.empty())
134 errs() << WhenceFunction << ": ";
136 auto IPE = instrprof_error::success;
137 E = handleErrors(std::move(E),
138 [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
139 IPE = E->get();
140 return Error(std::move(E));
142 errs() << toString(std::move(E)) << "\n";
144 if (ShowHint) {
145 StringRef Hint = "";
146 if (IPE != instrprof_error::success) {
147 switch (IPE) {
148 case instrprof_error::hash_mismatch:
149 case instrprof_error::count_mismatch:
150 case instrprof_error::value_site_count_mismatch:
151 Hint = "Make sure that all profile data to be merged is generated "
152 "from the same binary.";
153 break;
154 default:
155 break;
159 if (!Hint.empty())
160 errs() << Hint << "\n";
164 namespace {
165 /// A remapper from original symbol names to new symbol names based on a file
166 /// containing a list of mappings from old name to new name.
167 class SymbolRemapper {
168 std::unique_ptr<MemoryBuffer> File;
169 DenseMap<StringRef, StringRef> RemappingTable;
171 public:
172 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
173 static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
174 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
175 if (!BufOrError)
176 exitWithErrorCode(BufOrError.getError(), InputFile);
178 auto Remapper = std::make_unique<SymbolRemapper>();
179 Remapper->File = std::move(BufOrError.get());
181 for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
182 !LineIt.is_at_eof(); ++LineIt) {
183 std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
184 if (Parts.first.empty() || Parts.second.empty() ||
185 Parts.second.count(' ')) {
186 exitWithError("unexpected line in remapping file",
187 (InputFile + ":" + Twine(LineIt.line_number())).str(),
188 "expected 'old_symbol new_symbol'");
190 Remapper->RemappingTable.insert(Parts);
192 return Remapper;
195 /// Attempt to map the given old symbol into a new symbol.
197 /// \return The new symbol, or \p Name if no such symbol was found.
198 StringRef operator()(StringRef Name) {
199 StringRef New = RemappingTable.lookup(Name);
200 return New.empty() ? Name : New;
203 FunctionId operator()(FunctionId Name) {
204 // MD5 name cannot be remapped.
205 if (!Name.isStringRef())
206 return Name;
207 StringRef New = RemappingTable.lookup(Name.stringRef());
208 return New.empty() ? Name : FunctionId(New);
213 struct WeightedFile {
214 std::string Filename;
215 uint64_t Weight;
217 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
219 /// Keep track of merged data and reported errors.
220 struct WriterContext {
221 std::mutex Lock;
222 InstrProfWriter Writer;
223 std::vector<std::pair<Error, std::string>> Errors;
224 std::mutex &ErrLock;
225 SmallSet<instrprof_error, 4> &WriterErrorCodes;
227 WriterContext(bool IsSparse, std::mutex &ErrLock,
228 SmallSet<instrprof_error, 4> &WriterErrorCodes,
229 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
230 : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock),
231 WriterErrorCodes(WriterErrorCodes) {}
234 /// Computer the overlap b/w profile BaseFilename and TestFileName,
235 /// and store the program level result to Overlap.
236 static void overlapInput(const std::string &BaseFilename,
237 const std::string &TestFilename, WriterContext *WC,
238 OverlapStats &Overlap,
239 const OverlapFuncFilters &FuncFilter,
240 raw_fd_ostream &OS, bool IsCS) {
241 auto FS = vfs::getRealFileSystem();
242 auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
243 if (Error E = ReaderOrErr.takeError()) {
244 // Skip the empty profiles by returning sliently.
245 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
246 if (ErrorCode != instrprof_error::empty_raw_profile)
247 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
248 TestFilename);
249 return;
252 auto Reader = std::move(ReaderOrErr.get());
253 for (auto &I : *Reader) {
254 OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
255 FuncOverlap.setFuncInfo(I.Name, I.Hash);
257 WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
258 FuncOverlap.dump(OS);
262 /// Load an input into a writer context.
263 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
264 const InstrProfCorrelator *Correlator,
265 const StringRef ProfiledBinary, WriterContext *WC) {
266 std::unique_lock<std::mutex> CtxGuard{WC->Lock};
268 // Copy the filename, because llvm::ThreadPool copied the input "const
269 // WeightedFile &" by value, making a reference to the filename within it
270 // invalid outside of this packaged task.
271 std::string Filename = Input.Filename;
273 using ::llvm::memprof::RawMemProfReader;
274 if (RawMemProfReader::hasFormat(Input.Filename)) {
275 auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
276 if (!ReaderOrErr) {
277 exitWithError(ReaderOrErr.takeError(), Input.Filename);
279 std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
280 // Check if the profile types can be merged, e.g. clang frontend profiles
281 // should not be merged with memprof profiles.
282 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
283 consumeError(std::move(E));
284 WC->Errors.emplace_back(
285 make_error<StringError>(
286 "Cannot merge MemProf profile with Clang generated profile.",
287 std::error_code()),
288 Filename);
289 return;
292 auto MemProfError = [&](Error E) {
293 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
294 WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
295 Filename);
298 // Add the frame mappings into the writer context.
299 const auto &IdToFrame = Reader->getFrameMapping();
300 for (const auto &I : IdToFrame) {
301 bool Succeeded = WC->Writer.addMemProfFrame(
302 /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
303 // If we weren't able to add the frame mappings then it doesn't make sense
304 // to try to add the records from this profile.
305 if (!Succeeded)
306 return;
308 const auto &FunctionProfileData = Reader->getProfileData();
309 // Add the memprof records into the writer context.
310 for (const auto &I : FunctionProfileData) {
311 WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second);
313 return;
316 auto FS = vfs::getRealFileSystem();
317 // TODO: This only saves the first non-fatal error from InstrProfReader, and
318 // then added to WriterContext::Errors. However, this is not extensible, if
319 // we have more non-fatal errors from InstrProfReader in the future. How
320 // should this interact with different -failure-mode?
321 std::optional<std::pair<Error, std::string>> ReaderWarning;
322 auto Warn = [&](Error E) {
323 if (ReaderWarning) {
324 consumeError(std::move(E));
325 return;
327 // Only show the first time an error occurs in this file.
328 auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
329 ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
331 auto ReaderOrErr =
332 InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
333 if (Error E = ReaderOrErr.takeError()) {
334 // Skip the empty profiles by returning silently.
335 auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
336 if (ErrCode != instrprof_error::empty_raw_profile)
337 WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
338 Filename);
339 return;
342 auto Reader = std::move(ReaderOrErr.get());
343 if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
344 consumeError(std::move(E));
345 WC->Errors.emplace_back(
346 make_error<StringError>(
347 "Merge IR generated profile with Clang generated profile.",
348 std::error_code()),
349 Filename);
350 return;
353 for (auto &I : *Reader) {
354 if (Remapper)
355 I.Name = (*Remapper)(I.Name);
356 const StringRef FuncName = I.Name;
357 bool Reported = false;
358 WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
359 if (Reported) {
360 consumeError(std::move(E));
361 return;
363 Reported = true;
364 // Only show hint the first time an error occurs.
365 auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
366 std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
367 bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
368 handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
369 Input.Filename, FuncName, firstTime);
373 if (Reader->hasTemporalProfile()) {
374 auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
375 if (!Traces.empty())
376 WC->Writer.addTemporalProfileTraces(
377 Traces, Reader->getTemporalProfTraceStreamSize());
379 if (Reader->hasError()) {
380 if (Error E = Reader->getError()) {
381 WC->Errors.emplace_back(std::move(E), Filename);
382 return;
386 std::vector<llvm::object::BuildID> BinaryIds;
387 if (Error E = Reader->readBinaryIds(BinaryIds)) {
388 WC->Errors.emplace_back(std::move(E), Filename);
389 return;
391 WC->Writer.addBinaryIds(BinaryIds);
393 if (ReaderWarning) {
394 WC->Errors.emplace_back(std::move(ReaderWarning->first),
395 ReaderWarning->second);
399 /// Merge the \p Src writer context into \p Dst.
400 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
401 for (auto &ErrorPair : Src->Errors)
402 Dst->Errors.push_back(std::move(ErrorPair));
403 Src->Errors.clear();
405 if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
406 exitWithError(std::move(E));
408 Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
409 auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
410 std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
411 bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
412 if (firstTime)
413 warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
417 static void writeInstrProfile(StringRef OutputFilename,
418 ProfileFormat OutputFormat,
419 InstrProfWriter &Writer) {
420 std::error_code EC;
421 raw_fd_ostream Output(OutputFilename.data(), EC,
422 OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
423 : sys::fs::OF_None);
424 if (EC)
425 exitWithErrorCode(EC, OutputFilename);
427 if (OutputFormat == PF_Text) {
428 if (Error E = Writer.writeText(Output))
429 warn(std::move(E));
430 } else {
431 if (Output.is_displayed())
432 exitWithError("cannot write a non-text format profile to the terminal");
433 if (Error E = Writer.write(Output))
434 warn(std::move(E));
438 static void
439 mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename,
440 SymbolRemapper *Remapper, StringRef OutputFilename,
441 ProfileFormat OutputFormat, uint64_t TraceReservoirSize,
442 uint64_t MaxTraceLength, int MaxDbgCorrelationWarnings,
443 bool OutputSparse, unsigned NumThreads, FailureMode FailMode,
444 const StringRef ProfiledBinary) {
445 if (OutputFormat == PF_Compact_Binary)
446 exitWithError("Compact Binary is deprecated");
447 if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
448 OutputFormat != PF_Text)
449 exitWithError("unknown format is specified");
451 std::unique_ptr<InstrProfCorrelator> Correlator;
452 if (!DebugInfoFilename.empty()) {
453 if (auto Err = InstrProfCorrelator::get(DebugInfoFilename,
454 InstrProfCorrelator::DEBUG_INFO)
455 .moveInto(Correlator))
456 exitWithError(std::move(Err), DebugInfoFilename);
457 if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
458 exitWithError(std::move(Err), DebugInfoFilename);
461 std::mutex ErrorLock;
462 SmallSet<instrprof_error, 4> WriterErrorCodes;
464 // If NumThreads is not specified, auto-detect a good default.
465 if (NumThreads == 0)
466 NumThreads = std::min(hardware_concurrency().compute_thread_count(),
467 unsigned((Inputs.size() + 1) / 2));
469 // Initialize the writer contexts.
470 SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
471 for (unsigned I = 0; I < NumThreads; ++I)
472 Contexts.emplace_back(std::make_unique<WriterContext>(
473 OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
474 MaxTraceLength));
476 if (NumThreads == 1) {
477 for (const auto &Input : Inputs)
478 loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
479 Contexts[0].get());
480 } else {
481 ThreadPool Pool(hardware_concurrency(NumThreads));
483 // Load the inputs in parallel (N/NumThreads serial steps).
484 unsigned Ctx = 0;
485 for (const auto &Input : Inputs) {
486 Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
487 Contexts[Ctx].get());
488 Ctx = (Ctx + 1) % NumThreads;
490 Pool.wait();
492 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
493 unsigned Mid = Contexts.size() / 2;
494 unsigned End = Contexts.size();
495 assert(Mid > 0 && "Expected more than one context");
496 do {
497 for (unsigned I = 0; I < Mid; ++I)
498 Pool.async(mergeWriterContexts, Contexts[I].get(),
499 Contexts[I + Mid].get());
500 Pool.wait();
501 if (End & 1) {
502 Pool.async(mergeWriterContexts, Contexts[0].get(),
503 Contexts[End - 1].get());
504 Pool.wait();
506 End = Mid;
507 Mid /= 2;
508 } while (Mid > 0);
511 // Handle deferred errors encountered during merging. If the number of errors
512 // is equal to the number of inputs the merge failed.
513 unsigned NumErrors = 0;
514 for (std::unique_ptr<WriterContext> &WC : Contexts) {
515 for (auto &ErrorPair : WC->Errors) {
516 ++NumErrors;
517 warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
520 if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
521 (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
522 exitWithError("no profile can be merged");
524 writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
527 /// The profile entry for a function in instrumentation profile.
528 struct InstrProfileEntry {
529 uint64_t MaxCount = 0;
530 uint64_t NumEdgeCounters = 0;
531 float ZeroCounterRatio = 0.0;
532 InstrProfRecord *ProfRecord;
533 InstrProfileEntry(InstrProfRecord *Record);
534 InstrProfileEntry() = default;
537 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
538 ProfRecord = Record;
539 uint64_t CntNum = Record->Counts.size();
540 uint64_t ZeroCntNum = 0;
541 for (size_t I = 0; I < CntNum; ++I) {
542 MaxCount = std::max(MaxCount, Record->Counts[I]);
543 ZeroCntNum += !Record->Counts[I];
545 ZeroCounterRatio = (float)ZeroCntNum / CntNum;
546 NumEdgeCounters = CntNum;
549 /// Either set all the counters in the instr profile entry \p IFE to
550 /// -1 / -2 /in order to drop the profile or scale up the
551 /// counters in \p IFP to be above hot / cold threshold. We use
552 /// the ratio of zero counters in the profile of a function to
553 /// decide the profile is helpful or harmful for performance,
554 /// and to choose whether to scale up or drop it.
555 static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
556 uint64_t HotInstrThreshold,
557 uint64_t ColdInstrThreshold,
558 float ZeroCounterThreshold) {
559 InstrProfRecord *ProfRecord = IFE.ProfRecord;
560 if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
561 // If all or most of the counters of the function are zero, the
562 // profile is unaccountable and should be dropped. Reset all the
563 // counters to be -1 / -2 and PGO profile-use will drop the profile.
564 // All counters being -1 also implies that the function is hot so
565 // PGO profile-use will also set the entry count metadata to be
566 // above hot threshold.
567 // All counters being -2 implies that the function is warm so
568 // PGO profile-use will also set the entry count metadata to be
569 // above cold threshold.
570 auto Kind =
571 (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
572 ProfRecord->setPseudoCount(Kind);
573 return;
576 // Scale up the MaxCount to be multiple times above hot / cold threshold.
577 const unsigned MultiplyFactor = 3;
578 uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
579 uint64_t Numerator = Threshold * MultiplyFactor;
581 // Make sure Threshold for warm counters is below the HotInstrThreshold.
582 if (!SetToHot && Threshold >= HotInstrThreshold) {
583 Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
586 uint64_t Denominator = IFE.MaxCount;
587 if (Numerator <= Denominator)
588 return;
589 ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
590 warn(toString(make_error<InstrProfError>(E)));
594 const uint64_t ColdPercentileIdx = 15;
595 const uint64_t HotPercentileIdx = 11;
597 using sampleprof::FSDiscriminatorPass;
599 // Internal options to set FSDiscriminatorPass. Used in merge and show
600 // commands.
601 static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
602 "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
603 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
604 "pass beyond this value. The enum values are defined in "
605 "Support/Discriminator.h"),
606 cl::values(clEnumVal(Base, "Use base discriminators only"),
607 clEnumVal(Pass1, "Use base and pass 1 discriminators"),
608 clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
609 clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
610 clEnumVal(PassLast, "Use all discriminator bits (default)")));
612 static unsigned getDiscriminatorMask() {
613 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
616 /// Adjust the instr profile in \p WC based on the sample profile in
617 /// \p Reader.
618 static void
619 adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
620 std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
621 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
622 unsigned InstrProfColdThreshold) {
623 // Function to its entry in instr profile.
624 StringMap<InstrProfileEntry> InstrProfileMap;
625 StringMap<StringRef> StaticFuncMap;
626 InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
628 auto checkSampleProfileHasFUnique = [&Reader]() {
629 for (const auto &PD : Reader->getProfiles()) {
630 auto &FContext = PD.second.getContext();
631 if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
632 std::string::npos) {
633 return true;
636 return false;
639 bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
641 auto buildStaticFuncMap = [&StaticFuncMap,
642 SampleProfileHasFUnique](const StringRef Name) {
643 std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"};
644 size_t PrefixPos = StringRef::npos;
645 for (auto &Prefix : Prefixes) {
646 PrefixPos = Name.find_insensitive(Prefix);
647 if (PrefixPos == StringRef::npos)
648 continue;
649 PrefixPos += Prefix.size();
650 break;
653 if (PrefixPos == StringRef::npos) {
654 return;
657 StringRef NewName = Name.drop_front(PrefixPos);
658 StringRef FName = Name.substr(0, PrefixPos - 1);
659 if (NewName.size() == 0) {
660 return;
663 // This name should have a static linkage.
664 size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
665 bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
667 // If sample profile and instrumented profile do not agree on symbol
668 // uniqification.
669 if (SampleProfileHasFUnique != ProfileHasFUnique) {
670 // If instrumented profile uses -funique-internal-linakge-symbols,
671 // we need to trim the name.
672 if (ProfileHasFUnique) {
673 NewName = NewName.substr(0, PostfixPos);
674 } else {
675 // If sample profile uses -funique-internal-linakge-symbols,
676 // we build the map.
677 std::string NStr =
678 NewName.str() + getUniqueInternalLinkagePostfix(FName);
679 NewName = StringRef(NStr);
680 StaticFuncMap[NewName] = Name;
681 return;
685 if (!StaticFuncMap.contains(NewName)) {
686 StaticFuncMap[NewName] = Name;
687 } else {
688 StaticFuncMap[NewName] = DuplicateNameStr;
692 // We need to flatten the SampleFDO profile as the InstrFDO
693 // profile does not have inlined callsite profiles.
694 // One caveat is the pre-inlined function -- their samples
695 // should be collapsed into the caller function.
696 // Here we do a DFS traversal to get the flatten profile
697 // info: the sum of entrycount and the max of maxcount.
698 // Here is the algorithm:
699 // recursive (FS, root_name) {
700 // name = FS->getName();
701 // get samples for FS;
702 // if (InstrProf.find(name) {
703 // root_name = name;
704 // } else {
705 // if (name is in static_func map) {
706 // root_name = static_name;
707 // }
708 // }
709 // update the Map entry for root_name;
710 // for (subfs: FS) {
711 // recursive(subfs, root_name);
712 // }
713 // }
715 // Here is an example.
717 // SampleProfile:
718 // foo:12345:1000
719 // 1: 1000
720 // 2.1: 1000
721 // 15: 5000
722 // 4: bar:1000
723 // 1: 1000
724 // 2: goo:3000
725 // 1: 3000
726 // 8: bar:40000
727 // 1: 10000
728 // 2: goo:30000
729 // 1: 30000
731 // InstrProfile has two entries:
732 // foo
733 // bar.cc:bar
735 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
736 // {"foo", {1000, 5000}}
737 // {"bar.cc:bar", {11000, 30000}}
739 // foo's has an entry count of 1000, and max body count of 5000.
740 // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and
741 // 10000), and max count of 30000 (from the callsite in line 8).
743 // Note that goo's count will remain in bar.cc:bar() as it does not have an
744 // entry in InstrProfile.
745 llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
746 auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
747 &InstrProfileMap](const FunctionSamples &FS,
748 const StringRef &RootName) {
749 auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
750 const StringRef &RootName,
751 auto &BuildImpl) -> void {
752 std::string NameStr = FS.getFunction().str();
753 const StringRef Name = NameStr;
754 const StringRef *NewRootName = &RootName;
755 uint64_t EntrySample = FS.getHeadSamplesEstimate();
756 uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
758 auto It = InstrProfileMap.find(Name);
759 if (It != InstrProfileMap.end()) {
760 NewRootName = &Name;
761 } else {
762 auto NewName = StaticFuncMap.find(Name);
763 if (NewName != StaticFuncMap.end()) {
764 It = InstrProfileMap.find(NewName->second.str());
765 if (NewName->second != DuplicateNameStr) {
766 NewRootName = &NewName->second;
768 } else {
769 // Here the EntrySample is of an inlined function, so we should not
770 // update the EntrySample in the map.
771 EntrySample = 0;
774 EntrySample += FlattenSampleMap[*NewRootName].first;
775 MaxBodySample =
776 std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
777 FlattenSampleMap[*NewRootName] =
778 std::make_pair(EntrySample, MaxBodySample);
780 for (const auto &C : FS.getCallsiteSamples())
781 for (const auto &F : C.second)
782 BuildImpl(F.second, *NewRootName, BuildImpl);
784 BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
787 for (auto &PD : WC->Writer.getProfileData()) {
788 // Populate IPBuilder.
789 for (const auto &PDV : PD.getValue()) {
790 InstrProfRecord Record = PDV.second;
791 IPBuilder.addRecord(Record);
794 // If a function has multiple entries in instr profile, skip it.
795 if (PD.getValue().size() != 1)
796 continue;
798 // Initialize InstrProfileMap.
799 InstrProfRecord *R = &PD.getValue().begin()->second;
800 StringRef FullName = PD.getKey();
801 InstrProfileMap[FullName] = InstrProfileEntry(R);
802 buildStaticFuncMap(FullName);
805 for (auto &PD : Reader->getProfiles()) {
806 sampleprof::FunctionSamples &FS = PD.second;
807 std::string Name = FS.getFunction().str();
808 BuildMaxSampleMap(FS, Name);
811 ProfileSummary InstrPS = *IPBuilder.getSummary();
812 ProfileSummary SamplePS = Reader->getSummary();
814 // Compute cold thresholds for instr profile and sample profile.
815 uint64_t HotSampleThreshold =
816 ProfileSummaryBuilder::getEntryForPercentile(
817 SamplePS.getDetailedSummary(),
818 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
819 .MinCount;
820 uint64_t ColdSampleThreshold =
821 ProfileSummaryBuilder::getEntryForPercentile(
822 SamplePS.getDetailedSummary(),
823 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
824 .MinCount;
825 uint64_t HotInstrThreshold =
826 ProfileSummaryBuilder::getEntryForPercentile(
827 InstrPS.getDetailedSummary(),
828 ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
829 .MinCount;
830 uint64_t ColdInstrThreshold =
831 InstrProfColdThreshold
832 ? InstrProfColdThreshold
833 : ProfileSummaryBuilder::getEntryForPercentile(
834 InstrPS.getDetailedSummary(),
835 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
836 .MinCount;
838 // Find hot/warm functions in sample profile which is cold in instr profile
839 // and adjust the profiles of those functions in the instr profile.
840 for (const auto &E : FlattenSampleMap) {
841 uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
842 if (SampleMaxCount < ColdSampleThreshold)
843 continue;
844 StringRef Name = E.first();
845 auto It = InstrProfileMap.find(Name);
846 if (It == InstrProfileMap.end()) {
847 auto NewName = StaticFuncMap.find(Name);
848 if (NewName != StaticFuncMap.end()) {
849 It = InstrProfileMap.find(NewName->second.str());
850 if (NewName->second == DuplicateNameStr) {
851 WithColor::warning()
852 << "Static function " << Name
853 << " has multiple promoted names, cannot adjust profile.\n";
857 if (It == InstrProfileMap.end() ||
858 It->second.MaxCount > ColdInstrThreshold ||
859 It->second.NumEdgeCounters < SupplMinSizeThreshold)
860 continue;
861 bool SetToHot = SampleMaxCount >= HotSampleThreshold;
862 updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
863 ColdInstrThreshold, ZeroCounterThreshold);
867 /// The main function to supplement instr profile with sample profile.
868 /// \Inputs contains the instr profile. \p SampleFilename specifies the
869 /// sample profile. \p OutputFilename specifies the output profile name.
870 /// \p OutputFormat specifies the output profile format. \p OutputSparse
871 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
872 /// specifies the minimal size for the functions whose profile will be
873 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
874 /// a function contains too many zero counters and whether its profile
875 /// should be dropped. \p InstrProfColdThreshold is the user specified
876 /// cold threshold which will override the cold threshold got from the
877 /// instr profile summary.
878 static void supplementInstrProfile(
879 const WeightedFileVector &Inputs, StringRef SampleFilename,
880 StringRef OutputFilename, ProfileFormat OutputFormat, bool OutputSparse,
881 unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
882 unsigned InstrProfColdThreshold) {
883 if (OutputFilename.compare("-") == 0)
884 exitWithError("cannot write indexed profdata format to stdout");
885 if (Inputs.size() != 1)
886 exitWithError("expect one input to be an instr profile");
887 if (Inputs[0].Weight != 1)
888 exitWithError("expect instr profile doesn't have weight");
890 StringRef InstrFilename = Inputs[0].Filename;
892 // Read sample profile.
893 LLVMContext Context;
894 auto FS = vfs::getRealFileSystem();
895 auto ReaderOrErr = sampleprof::SampleProfileReader::create(
896 SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
897 if (std::error_code EC = ReaderOrErr.getError())
898 exitWithErrorCode(EC, SampleFilename);
899 auto Reader = std::move(ReaderOrErr.get());
900 if (std::error_code EC = Reader->read())
901 exitWithErrorCode(EC, SampleFilename);
903 // Read instr profile.
904 std::mutex ErrorLock;
905 SmallSet<instrprof_error, 4> WriterErrorCodes;
906 auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
907 WriterErrorCodes);
908 loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
909 if (WC->Errors.size() > 0)
910 exitWithError(std::move(WC->Errors[0].first), InstrFilename);
912 adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
913 InstrProfColdThreshold);
914 writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
917 /// Make a copy of the given function samples with all symbol names remapped
918 /// by the provided symbol remapper.
919 static sampleprof::FunctionSamples
920 remapSamples(const sampleprof::FunctionSamples &Samples,
921 SymbolRemapper &Remapper, sampleprof_error &Error) {
922 sampleprof::FunctionSamples Result;
923 Result.setFunction(Remapper(Samples.getFunction()));
924 Result.addTotalSamples(Samples.getTotalSamples());
925 Result.addHeadSamples(Samples.getHeadSamples());
926 for (const auto &BodySample : Samples.getBodySamples()) {
927 uint32_t MaskedDiscriminator =
928 BodySample.first.Discriminator & getDiscriminatorMask();
929 Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
930 BodySample.second.getSamples());
931 for (const auto &Target : BodySample.second.getCallTargets()) {
932 Result.addCalledTargetSamples(BodySample.first.LineOffset,
933 MaskedDiscriminator,
934 Remapper(Target.first), Target.second);
937 for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
938 sampleprof::FunctionSamplesMap &Target =
939 Result.functionSamplesAt(CallsiteSamples.first);
940 for (const auto &Callsite : CallsiteSamples.second) {
941 sampleprof::FunctionSamples Remapped =
942 remapSamples(Callsite.second, Remapper, Error);
943 MergeResult(Error, Target[Remapped.getFunction()].merge(Remapped));
946 return Result;
949 static sampleprof::SampleProfileFormat FormatMap[] = {
950 sampleprof::SPF_None,
951 sampleprof::SPF_Text,
952 sampleprof::SPF_None,
953 sampleprof::SPF_Ext_Binary,
954 sampleprof::SPF_GCC,
955 sampleprof::SPF_Binary};
957 static std::unique_ptr<MemoryBuffer>
958 getInputFileBuf(const StringRef &InputFile) {
959 if (InputFile == "")
960 return {};
962 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
963 if (!BufOrError)
964 exitWithErrorCode(BufOrError.getError(), InputFile);
966 return std::move(*BufOrError);
969 static void populateProfileSymbolList(MemoryBuffer *Buffer,
970 sampleprof::ProfileSymbolList &PSL) {
971 if (!Buffer)
972 return;
974 SmallVector<StringRef, 32> SymbolVec;
975 StringRef Data = Buffer->getBuffer();
976 Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
978 for (StringRef SymbolStr : SymbolVec)
979 PSL.add(SymbolStr.trim());
982 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
983 ProfileFormat OutputFormat,
984 MemoryBuffer *Buffer,
985 sampleprof::ProfileSymbolList &WriterList,
986 bool CompressAllSections, bool UseMD5,
987 bool GenPartialProfile) {
988 populateProfileSymbolList(Buffer, WriterList);
989 if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
990 warn("Profile Symbol list is not empty but the output format is not "
991 "ExtBinary format. The list will be lost in the output. ");
993 Writer.setProfileSymbolList(&WriterList);
995 if (CompressAllSections) {
996 if (OutputFormat != PF_Ext_Binary)
997 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
998 else
999 Writer.setToCompressAllSections();
1001 if (UseMD5) {
1002 if (OutputFormat != PF_Ext_Binary)
1003 warn("-use-md5 is ignored. Specify -extbinary to enable it");
1004 else
1005 Writer.setUseMD5();
1007 if (GenPartialProfile) {
1008 if (OutputFormat != PF_Ext_Binary)
1009 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1010 else
1011 Writer.setPartialProfile();
1015 static void
1016 mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
1017 StringRef OutputFilename, ProfileFormat OutputFormat,
1018 StringRef ProfileSymbolListFile, bool CompressAllSections,
1019 bool UseMD5, bool GenPartialProfile,
1020 SampleProfileLayout ProfileLayout,
1021 bool SampleMergeColdContext, bool SampleTrimColdContext,
1022 bool SampleColdContextFrameDepth, FailureMode FailMode,
1023 bool DropProfileSymbolList, size_t OutputSizeLimit) {
1024 using namespace sampleprof;
1025 SampleProfileMap ProfileMap;
1026 SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1027 LLVMContext Context;
1028 sampleprof::ProfileSymbolList WriterList;
1029 std::optional<bool> ProfileIsProbeBased;
1030 std::optional<bool> ProfileIsCS;
1031 for (const auto &Input : Inputs) {
1032 auto FS = vfs::getRealFileSystem();
1033 auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1034 FSDiscriminatorPassOption);
1035 if (std::error_code EC = ReaderOrErr.getError()) {
1036 warnOrExitGivenError(FailMode, EC, Input.Filename);
1037 continue;
1040 // We need to keep the readers around until after all the files are
1041 // read so that we do not lose the function names stored in each
1042 // reader's memory. The function names are needed to write out the
1043 // merged profile map.
1044 Readers.push_back(std::move(ReaderOrErr.get()));
1045 const auto Reader = Readers.back().get();
1046 if (std::error_code EC = Reader->read()) {
1047 warnOrExitGivenError(FailMode, EC, Input.Filename);
1048 Readers.pop_back();
1049 continue;
1052 SampleProfileMap &Profiles = Reader->getProfiles();
1053 if (ProfileIsProbeBased &&
1054 ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1055 exitWithError(
1056 "cannot merge probe-based profile with non-probe-based profile");
1057 ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1058 if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1059 exitWithError("cannot merge CS profile with non-CS profile");
1060 ProfileIsCS = FunctionSamples::ProfileIsCS;
1061 for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1062 I != E; ++I) {
1063 sampleprof_error Result = sampleprof_error::success;
1064 FunctionSamples Remapped =
1065 Remapper ? remapSamples(I->second, *Remapper, Result)
1066 : FunctionSamples();
1067 FunctionSamples &Samples = Remapper ? Remapped : I->second;
1068 SampleContext FContext = Samples.getContext();
1069 MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
1070 if (Result != sampleprof_error::success) {
1071 std::error_code EC = make_error_code(Result);
1072 handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1073 FContext.toString());
1077 if (!DropProfileSymbolList) {
1078 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1079 Reader->getProfileSymbolList();
1080 if (ReaderList)
1081 WriterList.merge(*ReaderList);
1085 if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1086 // Use threshold calculated from profile summary unless specified.
1087 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1088 auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1089 uint64_t SampleProfColdThreshold =
1090 ProfileSummaryBuilder::getColdCountThreshold(
1091 (Summary->getDetailedSummary()));
1093 // Trim and merge cold context profile using cold threshold above;
1094 SampleContextTrimmer(ProfileMap)
1095 .trimAndMergeColdContextProfiles(
1096 SampleProfColdThreshold, SampleTrimColdContext,
1097 SampleMergeColdContext, SampleColdContextFrameDepth, false);
1100 if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1101 ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1102 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1103 } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1104 ProfileConverter CSConverter(ProfileMap);
1105 CSConverter.convertCSProfiles();
1106 ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1109 auto WriterOrErr =
1110 SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1111 if (std::error_code EC = WriterOrErr.getError())
1112 exitWithErrorCode(EC, OutputFilename);
1114 auto Writer = std::move(WriterOrErr.get());
1115 // WriterList will have StringRef refering to string in Buffer.
1116 // Make sure Buffer lives as long as WriterList.
1117 auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1118 handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1119 CompressAllSections, UseMD5, GenPartialProfile);
1121 // If OutputSizeLimit is 0 (default), it is the same as write().
1122 if (std::error_code EC =
1123 Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1124 exitWithErrorCode(std::move(EC));
1127 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1128 StringRef WeightStr, FileName;
1129 std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1131 uint64_t Weight;
1132 if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1133 exitWithError("input weight must be a positive integer");
1135 return {std::string(FileName), Weight};
1138 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1139 StringRef Filename = WF.Filename;
1140 uint64_t Weight = WF.Weight;
1142 // If it's STDIN just pass it on.
1143 if (Filename == "-") {
1144 WNI.push_back({std::string(Filename), Weight});
1145 return;
1148 llvm::sys::fs::file_status Status;
1149 llvm::sys::fs::status(Filename, Status);
1150 if (!llvm::sys::fs::exists(Status))
1151 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1152 Filename);
1153 // If it's a source file, collect it.
1154 if (llvm::sys::fs::is_regular_file(Status)) {
1155 WNI.push_back({std::string(Filename), Weight});
1156 return;
1159 if (llvm::sys::fs::is_directory(Status)) {
1160 std::error_code EC;
1161 for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1162 F != E && !EC; F.increment(EC)) {
1163 if (llvm::sys::fs::is_regular_file(F->path())) {
1164 addWeightedInput(WNI, {F->path(), Weight});
1167 if (EC)
1168 exitWithErrorCode(EC, Filename);
1172 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1173 WeightedFileVector &WFV) {
1174 if (!Buffer)
1175 return;
1177 SmallVector<StringRef, 8> Entries;
1178 StringRef Data = Buffer->getBuffer();
1179 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1180 for (const StringRef &FileWeightEntry : Entries) {
1181 StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1182 // Skip comments.
1183 if (SanitizedEntry.startswith("#"))
1184 continue;
1185 // If there's no comma, it's an unweighted profile.
1186 else if (!SanitizedEntry.contains(','))
1187 addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1188 else
1189 addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1193 static int merge_main(int argc, const char *argv[]) {
1194 cl::list<std::string> InputFilenames(cl::Positional,
1195 cl::desc("<filename...>"));
1196 cl::list<std::string> WeightedInputFilenames("weighted-input",
1197 cl::desc("<weight>,<filename>"));
1198 cl::opt<std::string> InputFilenamesFile(
1199 "input-files", cl::init(""),
1200 cl::desc("Path to file containing newline-separated "
1201 "[<weight>,]<filename> entries"));
1202 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
1203 cl::aliasopt(InputFilenamesFile));
1204 cl::opt<bool> DumpInputFileList(
1205 "dump-input-file-list", cl::init(false), cl::Hidden,
1206 cl::desc("Dump the list of input files and their weights, then exit"));
1207 cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
1208 cl::desc("Symbol remapping file"));
1209 cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
1210 cl::aliasopt(RemappingFile));
1211 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
1212 cl::init("-"), cl::desc("Output file"));
1213 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
1214 cl::aliasopt(OutputFilename));
1215 cl::opt<ProfileKinds> ProfileKind(
1216 cl::desc("Profile kind:"), cl::init(instr),
1217 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
1218 clEnumVal(sample, "Sample profile")));
1219 cl::opt<ProfileFormat> OutputFormat(
1220 cl::desc("Format of output profile"), cl::init(PF_Ext_Binary),
1221 cl::values(
1222 clEnumValN(PF_Binary, "binary", "Binary encoding"),
1223 clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding "
1224 "(default)"),
1225 clEnumValN(PF_Text, "text", "Text encoding"),
1226 clEnumValN(PF_GCC, "gcc",
1227 "GCC encoding (only meaningful for -sample)")));
1228 cl::opt<FailureMode> FailureMode(
1229 "failure-mode", cl::init(failIfAnyAreInvalid), cl::desc("Failure mode:"),
1230 cl::values(
1231 clEnumValN(warnOnly, "warn", "Do not fail and just print warnings."),
1232 clEnumValN(failIfAnyAreInvalid, "any",
1233 "Fail if any profile is invalid."),
1234 clEnumValN(failIfAllAreInvalid, "all",
1235 "Fail only if all profiles are invalid.")));
1236 cl::opt<bool> OutputSparse("sparse", cl::init(false),
1237 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
1238 cl::opt<unsigned> NumThreads(
1239 "num-threads", cl::init(0),
1240 cl::desc("Number of merge threads to use (default: autodetect)"));
1241 cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
1242 cl::aliasopt(NumThreads));
1243 cl::opt<std::string> ProfileSymbolListFile(
1244 "prof-sym-list", cl::init(""),
1245 cl::desc("Path to file containing the list of function symbols "
1246 "used to populate profile symbol list"));
1247 cl::opt<bool> CompressAllSections(
1248 "compress-all-sections", cl::init(false), cl::Hidden,
1249 cl::desc("Compress all sections when writing the profile (only "
1250 "meaningful for -extbinary)"));
1251 cl::opt<bool> UseMD5(
1252 "use-md5", cl::init(false), cl::Hidden,
1253 cl::desc("Choose to use MD5 to represent string in name table (only "
1254 "meaningful for -extbinary)"));
1255 cl::opt<bool> SampleMergeColdContext(
1256 "sample-merge-cold-context", cl::init(false), cl::Hidden,
1257 cl::desc(
1258 "Merge context sample profiles whose count is below cold threshold"));
1259 cl::opt<bool> SampleTrimColdContext(
1260 "sample-trim-cold-context", cl::init(false), cl::Hidden,
1261 cl::desc(
1262 "Trim context sample profiles whose count is below cold threshold"));
1263 cl::opt<uint32_t> SampleColdContextFrameDepth(
1264 "sample-frame-depth-for-cold-context", cl::init(1),
1265 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
1266 "context-less base profile"));
1267 cl::opt<size_t> OutputSizeLimit(
1268 "output-size-limit", cl::init(0), cl::Hidden,
1269 cl::desc("Trim cold functions until profile size is below specified "
1270 "limit in bytes. This uses a heursitic and functions may be "
1271 "excessively trimmed"));
1272 cl::opt<bool> GenPartialProfile(
1273 "gen-partial-profile", cl::init(false), cl::Hidden,
1274 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
1275 cl::opt<std::string> SupplInstrWithSample(
1276 "supplement-instr-with-sample", cl::init(""), cl::Hidden,
1277 cl::desc("Supplement an instr profile with sample profile, to correct "
1278 "the profile unrepresentativeness issue. The sample "
1279 "profile is the input of the flag. Output will be in instr "
1280 "format (The flag only works with -instr)"));
1281 cl::opt<float> ZeroCounterThreshold(
1282 "zero-counter-threshold", cl::init(0.7), cl::Hidden,
1283 cl::desc("For the function which is cold in instr profile but hot in "
1284 "sample profile, if the ratio of the number of zero counters "
1285 "divided by the total number of counters is above the "
1286 "threshold, the profile of the function will be regarded as "
1287 "being harmful for performance and will be dropped."));
1288 cl::opt<unsigned> SupplMinSizeThreshold(
1289 "suppl-min-size-threshold", cl::init(10), cl::Hidden,
1290 cl::desc("If the size of a function is smaller than the threshold, "
1291 "assume it can be inlined by PGO early inliner and it won't "
1292 "be adjusted based on sample profile."));
1293 cl::opt<unsigned> InstrProfColdThreshold(
1294 "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
1295 cl::desc("User specified cold threshold for instr profile which will "
1296 "override the cold threshold got from profile summary. "));
1297 cl::opt<SampleProfileLayout> ProfileLayout(
1298 "convert-sample-profile-layout",
1299 cl::desc("Convert the generated profile to a profile with a new layout"),
1300 cl::init(SPL_None),
1301 cl::values(
1302 clEnumValN(SPL_Nest, "nest",
1303 "Nested profile, the input should be CS flat profile"),
1304 clEnumValN(SPL_Flat, "flat",
1305 "Profile with nested inlinee flatten out")));
1306 cl::opt<std::string> DebugInfoFilename(
1307 "debug-info", cl::init(""),
1308 cl::desc("Use the provided debug info to correlate the raw profile."));
1309 cl::opt<unsigned> MaxDbgCorrelationWarnings(
1310 "max-debug-info-correlation-warnings",
1311 cl::desc("The maximum number of warnings to emit when correlating "
1312 "profile from debug info (0 = no limit)"),
1313 cl::init(5));
1314 cl::opt<std::string> ProfiledBinary(
1315 "profiled-binary", cl::init(""),
1316 cl::desc("Path to binary from which the profile was collected."));
1317 cl::opt<bool> DropProfileSymbolList(
1318 "drop-profile-symbol-list", cl::init(false), cl::Hidden,
1319 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
1320 "(only meaningful for -sample)"));
1321 // WARNING: This reservoir size value is propagated to any input indexed
1322 // profiles for simplicity. Changing this value between invocations could
1323 // result in sample bias.
1324 cl::opt<uint64_t> TemporalProfTraceReservoirSize(
1325 "temporal-profile-trace-reservoir-size", cl::init(100),
1326 cl::desc("The maximum number of stored temporal profile traces (default: "
1327 "100)"));
1328 cl::opt<uint64_t> TemporalProfMaxTraceLength(
1329 "temporal-profile-max-trace-length", cl::init(10000),
1330 cl::desc("The maximum length of a single temporal profile trace "
1331 "(default: 10000)"));
1333 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
1335 WeightedFileVector WeightedInputs;
1336 for (StringRef Filename : InputFilenames)
1337 addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1338 for (StringRef WeightedFilename : WeightedInputFilenames)
1339 addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1341 // Make sure that the file buffer stays alive for the duration of the
1342 // weighted input vector's lifetime.
1343 auto Buffer = getInputFileBuf(InputFilenamesFile);
1344 parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1346 if (WeightedInputs.empty())
1347 exitWithError("no input files specified. See " +
1348 sys::path::filename(argv[0]) + " -help");
1350 if (DumpInputFileList) {
1351 for (auto &WF : WeightedInputs)
1352 outs() << WF.Weight << "," << WF.Filename << "\n";
1353 return 0;
1356 std::unique_ptr<SymbolRemapper> Remapper;
1357 if (!RemappingFile.empty())
1358 Remapper = SymbolRemapper::create(RemappingFile);
1360 if (!SupplInstrWithSample.empty()) {
1361 if (ProfileKind != instr)
1362 exitWithError(
1363 "-supplement-instr-with-sample can only work with -instr. ");
1365 supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputFilename,
1366 OutputFormat, OutputSparse, SupplMinSizeThreshold,
1367 ZeroCounterThreshold, InstrProfColdThreshold);
1368 return 0;
1371 if (ProfileKind == instr)
1372 mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
1373 OutputFilename, OutputFormat,
1374 TemporalProfTraceReservoirSize,
1375 TemporalProfMaxTraceLength, MaxDbgCorrelationWarnings,
1376 OutputSparse, NumThreads, FailureMode, ProfiledBinary);
1377 else
1378 mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
1379 OutputFormat, ProfileSymbolListFile, CompressAllSections,
1380 UseMD5, GenPartialProfile, ProfileLayout,
1381 SampleMergeColdContext, SampleTrimColdContext,
1382 SampleColdContextFrameDepth, FailureMode,
1383 DropProfileSymbolList, OutputSizeLimit);
1384 return 0;
1387 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1388 static void overlapInstrProfile(const std::string &BaseFilename,
1389 const std::string &TestFilename,
1390 const OverlapFuncFilters &FuncFilter,
1391 raw_fd_ostream &OS, bool IsCS) {
1392 std::mutex ErrorLock;
1393 SmallSet<instrprof_error, 4> WriterErrorCodes;
1394 WriterContext Context(false, ErrorLock, WriterErrorCodes);
1395 WeightedFile WeightedInput{BaseFilename, 1};
1396 OverlapStats Overlap;
1397 Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1398 if (E)
1399 exitWithError(std::move(E), "error in getting profile count sums");
1400 if (Overlap.Base.CountSum < 1.0f) {
1401 OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1402 exit(0);
1404 if (Overlap.Test.CountSum < 1.0f) {
1405 OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1406 exit(0);
1408 loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1409 overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1410 IsCS);
1411 Overlap.dump(OS);
1414 namespace {
1415 struct SampleOverlapStats {
1416 SampleContext BaseName;
1417 SampleContext TestName;
1418 // Number of overlap units
1419 uint64_t OverlapCount = 0;
1420 // Total samples of overlap units
1421 uint64_t OverlapSample = 0;
1422 // Number of and total samples of units that only present in base or test
1423 // profile
1424 uint64_t BaseUniqueCount = 0;
1425 uint64_t BaseUniqueSample = 0;
1426 uint64_t TestUniqueCount = 0;
1427 uint64_t TestUniqueSample = 0;
1428 // Number of units and total samples in base or test profile
1429 uint64_t BaseCount = 0;
1430 uint64_t BaseSample = 0;
1431 uint64_t TestCount = 0;
1432 uint64_t TestSample = 0;
1433 // Number of and total samples of units that present in at least one profile
1434 uint64_t UnionCount = 0;
1435 uint64_t UnionSample = 0;
1436 // Weighted similarity
1437 double Similarity = 0.0;
1438 // For SampleOverlapStats instances representing functions, weights of the
1439 // function in base and test profiles
1440 double BaseWeight = 0.0;
1441 double TestWeight = 0.0;
1443 SampleOverlapStats() = default;
1445 } // end anonymous namespace
1447 namespace {
1448 struct FuncSampleStats {
1449 uint64_t SampleSum;
1450 uint64_t MaxSample;
1451 uint64_t HotBlockCount;
1452 FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
1453 FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1454 uint64_t HotBlockCount)
1455 : SampleSum(SampleSum), MaxSample(MaxSample),
1456 HotBlockCount(HotBlockCount) {}
1458 } // end anonymous namespace
1460 namespace {
1461 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1463 // Class for updating merging steps for two sorted maps. The class should be
1464 // instantiated with a map iterator type.
1465 template <class T> class MatchStep {
1466 public:
1467 MatchStep() = delete;
1469 MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1470 : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1471 SecondEnd(SecondEnd), Status(MS_None) {}
1473 bool areBothFinished() const {
1474 return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1477 bool isFirstFinished() const { return FirstIter == FirstEnd; }
1479 bool isSecondFinished() const { return SecondIter == SecondEnd; }
1481 /// Advance one step based on the previous match status unless the previous
1482 /// status is MS_None. Then update Status based on the comparison between two
1483 /// container iterators at the current step. If the previous status is
1484 /// MS_None, it means two iterators are at the beginning and no comparison has
1485 /// been made, so we simply update Status without advancing the iterators.
1486 void updateOneStep();
1488 T getFirstIter() const { return FirstIter; }
1490 T getSecondIter() const { return SecondIter; }
1492 MatchStatus getMatchStatus() const { return Status; }
1494 private:
1495 // Current iterator and end iterator of the first container.
1496 T FirstIter;
1497 T FirstEnd;
1498 // Current iterator and end iterator of the second container.
1499 T SecondIter;
1500 T SecondEnd;
1501 // Match status of the current step.
1502 MatchStatus Status;
1504 } // end anonymous namespace
1506 template <class T> void MatchStep<T>::updateOneStep() {
1507 switch (Status) {
1508 case MS_Match:
1509 ++FirstIter;
1510 ++SecondIter;
1511 break;
1512 case MS_FirstUnique:
1513 ++FirstIter;
1514 break;
1515 case MS_SecondUnique:
1516 ++SecondIter;
1517 break;
1518 case MS_None:
1519 break;
1522 // Update Status according to iterators at the current step.
1523 if (areBothFinished())
1524 return;
1525 if (FirstIter != FirstEnd &&
1526 (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1527 Status = MS_FirstUnique;
1528 else if (SecondIter != SecondEnd &&
1529 (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1530 Status = MS_SecondUnique;
1531 else
1532 Status = MS_Match;
1535 // Return the sum of line/block samples, the max line/block sample, and the
1536 // number of line/block samples above the given threshold in a function
1537 // including its inlinees.
1538 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1539 FuncSampleStats &FuncStats,
1540 uint64_t HotThreshold) {
1541 for (const auto &L : Func.getBodySamples()) {
1542 uint64_t Sample = L.second.getSamples();
1543 FuncStats.SampleSum += Sample;
1544 FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1545 if (Sample >= HotThreshold)
1546 ++FuncStats.HotBlockCount;
1549 for (const auto &C : Func.getCallsiteSamples()) {
1550 for (const auto &F : C.second)
1551 getFuncSampleStats(F.second, FuncStats, HotThreshold);
1555 /// Predicate that determines if a function is hot with a given threshold. We
1556 /// keep it separate from its callsites for possible extension in the future.
1557 static bool isFunctionHot(const FuncSampleStats &FuncStats,
1558 uint64_t HotThreshold) {
1559 // We intentionally compare the maximum sample count in a function with the
1560 // HotThreshold to get an approximate determination on hot functions.
1561 return (FuncStats.MaxSample >= HotThreshold);
1564 namespace {
1565 class SampleOverlapAggregator {
1566 public:
1567 SampleOverlapAggregator(const std::string &BaseFilename,
1568 const std::string &TestFilename,
1569 double LowSimilarityThreshold, double Epsilon,
1570 const OverlapFuncFilters &FuncFilter)
1571 : BaseFilename(BaseFilename), TestFilename(TestFilename),
1572 LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1573 FuncFilter(FuncFilter) {}
1575 /// Detect 0-sample input profile and report to output stream. This interface
1576 /// should be called after loadProfiles().
1577 bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1579 /// Write out function-level similarity statistics for functions specified by
1580 /// options --function, --value-cutoff, and --similarity-cutoff.
1581 void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1583 /// Write out program-level similarity and overlap statistics.
1584 void dumpProgramSummary(raw_fd_ostream &OS) const;
1586 /// Write out hot-function and hot-block statistics for base_profile,
1587 /// test_profile, and their overlap. For both cases, the overlap HO is
1588 /// calculated as follows:
1589 /// Given the number of functions (or blocks) that are hot in both profiles
1590 /// HCommon and the number of functions (or blocks) that are hot in at
1591 /// least one profile HUnion, HO = HCommon / HUnion.
1592 void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1594 /// This function tries matching functions in base and test profiles. For each
1595 /// pair of matched functions, it aggregates the function-level
1596 /// similarity into a profile-level similarity. It also dump function-level
1597 /// similarity information of functions specified by --function,
1598 /// --value-cutoff, and --similarity-cutoff options. The program-level
1599 /// similarity PS is computed as follows:
1600 /// Given function-level similarity FS(A) for all function A, the
1601 /// weight of function A in base profile WB(A), and the weight of function
1602 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1603 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1604 /// meaning no-overlap.
1605 void computeSampleProfileOverlap(raw_fd_ostream &OS);
1607 /// Initialize ProfOverlap with the sum of samples in base and test
1608 /// profiles. This function also computes and keeps the sum of samples and
1609 /// max sample counts of each function in BaseStats and TestStats for later
1610 /// use to avoid re-computations.
1611 void initializeSampleProfileOverlap();
1613 /// Load profiles specified by BaseFilename and TestFilename.
1614 std::error_code loadProfiles();
1616 using FuncSampleStatsMap =
1617 std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1619 private:
1620 SampleOverlapStats ProfOverlap;
1621 SampleOverlapStats HotFuncOverlap;
1622 SampleOverlapStats HotBlockOverlap;
1623 std::string BaseFilename;
1624 std::string TestFilename;
1625 std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1626 std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1627 // BaseStats and TestStats hold FuncSampleStats for each function, with
1628 // function name as the key.
1629 FuncSampleStatsMap BaseStats;
1630 FuncSampleStatsMap TestStats;
1631 // Low similarity threshold in floating point number
1632 double LowSimilarityThreshold;
1633 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1634 // for tracking hot blocks.
1635 uint64_t BaseHotThreshold;
1636 uint64_t TestHotThreshold;
1637 // A small threshold used to round the results of floating point accumulations
1638 // to resolve imprecision.
1639 const double Epsilon;
1640 std::multimap<double, SampleOverlapStats, std::greater<double>>
1641 FuncSimilarityDump;
1642 // FuncFilter carries specifications in options --value-cutoff and
1643 // --function.
1644 OverlapFuncFilters FuncFilter;
1645 // Column offsets for printing the function-level details table.
1646 static const unsigned int TestWeightCol = 15;
1647 static const unsigned int SimilarityCol = 30;
1648 static const unsigned int OverlapCol = 43;
1649 static const unsigned int BaseUniqueCol = 53;
1650 static const unsigned int TestUniqueCol = 67;
1651 static const unsigned int BaseSampleCol = 81;
1652 static const unsigned int TestSampleCol = 96;
1653 static const unsigned int FuncNameCol = 111;
1655 /// Return a similarity of two line/block sample counters in the same
1656 /// function in base and test profiles. The line/block-similarity BS(i) is
1657 /// computed as follows:
1658 /// For an offsets i, given the sample count at i in base profile BB(i),
1659 /// the sample count at i in test profile BT(i), the sum of sample counts
1660 /// in this function in base profile SB, and the sum of sample counts in
1661 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1662 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1663 double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1664 const SampleOverlapStats &FuncOverlap) const;
1666 void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1667 uint64_t HotBlockCount);
1669 void getHotFunctions(const FuncSampleStatsMap &ProfStats,
1670 FuncSampleStatsMap &HotFunc,
1671 uint64_t HotThreshold) const;
1673 void computeHotFuncOverlap();
1675 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1676 /// Difference for two sample units in a matched function according to the
1677 /// given match status.
1678 void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
1679 uint64_t HotBlockCount,
1680 SampleOverlapStats &FuncOverlap,
1681 double &Difference, MatchStatus Status);
1683 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1684 /// Difference for unmatched callees that only present in one profile in a
1685 /// matched caller function.
1686 void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
1687 SampleOverlapStats &FuncOverlap,
1688 double &Difference, MatchStatus Status);
1690 /// This function updates sample overlap statistics of an overlap function in
1691 /// base and test profile. It also calculates a function-internal similarity
1692 /// FIS as follows:
1693 /// For offsets i that have samples in at least one profile in this
1694 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
1695 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
1696 /// 0.0 meaning no overlap.
1697 double computeSampleFunctionInternalOverlap(
1698 const sampleprof::FunctionSamples &BaseFunc,
1699 const sampleprof::FunctionSamples &TestFunc,
1700 SampleOverlapStats &FuncOverlap);
1702 /// Function-level similarity (FS) is a weighted value over function internal
1703 /// similarity (FIS). This function computes a function's FS from its FIS by
1704 /// applying the weight.
1705 double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
1706 uint64_t TestFuncSample) const;
1708 /// The function-level similarity FS(A) for a function A is computed as
1709 /// follows:
1710 /// Compute a function-internal similarity FIS(A) by
1711 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
1712 /// function A in base profile WB(A), and the weight of function A in test
1713 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
1714 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
1715 double
1716 computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
1717 const sampleprof::FunctionSamples *TestFunc,
1718 SampleOverlapStats *FuncOverlap,
1719 uint64_t BaseFuncSample,
1720 uint64_t TestFuncSample);
1722 /// Profile-level similarity (PS) is a weighted aggregate over function-level
1723 /// similarities (FS). This method weights the FS value by the function
1724 /// weights in the base and test profiles for the aggregation.
1725 double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
1726 uint64_t TestFuncSample) const;
1728 } // end anonymous namespace
1730 bool SampleOverlapAggregator::detectZeroSampleProfile(
1731 raw_fd_ostream &OS) const {
1732 bool HaveZeroSample = false;
1733 if (ProfOverlap.BaseSample == 0) {
1734 OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
1735 HaveZeroSample = true;
1737 if (ProfOverlap.TestSample == 0) {
1738 OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
1739 HaveZeroSample = true;
1741 return HaveZeroSample;
1744 double SampleOverlapAggregator::computeBlockSimilarity(
1745 uint64_t BaseSample, uint64_t TestSample,
1746 const SampleOverlapStats &FuncOverlap) const {
1747 double BaseFrac = 0.0;
1748 double TestFrac = 0.0;
1749 if (FuncOverlap.BaseSample > 0)
1750 BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
1751 if (FuncOverlap.TestSample > 0)
1752 TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
1753 return 1.0 - std::fabs(BaseFrac - TestFrac);
1756 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
1757 uint64_t TestSample,
1758 uint64_t HotBlockCount) {
1759 bool IsBaseHot = (BaseSample >= BaseHotThreshold);
1760 bool IsTestHot = (TestSample >= TestHotThreshold);
1761 if (!IsBaseHot && !IsTestHot)
1762 return;
1764 HotBlockOverlap.UnionCount += HotBlockCount;
1765 if (IsBaseHot)
1766 HotBlockOverlap.BaseCount += HotBlockCount;
1767 if (IsTestHot)
1768 HotBlockOverlap.TestCount += HotBlockCount;
1769 if (IsBaseHot && IsTestHot)
1770 HotBlockOverlap.OverlapCount += HotBlockCount;
1773 void SampleOverlapAggregator::getHotFunctions(
1774 const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
1775 uint64_t HotThreshold) const {
1776 for (const auto &F : ProfStats) {
1777 if (isFunctionHot(F.second, HotThreshold))
1778 HotFunc.emplace(F.first, F.second);
1782 void SampleOverlapAggregator::computeHotFuncOverlap() {
1783 FuncSampleStatsMap BaseHotFunc;
1784 getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
1785 HotFuncOverlap.BaseCount = BaseHotFunc.size();
1787 FuncSampleStatsMap TestHotFunc;
1788 getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
1789 HotFuncOverlap.TestCount = TestHotFunc.size();
1790 HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
1792 for (const auto &F : BaseHotFunc) {
1793 if (TestHotFunc.count(F.first))
1794 ++HotFuncOverlap.OverlapCount;
1795 else
1796 ++HotFuncOverlap.UnionCount;
1800 void SampleOverlapAggregator::updateOverlapStatsForFunction(
1801 uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
1802 SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
1803 assert(Status != MS_None &&
1804 "Match status should be updated before updating overlap statistics");
1805 if (Status == MS_FirstUnique) {
1806 TestSample = 0;
1807 FuncOverlap.BaseUniqueSample += BaseSample;
1808 } else if (Status == MS_SecondUnique) {
1809 BaseSample = 0;
1810 FuncOverlap.TestUniqueSample += TestSample;
1811 } else {
1812 ++FuncOverlap.OverlapCount;
1815 FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
1816 FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
1817 Difference +=
1818 1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
1819 updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
1822 void SampleOverlapAggregator::updateForUnmatchedCallee(
1823 const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
1824 double &Difference, MatchStatus Status) {
1825 assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
1826 "Status must be either of the two unmatched cases");
1827 FuncSampleStats FuncStats;
1828 if (Status == MS_FirstUnique) {
1829 getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
1830 updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
1831 FuncStats.HotBlockCount, FuncOverlap,
1832 Difference, Status);
1833 } else {
1834 getFuncSampleStats(Func, FuncStats, TestHotThreshold);
1835 updateOverlapStatsForFunction(0, FuncStats.SampleSum,
1836 FuncStats.HotBlockCount, FuncOverlap,
1837 Difference, Status);
1841 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
1842 const sampleprof::FunctionSamples &BaseFunc,
1843 const sampleprof::FunctionSamples &TestFunc,
1844 SampleOverlapStats &FuncOverlap) {
1846 using namespace sampleprof;
1848 double Difference = 0;
1850 // Accumulate Difference for regular line/block samples in the function.
1851 // We match them through sort-merge join algorithm because
1852 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
1853 // by their offsets.
1854 MatchStep<BodySampleMap::const_iterator> BlockIterStep(
1855 BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
1856 TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
1857 BlockIterStep.updateOneStep();
1858 while (!BlockIterStep.areBothFinished()) {
1859 uint64_t BaseSample =
1860 BlockIterStep.isFirstFinished()
1862 : BlockIterStep.getFirstIter()->second.getSamples();
1863 uint64_t TestSample =
1864 BlockIterStep.isSecondFinished()
1866 : BlockIterStep.getSecondIter()->second.getSamples();
1867 updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
1868 Difference, BlockIterStep.getMatchStatus());
1870 BlockIterStep.updateOneStep();
1873 // Accumulate Difference for callsite lines in the function. We match
1874 // them through sort-merge algorithm because
1875 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
1876 // ordered by their offsets.
1877 MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
1878 BaseFunc.getCallsiteSamples().cbegin(),
1879 BaseFunc.getCallsiteSamples().cend(),
1880 TestFunc.getCallsiteSamples().cbegin(),
1881 TestFunc.getCallsiteSamples().cend());
1882 CallsiteIterStep.updateOneStep();
1883 while (!CallsiteIterStep.areBothFinished()) {
1884 MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
1885 assert(CallsiteStepStatus != MS_None &&
1886 "Match status should be updated before entering loop body");
1888 if (CallsiteStepStatus != MS_Match) {
1889 auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
1890 ? CallsiteIterStep.getFirstIter()
1891 : CallsiteIterStep.getSecondIter();
1892 for (const auto &F : Callsite->second)
1893 updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
1894 CallsiteStepStatus);
1895 } else {
1896 // There may be multiple inlinees at the same offset, so we need to try
1897 // matching all of them. This match is implemented through sort-merge
1898 // algorithm because callsite records at the same offset are ordered by
1899 // function names.
1900 MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
1901 CallsiteIterStep.getFirstIter()->second.cbegin(),
1902 CallsiteIterStep.getFirstIter()->second.cend(),
1903 CallsiteIterStep.getSecondIter()->second.cbegin(),
1904 CallsiteIterStep.getSecondIter()->second.cend());
1905 CalleeIterStep.updateOneStep();
1906 while (!CalleeIterStep.areBothFinished()) {
1907 MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
1908 if (CalleeStepStatus != MS_Match) {
1909 auto Callee = (CalleeStepStatus == MS_FirstUnique)
1910 ? CalleeIterStep.getFirstIter()
1911 : CalleeIterStep.getSecondIter();
1912 updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
1913 CalleeStepStatus);
1914 } else {
1915 // An inlined function can contain other inlinees inside, so compute
1916 // the Difference recursively.
1917 Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
1918 CalleeIterStep.getFirstIter()->second,
1919 CalleeIterStep.getSecondIter()->second,
1920 FuncOverlap);
1922 CalleeIterStep.updateOneStep();
1925 CallsiteIterStep.updateOneStep();
1928 // Difference reflects the total differences of line/block samples in this
1929 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
1930 // reflect the similarity between function profiles in [0.0f to 1.0f].
1931 return (2.0 - Difference) / 2;
1934 double SampleOverlapAggregator::weightForFuncSimilarity(
1935 double FuncInternalSimilarity, uint64_t BaseFuncSample,
1936 uint64_t TestFuncSample) const {
1937 // Compute the weight as the distance between the function weights in two
1938 // profiles.
1939 double BaseFrac = 0.0;
1940 double TestFrac = 0.0;
1941 assert(ProfOverlap.BaseSample > 0 &&
1942 "Total samples in base profile should be greater than 0");
1943 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
1944 assert(ProfOverlap.TestSample > 0 &&
1945 "Total samples in test profile should be greater than 0");
1946 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
1947 double WeightDistance = std::fabs(BaseFrac - TestFrac);
1949 // Take WeightDistance into the similarity.
1950 return FuncInternalSimilarity * (1 - WeightDistance);
1953 double
1954 SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
1955 uint64_t BaseFuncSample,
1956 uint64_t TestFuncSample) const {
1958 double BaseFrac = 0.0;
1959 double TestFrac = 0.0;
1960 assert(ProfOverlap.BaseSample > 0 &&
1961 "Total samples in base profile should be greater than 0");
1962 BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
1963 assert(ProfOverlap.TestSample > 0 &&
1964 "Total samples in test profile should be greater than 0");
1965 TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
1966 return FuncSimilarity * (BaseFrac + TestFrac);
1969 double SampleOverlapAggregator::computeSampleFunctionOverlap(
1970 const sampleprof::FunctionSamples *BaseFunc,
1971 const sampleprof::FunctionSamples *TestFunc,
1972 SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
1973 uint64_t TestFuncSample) {
1974 // Default function internal similarity before weighted, meaning two functions
1975 // has no overlap.
1976 const double DefaultFuncInternalSimilarity = 0;
1977 double FuncSimilarity;
1978 double FuncInternalSimilarity;
1980 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
1981 // In this case, we use DefaultFuncInternalSimilarity as the function internal
1982 // similarity.
1983 if (!BaseFunc || !TestFunc) {
1984 FuncInternalSimilarity = DefaultFuncInternalSimilarity;
1985 } else {
1986 assert(FuncOverlap != nullptr &&
1987 "FuncOverlap should be provided in this case");
1988 FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
1989 *BaseFunc, *TestFunc, *FuncOverlap);
1990 // Now, FuncInternalSimilarity may be a little less than 0 due to
1991 // imprecision of floating point accumulations. Make it zero if the
1992 // difference is below Epsilon.
1993 FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
1995 : FuncInternalSimilarity;
1997 FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
1998 BaseFuncSample, TestFuncSample);
1999 return FuncSimilarity;
2002 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2003 using namespace sampleprof;
2005 std::unordered_map<SampleContext, const FunctionSamples *,
2006 SampleContext::Hash>
2007 BaseFuncProf;
2008 const auto &BaseProfiles = BaseReader->getProfiles();
2009 for (const auto &BaseFunc : BaseProfiles) {
2010 BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
2012 ProfOverlap.UnionCount = BaseFuncProf.size();
2014 const auto &TestProfiles = TestReader->getProfiles();
2015 for (const auto &TestFunc : TestProfiles) {
2016 SampleOverlapStats FuncOverlap;
2017 FuncOverlap.TestName = TestFunc.second.getContext();
2018 assert(TestStats.count(FuncOverlap.TestName) &&
2019 "TestStats should have records for all functions in test profile "
2020 "except inlinees");
2021 FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2023 bool Matched = false;
2024 const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
2025 if (Match == BaseFuncProf.end()) {
2026 const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2027 ++ProfOverlap.TestUniqueCount;
2028 ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2029 FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2031 updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
2033 double FuncSimilarity = computeSampleFunctionOverlap(
2034 nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
2035 ProfOverlap.Similarity +=
2036 weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2038 ++ProfOverlap.UnionCount;
2039 ProfOverlap.UnionSample += FuncStats.SampleSum;
2040 } else {
2041 ++ProfOverlap.OverlapCount;
2043 // Two functions match with each other. Compute function-level overlap and
2044 // aggregate them into profile-level overlap.
2045 FuncOverlap.BaseName = Match->second->getContext();
2046 assert(BaseStats.count(FuncOverlap.BaseName) &&
2047 "BaseStats should have records for all functions in base profile "
2048 "except inlinees");
2049 FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2051 FuncOverlap.Similarity = computeSampleFunctionOverlap(
2052 Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2053 FuncOverlap.TestSample);
2054 ProfOverlap.Similarity +=
2055 weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2056 FuncOverlap.TestSample);
2057 ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2058 ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2060 // Accumulate the percentage of base unique and test unique samples into
2061 // ProfOverlap.
2062 ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2063 ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2065 // Remove matched base functions for later reporting functions not found
2066 // in test profile.
2067 BaseFuncProf.erase(Match);
2068 Matched = true;
2071 // Print function-level similarity information if specified by options.
2072 assert(TestStats.count(FuncOverlap.TestName) &&
2073 "TestStats should have records for all functions in test profile "
2074 "except inlinees");
2075 if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2076 (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2077 (Matched && !FuncFilter.NameFilter.empty() &&
2078 FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2079 std::string::npos)) {
2080 assert(ProfOverlap.BaseSample > 0 &&
2081 "Total samples in base profile should be greater than 0");
2082 FuncOverlap.BaseWeight =
2083 static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2084 assert(ProfOverlap.TestSample > 0 &&
2085 "Total samples in test profile should be greater than 0");
2086 FuncOverlap.TestWeight =
2087 static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2088 FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2092 // Traverse through functions in base profile but not in test profile.
2093 for (const auto &F : BaseFuncProf) {
2094 assert(BaseStats.count(F.second->getContext()) &&
2095 "BaseStats should have records for all functions in base profile "
2096 "except inlinees");
2097 const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2098 ++ProfOverlap.BaseUniqueCount;
2099 ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2101 updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2103 double FuncSimilarity = computeSampleFunctionOverlap(
2104 nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2105 ProfOverlap.Similarity +=
2106 weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2108 ProfOverlap.UnionSample += FuncStats.SampleSum;
2111 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2112 // of floating point accumulations. Make it 1.0 if the difference is below
2113 // Epsilon.
2114 ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2116 : ProfOverlap.Similarity;
2118 computeHotFuncOverlap();
2121 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2122 const auto &BaseProf = BaseReader->getProfiles();
2123 for (const auto &I : BaseProf) {
2124 ++ProfOverlap.BaseCount;
2125 FuncSampleStats FuncStats;
2126 getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2127 ProfOverlap.BaseSample += FuncStats.SampleSum;
2128 BaseStats.emplace(I.second.getContext(), FuncStats);
2131 const auto &TestProf = TestReader->getProfiles();
2132 for (const auto &I : TestProf) {
2133 ++ProfOverlap.TestCount;
2134 FuncSampleStats FuncStats;
2135 getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2136 ProfOverlap.TestSample += FuncStats.SampleSum;
2137 TestStats.emplace(I.second.getContext(), FuncStats);
2140 ProfOverlap.BaseName = StringRef(BaseFilename);
2141 ProfOverlap.TestName = StringRef(TestFilename);
2144 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2145 using namespace sampleprof;
2147 if (FuncSimilarityDump.empty())
2148 return;
2150 formatted_raw_ostream FOS(OS);
2151 FOS << "Function-level details:\n";
2152 FOS << "Base weight";
2153 FOS.PadToColumn(TestWeightCol);
2154 FOS << "Test weight";
2155 FOS.PadToColumn(SimilarityCol);
2156 FOS << "Similarity";
2157 FOS.PadToColumn(OverlapCol);
2158 FOS << "Overlap";
2159 FOS.PadToColumn(BaseUniqueCol);
2160 FOS << "Base unique";
2161 FOS.PadToColumn(TestUniqueCol);
2162 FOS << "Test unique";
2163 FOS.PadToColumn(BaseSampleCol);
2164 FOS << "Base samples";
2165 FOS.PadToColumn(TestSampleCol);
2166 FOS << "Test samples";
2167 FOS.PadToColumn(FuncNameCol);
2168 FOS << "Function name\n";
2169 for (const auto &F : FuncSimilarityDump) {
2170 double OverlapPercent =
2171 F.second.UnionSample > 0
2172 ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2173 : 0;
2174 double BaseUniquePercent =
2175 F.second.BaseSample > 0
2176 ? static_cast<double>(F.second.BaseUniqueSample) /
2177 F.second.BaseSample
2178 : 0;
2179 double TestUniquePercent =
2180 F.second.TestSample > 0
2181 ? static_cast<double>(F.second.TestUniqueSample) /
2182 F.second.TestSample
2183 : 0;
2185 FOS << format("%.2f%%", F.second.BaseWeight * 100);
2186 FOS.PadToColumn(TestWeightCol);
2187 FOS << format("%.2f%%", F.second.TestWeight * 100);
2188 FOS.PadToColumn(SimilarityCol);
2189 FOS << format("%.2f%%", F.second.Similarity * 100);
2190 FOS.PadToColumn(OverlapCol);
2191 FOS << format("%.2f%%", OverlapPercent * 100);
2192 FOS.PadToColumn(BaseUniqueCol);
2193 FOS << format("%.2f%%", BaseUniquePercent * 100);
2194 FOS.PadToColumn(TestUniqueCol);
2195 FOS << format("%.2f%%", TestUniquePercent * 100);
2196 FOS.PadToColumn(BaseSampleCol);
2197 FOS << F.second.BaseSample;
2198 FOS.PadToColumn(TestSampleCol);
2199 FOS << F.second.TestSample;
2200 FOS.PadToColumn(FuncNameCol);
2201 FOS << F.second.TestName.toString() << "\n";
2205 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2206 OS << "Profile overlap infomation for base_profile: "
2207 << ProfOverlap.BaseName.toString()
2208 << " and test_profile: " << ProfOverlap.TestName.toString()
2209 << "\nProgram level:\n";
2211 OS << " Whole program profile similarity: "
2212 << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2214 assert(ProfOverlap.UnionSample > 0 &&
2215 "Total samples in two profile should be greater than 0");
2216 double OverlapPercent =
2217 static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2218 assert(ProfOverlap.BaseSample > 0 &&
2219 "Total samples in base profile should be greater than 0");
2220 double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2221 ProfOverlap.BaseSample;
2222 assert(ProfOverlap.TestSample > 0 &&
2223 "Total samples in test profile should be greater than 0");
2224 double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2225 ProfOverlap.TestSample;
2227 OS << " Whole program sample overlap: "
2228 << format("%.3f%%", OverlapPercent * 100) << "\n";
2229 OS << " percentage of samples unique in base profile: "
2230 << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2231 OS << " percentage of samples unique in test profile: "
2232 << format("%.3f%%", TestUniquePercent * 100) << "\n";
2233 OS << " total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2234 << " total samples in test profile: " << ProfOverlap.TestSample << "\n";
2236 assert(ProfOverlap.UnionCount > 0 &&
2237 "There should be at least one function in two input profiles");
2238 double FuncOverlapPercent =
2239 static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2240 OS << " Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2241 << "\n";
2242 OS << " overlap functions: " << ProfOverlap.OverlapCount << "\n";
2243 OS << " functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2244 << "\n";
2245 OS << " functions unique in test profile: " << ProfOverlap.TestUniqueCount
2246 << "\n";
2249 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2250 raw_fd_ostream &OS) const {
2251 assert(HotFuncOverlap.UnionCount > 0 &&
2252 "There should be at least one hot function in two input profiles");
2253 OS << " Hot-function overlap: "
2254 << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2255 HotFuncOverlap.UnionCount * 100)
2256 << "\n";
2257 OS << " overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2258 OS << " hot functions unique in base profile: "
2259 << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2260 OS << " hot functions unique in test profile: "
2261 << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2263 assert(HotBlockOverlap.UnionCount > 0 &&
2264 "There should be at least one hot block in two input profiles");
2265 OS << " Hot-block overlap: "
2266 << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2267 HotBlockOverlap.UnionCount * 100)
2268 << "\n";
2269 OS << " overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2270 OS << " hot blocks unique in base profile: "
2271 << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2272 OS << " hot blocks unique in test profile: "
2273 << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2276 std::error_code SampleOverlapAggregator::loadProfiles() {
2277 using namespace sampleprof;
2279 LLVMContext Context;
2280 auto FS = vfs::getRealFileSystem();
2281 auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2282 FSDiscriminatorPassOption);
2283 if (std::error_code EC = BaseReaderOrErr.getError())
2284 exitWithErrorCode(EC, BaseFilename);
2286 auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2287 FSDiscriminatorPassOption);
2288 if (std::error_code EC = TestReaderOrErr.getError())
2289 exitWithErrorCode(EC, TestFilename);
2291 BaseReader = std::move(BaseReaderOrErr.get());
2292 TestReader = std::move(TestReaderOrErr.get());
2294 if (std::error_code EC = BaseReader->read())
2295 exitWithErrorCode(EC, BaseFilename);
2296 if (std::error_code EC = TestReader->read())
2297 exitWithErrorCode(EC, TestFilename);
2298 if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2299 exitWithError(
2300 "cannot compare probe-based profile with non-probe-based profile");
2301 if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2302 exitWithError("cannot compare CS profile with non-CS profile");
2304 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2305 // profile summary.
2306 ProfileSummary &BasePS = BaseReader->getSummary();
2307 ProfileSummary &TestPS = TestReader->getSummary();
2308 BaseHotThreshold =
2309 ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2310 TestHotThreshold =
2311 ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2313 return std::error_code();
2316 void overlapSampleProfile(const std::string &BaseFilename,
2317 const std::string &TestFilename,
2318 const OverlapFuncFilters &FuncFilter,
2319 uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2320 using namespace sampleprof;
2322 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2323 // report 2--3 places after decimal point in percentage numbers.
2324 SampleOverlapAggregator OverlapAggr(
2325 BaseFilename, TestFilename,
2326 static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2327 if (std::error_code EC = OverlapAggr.loadProfiles())
2328 exitWithErrorCode(EC);
2330 OverlapAggr.initializeSampleProfileOverlap();
2331 if (OverlapAggr.detectZeroSampleProfile(OS))
2332 return;
2334 OverlapAggr.computeSampleProfileOverlap(OS);
2336 OverlapAggr.dumpProgramSummary(OS);
2337 OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2338 OverlapAggr.dumpFuncSimilarity(OS);
2341 static int overlap_main(int argc, const char *argv[]) {
2342 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
2343 cl::desc("<base profile file>"));
2344 cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
2345 cl::desc("<test profile file>"));
2346 cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
2347 cl::desc("Output file"));
2348 cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
2349 cl::opt<bool> IsCS(
2350 "cs", cl::init(false),
2351 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."));
2352 cl::opt<unsigned long long> ValueCutoff(
2353 "value-cutoff", cl::init(-1),
2354 cl::desc(
2355 "Function level overlap information for every function (with calling "
2356 "context for csspgo) in test "
2357 "profile with max count value greater then the parameter value"));
2358 cl::opt<std::string> FuncNameFilter(
2359 "function",
2360 cl::desc("Function level overlap information for matching functions. For "
2361 "CSSPGO this takes a a function name with calling context"));
2362 cl::opt<unsigned long long> SimilarityCutoff(
2363 "similarity-cutoff", cl::init(0),
2364 cl::desc("For sample profiles, list function names (with calling context "
2365 "for csspgo) for overlapped functions "
2366 "with similarities below the cutoff (percentage times 10000)."));
2367 cl::opt<ProfileKinds> ProfileKind(
2368 cl::desc("Profile kind:"), cl::init(instr),
2369 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
2370 clEnumVal(sample, "Sample profile")));
2371 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
2373 std::error_code EC;
2374 raw_fd_ostream OS(Output.data(), EC, sys::fs::OF_TextWithCRLF);
2375 if (EC)
2376 exitWithErrorCode(EC, Output);
2378 if (ProfileKind == instr)
2379 overlapInstrProfile(BaseFilename, TestFilename,
2380 OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
2381 IsCS);
2382 else
2383 overlapSampleProfile(BaseFilename, TestFilename,
2384 OverlapFuncFilters{ValueCutoff, FuncNameFilter},
2385 SimilarityCutoff, OS);
2387 return 0;
2390 namespace {
2391 struct ValueSitesStats {
2392 ValueSitesStats()
2393 : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
2394 TotalNumValues(0) {}
2395 uint64_t TotalNumValueSites;
2396 uint64_t TotalNumValueSitesWithValueProfile;
2397 uint64_t TotalNumValues;
2398 std::vector<unsigned> ValueSitesHistogram;
2400 } // namespace
2402 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2403 ValueSitesStats &Stats, raw_fd_ostream &OS,
2404 InstrProfSymtab *Symtab) {
2405 uint32_t NS = Func.getNumValueSites(VK);
2406 Stats.TotalNumValueSites += NS;
2407 for (size_t I = 0; I < NS; ++I) {
2408 uint32_t NV = Func.getNumValueDataForSite(VK, I);
2409 std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I);
2410 Stats.TotalNumValues += NV;
2411 if (NV) {
2412 Stats.TotalNumValueSitesWithValueProfile++;
2413 if (NV > Stats.ValueSitesHistogram.size())
2414 Stats.ValueSitesHistogram.resize(NV, 0);
2415 Stats.ValueSitesHistogram[NV - 1]++;
2418 uint64_t SiteSum = 0;
2419 for (uint32_t V = 0; V < NV; V++)
2420 SiteSum += VD[V].Count;
2421 if (SiteSum == 0)
2422 SiteSum = 1;
2424 for (uint32_t V = 0; V < NV; V++) {
2425 OS << "\t[ " << format("%2u", I) << ", ";
2426 if (Symtab == nullptr)
2427 OS << format("%4" PRIu64, VD[V].Value);
2428 else
2429 OS << Symtab->getFuncOrVarName(VD[V].Value);
2430 OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
2431 << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
2436 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2437 ValueSitesStats &Stats) {
2438 OS << " Total number of sites: " << Stats.TotalNumValueSites << "\n";
2439 OS << " Total number of sites with values: "
2440 << Stats.TotalNumValueSitesWithValueProfile << "\n";
2441 OS << " Total number of profiled values: " << Stats.TotalNumValues << "\n";
2443 OS << " Value sites histogram:\n\tNumTargets, SiteCount\n";
2444 for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2445 if (Stats.ValueSitesHistogram[I] > 0)
2446 OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2450 static int showInstrProfile(
2451 const std::string &Filename, bool ShowCounts, uint32_t TopN,
2452 bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary,
2453 std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions,
2454 bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow,
2455 const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds,
2456 bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces,
2457 ShowFormat SFormat, raw_fd_ostream &OS) {
2458 if (SFormat == ShowFormat::Json)
2459 exitWithError("JSON output is not supported for instr profiles");
2460 if (SFormat == ShowFormat::Yaml)
2461 exitWithError("YAML output is not supported for instr profiles");
2462 auto FS = vfs::getRealFileSystem();
2463 auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2464 std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2465 if (ShowDetailedSummary && Cutoffs.empty()) {
2466 Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2468 InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2469 if (Error E = ReaderOrErr.takeError())
2470 exitWithError(std::move(E), Filename);
2472 auto Reader = std::move(ReaderOrErr.get());
2473 bool IsIRInstr = Reader->isIRLevelProfile();
2474 size_t ShownFunctions = 0;
2475 size_t BelowCutoffFunctions = 0;
2476 int NumVPKind = IPVK_Last - IPVK_First + 1;
2477 std::vector<ValueSitesStats> VPStats(NumVPKind);
2479 auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2480 const std::pair<std::string, uint64_t> &v2) {
2481 return v1.second > v2.second;
2484 std::priority_queue<std::pair<std::string, uint64_t>,
2485 std::vector<std::pair<std::string, uint64_t>>,
2486 decltype(MinCmp)>
2487 HottestFuncs(MinCmp);
2489 if (!TextFormat && OnlyListBelow) {
2490 OS << "The list of functions with the maximum counter less than "
2491 << ValueCutoff << ":\n";
2494 // Add marker so that IR-level instrumentation round-trips properly.
2495 if (TextFormat && IsIRInstr)
2496 OS << ":ir\n";
2498 for (const auto &Func : *Reader) {
2499 if (Reader->isIRLevelProfile()) {
2500 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2501 if (FuncIsCS != ShowCS)
2502 continue;
2504 bool Show = ShowAllFunctions ||
2505 (!ShowFunction.empty() && Func.Name.contains(ShowFunction));
2507 bool doTextFormatDump = (Show && TextFormat);
2509 if (doTextFormatDump) {
2510 InstrProfSymtab &Symtab = Reader->getSymtab();
2511 InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2512 OS);
2513 continue;
2516 assert(Func.Counts.size() > 0 && "function missing entry counter");
2517 Builder.addRecord(Func);
2519 if (ShowCovered) {
2520 if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2521 OS << Func.Name << "\n";
2522 continue;
2525 uint64_t FuncMax = 0;
2526 uint64_t FuncSum = 0;
2528 auto PseudoKind = Func.getCountPseudoKind();
2529 if (PseudoKind != InstrProfRecord::NotPseudo) {
2530 if (Show) {
2531 if (!ShownFunctions)
2532 OS << "Counters:\n";
2533 ++ShownFunctions;
2534 OS << " " << Func.Name << ":\n"
2535 << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2536 << " Counters: " << Func.Counts.size();
2537 if (PseudoKind == InstrProfRecord::PseudoHot)
2538 OS << " <PseudoHot>\n";
2539 else if (PseudoKind == InstrProfRecord::PseudoWarm)
2540 OS << " <PseudoWarm>\n";
2541 else
2542 llvm_unreachable("Unknown PseudoKind");
2544 continue;
2547 for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2548 FuncMax = std::max(FuncMax, Func.Counts[I]);
2549 FuncSum += Func.Counts[I];
2552 if (FuncMax < ValueCutoff) {
2553 ++BelowCutoffFunctions;
2554 if (OnlyListBelow) {
2555 OS << " " << Func.Name << ": (Max = " << FuncMax
2556 << " Sum = " << FuncSum << ")\n";
2558 continue;
2559 } else if (OnlyListBelow)
2560 continue;
2562 if (TopN) {
2563 if (HottestFuncs.size() == TopN) {
2564 if (HottestFuncs.top().second < FuncMax) {
2565 HottestFuncs.pop();
2566 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2568 } else
2569 HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2572 if (Show) {
2573 if (!ShownFunctions)
2574 OS << "Counters:\n";
2576 ++ShownFunctions;
2578 OS << " " << Func.Name << ":\n"
2579 << " Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2580 << " Counters: " << Func.Counts.size() << "\n";
2581 if (!IsIRInstr)
2582 OS << " Function count: " << Func.Counts[0] << "\n";
2584 if (ShowIndirectCallTargets)
2585 OS << " Indirect Call Site Count: "
2586 << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2588 uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2589 if (ShowMemOPSizes && NumMemOPCalls > 0)
2590 OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2591 << "\n";
2593 if (ShowCounts) {
2594 OS << " Block counts: [";
2595 size_t Start = (IsIRInstr ? 0 : 1);
2596 for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2597 OS << (I == Start ? "" : ", ") << Func.Counts[I];
2599 OS << "]\n";
2602 if (ShowIndirectCallTargets) {
2603 OS << " Indirect Target Results:\n";
2604 traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2605 VPStats[IPVK_IndirectCallTarget], OS,
2606 &(Reader->getSymtab()));
2609 if (ShowMemOPSizes && NumMemOPCalls > 0) {
2610 OS << " Memory Intrinsic Size Results:\n";
2611 traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2612 nullptr);
2616 if (Reader->hasError())
2617 exitWithError(Reader->getError(), Filename);
2619 if (TextFormat || ShowCovered)
2620 return 0;
2621 std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2622 bool IsIR = Reader->isIRLevelProfile();
2623 OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2624 if (IsIR)
2625 OS << " entry_first = " << Reader->instrEntryBBEnabled();
2626 OS << "\n";
2627 if (ShowAllFunctions || !ShowFunction.empty())
2628 OS << "Functions shown: " << ShownFunctions << "\n";
2629 OS << "Total functions: " << PS->getNumFunctions() << "\n";
2630 if (ValueCutoff > 0) {
2631 OS << "Number of functions with maximum count (< " << ValueCutoff
2632 << "): " << BelowCutoffFunctions << "\n";
2633 OS << "Number of functions with maximum count (>= " << ValueCutoff
2634 << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2636 OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2637 OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2639 if (TopN) {
2640 std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2641 while (!HottestFuncs.empty()) {
2642 SortedHottestFuncs.emplace_back(HottestFuncs.top());
2643 HottestFuncs.pop();
2645 OS << "Top " << TopN
2646 << " functions with the largest internal block counts: \n";
2647 for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2648 OS << " " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2651 if (ShownFunctions && ShowIndirectCallTargets) {
2652 OS << "Statistics for indirect call sites profile:\n";
2653 showValueSitesStats(OS, IPVK_IndirectCallTarget,
2654 VPStats[IPVK_IndirectCallTarget]);
2657 if (ShownFunctions && ShowMemOPSizes) {
2658 OS << "Statistics for memory intrinsic calls sizes profile:\n";
2659 showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
2662 if (ShowDetailedSummary) {
2663 OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2664 OS << "Total count: " << PS->getTotalCount() << "\n";
2665 PS->printDetailedSummary(OS);
2668 if (ShowBinaryIds)
2669 if (Error E = Reader->printBinaryIds(OS))
2670 exitWithError(std::move(E), Filename);
2672 if (ShowProfileVersion)
2673 OS << "Profile version: " << Reader->getVersion() << "\n";
2675 if (ShowTemporalProfTraces) {
2676 auto &Traces = Reader->getTemporalProfTraces();
2677 OS << "Temporal Profile Traces (samples=" << Traces.size()
2678 << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2679 for (unsigned i = 0; i < Traces.size(); i++) {
2680 OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2681 << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2682 for (auto &NameRef : Traces[i].FunctionNameRefs)
2683 OS << " " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
2687 return 0;
2690 static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2691 raw_fd_ostream &OS) {
2692 if (!Reader->dumpSectionInfo(OS)) {
2693 WithColor::warning() << "-show-sec-info-only is only supported for "
2694 << "sample profile in extbinary format and is "
2695 << "ignored for other formats.\n";
2696 return;
2700 namespace {
2701 struct HotFuncInfo {
2702 std::string FuncName;
2703 uint64_t TotalCount;
2704 double TotalCountPercent;
2705 uint64_t MaxCount;
2706 uint64_t EntryCount;
2708 HotFuncInfo()
2709 : TotalCount(0), TotalCountPercent(0.0f), MaxCount(0), EntryCount(0) {}
2711 HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
2712 : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
2713 MaxCount(MS), EntryCount(ES) {}
2715 } // namespace
2717 // Print out detailed information about hot functions in PrintValues vector.
2718 // Users specify titles and offset of every columns through ColumnTitle and
2719 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
2720 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
2721 // print out or let it be an empty string.
2722 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
2723 const std::vector<int> &ColumnOffset,
2724 const std::vector<HotFuncInfo> &PrintValues,
2725 uint64_t HotFuncCount, uint64_t TotalFuncCount,
2726 uint64_t HotProfCount, uint64_t TotalProfCount,
2727 const std::string &HotFuncMetric,
2728 uint32_t TopNFunctions, raw_fd_ostream &OS) {
2729 assert(ColumnOffset.size() == ColumnTitle.size() &&
2730 "ColumnOffset and ColumnTitle should have the same size");
2731 assert(ColumnTitle.size() >= 4 &&
2732 "ColumnTitle should have at least 4 elements");
2733 assert(TotalFuncCount > 0 &&
2734 "There should be at least one function in the profile");
2735 double TotalProfPercent = 0;
2736 if (TotalProfCount > 0)
2737 TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
2739 formatted_raw_ostream FOS(OS);
2740 FOS << HotFuncCount << " out of " << TotalFuncCount
2741 << " functions with profile ("
2742 << format("%.2f%%",
2743 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
2744 << ") are considered hot functions";
2745 if (!HotFuncMetric.empty())
2746 FOS << " (" << HotFuncMetric << ")";
2747 FOS << ".\n";
2748 FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
2749 << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
2751 for (size_t I = 0; I < ColumnTitle.size(); ++I) {
2752 FOS.PadToColumn(ColumnOffset[I]);
2753 FOS << ColumnTitle[I];
2755 FOS << "\n";
2757 uint32_t Count = 0;
2758 for (const auto &R : PrintValues) {
2759 if (TopNFunctions && (Count++ == TopNFunctions))
2760 break;
2761 FOS.PadToColumn(ColumnOffset[0]);
2762 FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
2763 FOS.PadToColumn(ColumnOffset[1]);
2764 FOS << R.MaxCount;
2765 FOS.PadToColumn(ColumnOffset[2]);
2766 FOS << R.EntryCount;
2767 FOS.PadToColumn(ColumnOffset[3]);
2768 FOS << R.FuncName << "\n";
2772 static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
2773 ProfileSummary &PS, uint32_t TopN,
2774 raw_fd_ostream &OS) {
2775 using namespace sampleprof;
2777 const uint32_t HotFuncCutoff = 990000;
2778 auto &SummaryVector = PS.getDetailedSummary();
2779 uint64_t MinCountThreshold = 0;
2780 for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
2781 if (SummaryEntry.Cutoff == HotFuncCutoff) {
2782 MinCountThreshold = SummaryEntry.MinCount;
2783 break;
2787 // Traverse all functions in the profile and keep only hot functions.
2788 // The following loop also calculates the sum of total samples of all
2789 // functions.
2790 std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
2791 std::greater<uint64_t>>
2792 HotFunc;
2793 uint64_t ProfileTotalSample = 0;
2794 uint64_t HotFuncSample = 0;
2795 uint64_t HotFuncCount = 0;
2797 for (const auto &I : Profiles) {
2798 FuncSampleStats FuncStats;
2799 const FunctionSamples &FuncProf = I.second;
2800 ProfileTotalSample += FuncProf.getTotalSamples();
2801 getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
2803 if (isFunctionHot(FuncStats, MinCountThreshold)) {
2804 HotFunc.emplace(FuncProf.getTotalSamples(),
2805 std::make_pair(&(I.second), FuncStats.MaxSample));
2806 HotFuncSample += FuncProf.getTotalSamples();
2807 ++HotFuncCount;
2811 std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
2812 "Entry sample", "Function name"};
2813 std::vector<int> ColumnOffset{0, 24, 42, 58};
2814 std::string Metric =
2815 std::string("max sample >= ") + std::to_string(MinCountThreshold);
2816 std::vector<HotFuncInfo> PrintValues;
2817 for (const auto &FuncPair : HotFunc) {
2818 const FunctionSamples &Func = *FuncPair.second.first;
2819 double TotalSamplePercent =
2820 (ProfileTotalSample > 0)
2821 ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
2822 : 0;
2823 PrintValues.emplace_back(
2824 HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
2825 TotalSamplePercent, FuncPair.second.second,
2826 Func.getHeadSamplesEstimate()));
2828 dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
2829 Profiles.size(), HotFuncSample, ProfileTotalSample,
2830 Metric, TopN, OS);
2832 return 0;
2835 static int showSampleProfile(const std::string &Filename, bool ShowCounts,
2836 uint32_t TopN, bool ShowAllFunctions,
2837 bool ShowDetailedSummary,
2838 const std::string &ShowFunction,
2839 bool ShowProfileSymbolList,
2840 bool ShowSectionInfoOnly, bool ShowHotFuncList,
2841 ShowFormat SFormat, raw_fd_ostream &OS) {
2842 if (SFormat == ShowFormat::Yaml)
2843 exitWithError("YAML output is not supported for sample profiles");
2844 using namespace sampleprof;
2845 LLVMContext Context;
2846 auto FS = vfs::getRealFileSystem();
2847 auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
2848 FSDiscriminatorPassOption);
2849 if (std::error_code EC = ReaderOrErr.getError())
2850 exitWithErrorCode(EC, Filename);
2852 auto Reader = std::move(ReaderOrErr.get());
2853 if (ShowSectionInfoOnly) {
2854 showSectionInfo(Reader.get(), OS);
2855 return 0;
2858 if (std::error_code EC = Reader->read())
2859 exitWithErrorCode(EC, Filename);
2861 if (ShowAllFunctions || ShowFunction.empty()) {
2862 if (SFormat == ShowFormat::Json)
2863 Reader->dumpJson(OS);
2864 else
2865 Reader->dump(OS);
2866 } else {
2867 if (SFormat == ShowFormat::Json)
2868 exitWithError(
2869 "the JSON format is supported only when all functions are to "
2870 "be printed");
2872 // TODO: parse context string to support filtering by contexts.
2873 FunctionSamples *FS = Reader->getSamplesFor(StringRef(ShowFunction));
2874 Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
2877 if (ShowProfileSymbolList) {
2878 std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
2879 Reader->getProfileSymbolList();
2880 ReaderList->dump(OS);
2883 if (ShowDetailedSummary) {
2884 auto &PS = Reader->getSummary();
2885 PS.printSummary(OS);
2886 PS.printDetailedSummary(OS);
2889 if (ShowHotFuncList || TopN)
2890 showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS);
2892 return 0;
2895 static int showMemProfProfile(const std::string &Filename,
2896 const std::string &ProfiledBinary,
2897 ShowFormat SFormat, raw_fd_ostream &OS) {
2898 if (SFormat == ShowFormat::Json)
2899 exitWithError("JSON output is not supported for MemProf");
2900 auto ReaderOr = llvm::memprof::RawMemProfReader::create(
2901 Filename, ProfiledBinary, /*KeepNames=*/true);
2902 if (Error E = ReaderOr.takeError())
2903 // Since the error can be related to the profile or the binary we do not
2904 // pass whence. Instead additional context is provided where necessary in
2905 // the error message.
2906 exitWithError(std::move(E), /*Whence*/ "");
2908 std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
2909 ReaderOr.get().release());
2911 Reader->printYAML(OS);
2912 return 0;
2915 static int showDebugInfoCorrelation(const std::string &Filename,
2916 bool ShowDetailedSummary,
2917 bool ShowProfileSymbolList,
2918 int MaxDbgCorrelationWarnings,
2919 ShowFormat SFormat, raw_fd_ostream &OS) {
2920 if (SFormat == ShowFormat::Json)
2921 exitWithError("JSON output is not supported for debug info correlation");
2922 std::unique_ptr<InstrProfCorrelator> Correlator;
2923 if (auto Err =
2924 InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO)
2925 .moveInto(Correlator))
2926 exitWithError(std::move(Err), Filename);
2927 if (SFormat == ShowFormat::Yaml) {
2928 if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
2929 exitWithError(std::move(Err), Filename);
2930 return 0;
2933 if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
2934 exitWithError(std::move(Err), Filename);
2936 InstrProfSymtab Symtab;
2937 if (auto Err = Symtab.create(
2938 StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
2939 exitWithError(std::move(Err), Filename);
2941 if (ShowProfileSymbolList)
2942 Symtab.dumpNames(OS);
2943 // TODO: Read "Profile Data Type" from debug info to compute and show how many
2944 // counters the section holds.
2945 if (ShowDetailedSummary)
2946 OS << "Counters section size: 0x"
2947 << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
2948 OS << "Found " << Correlator->getDataSize() << " functions\n";
2950 return 0;
2953 static int show_main(int argc, const char *argv[]) {
2954 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
2956 cl::opt<bool> ShowCounts("counts", cl::init(false),
2957 cl::desc("Show counter values for shown functions"));
2958 cl::opt<ShowFormat> SFormat(
2959 "show-format", cl::init(ShowFormat::Text),
2960 cl::desc("Emit output in the selected format if supported"),
2961 cl::values(clEnumValN(ShowFormat::Text, "text",
2962 "emit normal text output (default)"),
2963 clEnumValN(ShowFormat::Json, "json", "emit JSON"),
2964 clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
2965 // TODO: Consider replacing this with `--show-format=text-encoding`.
2966 cl::opt<bool> TextFormat(
2967 "text", cl::init(false),
2968 cl::desc("Show instr profile data in text dump format"));
2969 cl::opt<bool> JsonFormat(
2970 "json", cl::desc("Show sample profile data in the JSON format "
2971 "(deprecated, please use --show-format=json)"));
2972 cl::opt<bool> ShowIndirectCallTargets(
2973 "ic-targets", cl::init(false),
2974 cl::desc("Show indirect call site target values for shown functions"));
2975 cl::opt<bool> ShowMemOPSizes(
2976 "memop-sizes", cl::init(false),
2977 cl::desc("Show the profiled sizes of the memory intrinsic calls "
2978 "for shown functions"));
2979 cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
2980 cl::desc("Show detailed profile summary"));
2981 cl::list<uint32_t> DetailedSummaryCutoffs(
2982 cl::CommaSeparated, "detailed-summary-cutoffs",
2983 cl::desc(
2984 "Cutoff percentages (times 10000) for generating detailed summary"),
2985 cl::value_desc("800000,901000,999999"));
2986 cl::opt<bool> ShowHotFuncList(
2987 "hot-func-list", cl::init(false),
2988 cl::desc("Show profile summary of a list of hot functions"));
2989 cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
2990 cl::desc("Details for every function"));
2991 cl::opt<bool> ShowCS("showcs", cl::init(false),
2992 cl::desc("Show context sensitive counts"));
2993 cl::opt<std::string> ShowFunction("function",
2994 cl::desc("Details for matching functions"));
2996 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
2997 cl::init("-"), cl::desc("Output file"));
2998 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
2999 cl::aliasopt(OutputFilename));
3000 cl::opt<ProfileKinds> ProfileKind(
3001 cl::desc("Profile kind:"), cl::init(instr),
3002 cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
3003 clEnumVal(sample, "Sample profile"),
3004 clEnumVal(memory, "MemProf memory access profile")));
3005 cl::opt<uint32_t> TopNFunctions(
3006 "topn", cl::init(0),
3007 cl::desc("Show the list of functions with the largest internal counts"));
3008 cl::opt<uint32_t> ValueCutoff(
3009 "value-cutoff", cl::init(0),
3010 cl::desc("Set the count value cutoff. Functions with the maximum count "
3011 "less than this value will not be printed out. (Default is 0)"));
3012 cl::opt<bool> OnlyListBelow(
3013 "list-below-cutoff", cl::init(false),
3014 cl::desc("Only output names of functions whose max count values are "
3015 "below the cutoff value"));
3016 cl::opt<bool> ShowProfileSymbolList(
3017 "show-prof-sym-list", cl::init(false),
3018 cl::desc("Show profile symbol list if it exists in the profile. "));
3019 cl::opt<bool> ShowSectionInfoOnly(
3020 "show-sec-info-only", cl::init(false),
3021 cl::desc("Show the information of each section in the sample profile. "
3022 "The flag is only usable when the sample profile is in "
3023 "extbinary format"));
3024 cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
3025 cl::desc("Show binary ids in the profile. "));
3026 cl::opt<bool> ShowTemporalProfTraces(
3027 "temporal-profile-traces",
3028 cl::desc("Show temporal profile traces in the profile."));
3029 cl::opt<std::string> DebugInfoFilename(
3030 "debug-info", cl::init(""),
3031 cl::desc("Read and extract profile metadata from debug info and show "
3032 "the functions it found."));
3033 cl::opt<unsigned> MaxDbgCorrelationWarnings(
3034 "max-debug-info-correlation-warnings",
3035 cl::desc("The maximum number of warnings to emit when correlating "
3036 "profile from debug info (0 = no limit)"),
3037 cl::init(5));
3038 cl::opt<bool> ShowCovered(
3039 "covered", cl::init(false),
3040 cl::desc("Show only the functions that have been executed."));
3041 cl::opt<std::string> ProfiledBinary(
3042 "profiled-binary", cl::init(""),
3043 cl::desc("Path to binary from which the profile was collected."));
3044 cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
3045 cl::desc("Show profile version. "));
3046 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
3048 if (Filename.empty() && DebugInfoFilename.empty())
3049 exitWithError(
3050 "the positional argument '<profdata-file>' is required unless '--" +
3051 DebugInfoFilename.ArgStr + "' is provided");
3053 if (Filename == OutputFilename) {
3054 errs() << sys::path::filename(argv[0])
3055 << ": Input file name cannot be the same as the output file name!\n";
3056 return 1;
3058 if (JsonFormat)
3059 SFormat = ShowFormat::Json;
3061 std::error_code EC;
3062 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3063 if (EC)
3064 exitWithErrorCode(EC, OutputFilename);
3066 if (ShowAllFunctions && !ShowFunction.empty())
3067 WithColor::warning() << "-function argument ignored: showing all functions\n";
3069 if (!DebugInfoFilename.empty())
3070 return showDebugInfoCorrelation(DebugInfoFilename, ShowDetailedSummary,
3071 ShowProfileSymbolList,
3072 MaxDbgCorrelationWarnings, SFormat, OS);
3074 if (ProfileKind == instr)
3075 return showInstrProfile(
3076 Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
3077 ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
3078 ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
3079 TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion,
3080 ShowTemporalProfTraces, SFormat, OS);
3081 if (ProfileKind == sample)
3082 return showSampleProfile(Filename, ShowCounts, TopNFunctions,
3083 ShowAllFunctions, ShowDetailedSummary,
3084 ShowFunction, ShowProfileSymbolList,
3085 ShowSectionInfoOnly, ShowHotFuncList, SFormat, OS);
3086 return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS);
3089 static int order_main(int argc, const char *argv[]) {
3090 cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
3091 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
3092 cl::init("-"), cl::desc("Output file"));
3093 cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
3094 cl::aliasopt(OutputFilename));
3095 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data order\n");
3097 std::error_code EC;
3098 raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3099 if (EC)
3100 exitWithErrorCode(EC, OutputFilename);
3101 auto FS = vfs::getRealFileSystem();
3102 auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3103 if (Error E = ReaderOrErr.takeError())
3104 exitWithError(std::move(E), Filename);
3106 auto Reader = std::move(ReaderOrErr.get());
3107 for (auto &I : *Reader) {
3108 // Read all entries
3109 (void)I;
3111 auto &Traces = Reader->getTemporalProfTraces();
3112 auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
3113 BalancedPartitioningConfig Config;
3114 BalancedPartitioning BP(Config);
3115 BP.run(Nodes);
3117 WithColor::note() << "# Ordered " << Nodes.size() << " functions\n";
3118 for (auto &N : Nodes) {
3119 auto [Filename, ParsedFuncName] =
3120 getParsedIRPGOFuncName(Reader->getSymtab().getFuncOrVarName(N.Id));
3121 if (!Filename.empty())
3122 OS << "# " << Filename << "\n";
3123 OS << ParsedFuncName << "\n";
3125 return 0;
3128 typedef int (*llvm_profdata_subcommand)(int, const char *[]);
3130 static std::tuple<StringRef, llvm_profdata_subcommand>
3131 llvm_profdata_subcommands[] = {
3132 {"merge", merge_main},
3133 {"show", show_main},
3134 {"order", order_main},
3135 {"overlap", overlap_main},
3138 int llvm_profdata_main(int argc, char **argvNonConst,
3139 const llvm::ToolContext &) {
3140 const char **argv = const_cast<const char **>(argvNonConst);
3141 InitLLVM X(argc, argv);
3143 StringRef ProgName(sys::path::filename(argv[0]));
3144 if (argc > 1) {
3146 llvm_profdata_subcommand func = nullptr;
3147 for (auto [subcmd_name, subcmd_action] : llvm_profdata_subcommands)
3148 if (subcmd_name == argv[1])
3149 func = subcmd_action;
3151 if (func) {
3152 std::string Invocation(ProgName.str() + " " + argv[1]);
3153 argv[1] = Invocation.c_str();
3154 return func(argc - 1, argv + 1);
3157 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
3158 strcmp(argv[1], "--help") == 0) {
3160 errs() << "OVERVIEW: LLVM profile data tools\n\n"
3161 << "USAGE: " << ProgName << " <command> [args...]\n"
3162 << "USAGE: " << ProgName << " <command> -help\n\n"
3163 << "See each individual command --help for more details.\n"
3164 << "Available commands: "
3165 << join(map_range(llvm_profdata_subcommands,
3166 [](auto const &KV) { return std::get<0>(KV); }),
3167 ", ")
3168 << "\n";
3169 return 0;
3172 if (strcmp(argv[1], "--version") == 0) {
3173 outs() << ProgName << '\n';
3174 cl::PrintVersionMessage();
3175 return 0;
3179 if (argc < 2)
3180 errs() << ProgName << ": No command specified!\n";
3181 else
3182 errs() << ProgName << ": Unknown command!\n";
3184 errs() << "USAGE: " << ProgName << " <"
3185 << join(map_range(llvm_profdata_subcommands,
3186 [](auto const &KV) { return std::get<0>(KV); }),
3187 "|")
3188 << "> [args...]\n";
3189 return 1;