1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // llvm-profdata merges .profdata files.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/IR/LLVMContext.h"
17 #include "llvm/Object/Binary.h"
18 #include "llvm/ProfileData/InstrProfCorrelator.h"
19 #include "llvm/ProfileData/InstrProfReader.h"
20 #include "llvm/ProfileData/InstrProfWriter.h"
21 #include "llvm/ProfileData/MemProf.h"
22 #include "llvm/ProfileData/ProfileCommon.h"
23 #include "llvm/ProfileData/RawMemProfReader.h"
24 #include "llvm/ProfileData/SampleProfReader.h"
25 #include "llvm/ProfileData/SampleProfWriter.h"
26 #include "llvm/Support/BalancedPartitioning.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Discriminator.h"
29 #include "llvm/Support/Errc.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/FormattedStream.h"
33 #include "llvm/Support/InitLLVM.h"
34 #include "llvm/Support/LLVMDriver.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Support/Threading.h"
40 #include "llvm/Support/VirtualFileSystem.h"
41 #include "llvm/Support/WithColor.h"
42 #include "llvm/Support/raw_ostream.h"
50 // We use this string to indicate that there are
51 // multiple static functions map to the same name.
52 const std::string DuplicateNameStr
= "----";
57 PF_Compact_Binary
, // Deprecated
63 enum class ShowFormat
{ Text
, Json
, Yaml
};
65 static void warn(Twine Message
, std::string Whence
= "",
66 std::string Hint
= "") {
69 errs() << Whence
<< ": ";
70 errs() << Message
<< "\n";
72 WithColor::note() << Hint
<< "\n";
75 static void warn(Error E
, StringRef Whence
= "") {
76 if (E
.isA
<InstrProfError
>()) {
77 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
78 warn(IPE
.message(), std::string(Whence
), std::string(""));
83 static void exitWithError(Twine Message
, std::string Whence
= "",
84 std::string Hint
= "") {
87 errs() << Whence
<< ": ";
88 errs() << Message
<< "\n";
90 WithColor::note() << Hint
<< "\n";
94 static void exitWithError(Error E
, StringRef Whence
= "") {
95 if (E
.isA
<InstrProfError
>()) {
96 handleAllErrors(std::move(E
), [&](const InstrProfError
&IPE
) {
97 instrprof_error instrError
= IPE
.get();
99 if (instrError
== instrprof_error::unrecognized_format
) {
100 // Hint in case user missed specifying the profile type.
101 Hint
= "Perhaps you forgot to use the --sample or --memory option?";
103 exitWithError(IPE
.message(), std::string(Whence
), std::string(Hint
));
108 exitWithError(toString(std::move(E
)), std::string(Whence
));
111 static void exitWithErrorCode(std::error_code EC
, StringRef Whence
= "") {
112 exitWithError(EC
.message(), std::string(Whence
));
116 enum ProfileKinds
{ instr
, sample
, memory
};
117 enum FailureMode
{ warnOnly
, failIfAnyAreInvalid
, failIfAllAreInvalid
};
120 static void warnOrExitGivenError(FailureMode FailMode
, std::error_code EC
,
121 StringRef Whence
= "") {
122 if (FailMode
== failIfAnyAreInvalid
)
123 exitWithErrorCode(EC
, Whence
);
125 warn(EC
.message(), std::string(Whence
));
128 static void handleMergeWriterError(Error E
, StringRef WhenceFile
= "",
129 StringRef WhenceFunction
= "",
130 bool ShowHint
= true) {
131 if (!WhenceFile
.empty())
132 errs() << WhenceFile
<< ": ";
133 if (!WhenceFunction
.empty())
134 errs() << WhenceFunction
<< ": ";
136 auto IPE
= instrprof_error::success
;
137 E
= handleErrors(std::move(E
),
138 [&IPE
](std::unique_ptr
<InstrProfError
> E
) -> Error
{
140 return Error(std::move(E
));
142 errs() << toString(std::move(E
)) << "\n";
146 if (IPE
!= instrprof_error::success
) {
148 case instrprof_error::hash_mismatch
:
149 case instrprof_error::count_mismatch
:
150 case instrprof_error::value_site_count_mismatch
:
151 Hint
= "Make sure that all profile data to be merged is generated "
152 "from the same binary.";
160 errs() << Hint
<< "\n";
165 /// A remapper from original symbol names to new symbol names based on a file
166 /// containing a list of mappings from old name to new name.
167 class SymbolRemapper
{
168 std::unique_ptr
<MemoryBuffer
> File
;
169 DenseMap
<StringRef
, StringRef
> RemappingTable
;
172 /// Build a SymbolRemapper from a file containing a list of old/new symbols.
173 static std::unique_ptr
<SymbolRemapper
> create(StringRef InputFile
) {
174 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
176 exitWithErrorCode(BufOrError
.getError(), InputFile
);
178 auto Remapper
= std::make_unique
<SymbolRemapper
>();
179 Remapper
->File
= std::move(BufOrError
.get());
181 for (line_iterator
LineIt(*Remapper
->File
, /*SkipBlanks=*/true, '#');
182 !LineIt
.is_at_eof(); ++LineIt
) {
183 std::pair
<StringRef
, StringRef
> Parts
= LineIt
->split(' ');
184 if (Parts
.first
.empty() || Parts
.second
.empty() ||
185 Parts
.second
.count(' ')) {
186 exitWithError("unexpected line in remapping file",
187 (InputFile
+ ":" + Twine(LineIt
.line_number())).str(),
188 "expected 'old_symbol new_symbol'");
190 Remapper
->RemappingTable
.insert(Parts
);
195 /// Attempt to map the given old symbol into a new symbol.
197 /// \return The new symbol, or \p Name if no such symbol was found.
198 StringRef
operator()(StringRef Name
) {
199 StringRef New
= RemappingTable
.lookup(Name
);
200 return New
.empty() ? Name
: New
;
203 FunctionId
operator()(FunctionId Name
) {
204 // MD5 name cannot be remapped.
205 if (!Name
.isStringRef())
207 StringRef New
= RemappingTable
.lookup(Name
.stringRef());
208 return New
.empty() ? Name
: FunctionId(New
);
213 struct WeightedFile
{
214 std::string Filename
;
217 typedef SmallVector
<WeightedFile
, 5> WeightedFileVector
;
219 /// Keep track of merged data and reported errors.
220 struct WriterContext
{
222 InstrProfWriter Writer
;
223 std::vector
<std::pair
<Error
, std::string
>> Errors
;
225 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
;
227 WriterContext(bool IsSparse
, std::mutex
&ErrLock
,
228 SmallSet
<instrprof_error
, 4> &WriterErrorCodes
,
229 uint64_t ReservoirSize
= 0, uint64_t MaxTraceLength
= 0)
230 : Writer(IsSparse
, ReservoirSize
, MaxTraceLength
), ErrLock(ErrLock
),
231 WriterErrorCodes(WriterErrorCodes
) {}
234 /// Computer the overlap b/w profile BaseFilename and TestFileName,
235 /// and store the program level result to Overlap.
236 static void overlapInput(const std::string
&BaseFilename
,
237 const std::string
&TestFilename
, WriterContext
*WC
,
238 OverlapStats
&Overlap
,
239 const OverlapFuncFilters
&FuncFilter
,
240 raw_fd_ostream
&OS
, bool IsCS
) {
241 auto FS
= vfs::getRealFileSystem();
242 auto ReaderOrErr
= InstrProfReader::create(TestFilename
, *FS
);
243 if (Error E
= ReaderOrErr
.takeError()) {
244 // Skip the empty profiles by returning sliently.
245 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
246 if (ErrorCode
!= instrprof_error::empty_raw_profile
)
247 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
252 auto Reader
= std::move(ReaderOrErr
.get());
253 for (auto &I
: *Reader
) {
254 OverlapStats
FuncOverlap(OverlapStats::FunctionLevel
);
255 FuncOverlap
.setFuncInfo(I
.Name
, I
.Hash
);
257 WC
->Writer
.overlapRecord(std::move(I
), Overlap
, FuncOverlap
, FuncFilter
);
258 FuncOverlap
.dump(OS
);
262 /// Load an input into a writer context.
263 static void loadInput(const WeightedFile
&Input
, SymbolRemapper
*Remapper
,
264 const InstrProfCorrelator
*Correlator
,
265 const StringRef ProfiledBinary
, WriterContext
*WC
) {
266 std::unique_lock
<std::mutex
> CtxGuard
{WC
->Lock
};
268 // Copy the filename, because llvm::ThreadPool copied the input "const
269 // WeightedFile &" by value, making a reference to the filename within it
270 // invalid outside of this packaged task.
271 std::string Filename
= Input
.Filename
;
273 using ::llvm::memprof::RawMemProfReader
;
274 if (RawMemProfReader::hasFormat(Input
.Filename
)) {
275 auto ReaderOrErr
= RawMemProfReader::create(Input
.Filename
, ProfiledBinary
);
277 exitWithError(ReaderOrErr
.takeError(), Input
.Filename
);
279 std::unique_ptr
<RawMemProfReader
> Reader
= std::move(ReaderOrErr
.get());
280 // Check if the profile types can be merged, e.g. clang frontend profiles
281 // should not be merged with memprof profiles.
282 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
283 consumeError(std::move(E
));
284 WC
->Errors
.emplace_back(
285 make_error
<StringError
>(
286 "Cannot merge MemProf profile with Clang generated profile.",
292 auto MemProfError
= [&](Error E
) {
293 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
294 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrorCode
, Msg
),
298 // Add the frame mappings into the writer context.
299 const auto &IdToFrame
= Reader
->getFrameMapping();
300 for (const auto &I
: IdToFrame
) {
301 bool Succeeded
= WC
->Writer
.addMemProfFrame(
302 /*Id=*/I
.first
, /*Frame=*/I
.getSecond(), MemProfError
);
303 // If we weren't able to add the frame mappings then it doesn't make sense
304 // to try to add the records from this profile.
308 const auto &FunctionProfileData
= Reader
->getProfileData();
309 // Add the memprof records into the writer context.
310 for (const auto &I
: FunctionProfileData
) {
311 WC
->Writer
.addMemProfRecord(/*Id=*/I
.first
, /*Record=*/I
.second
);
316 auto FS
= vfs::getRealFileSystem();
317 // TODO: This only saves the first non-fatal error from InstrProfReader, and
318 // then added to WriterContext::Errors. However, this is not extensible, if
319 // we have more non-fatal errors from InstrProfReader in the future. How
320 // should this interact with different -failure-mode?
321 std::optional
<std::pair
<Error
, std::string
>> ReaderWarning
;
322 auto Warn
= [&](Error E
) {
324 consumeError(std::move(E
));
327 // Only show the first time an error occurs in this file.
328 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
329 ReaderWarning
= {make_error
<InstrProfError
>(ErrCode
, Msg
), Filename
};
332 InstrProfReader::create(Input
.Filename
, *FS
, Correlator
, Warn
);
333 if (Error E
= ReaderOrErr
.takeError()) {
334 // Skip the empty profiles by returning silently.
335 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
336 if (ErrCode
!= instrprof_error::empty_raw_profile
)
337 WC
->Errors
.emplace_back(make_error
<InstrProfError
>(ErrCode
, Msg
),
342 auto Reader
= std::move(ReaderOrErr
.get());
343 if (Error E
= WC
->Writer
.mergeProfileKind(Reader
->getProfileKind())) {
344 consumeError(std::move(E
));
345 WC
->Errors
.emplace_back(
346 make_error
<StringError
>(
347 "Merge IR generated profile with Clang generated profile.",
353 for (auto &I
: *Reader
) {
355 I
.Name
= (*Remapper
)(I
.Name
);
356 const StringRef FuncName
= I
.Name
;
357 bool Reported
= false;
358 WC
->Writer
.addRecord(std::move(I
), Input
.Weight
, [&](Error E
) {
360 consumeError(std::move(E
));
364 // Only show hint the first time an error occurs.
365 auto [ErrCode
, Msg
] = InstrProfError::take(std::move(E
));
366 std::unique_lock
<std::mutex
> ErrGuard
{WC
->ErrLock
};
367 bool firstTime
= WC
->WriterErrorCodes
.insert(ErrCode
).second
;
368 handleMergeWriterError(make_error
<InstrProfError
>(ErrCode
, Msg
),
369 Input
.Filename
, FuncName
, firstTime
);
373 if (Reader
->hasTemporalProfile()) {
374 auto &Traces
= Reader
->getTemporalProfTraces(Input
.Weight
);
376 WC
->Writer
.addTemporalProfileTraces(
377 Traces
, Reader
->getTemporalProfTraceStreamSize());
379 if (Reader
->hasError()) {
380 if (Error E
= Reader
->getError()) {
381 WC
->Errors
.emplace_back(std::move(E
), Filename
);
386 std::vector
<llvm::object::BuildID
> BinaryIds
;
387 if (Error E
= Reader
->readBinaryIds(BinaryIds
)) {
388 WC
->Errors
.emplace_back(std::move(E
), Filename
);
391 WC
->Writer
.addBinaryIds(BinaryIds
);
394 WC
->Errors
.emplace_back(std::move(ReaderWarning
->first
),
395 ReaderWarning
->second
);
399 /// Merge the \p Src writer context into \p Dst.
400 static void mergeWriterContexts(WriterContext
*Dst
, WriterContext
*Src
) {
401 for (auto &ErrorPair
: Src
->Errors
)
402 Dst
->Errors
.push_back(std::move(ErrorPair
));
405 if (Error E
= Dst
->Writer
.mergeProfileKind(Src
->Writer
.getProfileKind()))
406 exitWithError(std::move(E
));
408 Dst
->Writer
.mergeRecordsFromWriter(std::move(Src
->Writer
), [&](Error E
) {
409 auto [ErrorCode
, Msg
] = InstrProfError::take(std::move(E
));
410 std::unique_lock
<std::mutex
> ErrGuard
{Dst
->ErrLock
};
411 bool firstTime
= Dst
->WriterErrorCodes
.insert(ErrorCode
).second
;
413 warn(toString(make_error
<InstrProfError
>(ErrorCode
, Msg
)));
417 static void writeInstrProfile(StringRef OutputFilename
,
418 ProfileFormat OutputFormat
,
419 InstrProfWriter
&Writer
) {
421 raw_fd_ostream
Output(OutputFilename
.data(), EC
,
422 OutputFormat
== PF_Text
? sys::fs::OF_TextWithCRLF
425 exitWithErrorCode(EC
, OutputFilename
);
427 if (OutputFormat
== PF_Text
) {
428 if (Error E
= Writer
.writeText(Output
))
431 if (Output
.is_displayed())
432 exitWithError("cannot write a non-text format profile to the terminal");
433 if (Error E
= Writer
.write(Output
))
439 mergeInstrProfile(const WeightedFileVector
&Inputs
, StringRef DebugInfoFilename
,
440 SymbolRemapper
*Remapper
, StringRef OutputFilename
,
441 ProfileFormat OutputFormat
, uint64_t TraceReservoirSize
,
442 uint64_t MaxTraceLength
, int MaxDbgCorrelationWarnings
,
443 bool OutputSparse
, unsigned NumThreads
, FailureMode FailMode
,
444 const StringRef ProfiledBinary
) {
445 if (OutputFormat
== PF_Compact_Binary
)
446 exitWithError("Compact Binary is deprecated");
447 if (OutputFormat
!= PF_Binary
&& OutputFormat
!= PF_Ext_Binary
&&
448 OutputFormat
!= PF_Text
)
449 exitWithError("unknown format is specified");
451 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
452 if (!DebugInfoFilename
.empty()) {
453 if (auto Err
= InstrProfCorrelator::get(DebugInfoFilename
,
454 InstrProfCorrelator::DEBUG_INFO
)
455 .moveInto(Correlator
))
456 exitWithError(std::move(Err
), DebugInfoFilename
);
457 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
458 exitWithError(std::move(Err
), DebugInfoFilename
);
461 std::mutex ErrorLock
;
462 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
464 // If NumThreads is not specified, auto-detect a good default.
466 NumThreads
= std::min(hardware_concurrency().compute_thread_count(),
467 unsigned((Inputs
.size() + 1) / 2));
469 // Initialize the writer contexts.
470 SmallVector
<std::unique_ptr
<WriterContext
>, 4> Contexts
;
471 for (unsigned I
= 0; I
< NumThreads
; ++I
)
472 Contexts
.emplace_back(std::make_unique
<WriterContext
>(
473 OutputSparse
, ErrorLock
, WriterErrorCodes
, TraceReservoirSize
,
476 if (NumThreads
== 1) {
477 for (const auto &Input
: Inputs
)
478 loadInput(Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
481 ThreadPool
Pool(hardware_concurrency(NumThreads
));
483 // Load the inputs in parallel (N/NumThreads serial steps).
485 for (const auto &Input
: Inputs
) {
486 Pool
.async(loadInput
, Input
, Remapper
, Correlator
.get(), ProfiledBinary
,
487 Contexts
[Ctx
].get());
488 Ctx
= (Ctx
+ 1) % NumThreads
;
492 // Merge the writer contexts together (~ lg(NumThreads) serial steps).
493 unsigned Mid
= Contexts
.size() / 2;
494 unsigned End
= Contexts
.size();
495 assert(Mid
> 0 && "Expected more than one context");
497 for (unsigned I
= 0; I
< Mid
; ++I
)
498 Pool
.async(mergeWriterContexts
, Contexts
[I
].get(),
499 Contexts
[I
+ Mid
].get());
502 Pool
.async(mergeWriterContexts
, Contexts
[0].get(),
503 Contexts
[End
- 1].get());
511 // Handle deferred errors encountered during merging. If the number of errors
512 // is equal to the number of inputs the merge failed.
513 unsigned NumErrors
= 0;
514 for (std::unique_ptr
<WriterContext
> &WC
: Contexts
) {
515 for (auto &ErrorPair
: WC
->Errors
) {
517 warn(toString(std::move(ErrorPair
.first
)), ErrorPair
.second
);
520 if ((NumErrors
== Inputs
.size() && FailMode
== failIfAllAreInvalid
) ||
521 (NumErrors
> 0 && FailMode
== failIfAnyAreInvalid
))
522 exitWithError("no profile can be merged");
524 writeInstrProfile(OutputFilename
, OutputFormat
, Contexts
[0]->Writer
);
527 /// The profile entry for a function in instrumentation profile.
528 struct InstrProfileEntry
{
529 uint64_t MaxCount
= 0;
530 uint64_t NumEdgeCounters
= 0;
531 float ZeroCounterRatio
= 0.0;
532 InstrProfRecord
*ProfRecord
;
533 InstrProfileEntry(InstrProfRecord
*Record
);
534 InstrProfileEntry() = default;
537 InstrProfileEntry::InstrProfileEntry(InstrProfRecord
*Record
) {
539 uint64_t CntNum
= Record
->Counts
.size();
540 uint64_t ZeroCntNum
= 0;
541 for (size_t I
= 0; I
< CntNum
; ++I
) {
542 MaxCount
= std::max(MaxCount
, Record
->Counts
[I
]);
543 ZeroCntNum
+= !Record
->Counts
[I
];
545 ZeroCounterRatio
= (float)ZeroCntNum
/ CntNum
;
546 NumEdgeCounters
= CntNum
;
549 /// Either set all the counters in the instr profile entry \p IFE to
550 /// -1 / -2 /in order to drop the profile or scale up the
551 /// counters in \p IFP to be above hot / cold threshold. We use
552 /// the ratio of zero counters in the profile of a function to
553 /// decide the profile is helpful or harmful for performance,
554 /// and to choose whether to scale up or drop it.
555 static void updateInstrProfileEntry(InstrProfileEntry
&IFE
, bool SetToHot
,
556 uint64_t HotInstrThreshold
,
557 uint64_t ColdInstrThreshold
,
558 float ZeroCounterThreshold
) {
559 InstrProfRecord
*ProfRecord
= IFE
.ProfRecord
;
560 if (!IFE
.MaxCount
|| IFE
.ZeroCounterRatio
> ZeroCounterThreshold
) {
561 // If all or most of the counters of the function are zero, the
562 // profile is unaccountable and should be dropped. Reset all the
563 // counters to be -1 / -2 and PGO profile-use will drop the profile.
564 // All counters being -1 also implies that the function is hot so
565 // PGO profile-use will also set the entry count metadata to be
566 // above hot threshold.
567 // All counters being -2 implies that the function is warm so
568 // PGO profile-use will also set the entry count metadata to be
569 // above cold threshold.
571 (SetToHot
? InstrProfRecord::PseudoHot
: InstrProfRecord::PseudoWarm
);
572 ProfRecord
->setPseudoCount(Kind
);
576 // Scale up the MaxCount to be multiple times above hot / cold threshold.
577 const unsigned MultiplyFactor
= 3;
578 uint64_t Threshold
= (SetToHot
? HotInstrThreshold
: ColdInstrThreshold
);
579 uint64_t Numerator
= Threshold
* MultiplyFactor
;
581 // Make sure Threshold for warm counters is below the HotInstrThreshold.
582 if (!SetToHot
&& Threshold
>= HotInstrThreshold
) {
583 Threshold
= (HotInstrThreshold
+ ColdInstrThreshold
) / 2;
586 uint64_t Denominator
= IFE
.MaxCount
;
587 if (Numerator
<= Denominator
)
589 ProfRecord
->scale(Numerator
, Denominator
, [&](instrprof_error E
) {
590 warn(toString(make_error
<InstrProfError
>(E
)));
594 const uint64_t ColdPercentileIdx
= 15;
595 const uint64_t HotPercentileIdx
= 11;
597 using sampleprof::FSDiscriminatorPass
;
599 // Internal options to set FSDiscriminatorPass. Used in merge and show
601 static cl::opt
<FSDiscriminatorPass
> FSDiscriminatorPassOption(
602 "fs-discriminator-pass", cl::init(PassLast
), cl::Hidden
,
603 cl::desc("Zero out the discriminator bits for the FS discrimiantor "
604 "pass beyond this value. The enum values are defined in "
605 "Support/Discriminator.h"),
606 cl::values(clEnumVal(Base
, "Use base discriminators only"),
607 clEnumVal(Pass1
, "Use base and pass 1 discriminators"),
608 clEnumVal(Pass2
, "Use base and pass 1-2 discriminators"),
609 clEnumVal(Pass3
, "Use base and pass 1-3 discriminators"),
610 clEnumVal(PassLast
, "Use all discriminator bits (default)")));
612 static unsigned getDiscriminatorMask() {
613 return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption
.getValue()));
616 /// Adjust the instr profile in \p WC based on the sample profile in
619 adjustInstrProfile(std::unique_ptr
<WriterContext
> &WC
,
620 std::unique_ptr
<sampleprof::SampleProfileReader
> &Reader
,
621 unsigned SupplMinSizeThreshold
, float ZeroCounterThreshold
,
622 unsigned InstrProfColdThreshold
) {
623 // Function to its entry in instr profile.
624 StringMap
<InstrProfileEntry
> InstrProfileMap
;
625 StringMap
<StringRef
> StaticFuncMap
;
626 InstrProfSummaryBuilder
IPBuilder(ProfileSummaryBuilder::DefaultCutoffs
);
628 auto checkSampleProfileHasFUnique
= [&Reader
]() {
629 for (const auto &PD
: Reader
->getProfiles()) {
630 auto &FContext
= PD
.second
.getContext();
631 if (FContext
.toString().find(FunctionSamples::UniqSuffix
) !=
639 bool SampleProfileHasFUnique
= checkSampleProfileHasFUnique();
641 auto buildStaticFuncMap
= [&StaticFuncMap
,
642 SampleProfileHasFUnique
](const StringRef Name
) {
643 std::string Prefixes
[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"};
644 size_t PrefixPos
= StringRef::npos
;
645 for (auto &Prefix
: Prefixes
) {
646 PrefixPos
= Name
.find_insensitive(Prefix
);
647 if (PrefixPos
== StringRef::npos
)
649 PrefixPos
+= Prefix
.size();
653 if (PrefixPos
== StringRef::npos
) {
657 StringRef NewName
= Name
.drop_front(PrefixPos
);
658 StringRef FName
= Name
.substr(0, PrefixPos
- 1);
659 if (NewName
.size() == 0) {
663 // This name should have a static linkage.
664 size_t PostfixPos
= NewName
.find(FunctionSamples::UniqSuffix
);
665 bool ProfileHasFUnique
= (PostfixPos
!= StringRef::npos
);
667 // If sample profile and instrumented profile do not agree on symbol
669 if (SampleProfileHasFUnique
!= ProfileHasFUnique
) {
670 // If instrumented profile uses -funique-internal-linakge-symbols,
671 // we need to trim the name.
672 if (ProfileHasFUnique
) {
673 NewName
= NewName
.substr(0, PostfixPos
);
675 // If sample profile uses -funique-internal-linakge-symbols,
678 NewName
.str() + getUniqueInternalLinkagePostfix(FName
);
679 NewName
= StringRef(NStr
);
680 StaticFuncMap
[NewName
] = Name
;
685 if (!StaticFuncMap
.contains(NewName
)) {
686 StaticFuncMap
[NewName
] = Name
;
688 StaticFuncMap
[NewName
] = DuplicateNameStr
;
692 // We need to flatten the SampleFDO profile as the InstrFDO
693 // profile does not have inlined callsite profiles.
694 // One caveat is the pre-inlined function -- their samples
695 // should be collapsed into the caller function.
696 // Here we do a DFS traversal to get the flatten profile
697 // info: the sum of entrycount and the max of maxcount.
698 // Here is the algorithm:
699 // recursive (FS, root_name) {
700 // name = FS->getName();
701 // get samples for FS;
702 // if (InstrProf.find(name) {
705 // if (name is in static_func map) {
706 // root_name = static_name;
709 // update the Map entry for root_name;
711 // recursive(subfs, root_name);
715 // Here is an example.
731 // InstrProfile has two entries:
735 // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
736 // {"foo", {1000, 5000}}
737 // {"bar.cc:bar", {11000, 30000}}
739 // foo's has an entry count of 1000, and max body count of 5000.
740 // bar.cc:bar has an entry count of 11000 (sum two callsites of 1000 and
741 // 10000), and max count of 30000 (from the callsite in line 8).
743 // Note that goo's count will remain in bar.cc:bar() as it does not have an
744 // entry in InstrProfile.
745 llvm::StringMap
<std::pair
<uint64_t, uint64_t>> FlattenSampleMap
;
746 auto BuildMaxSampleMap
= [&FlattenSampleMap
, &StaticFuncMap
,
747 &InstrProfileMap
](const FunctionSamples
&FS
,
748 const StringRef
&RootName
) {
749 auto BuildMaxSampleMapImpl
= [&](const FunctionSamples
&FS
,
750 const StringRef
&RootName
,
751 auto &BuildImpl
) -> void {
752 std::string NameStr
= FS
.getFunction().str();
753 const StringRef Name
= NameStr
;
754 const StringRef
*NewRootName
= &RootName
;
755 uint64_t EntrySample
= FS
.getHeadSamplesEstimate();
756 uint64_t MaxBodySample
= FS
.getMaxCountInside(/* SkipCallSite*/ true);
758 auto It
= InstrProfileMap
.find(Name
);
759 if (It
!= InstrProfileMap
.end()) {
762 auto NewName
= StaticFuncMap
.find(Name
);
763 if (NewName
!= StaticFuncMap
.end()) {
764 It
= InstrProfileMap
.find(NewName
->second
.str());
765 if (NewName
->second
!= DuplicateNameStr
) {
766 NewRootName
= &NewName
->second
;
769 // Here the EntrySample is of an inlined function, so we should not
770 // update the EntrySample in the map.
774 EntrySample
+= FlattenSampleMap
[*NewRootName
].first
;
776 std::max(FlattenSampleMap
[*NewRootName
].second
, MaxBodySample
);
777 FlattenSampleMap
[*NewRootName
] =
778 std::make_pair(EntrySample
, MaxBodySample
);
780 for (const auto &C
: FS
.getCallsiteSamples())
781 for (const auto &F
: C
.second
)
782 BuildImpl(F
.second
, *NewRootName
, BuildImpl
);
784 BuildMaxSampleMapImpl(FS
, RootName
, BuildMaxSampleMapImpl
);
787 for (auto &PD
: WC
->Writer
.getProfileData()) {
788 // Populate IPBuilder.
789 for (const auto &PDV
: PD
.getValue()) {
790 InstrProfRecord Record
= PDV
.second
;
791 IPBuilder
.addRecord(Record
);
794 // If a function has multiple entries in instr profile, skip it.
795 if (PD
.getValue().size() != 1)
798 // Initialize InstrProfileMap.
799 InstrProfRecord
*R
= &PD
.getValue().begin()->second
;
800 StringRef FullName
= PD
.getKey();
801 InstrProfileMap
[FullName
] = InstrProfileEntry(R
);
802 buildStaticFuncMap(FullName
);
805 for (auto &PD
: Reader
->getProfiles()) {
806 sampleprof::FunctionSamples
&FS
= PD
.second
;
807 std::string Name
= FS
.getFunction().str();
808 BuildMaxSampleMap(FS
, Name
);
811 ProfileSummary InstrPS
= *IPBuilder
.getSummary();
812 ProfileSummary SamplePS
= Reader
->getSummary();
814 // Compute cold thresholds for instr profile and sample profile.
815 uint64_t HotSampleThreshold
=
816 ProfileSummaryBuilder::getEntryForPercentile(
817 SamplePS
.getDetailedSummary(),
818 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
820 uint64_t ColdSampleThreshold
=
821 ProfileSummaryBuilder::getEntryForPercentile(
822 SamplePS
.getDetailedSummary(),
823 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
825 uint64_t HotInstrThreshold
=
826 ProfileSummaryBuilder::getEntryForPercentile(
827 InstrPS
.getDetailedSummary(),
828 ProfileSummaryBuilder::DefaultCutoffs
[HotPercentileIdx
])
830 uint64_t ColdInstrThreshold
=
831 InstrProfColdThreshold
832 ? InstrProfColdThreshold
833 : ProfileSummaryBuilder::getEntryForPercentile(
834 InstrPS
.getDetailedSummary(),
835 ProfileSummaryBuilder::DefaultCutoffs
[ColdPercentileIdx
])
838 // Find hot/warm functions in sample profile which is cold in instr profile
839 // and adjust the profiles of those functions in the instr profile.
840 for (const auto &E
: FlattenSampleMap
) {
841 uint64_t SampleMaxCount
= std::max(E
.second
.first
, E
.second
.second
);
842 if (SampleMaxCount
< ColdSampleThreshold
)
844 StringRef Name
= E
.first();
845 auto It
= InstrProfileMap
.find(Name
);
846 if (It
== InstrProfileMap
.end()) {
847 auto NewName
= StaticFuncMap
.find(Name
);
848 if (NewName
!= StaticFuncMap
.end()) {
849 It
= InstrProfileMap
.find(NewName
->second
.str());
850 if (NewName
->second
== DuplicateNameStr
) {
852 << "Static function " << Name
853 << " has multiple promoted names, cannot adjust profile.\n";
857 if (It
== InstrProfileMap
.end() ||
858 It
->second
.MaxCount
> ColdInstrThreshold
||
859 It
->second
.NumEdgeCounters
< SupplMinSizeThreshold
)
861 bool SetToHot
= SampleMaxCount
>= HotSampleThreshold
;
862 updateInstrProfileEntry(It
->second
, SetToHot
, HotInstrThreshold
,
863 ColdInstrThreshold
, ZeroCounterThreshold
);
867 /// The main function to supplement instr profile with sample profile.
868 /// \Inputs contains the instr profile. \p SampleFilename specifies the
869 /// sample profile. \p OutputFilename specifies the output profile name.
870 /// \p OutputFormat specifies the output profile format. \p OutputSparse
871 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
872 /// specifies the minimal size for the functions whose profile will be
873 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
874 /// a function contains too many zero counters and whether its profile
875 /// should be dropped. \p InstrProfColdThreshold is the user specified
876 /// cold threshold which will override the cold threshold got from the
877 /// instr profile summary.
878 static void supplementInstrProfile(
879 const WeightedFileVector
&Inputs
, StringRef SampleFilename
,
880 StringRef OutputFilename
, ProfileFormat OutputFormat
, bool OutputSparse
,
881 unsigned SupplMinSizeThreshold
, float ZeroCounterThreshold
,
882 unsigned InstrProfColdThreshold
) {
883 if (OutputFilename
.compare("-") == 0)
884 exitWithError("cannot write indexed profdata format to stdout");
885 if (Inputs
.size() != 1)
886 exitWithError("expect one input to be an instr profile");
887 if (Inputs
[0].Weight
!= 1)
888 exitWithError("expect instr profile doesn't have weight");
890 StringRef InstrFilename
= Inputs
[0].Filename
;
892 // Read sample profile.
894 auto FS
= vfs::getRealFileSystem();
895 auto ReaderOrErr
= sampleprof::SampleProfileReader::create(
896 SampleFilename
.str(), Context
, *FS
, FSDiscriminatorPassOption
);
897 if (std::error_code EC
= ReaderOrErr
.getError())
898 exitWithErrorCode(EC
, SampleFilename
);
899 auto Reader
= std::move(ReaderOrErr
.get());
900 if (std::error_code EC
= Reader
->read())
901 exitWithErrorCode(EC
, SampleFilename
);
903 // Read instr profile.
904 std::mutex ErrorLock
;
905 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
906 auto WC
= std::make_unique
<WriterContext
>(OutputSparse
, ErrorLock
,
908 loadInput(Inputs
[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC
.get());
909 if (WC
->Errors
.size() > 0)
910 exitWithError(std::move(WC
->Errors
[0].first
), InstrFilename
);
912 adjustInstrProfile(WC
, Reader
, SupplMinSizeThreshold
, ZeroCounterThreshold
,
913 InstrProfColdThreshold
);
914 writeInstrProfile(OutputFilename
, OutputFormat
, WC
->Writer
);
917 /// Make a copy of the given function samples with all symbol names remapped
918 /// by the provided symbol remapper.
919 static sampleprof::FunctionSamples
920 remapSamples(const sampleprof::FunctionSamples
&Samples
,
921 SymbolRemapper
&Remapper
, sampleprof_error
&Error
) {
922 sampleprof::FunctionSamples Result
;
923 Result
.setFunction(Remapper(Samples
.getFunction()));
924 Result
.addTotalSamples(Samples
.getTotalSamples());
925 Result
.addHeadSamples(Samples
.getHeadSamples());
926 for (const auto &BodySample
: Samples
.getBodySamples()) {
927 uint32_t MaskedDiscriminator
=
928 BodySample
.first
.Discriminator
& getDiscriminatorMask();
929 Result
.addBodySamples(BodySample
.first
.LineOffset
, MaskedDiscriminator
,
930 BodySample
.second
.getSamples());
931 for (const auto &Target
: BodySample
.second
.getCallTargets()) {
932 Result
.addCalledTargetSamples(BodySample
.first
.LineOffset
,
934 Remapper(Target
.first
), Target
.second
);
937 for (const auto &CallsiteSamples
: Samples
.getCallsiteSamples()) {
938 sampleprof::FunctionSamplesMap
&Target
=
939 Result
.functionSamplesAt(CallsiteSamples
.first
);
940 for (const auto &Callsite
: CallsiteSamples
.second
) {
941 sampleprof::FunctionSamples Remapped
=
942 remapSamples(Callsite
.second
, Remapper
, Error
);
943 MergeResult(Error
, Target
[Remapped
.getFunction()].merge(Remapped
));
949 static sampleprof::SampleProfileFormat FormatMap
[] = {
950 sampleprof::SPF_None
,
951 sampleprof::SPF_Text
,
952 sampleprof::SPF_None
,
953 sampleprof::SPF_Ext_Binary
,
955 sampleprof::SPF_Binary
};
957 static std::unique_ptr
<MemoryBuffer
>
958 getInputFileBuf(const StringRef
&InputFile
) {
962 auto BufOrError
= MemoryBuffer::getFileOrSTDIN(InputFile
);
964 exitWithErrorCode(BufOrError
.getError(), InputFile
);
966 return std::move(*BufOrError
);
969 static void populateProfileSymbolList(MemoryBuffer
*Buffer
,
970 sampleprof::ProfileSymbolList
&PSL
) {
974 SmallVector
<StringRef
, 32> SymbolVec
;
975 StringRef Data
= Buffer
->getBuffer();
976 Data
.split(SymbolVec
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
978 for (StringRef SymbolStr
: SymbolVec
)
979 PSL
.add(SymbolStr
.trim());
982 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter
&Writer
,
983 ProfileFormat OutputFormat
,
984 MemoryBuffer
*Buffer
,
985 sampleprof::ProfileSymbolList
&WriterList
,
986 bool CompressAllSections
, bool UseMD5
,
987 bool GenPartialProfile
) {
988 populateProfileSymbolList(Buffer
, WriterList
);
989 if (WriterList
.size() > 0 && OutputFormat
!= PF_Ext_Binary
)
990 warn("Profile Symbol list is not empty but the output format is not "
991 "ExtBinary format. The list will be lost in the output. ");
993 Writer
.setProfileSymbolList(&WriterList
);
995 if (CompressAllSections
) {
996 if (OutputFormat
!= PF_Ext_Binary
)
997 warn("-compress-all-section is ignored. Specify -extbinary to enable it");
999 Writer
.setToCompressAllSections();
1002 if (OutputFormat
!= PF_Ext_Binary
)
1003 warn("-use-md5 is ignored. Specify -extbinary to enable it");
1007 if (GenPartialProfile
) {
1008 if (OutputFormat
!= PF_Ext_Binary
)
1009 warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1011 Writer
.setPartialProfile();
1016 mergeSampleProfile(const WeightedFileVector
&Inputs
, SymbolRemapper
*Remapper
,
1017 StringRef OutputFilename
, ProfileFormat OutputFormat
,
1018 StringRef ProfileSymbolListFile
, bool CompressAllSections
,
1019 bool UseMD5
, bool GenPartialProfile
,
1020 SampleProfileLayout ProfileLayout
,
1021 bool SampleMergeColdContext
, bool SampleTrimColdContext
,
1022 bool SampleColdContextFrameDepth
, FailureMode FailMode
,
1023 bool DropProfileSymbolList
, size_t OutputSizeLimit
) {
1024 using namespace sampleprof
;
1025 SampleProfileMap ProfileMap
;
1026 SmallVector
<std::unique_ptr
<sampleprof::SampleProfileReader
>, 5> Readers
;
1027 LLVMContext Context
;
1028 sampleprof::ProfileSymbolList WriterList
;
1029 std::optional
<bool> ProfileIsProbeBased
;
1030 std::optional
<bool> ProfileIsCS
;
1031 for (const auto &Input
: Inputs
) {
1032 auto FS
= vfs::getRealFileSystem();
1033 auto ReaderOrErr
= SampleProfileReader::create(Input
.Filename
, Context
, *FS
,
1034 FSDiscriminatorPassOption
);
1035 if (std::error_code EC
= ReaderOrErr
.getError()) {
1036 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1040 // We need to keep the readers around until after all the files are
1041 // read so that we do not lose the function names stored in each
1042 // reader's memory. The function names are needed to write out the
1043 // merged profile map.
1044 Readers
.push_back(std::move(ReaderOrErr
.get()));
1045 const auto Reader
= Readers
.back().get();
1046 if (std::error_code EC
= Reader
->read()) {
1047 warnOrExitGivenError(FailMode
, EC
, Input
.Filename
);
1052 SampleProfileMap
&Profiles
= Reader
->getProfiles();
1053 if (ProfileIsProbeBased
&&
1054 ProfileIsProbeBased
!= FunctionSamples::ProfileIsProbeBased
)
1056 "cannot merge probe-based profile with non-probe-based profile");
1057 ProfileIsProbeBased
= FunctionSamples::ProfileIsProbeBased
;
1058 if (ProfileIsCS
&& ProfileIsCS
!= FunctionSamples::ProfileIsCS
)
1059 exitWithError("cannot merge CS profile with non-CS profile");
1060 ProfileIsCS
= FunctionSamples::ProfileIsCS
;
1061 for (SampleProfileMap::iterator I
= Profiles
.begin(), E
= Profiles
.end();
1063 sampleprof_error Result
= sampleprof_error::success
;
1064 FunctionSamples Remapped
=
1065 Remapper
? remapSamples(I
->second
, *Remapper
, Result
)
1066 : FunctionSamples();
1067 FunctionSamples
&Samples
= Remapper
? Remapped
: I
->second
;
1068 SampleContext FContext
= Samples
.getContext();
1069 MergeResult(Result
, ProfileMap
[FContext
].merge(Samples
, Input
.Weight
));
1070 if (Result
!= sampleprof_error::success
) {
1071 std::error_code EC
= make_error_code(Result
);
1072 handleMergeWriterError(errorCodeToError(EC
), Input
.Filename
,
1073 FContext
.toString());
1077 if (!DropProfileSymbolList
) {
1078 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
1079 Reader
->getProfileSymbolList();
1081 WriterList
.merge(*ReaderList
);
1085 if (ProfileIsCS
&& (SampleMergeColdContext
|| SampleTrimColdContext
)) {
1086 // Use threshold calculated from profile summary unless specified.
1087 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1088 auto Summary
= Builder
.computeSummaryForProfiles(ProfileMap
);
1089 uint64_t SampleProfColdThreshold
=
1090 ProfileSummaryBuilder::getColdCountThreshold(
1091 (Summary
->getDetailedSummary()));
1093 // Trim and merge cold context profile using cold threshold above;
1094 SampleContextTrimmer(ProfileMap
)
1095 .trimAndMergeColdContextProfiles(
1096 SampleProfColdThreshold
, SampleTrimColdContext
,
1097 SampleMergeColdContext
, SampleColdContextFrameDepth
, false);
1100 if (ProfileLayout
== llvm::sampleprof::SPL_Flat
) {
1101 ProfileConverter::flattenProfile(ProfileMap
, FunctionSamples::ProfileIsCS
);
1102 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1103 } else if (ProfileIsCS
&& ProfileLayout
== llvm::sampleprof::SPL_Nest
) {
1104 ProfileConverter
CSConverter(ProfileMap
);
1105 CSConverter
.convertCSProfiles();
1106 ProfileIsCS
= FunctionSamples::ProfileIsCS
= false;
1110 SampleProfileWriter::create(OutputFilename
, FormatMap
[OutputFormat
]);
1111 if (std::error_code EC
= WriterOrErr
.getError())
1112 exitWithErrorCode(EC
, OutputFilename
);
1114 auto Writer
= std::move(WriterOrErr
.get());
1115 // WriterList will have StringRef refering to string in Buffer.
1116 // Make sure Buffer lives as long as WriterList.
1117 auto Buffer
= getInputFileBuf(ProfileSymbolListFile
);
1118 handleExtBinaryWriter(*Writer
, OutputFormat
, Buffer
.get(), WriterList
,
1119 CompressAllSections
, UseMD5
, GenPartialProfile
);
1121 // If OutputSizeLimit is 0 (default), it is the same as write().
1122 if (std::error_code EC
=
1123 Writer
->writeWithSizeLimit(ProfileMap
, OutputSizeLimit
))
1124 exitWithErrorCode(std::move(EC
));
1127 static WeightedFile
parseWeightedFile(const StringRef
&WeightedFilename
) {
1128 StringRef WeightStr
, FileName
;
1129 std::tie(WeightStr
, FileName
) = WeightedFilename
.split(',');
1132 if (WeightStr
.getAsInteger(10, Weight
) || Weight
< 1)
1133 exitWithError("input weight must be a positive integer");
1135 return {std::string(FileName
), Weight
};
1138 static void addWeightedInput(WeightedFileVector
&WNI
, const WeightedFile
&WF
) {
1139 StringRef Filename
= WF
.Filename
;
1140 uint64_t Weight
= WF
.Weight
;
1142 // If it's STDIN just pass it on.
1143 if (Filename
== "-") {
1144 WNI
.push_back({std::string(Filename
), Weight
});
1148 llvm::sys::fs::file_status Status
;
1149 llvm::sys::fs::status(Filename
, Status
);
1150 if (!llvm::sys::fs::exists(Status
))
1151 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory
),
1153 // If it's a source file, collect it.
1154 if (llvm::sys::fs::is_regular_file(Status
)) {
1155 WNI
.push_back({std::string(Filename
), Weight
});
1159 if (llvm::sys::fs::is_directory(Status
)) {
1161 for (llvm::sys::fs::recursive_directory_iterator
F(Filename
, EC
), E
;
1162 F
!= E
&& !EC
; F
.increment(EC
)) {
1163 if (llvm::sys::fs::is_regular_file(F
->path())) {
1164 addWeightedInput(WNI
, {F
->path(), Weight
});
1168 exitWithErrorCode(EC
, Filename
);
1172 static void parseInputFilenamesFile(MemoryBuffer
*Buffer
,
1173 WeightedFileVector
&WFV
) {
1177 SmallVector
<StringRef
, 8> Entries
;
1178 StringRef Data
= Buffer
->getBuffer();
1179 Data
.split(Entries
, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1180 for (const StringRef
&FileWeightEntry
: Entries
) {
1181 StringRef SanitizedEntry
= FileWeightEntry
.trim(" \t\v\f\r");
1183 if (SanitizedEntry
.startswith("#"))
1185 // If there's no comma, it's an unweighted profile.
1186 else if (!SanitizedEntry
.contains(','))
1187 addWeightedInput(WFV
, {std::string(SanitizedEntry
), 1});
1189 addWeightedInput(WFV
, parseWeightedFile(SanitizedEntry
));
1193 static int merge_main(int argc
, const char *argv
[]) {
1194 cl::list
<std::string
> InputFilenames(cl::Positional
,
1195 cl::desc("<filename...>"));
1196 cl::list
<std::string
> WeightedInputFilenames("weighted-input",
1197 cl::desc("<weight>,<filename>"));
1198 cl::opt
<std::string
> InputFilenamesFile(
1199 "input-files", cl::init(""),
1200 cl::desc("Path to file containing newline-separated "
1201 "[<weight>,]<filename> entries"));
1202 cl::alias
InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
1203 cl::aliasopt(InputFilenamesFile
));
1204 cl::opt
<bool> DumpInputFileList(
1205 "dump-input-file-list", cl::init(false), cl::Hidden
,
1206 cl::desc("Dump the list of input files and their weights, then exit"));
1207 cl::opt
<std::string
> RemappingFile("remapping-file", cl::value_desc("file"),
1208 cl::desc("Symbol remapping file"));
1209 cl::alias
RemappingFileA("r", cl::desc("Alias for --remapping-file"),
1210 cl::aliasopt(RemappingFile
));
1211 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
1212 cl::init("-"), cl::desc("Output file"));
1213 cl::alias
OutputFilenameA("o", cl::desc("Alias for --output"),
1214 cl::aliasopt(OutputFilename
));
1215 cl::opt
<ProfileKinds
> ProfileKind(
1216 cl::desc("Profile kind:"), cl::init(instr
),
1217 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
1218 clEnumVal(sample
, "Sample profile")));
1219 cl::opt
<ProfileFormat
> OutputFormat(
1220 cl::desc("Format of output profile"), cl::init(PF_Ext_Binary
),
1222 clEnumValN(PF_Binary
, "binary", "Binary encoding"),
1223 clEnumValN(PF_Ext_Binary
, "extbinary", "Extensible binary encoding "
1225 clEnumValN(PF_Text
, "text", "Text encoding"),
1226 clEnumValN(PF_GCC
, "gcc",
1227 "GCC encoding (only meaningful for -sample)")));
1228 cl::opt
<FailureMode
> FailureMode(
1229 "failure-mode", cl::init(failIfAnyAreInvalid
), cl::desc("Failure mode:"),
1231 clEnumValN(warnOnly
, "warn", "Do not fail and just print warnings."),
1232 clEnumValN(failIfAnyAreInvalid
, "any",
1233 "Fail if any profile is invalid."),
1234 clEnumValN(failIfAllAreInvalid
, "all",
1235 "Fail only if all profiles are invalid.")));
1236 cl::opt
<bool> OutputSparse("sparse", cl::init(false),
1237 cl::desc("Generate a sparse profile (only meaningful for -instr)"));
1238 cl::opt
<unsigned> NumThreads(
1239 "num-threads", cl::init(0),
1240 cl::desc("Number of merge threads to use (default: autodetect)"));
1241 cl::alias
NumThreadsA("j", cl::desc("Alias for --num-threads"),
1242 cl::aliasopt(NumThreads
));
1243 cl::opt
<std::string
> ProfileSymbolListFile(
1244 "prof-sym-list", cl::init(""),
1245 cl::desc("Path to file containing the list of function symbols "
1246 "used to populate profile symbol list"));
1247 cl::opt
<bool> CompressAllSections(
1248 "compress-all-sections", cl::init(false), cl::Hidden
,
1249 cl::desc("Compress all sections when writing the profile (only "
1250 "meaningful for -extbinary)"));
1251 cl::opt
<bool> UseMD5(
1252 "use-md5", cl::init(false), cl::Hidden
,
1253 cl::desc("Choose to use MD5 to represent string in name table (only "
1254 "meaningful for -extbinary)"));
1255 cl::opt
<bool> SampleMergeColdContext(
1256 "sample-merge-cold-context", cl::init(false), cl::Hidden
,
1258 "Merge context sample profiles whose count is below cold threshold"));
1259 cl::opt
<bool> SampleTrimColdContext(
1260 "sample-trim-cold-context", cl::init(false), cl::Hidden
,
1262 "Trim context sample profiles whose count is below cold threshold"));
1263 cl::opt
<uint32_t> SampleColdContextFrameDepth(
1264 "sample-frame-depth-for-cold-context", cl::init(1),
1265 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
1266 "context-less base profile"));
1267 cl::opt
<size_t> OutputSizeLimit(
1268 "output-size-limit", cl::init(0), cl::Hidden
,
1269 cl::desc("Trim cold functions until profile size is below specified "
1270 "limit in bytes. This uses a heursitic and functions may be "
1271 "excessively trimmed"));
1272 cl::opt
<bool> GenPartialProfile(
1273 "gen-partial-profile", cl::init(false), cl::Hidden
,
1274 cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
1275 cl::opt
<std::string
> SupplInstrWithSample(
1276 "supplement-instr-with-sample", cl::init(""), cl::Hidden
,
1277 cl::desc("Supplement an instr profile with sample profile, to correct "
1278 "the profile unrepresentativeness issue. The sample "
1279 "profile is the input of the flag. Output will be in instr "
1280 "format (The flag only works with -instr)"));
1281 cl::opt
<float> ZeroCounterThreshold(
1282 "zero-counter-threshold", cl::init(0.7), cl::Hidden
,
1283 cl::desc("For the function which is cold in instr profile but hot in "
1284 "sample profile, if the ratio of the number of zero counters "
1285 "divided by the total number of counters is above the "
1286 "threshold, the profile of the function will be regarded as "
1287 "being harmful for performance and will be dropped."));
1288 cl::opt
<unsigned> SupplMinSizeThreshold(
1289 "suppl-min-size-threshold", cl::init(10), cl::Hidden
,
1290 cl::desc("If the size of a function is smaller than the threshold, "
1291 "assume it can be inlined by PGO early inliner and it won't "
1292 "be adjusted based on sample profile."));
1293 cl::opt
<unsigned> InstrProfColdThreshold(
1294 "instr-prof-cold-threshold", cl::init(0), cl::Hidden
,
1295 cl::desc("User specified cold threshold for instr profile which will "
1296 "override the cold threshold got from profile summary. "));
1297 cl::opt
<SampleProfileLayout
> ProfileLayout(
1298 "convert-sample-profile-layout",
1299 cl::desc("Convert the generated profile to a profile with a new layout"),
1302 clEnumValN(SPL_Nest
, "nest",
1303 "Nested profile, the input should be CS flat profile"),
1304 clEnumValN(SPL_Flat
, "flat",
1305 "Profile with nested inlinee flatten out")));
1306 cl::opt
<std::string
> DebugInfoFilename(
1307 "debug-info", cl::init(""),
1308 cl::desc("Use the provided debug info to correlate the raw profile."));
1309 cl::opt
<unsigned> MaxDbgCorrelationWarnings(
1310 "max-debug-info-correlation-warnings",
1311 cl::desc("The maximum number of warnings to emit when correlating "
1312 "profile from debug info (0 = no limit)"),
1314 cl::opt
<std::string
> ProfiledBinary(
1315 "profiled-binary", cl::init(""),
1316 cl::desc("Path to binary from which the profile was collected."));
1317 cl::opt
<bool> DropProfileSymbolList(
1318 "drop-profile-symbol-list", cl::init(false), cl::Hidden
,
1319 cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
1320 "(only meaningful for -sample)"));
1321 // WARNING: This reservoir size value is propagated to any input indexed
1322 // profiles for simplicity. Changing this value between invocations could
1323 // result in sample bias.
1324 cl::opt
<uint64_t> TemporalProfTraceReservoirSize(
1325 "temporal-profile-trace-reservoir-size", cl::init(100),
1326 cl::desc("The maximum number of stored temporal profile traces (default: "
1328 cl::opt
<uint64_t> TemporalProfMaxTraceLength(
1329 "temporal-profile-max-trace-length", cl::init(10000),
1330 cl::desc("The maximum length of a single temporal profile trace "
1331 "(default: 10000)"));
1333 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data merger\n");
1335 WeightedFileVector WeightedInputs
;
1336 for (StringRef Filename
: InputFilenames
)
1337 addWeightedInput(WeightedInputs
, {std::string(Filename
), 1});
1338 for (StringRef WeightedFilename
: WeightedInputFilenames
)
1339 addWeightedInput(WeightedInputs
, parseWeightedFile(WeightedFilename
));
1341 // Make sure that the file buffer stays alive for the duration of the
1342 // weighted input vector's lifetime.
1343 auto Buffer
= getInputFileBuf(InputFilenamesFile
);
1344 parseInputFilenamesFile(Buffer
.get(), WeightedInputs
);
1346 if (WeightedInputs
.empty())
1347 exitWithError("no input files specified. See " +
1348 sys::path::filename(argv
[0]) + " -help");
1350 if (DumpInputFileList
) {
1351 for (auto &WF
: WeightedInputs
)
1352 outs() << WF
.Weight
<< "," << WF
.Filename
<< "\n";
1356 std::unique_ptr
<SymbolRemapper
> Remapper
;
1357 if (!RemappingFile
.empty())
1358 Remapper
= SymbolRemapper::create(RemappingFile
);
1360 if (!SupplInstrWithSample
.empty()) {
1361 if (ProfileKind
!= instr
)
1363 "-supplement-instr-with-sample can only work with -instr. ");
1365 supplementInstrProfile(WeightedInputs
, SupplInstrWithSample
, OutputFilename
,
1366 OutputFormat
, OutputSparse
, SupplMinSizeThreshold
,
1367 ZeroCounterThreshold
, InstrProfColdThreshold
);
1371 if (ProfileKind
== instr
)
1372 mergeInstrProfile(WeightedInputs
, DebugInfoFilename
, Remapper
.get(),
1373 OutputFilename
, OutputFormat
,
1374 TemporalProfTraceReservoirSize
,
1375 TemporalProfMaxTraceLength
, MaxDbgCorrelationWarnings
,
1376 OutputSparse
, NumThreads
, FailureMode
, ProfiledBinary
);
1378 mergeSampleProfile(WeightedInputs
, Remapper
.get(), OutputFilename
,
1379 OutputFormat
, ProfileSymbolListFile
, CompressAllSections
,
1380 UseMD5
, GenPartialProfile
, ProfileLayout
,
1381 SampleMergeColdContext
, SampleTrimColdContext
,
1382 SampleColdContextFrameDepth
, FailureMode
,
1383 DropProfileSymbolList
, OutputSizeLimit
);
1387 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1388 static void overlapInstrProfile(const std::string
&BaseFilename
,
1389 const std::string
&TestFilename
,
1390 const OverlapFuncFilters
&FuncFilter
,
1391 raw_fd_ostream
&OS
, bool IsCS
) {
1392 std::mutex ErrorLock
;
1393 SmallSet
<instrprof_error
, 4> WriterErrorCodes
;
1394 WriterContext
Context(false, ErrorLock
, WriterErrorCodes
);
1395 WeightedFile WeightedInput
{BaseFilename
, 1};
1396 OverlapStats Overlap
;
1397 Error E
= Overlap
.accumulateCounts(BaseFilename
, TestFilename
, IsCS
);
1399 exitWithError(std::move(E
), "error in getting profile count sums");
1400 if (Overlap
.Base
.CountSum
< 1.0f
) {
1401 OS
<< "Sum of edge counts for profile " << BaseFilename
<< " is 0.\n";
1404 if (Overlap
.Test
.CountSum
< 1.0f
) {
1405 OS
<< "Sum of edge counts for profile " << TestFilename
<< " is 0.\n";
1408 loadInput(WeightedInput
, nullptr, nullptr, /*ProfiledBinary=*/"", &Context
);
1409 overlapInput(BaseFilename
, TestFilename
, &Context
, Overlap
, FuncFilter
, OS
,
1415 struct SampleOverlapStats
{
1416 SampleContext BaseName
;
1417 SampleContext TestName
;
1418 // Number of overlap units
1419 uint64_t OverlapCount
= 0;
1420 // Total samples of overlap units
1421 uint64_t OverlapSample
= 0;
1422 // Number of and total samples of units that only present in base or test
1424 uint64_t BaseUniqueCount
= 0;
1425 uint64_t BaseUniqueSample
= 0;
1426 uint64_t TestUniqueCount
= 0;
1427 uint64_t TestUniqueSample
= 0;
1428 // Number of units and total samples in base or test profile
1429 uint64_t BaseCount
= 0;
1430 uint64_t BaseSample
= 0;
1431 uint64_t TestCount
= 0;
1432 uint64_t TestSample
= 0;
1433 // Number of and total samples of units that present in at least one profile
1434 uint64_t UnionCount
= 0;
1435 uint64_t UnionSample
= 0;
1436 // Weighted similarity
1437 double Similarity
= 0.0;
1438 // For SampleOverlapStats instances representing functions, weights of the
1439 // function in base and test profiles
1440 double BaseWeight
= 0.0;
1441 double TestWeight
= 0.0;
1443 SampleOverlapStats() = default;
1445 } // end anonymous namespace
1448 struct FuncSampleStats
{
1451 uint64_t HotBlockCount
;
1452 FuncSampleStats() : SampleSum(0), MaxSample(0), HotBlockCount(0) {}
1453 FuncSampleStats(uint64_t SampleSum
, uint64_t MaxSample
,
1454 uint64_t HotBlockCount
)
1455 : SampleSum(SampleSum
), MaxSample(MaxSample
),
1456 HotBlockCount(HotBlockCount
) {}
1458 } // end anonymous namespace
1461 enum MatchStatus
{ MS_Match
, MS_FirstUnique
, MS_SecondUnique
, MS_None
};
1463 // Class for updating merging steps for two sorted maps. The class should be
1464 // instantiated with a map iterator type.
1465 template <class T
> class MatchStep
{
1467 MatchStep() = delete;
1469 MatchStep(T FirstIter
, T FirstEnd
, T SecondIter
, T SecondEnd
)
1470 : FirstIter(FirstIter
), FirstEnd(FirstEnd
), SecondIter(SecondIter
),
1471 SecondEnd(SecondEnd
), Status(MS_None
) {}
1473 bool areBothFinished() const {
1474 return (FirstIter
== FirstEnd
&& SecondIter
== SecondEnd
);
1477 bool isFirstFinished() const { return FirstIter
== FirstEnd
; }
1479 bool isSecondFinished() const { return SecondIter
== SecondEnd
; }
1481 /// Advance one step based on the previous match status unless the previous
1482 /// status is MS_None. Then update Status based on the comparison between two
1483 /// container iterators at the current step. If the previous status is
1484 /// MS_None, it means two iterators are at the beginning and no comparison has
1485 /// been made, so we simply update Status without advancing the iterators.
1486 void updateOneStep();
1488 T
getFirstIter() const { return FirstIter
; }
1490 T
getSecondIter() const { return SecondIter
; }
1492 MatchStatus
getMatchStatus() const { return Status
; }
1495 // Current iterator and end iterator of the first container.
1498 // Current iterator and end iterator of the second container.
1501 // Match status of the current step.
1504 } // end anonymous namespace
1506 template <class T
> void MatchStep
<T
>::updateOneStep() {
1512 case MS_FirstUnique
:
1515 case MS_SecondUnique
:
1522 // Update Status according to iterators at the current step.
1523 if (areBothFinished())
1525 if (FirstIter
!= FirstEnd
&&
1526 (SecondIter
== SecondEnd
|| FirstIter
->first
< SecondIter
->first
))
1527 Status
= MS_FirstUnique
;
1528 else if (SecondIter
!= SecondEnd
&&
1529 (FirstIter
== FirstEnd
|| SecondIter
->first
< FirstIter
->first
))
1530 Status
= MS_SecondUnique
;
1535 // Return the sum of line/block samples, the max line/block sample, and the
1536 // number of line/block samples above the given threshold in a function
1537 // including its inlinees.
1538 static void getFuncSampleStats(const sampleprof::FunctionSamples
&Func
,
1539 FuncSampleStats
&FuncStats
,
1540 uint64_t HotThreshold
) {
1541 for (const auto &L
: Func
.getBodySamples()) {
1542 uint64_t Sample
= L
.second
.getSamples();
1543 FuncStats
.SampleSum
+= Sample
;
1544 FuncStats
.MaxSample
= std::max(FuncStats
.MaxSample
, Sample
);
1545 if (Sample
>= HotThreshold
)
1546 ++FuncStats
.HotBlockCount
;
1549 for (const auto &C
: Func
.getCallsiteSamples()) {
1550 for (const auto &F
: C
.second
)
1551 getFuncSampleStats(F
.second
, FuncStats
, HotThreshold
);
1555 /// Predicate that determines if a function is hot with a given threshold. We
1556 /// keep it separate from its callsites for possible extension in the future.
1557 static bool isFunctionHot(const FuncSampleStats
&FuncStats
,
1558 uint64_t HotThreshold
) {
1559 // We intentionally compare the maximum sample count in a function with the
1560 // HotThreshold to get an approximate determination on hot functions.
1561 return (FuncStats
.MaxSample
>= HotThreshold
);
1565 class SampleOverlapAggregator
{
1567 SampleOverlapAggregator(const std::string
&BaseFilename
,
1568 const std::string
&TestFilename
,
1569 double LowSimilarityThreshold
, double Epsilon
,
1570 const OverlapFuncFilters
&FuncFilter
)
1571 : BaseFilename(BaseFilename
), TestFilename(TestFilename
),
1572 LowSimilarityThreshold(LowSimilarityThreshold
), Epsilon(Epsilon
),
1573 FuncFilter(FuncFilter
) {}
1575 /// Detect 0-sample input profile and report to output stream. This interface
1576 /// should be called after loadProfiles().
1577 bool detectZeroSampleProfile(raw_fd_ostream
&OS
) const;
1579 /// Write out function-level similarity statistics for functions specified by
1580 /// options --function, --value-cutoff, and --similarity-cutoff.
1581 void dumpFuncSimilarity(raw_fd_ostream
&OS
) const;
1583 /// Write out program-level similarity and overlap statistics.
1584 void dumpProgramSummary(raw_fd_ostream
&OS
) const;
1586 /// Write out hot-function and hot-block statistics for base_profile,
1587 /// test_profile, and their overlap. For both cases, the overlap HO is
1588 /// calculated as follows:
1589 /// Given the number of functions (or blocks) that are hot in both profiles
1590 /// HCommon and the number of functions (or blocks) that are hot in at
1591 /// least one profile HUnion, HO = HCommon / HUnion.
1592 void dumpHotFuncAndBlockOverlap(raw_fd_ostream
&OS
) const;
1594 /// This function tries matching functions in base and test profiles. For each
1595 /// pair of matched functions, it aggregates the function-level
1596 /// similarity into a profile-level similarity. It also dump function-level
1597 /// similarity information of functions specified by --function,
1598 /// --value-cutoff, and --similarity-cutoff options. The program-level
1599 /// similarity PS is computed as follows:
1600 /// Given function-level similarity FS(A) for all function A, the
1601 /// weight of function A in base profile WB(A), and the weight of function
1602 /// A in test profile WT(A), compute PS(base_profile, test_profile) =
1603 /// sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1604 /// meaning no-overlap.
1605 void computeSampleProfileOverlap(raw_fd_ostream
&OS
);
1607 /// Initialize ProfOverlap with the sum of samples in base and test
1608 /// profiles. This function also computes and keeps the sum of samples and
1609 /// max sample counts of each function in BaseStats and TestStats for later
1610 /// use to avoid re-computations.
1611 void initializeSampleProfileOverlap();
1613 /// Load profiles specified by BaseFilename and TestFilename.
1614 std::error_code
loadProfiles();
1616 using FuncSampleStatsMap
=
1617 std::unordered_map
<SampleContext
, FuncSampleStats
, SampleContext::Hash
>;
1620 SampleOverlapStats ProfOverlap
;
1621 SampleOverlapStats HotFuncOverlap
;
1622 SampleOverlapStats HotBlockOverlap
;
1623 std::string BaseFilename
;
1624 std::string TestFilename
;
1625 std::unique_ptr
<sampleprof::SampleProfileReader
> BaseReader
;
1626 std::unique_ptr
<sampleprof::SampleProfileReader
> TestReader
;
1627 // BaseStats and TestStats hold FuncSampleStats for each function, with
1628 // function name as the key.
1629 FuncSampleStatsMap BaseStats
;
1630 FuncSampleStatsMap TestStats
;
1631 // Low similarity threshold in floating point number
1632 double LowSimilarityThreshold
;
1633 // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1634 // for tracking hot blocks.
1635 uint64_t BaseHotThreshold
;
1636 uint64_t TestHotThreshold
;
1637 // A small threshold used to round the results of floating point accumulations
1638 // to resolve imprecision.
1639 const double Epsilon
;
1640 std::multimap
<double, SampleOverlapStats
, std::greater
<double>>
1642 // FuncFilter carries specifications in options --value-cutoff and
1644 OverlapFuncFilters FuncFilter
;
1645 // Column offsets for printing the function-level details table.
1646 static const unsigned int TestWeightCol
= 15;
1647 static const unsigned int SimilarityCol
= 30;
1648 static const unsigned int OverlapCol
= 43;
1649 static const unsigned int BaseUniqueCol
= 53;
1650 static const unsigned int TestUniqueCol
= 67;
1651 static const unsigned int BaseSampleCol
= 81;
1652 static const unsigned int TestSampleCol
= 96;
1653 static const unsigned int FuncNameCol
= 111;
1655 /// Return a similarity of two line/block sample counters in the same
1656 /// function in base and test profiles. The line/block-similarity BS(i) is
1657 /// computed as follows:
1658 /// For an offsets i, given the sample count at i in base profile BB(i),
1659 /// the sample count at i in test profile BT(i), the sum of sample counts
1660 /// in this function in base profile SB, and the sum of sample counts in
1661 /// this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1662 /// BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1663 double computeBlockSimilarity(uint64_t BaseSample
, uint64_t TestSample
,
1664 const SampleOverlapStats
&FuncOverlap
) const;
1666 void updateHotBlockOverlap(uint64_t BaseSample
, uint64_t TestSample
,
1667 uint64_t HotBlockCount
);
1669 void getHotFunctions(const FuncSampleStatsMap
&ProfStats
,
1670 FuncSampleStatsMap
&HotFunc
,
1671 uint64_t HotThreshold
) const;
1673 void computeHotFuncOverlap();
1675 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1676 /// Difference for two sample units in a matched function according to the
1677 /// given match status.
1678 void updateOverlapStatsForFunction(uint64_t BaseSample
, uint64_t TestSample
,
1679 uint64_t HotBlockCount
,
1680 SampleOverlapStats
&FuncOverlap
,
1681 double &Difference
, MatchStatus Status
);
1683 /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1684 /// Difference for unmatched callees that only present in one profile in a
1685 /// matched caller function.
1686 void updateForUnmatchedCallee(const sampleprof::FunctionSamples
&Func
,
1687 SampleOverlapStats
&FuncOverlap
,
1688 double &Difference
, MatchStatus Status
);
1690 /// This function updates sample overlap statistics of an overlap function in
1691 /// base and test profile. It also calculates a function-internal similarity
1693 /// For offsets i that have samples in at least one profile in this
1694 /// function A, given BS(i) returned by computeBlockSimilarity(), compute
1695 /// FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
1696 /// 0.0 meaning no overlap.
1697 double computeSampleFunctionInternalOverlap(
1698 const sampleprof::FunctionSamples
&BaseFunc
,
1699 const sampleprof::FunctionSamples
&TestFunc
,
1700 SampleOverlapStats
&FuncOverlap
);
1702 /// Function-level similarity (FS) is a weighted value over function internal
1703 /// similarity (FIS). This function computes a function's FS from its FIS by
1704 /// applying the weight.
1705 double weightForFuncSimilarity(double FuncSimilarity
, uint64_t BaseFuncSample
,
1706 uint64_t TestFuncSample
) const;
1708 /// The function-level similarity FS(A) for a function A is computed as
1710 /// Compute a function-internal similarity FIS(A) by
1711 /// computeSampleFunctionInternalOverlap(). Then, with the weight of
1712 /// function A in base profile WB(A), and the weight of function A in test
1713 /// profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
1714 /// ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
1716 computeSampleFunctionOverlap(const sampleprof::FunctionSamples
*BaseFunc
,
1717 const sampleprof::FunctionSamples
*TestFunc
,
1718 SampleOverlapStats
*FuncOverlap
,
1719 uint64_t BaseFuncSample
,
1720 uint64_t TestFuncSample
);
1722 /// Profile-level similarity (PS) is a weighted aggregate over function-level
1723 /// similarities (FS). This method weights the FS value by the function
1724 /// weights in the base and test profiles for the aggregation.
1725 double weightByImportance(double FuncSimilarity
, uint64_t BaseFuncSample
,
1726 uint64_t TestFuncSample
) const;
1728 } // end anonymous namespace
1730 bool SampleOverlapAggregator::detectZeroSampleProfile(
1731 raw_fd_ostream
&OS
) const {
1732 bool HaveZeroSample
= false;
1733 if (ProfOverlap
.BaseSample
== 0) {
1734 OS
<< "Sum of sample counts for profile " << BaseFilename
<< " is 0.\n";
1735 HaveZeroSample
= true;
1737 if (ProfOverlap
.TestSample
== 0) {
1738 OS
<< "Sum of sample counts for profile " << TestFilename
<< " is 0.\n";
1739 HaveZeroSample
= true;
1741 return HaveZeroSample
;
1744 double SampleOverlapAggregator::computeBlockSimilarity(
1745 uint64_t BaseSample
, uint64_t TestSample
,
1746 const SampleOverlapStats
&FuncOverlap
) const {
1747 double BaseFrac
= 0.0;
1748 double TestFrac
= 0.0;
1749 if (FuncOverlap
.BaseSample
> 0)
1750 BaseFrac
= static_cast<double>(BaseSample
) / FuncOverlap
.BaseSample
;
1751 if (FuncOverlap
.TestSample
> 0)
1752 TestFrac
= static_cast<double>(TestSample
) / FuncOverlap
.TestSample
;
1753 return 1.0 - std::fabs(BaseFrac
- TestFrac
);
1756 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample
,
1757 uint64_t TestSample
,
1758 uint64_t HotBlockCount
) {
1759 bool IsBaseHot
= (BaseSample
>= BaseHotThreshold
);
1760 bool IsTestHot
= (TestSample
>= TestHotThreshold
);
1761 if (!IsBaseHot
&& !IsTestHot
)
1764 HotBlockOverlap
.UnionCount
+= HotBlockCount
;
1766 HotBlockOverlap
.BaseCount
+= HotBlockCount
;
1768 HotBlockOverlap
.TestCount
+= HotBlockCount
;
1769 if (IsBaseHot
&& IsTestHot
)
1770 HotBlockOverlap
.OverlapCount
+= HotBlockCount
;
1773 void SampleOverlapAggregator::getHotFunctions(
1774 const FuncSampleStatsMap
&ProfStats
, FuncSampleStatsMap
&HotFunc
,
1775 uint64_t HotThreshold
) const {
1776 for (const auto &F
: ProfStats
) {
1777 if (isFunctionHot(F
.second
, HotThreshold
))
1778 HotFunc
.emplace(F
.first
, F
.second
);
1782 void SampleOverlapAggregator::computeHotFuncOverlap() {
1783 FuncSampleStatsMap BaseHotFunc
;
1784 getHotFunctions(BaseStats
, BaseHotFunc
, BaseHotThreshold
);
1785 HotFuncOverlap
.BaseCount
= BaseHotFunc
.size();
1787 FuncSampleStatsMap TestHotFunc
;
1788 getHotFunctions(TestStats
, TestHotFunc
, TestHotThreshold
);
1789 HotFuncOverlap
.TestCount
= TestHotFunc
.size();
1790 HotFuncOverlap
.UnionCount
= HotFuncOverlap
.TestCount
;
1792 for (const auto &F
: BaseHotFunc
) {
1793 if (TestHotFunc
.count(F
.first
))
1794 ++HotFuncOverlap
.OverlapCount
;
1796 ++HotFuncOverlap
.UnionCount
;
1800 void SampleOverlapAggregator::updateOverlapStatsForFunction(
1801 uint64_t BaseSample
, uint64_t TestSample
, uint64_t HotBlockCount
,
1802 SampleOverlapStats
&FuncOverlap
, double &Difference
, MatchStatus Status
) {
1803 assert(Status
!= MS_None
&&
1804 "Match status should be updated before updating overlap statistics");
1805 if (Status
== MS_FirstUnique
) {
1807 FuncOverlap
.BaseUniqueSample
+= BaseSample
;
1808 } else if (Status
== MS_SecondUnique
) {
1810 FuncOverlap
.TestUniqueSample
+= TestSample
;
1812 ++FuncOverlap
.OverlapCount
;
1815 FuncOverlap
.UnionSample
+= std::max(BaseSample
, TestSample
);
1816 FuncOverlap
.OverlapSample
+= std::min(BaseSample
, TestSample
);
1818 1.0 - computeBlockSimilarity(BaseSample
, TestSample
, FuncOverlap
);
1819 updateHotBlockOverlap(BaseSample
, TestSample
, HotBlockCount
);
1822 void SampleOverlapAggregator::updateForUnmatchedCallee(
1823 const sampleprof::FunctionSamples
&Func
, SampleOverlapStats
&FuncOverlap
,
1824 double &Difference
, MatchStatus Status
) {
1825 assert((Status
== MS_FirstUnique
|| Status
== MS_SecondUnique
) &&
1826 "Status must be either of the two unmatched cases");
1827 FuncSampleStats FuncStats
;
1828 if (Status
== MS_FirstUnique
) {
1829 getFuncSampleStats(Func
, FuncStats
, BaseHotThreshold
);
1830 updateOverlapStatsForFunction(FuncStats
.SampleSum
, 0,
1831 FuncStats
.HotBlockCount
, FuncOverlap
,
1832 Difference
, Status
);
1834 getFuncSampleStats(Func
, FuncStats
, TestHotThreshold
);
1835 updateOverlapStatsForFunction(0, FuncStats
.SampleSum
,
1836 FuncStats
.HotBlockCount
, FuncOverlap
,
1837 Difference
, Status
);
1841 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
1842 const sampleprof::FunctionSamples
&BaseFunc
,
1843 const sampleprof::FunctionSamples
&TestFunc
,
1844 SampleOverlapStats
&FuncOverlap
) {
1846 using namespace sampleprof
;
1848 double Difference
= 0;
1850 // Accumulate Difference for regular line/block samples in the function.
1851 // We match them through sort-merge join algorithm because
1852 // FunctionSamples::getBodySamples() returns a map of sample counters ordered
1853 // by their offsets.
1854 MatchStep
<BodySampleMap::const_iterator
> BlockIterStep(
1855 BaseFunc
.getBodySamples().cbegin(), BaseFunc
.getBodySamples().cend(),
1856 TestFunc
.getBodySamples().cbegin(), TestFunc
.getBodySamples().cend());
1857 BlockIterStep
.updateOneStep();
1858 while (!BlockIterStep
.areBothFinished()) {
1859 uint64_t BaseSample
=
1860 BlockIterStep
.isFirstFinished()
1862 : BlockIterStep
.getFirstIter()->second
.getSamples();
1863 uint64_t TestSample
=
1864 BlockIterStep
.isSecondFinished()
1866 : BlockIterStep
.getSecondIter()->second
.getSamples();
1867 updateOverlapStatsForFunction(BaseSample
, TestSample
, 1, FuncOverlap
,
1868 Difference
, BlockIterStep
.getMatchStatus());
1870 BlockIterStep
.updateOneStep();
1873 // Accumulate Difference for callsite lines in the function. We match
1874 // them through sort-merge algorithm because
1875 // FunctionSamples::getCallsiteSamples() returns a map of callsite records
1876 // ordered by their offsets.
1877 MatchStep
<CallsiteSampleMap::const_iterator
> CallsiteIterStep(
1878 BaseFunc
.getCallsiteSamples().cbegin(),
1879 BaseFunc
.getCallsiteSamples().cend(),
1880 TestFunc
.getCallsiteSamples().cbegin(),
1881 TestFunc
.getCallsiteSamples().cend());
1882 CallsiteIterStep
.updateOneStep();
1883 while (!CallsiteIterStep
.areBothFinished()) {
1884 MatchStatus CallsiteStepStatus
= CallsiteIterStep
.getMatchStatus();
1885 assert(CallsiteStepStatus
!= MS_None
&&
1886 "Match status should be updated before entering loop body");
1888 if (CallsiteStepStatus
!= MS_Match
) {
1889 auto Callsite
= (CallsiteStepStatus
== MS_FirstUnique
)
1890 ? CallsiteIterStep
.getFirstIter()
1891 : CallsiteIterStep
.getSecondIter();
1892 for (const auto &F
: Callsite
->second
)
1893 updateForUnmatchedCallee(F
.second
, FuncOverlap
, Difference
,
1894 CallsiteStepStatus
);
1896 // There may be multiple inlinees at the same offset, so we need to try
1897 // matching all of them. This match is implemented through sort-merge
1898 // algorithm because callsite records at the same offset are ordered by
1900 MatchStep
<FunctionSamplesMap::const_iterator
> CalleeIterStep(
1901 CallsiteIterStep
.getFirstIter()->second
.cbegin(),
1902 CallsiteIterStep
.getFirstIter()->second
.cend(),
1903 CallsiteIterStep
.getSecondIter()->second
.cbegin(),
1904 CallsiteIterStep
.getSecondIter()->second
.cend());
1905 CalleeIterStep
.updateOneStep();
1906 while (!CalleeIterStep
.areBothFinished()) {
1907 MatchStatus CalleeStepStatus
= CalleeIterStep
.getMatchStatus();
1908 if (CalleeStepStatus
!= MS_Match
) {
1909 auto Callee
= (CalleeStepStatus
== MS_FirstUnique
)
1910 ? CalleeIterStep
.getFirstIter()
1911 : CalleeIterStep
.getSecondIter();
1912 updateForUnmatchedCallee(Callee
->second
, FuncOverlap
, Difference
,
1915 // An inlined function can contain other inlinees inside, so compute
1916 // the Difference recursively.
1917 Difference
+= 2.0 - 2 * computeSampleFunctionInternalOverlap(
1918 CalleeIterStep
.getFirstIter()->second
,
1919 CalleeIterStep
.getSecondIter()->second
,
1922 CalleeIterStep
.updateOneStep();
1925 CallsiteIterStep
.updateOneStep();
1928 // Difference reflects the total differences of line/block samples in this
1929 // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
1930 // reflect the similarity between function profiles in [0.0f to 1.0f].
1931 return (2.0 - Difference
) / 2;
1934 double SampleOverlapAggregator::weightForFuncSimilarity(
1935 double FuncInternalSimilarity
, uint64_t BaseFuncSample
,
1936 uint64_t TestFuncSample
) const {
1937 // Compute the weight as the distance between the function weights in two
1939 double BaseFrac
= 0.0;
1940 double TestFrac
= 0.0;
1941 assert(ProfOverlap
.BaseSample
> 0 &&
1942 "Total samples in base profile should be greater than 0");
1943 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
;
1944 assert(ProfOverlap
.TestSample
> 0 &&
1945 "Total samples in test profile should be greater than 0");
1946 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
;
1947 double WeightDistance
= std::fabs(BaseFrac
- TestFrac
);
1949 // Take WeightDistance into the similarity.
1950 return FuncInternalSimilarity
* (1 - WeightDistance
);
1954 SampleOverlapAggregator::weightByImportance(double FuncSimilarity
,
1955 uint64_t BaseFuncSample
,
1956 uint64_t TestFuncSample
) const {
1958 double BaseFrac
= 0.0;
1959 double TestFrac
= 0.0;
1960 assert(ProfOverlap
.BaseSample
> 0 &&
1961 "Total samples in base profile should be greater than 0");
1962 BaseFrac
= static_cast<double>(BaseFuncSample
) / ProfOverlap
.BaseSample
/ 2.0;
1963 assert(ProfOverlap
.TestSample
> 0 &&
1964 "Total samples in test profile should be greater than 0");
1965 TestFrac
= static_cast<double>(TestFuncSample
) / ProfOverlap
.TestSample
/ 2.0;
1966 return FuncSimilarity
* (BaseFrac
+ TestFrac
);
1969 double SampleOverlapAggregator::computeSampleFunctionOverlap(
1970 const sampleprof::FunctionSamples
*BaseFunc
,
1971 const sampleprof::FunctionSamples
*TestFunc
,
1972 SampleOverlapStats
*FuncOverlap
, uint64_t BaseFuncSample
,
1973 uint64_t TestFuncSample
) {
1974 // Default function internal similarity before weighted, meaning two functions
1976 const double DefaultFuncInternalSimilarity
= 0;
1977 double FuncSimilarity
;
1978 double FuncInternalSimilarity
;
1980 // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
1981 // In this case, we use DefaultFuncInternalSimilarity as the function internal
1983 if (!BaseFunc
|| !TestFunc
) {
1984 FuncInternalSimilarity
= DefaultFuncInternalSimilarity
;
1986 assert(FuncOverlap
!= nullptr &&
1987 "FuncOverlap should be provided in this case");
1988 FuncInternalSimilarity
= computeSampleFunctionInternalOverlap(
1989 *BaseFunc
, *TestFunc
, *FuncOverlap
);
1990 // Now, FuncInternalSimilarity may be a little less than 0 due to
1991 // imprecision of floating point accumulations. Make it zero if the
1992 // difference is below Epsilon.
1993 FuncInternalSimilarity
= (std::fabs(FuncInternalSimilarity
- 0) < Epsilon
)
1995 : FuncInternalSimilarity
;
1997 FuncSimilarity
= weightForFuncSimilarity(FuncInternalSimilarity
,
1998 BaseFuncSample
, TestFuncSample
);
1999 return FuncSimilarity
;
2002 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream
&OS
) {
2003 using namespace sampleprof
;
2005 std::unordered_map
<SampleContext
, const FunctionSamples
*,
2006 SampleContext::Hash
>
2008 const auto &BaseProfiles
= BaseReader
->getProfiles();
2009 for (const auto &BaseFunc
: BaseProfiles
) {
2010 BaseFuncProf
.emplace(BaseFunc
.second
.getContext(), &(BaseFunc
.second
));
2012 ProfOverlap
.UnionCount
= BaseFuncProf
.size();
2014 const auto &TestProfiles
= TestReader
->getProfiles();
2015 for (const auto &TestFunc
: TestProfiles
) {
2016 SampleOverlapStats FuncOverlap
;
2017 FuncOverlap
.TestName
= TestFunc
.second
.getContext();
2018 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2019 "TestStats should have records for all functions in test profile "
2021 FuncOverlap
.TestSample
= TestStats
[FuncOverlap
.TestName
].SampleSum
;
2023 bool Matched
= false;
2024 const auto Match
= BaseFuncProf
.find(FuncOverlap
.TestName
);
2025 if (Match
== BaseFuncProf
.end()) {
2026 const FuncSampleStats
&FuncStats
= TestStats
[FuncOverlap
.TestName
];
2027 ++ProfOverlap
.TestUniqueCount
;
2028 ProfOverlap
.TestUniqueSample
+= FuncStats
.SampleSum
;
2029 FuncOverlap
.TestUniqueSample
= FuncStats
.SampleSum
;
2031 updateHotBlockOverlap(0, FuncStats
.SampleSum
, FuncStats
.HotBlockCount
);
2033 double FuncSimilarity
= computeSampleFunctionOverlap(
2034 nullptr, nullptr, nullptr, 0, FuncStats
.SampleSum
);
2035 ProfOverlap
.Similarity
+=
2036 weightByImportance(FuncSimilarity
, 0, FuncStats
.SampleSum
);
2038 ++ProfOverlap
.UnionCount
;
2039 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2041 ++ProfOverlap
.OverlapCount
;
2043 // Two functions match with each other. Compute function-level overlap and
2044 // aggregate them into profile-level overlap.
2045 FuncOverlap
.BaseName
= Match
->second
->getContext();
2046 assert(BaseStats
.count(FuncOverlap
.BaseName
) &&
2047 "BaseStats should have records for all functions in base profile "
2049 FuncOverlap
.BaseSample
= BaseStats
[FuncOverlap
.BaseName
].SampleSum
;
2051 FuncOverlap
.Similarity
= computeSampleFunctionOverlap(
2052 Match
->second
, &TestFunc
.second
, &FuncOverlap
, FuncOverlap
.BaseSample
,
2053 FuncOverlap
.TestSample
);
2054 ProfOverlap
.Similarity
+=
2055 weightByImportance(FuncOverlap
.Similarity
, FuncOverlap
.BaseSample
,
2056 FuncOverlap
.TestSample
);
2057 ProfOverlap
.OverlapSample
+= FuncOverlap
.OverlapSample
;
2058 ProfOverlap
.UnionSample
+= FuncOverlap
.UnionSample
;
2060 // Accumulate the percentage of base unique and test unique samples into
2062 ProfOverlap
.BaseUniqueSample
+= FuncOverlap
.BaseUniqueSample
;
2063 ProfOverlap
.TestUniqueSample
+= FuncOverlap
.TestUniqueSample
;
2065 // Remove matched base functions for later reporting functions not found
2067 BaseFuncProf
.erase(Match
);
2071 // Print function-level similarity information if specified by options.
2072 assert(TestStats
.count(FuncOverlap
.TestName
) &&
2073 "TestStats should have records for all functions in test profile "
2075 if (TestStats
[FuncOverlap
.TestName
].MaxSample
>= FuncFilter
.ValueCutoff
||
2076 (Matched
&& FuncOverlap
.Similarity
< LowSimilarityThreshold
) ||
2077 (Matched
&& !FuncFilter
.NameFilter
.empty() &&
2078 FuncOverlap
.BaseName
.toString().find(FuncFilter
.NameFilter
) !=
2079 std::string::npos
)) {
2080 assert(ProfOverlap
.BaseSample
> 0 &&
2081 "Total samples in base profile should be greater than 0");
2082 FuncOverlap
.BaseWeight
=
2083 static_cast<double>(FuncOverlap
.BaseSample
) / ProfOverlap
.BaseSample
;
2084 assert(ProfOverlap
.TestSample
> 0 &&
2085 "Total samples in test profile should be greater than 0");
2086 FuncOverlap
.TestWeight
=
2087 static_cast<double>(FuncOverlap
.TestSample
) / ProfOverlap
.TestSample
;
2088 FuncSimilarityDump
.emplace(FuncOverlap
.BaseWeight
, FuncOverlap
);
2092 // Traverse through functions in base profile but not in test profile.
2093 for (const auto &F
: BaseFuncProf
) {
2094 assert(BaseStats
.count(F
.second
->getContext()) &&
2095 "BaseStats should have records for all functions in base profile "
2097 const FuncSampleStats
&FuncStats
= BaseStats
[F
.second
->getContext()];
2098 ++ProfOverlap
.BaseUniqueCount
;
2099 ProfOverlap
.BaseUniqueSample
+= FuncStats
.SampleSum
;
2101 updateHotBlockOverlap(FuncStats
.SampleSum
, 0, FuncStats
.HotBlockCount
);
2103 double FuncSimilarity
= computeSampleFunctionOverlap(
2104 nullptr, nullptr, nullptr, FuncStats
.SampleSum
, 0);
2105 ProfOverlap
.Similarity
+=
2106 weightByImportance(FuncSimilarity
, FuncStats
.SampleSum
, 0);
2108 ProfOverlap
.UnionSample
+= FuncStats
.SampleSum
;
2111 // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2112 // of floating point accumulations. Make it 1.0 if the difference is below
2114 ProfOverlap
.Similarity
= (std::fabs(ProfOverlap
.Similarity
- 1) < Epsilon
)
2116 : ProfOverlap
.Similarity
;
2118 computeHotFuncOverlap();
2121 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2122 const auto &BaseProf
= BaseReader
->getProfiles();
2123 for (const auto &I
: BaseProf
) {
2124 ++ProfOverlap
.BaseCount
;
2125 FuncSampleStats FuncStats
;
2126 getFuncSampleStats(I
.second
, FuncStats
, BaseHotThreshold
);
2127 ProfOverlap
.BaseSample
+= FuncStats
.SampleSum
;
2128 BaseStats
.emplace(I
.second
.getContext(), FuncStats
);
2131 const auto &TestProf
= TestReader
->getProfiles();
2132 for (const auto &I
: TestProf
) {
2133 ++ProfOverlap
.TestCount
;
2134 FuncSampleStats FuncStats
;
2135 getFuncSampleStats(I
.second
, FuncStats
, TestHotThreshold
);
2136 ProfOverlap
.TestSample
+= FuncStats
.SampleSum
;
2137 TestStats
.emplace(I
.second
.getContext(), FuncStats
);
2140 ProfOverlap
.BaseName
= StringRef(BaseFilename
);
2141 ProfOverlap
.TestName
= StringRef(TestFilename
);
2144 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream
&OS
) const {
2145 using namespace sampleprof
;
2147 if (FuncSimilarityDump
.empty())
2150 formatted_raw_ostream
FOS(OS
);
2151 FOS
<< "Function-level details:\n";
2152 FOS
<< "Base weight";
2153 FOS
.PadToColumn(TestWeightCol
);
2154 FOS
<< "Test weight";
2155 FOS
.PadToColumn(SimilarityCol
);
2156 FOS
<< "Similarity";
2157 FOS
.PadToColumn(OverlapCol
);
2159 FOS
.PadToColumn(BaseUniqueCol
);
2160 FOS
<< "Base unique";
2161 FOS
.PadToColumn(TestUniqueCol
);
2162 FOS
<< "Test unique";
2163 FOS
.PadToColumn(BaseSampleCol
);
2164 FOS
<< "Base samples";
2165 FOS
.PadToColumn(TestSampleCol
);
2166 FOS
<< "Test samples";
2167 FOS
.PadToColumn(FuncNameCol
);
2168 FOS
<< "Function name\n";
2169 for (const auto &F
: FuncSimilarityDump
) {
2170 double OverlapPercent
=
2171 F
.second
.UnionSample
> 0
2172 ? static_cast<double>(F
.second
.OverlapSample
) / F
.second
.UnionSample
2174 double BaseUniquePercent
=
2175 F
.second
.BaseSample
> 0
2176 ? static_cast<double>(F
.second
.BaseUniqueSample
) /
2179 double TestUniquePercent
=
2180 F
.second
.TestSample
> 0
2181 ? static_cast<double>(F
.second
.TestUniqueSample
) /
2185 FOS
<< format("%.2f%%", F
.second
.BaseWeight
* 100);
2186 FOS
.PadToColumn(TestWeightCol
);
2187 FOS
<< format("%.2f%%", F
.second
.TestWeight
* 100);
2188 FOS
.PadToColumn(SimilarityCol
);
2189 FOS
<< format("%.2f%%", F
.second
.Similarity
* 100);
2190 FOS
.PadToColumn(OverlapCol
);
2191 FOS
<< format("%.2f%%", OverlapPercent
* 100);
2192 FOS
.PadToColumn(BaseUniqueCol
);
2193 FOS
<< format("%.2f%%", BaseUniquePercent
* 100);
2194 FOS
.PadToColumn(TestUniqueCol
);
2195 FOS
<< format("%.2f%%", TestUniquePercent
* 100);
2196 FOS
.PadToColumn(BaseSampleCol
);
2197 FOS
<< F
.second
.BaseSample
;
2198 FOS
.PadToColumn(TestSampleCol
);
2199 FOS
<< F
.second
.TestSample
;
2200 FOS
.PadToColumn(FuncNameCol
);
2201 FOS
<< F
.second
.TestName
.toString() << "\n";
2205 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream
&OS
) const {
2206 OS
<< "Profile overlap infomation for base_profile: "
2207 << ProfOverlap
.BaseName
.toString()
2208 << " and test_profile: " << ProfOverlap
.TestName
.toString()
2209 << "\nProgram level:\n";
2211 OS
<< " Whole program profile similarity: "
2212 << format("%.3f%%", ProfOverlap
.Similarity
* 100) << "\n";
2214 assert(ProfOverlap
.UnionSample
> 0 &&
2215 "Total samples in two profile should be greater than 0");
2216 double OverlapPercent
=
2217 static_cast<double>(ProfOverlap
.OverlapSample
) / ProfOverlap
.UnionSample
;
2218 assert(ProfOverlap
.BaseSample
> 0 &&
2219 "Total samples in base profile should be greater than 0");
2220 double BaseUniquePercent
= static_cast<double>(ProfOverlap
.BaseUniqueSample
) /
2221 ProfOverlap
.BaseSample
;
2222 assert(ProfOverlap
.TestSample
> 0 &&
2223 "Total samples in test profile should be greater than 0");
2224 double TestUniquePercent
= static_cast<double>(ProfOverlap
.TestUniqueSample
) /
2225 ProfOverlap
.TestSample
;
2227 OS
<< " Whole program sample overlap: "
2228 << format("%.3f%%", OverlapPercent
* 100) << "\n";
2229 OS
<< " percentage of samples unique in base profile: "
2230 << format("%.3f%%", BaseUniquePercent
* 100) << "\n";
2231 OS
<< " percentage of samples unique in test profile: "
2232 << format("%.3f%%", TestUniquePercent
* 100) << "\n";
2233 OS
<< " total samples in base profile: " << ProfOverlap
.BaseSample
<< "\n"
2234 << " total samples in test profile: " << ProfOverlap
.TestSample
<< "\n";
2236 assert(ProfOverlap
.UnionCount
> 0 &&
2237 "There should be at least one function in two input profiles");
2238 double FuncOverlapPercent
=
2239 static_cast<double>(ProfOverlap
.OverlapCount
) / ProfOverlap
.UnionCount
;
2240 OS
<< " Function overlap: " << format("%.3f%%", FuncOverlapPercent
* 100)
2242 OS
<< " overlap functions: " << ProfOverlap
.OverlapCount
<< "\n";
2243 OS
<< " functions unique in base profile: " << ProfOverlap
.BaseUniqueCount
2245 OS
<< " functions unique in test profile: " << ProfOverlap
.TestUniqueCount
2249 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2250 raw_fd_ostream
&OS
) const {
2251 assert(HotFuncOverlap
.UnionCount
> 0 &&
2252 "There should be at least one hot function in two input profiles");
2253 OS
<< " Hot-function overlap: "
2254 << format("%.3f%%", static_cast<double>(HotFuncOverlap
.OverlapCount
) /
2255 HotFuncOverlap
.UnionCount
* 100)
2257 OS
<< " overlap hot functions: " << HotFuncOverlap
.OverlapCount
<< "\n";
2258 OS
<< " hot functions unique in base profile: "
2259 << HotFuncOverlap
.BaseCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2260 OS
<< " hot functions unique in test profile: "
2261 << HotFuncOverlap
.TestCount
- HotFuncOverlap
.OverlapCount
<< "\n";
2263 assert(HotBlockOverlap
.UnionCount
> 0 &&
2264 "There should be at least one hot block in two input profiles");
2265 OS
<< " Hot-block overlap: "
2266 << format("%.3f%%", static_cast<double>(HotBlockOverlap
.OverlapCount
) /
2267 HotBlockOverlap
.UnionCount
* 100)
2269 OS
<< " overlap hot blocks: " << HotBlockOverlap
.OverlapCount
<< "\n";
2270 OS
<< " hot blocks unique in base profile: "
2271 << HotBlockOverlap
.BaseCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2272 OS
<< " hot blocks unique in test profile: "
2273 << HotBlockOverlap
.TestCount
- HotBlockOverlap
.OverlapCount
<< "\n";
2276 std::error_code
SampleOverlapAggregator::loadProfiles() {
2277 using namespace sampleprof
;
2279 LLVMContext Context
;
2280 auto FS
= vfs::getRealFileSystem();
2281 auto BaseReaderOrErr
= SampleProfileReader::create(BaseFilename
, Context
, *FS
,
2282 FSDiscriminatorPassOption
);
2283 if (std::error_code EC
= BaseReaderOrErr
.getError())
2284 exitWithErrorCode(EC
, BaseFilename
);
2286 auto TestReaderOrErr
= SampleProfileReader::create(TestFilename
, Context
, *FS
,
2287 FSDiscriminatorPassOption
);
2288 if (std::error_code EC
= TestReaderOrErr
.getError())
2289 exitWithErrorCode(EC
, TestFilename
);
2291 BaseReader
= std::move(BaseReaderOrErr
.get());
2292 TestReader
= std::move(TestReaderOrErr
.get());
2294 if (std::error_code EC
= BaseReader
->read())
2295 exitWithErrorCode(EC
, BaseFilename
);
2296 if (std::error_code EC
= TestReader
->read())
2297 exitWithErrorCode(EC
, TestFilename
);
2298 if (BaseReader
->profileIsProbeBased() != TestReader
->profileIsProbeBased())
2300 "cannot compare probe-based profile with non-probe-based profile");
2301 if (BaseReader
->profileIsCS() != TestReader
->profileIsCS())
2302 exitWithError("cannot compare CS profile with non-CS profile");
2304 // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2306 ProfileSummary
&BasePS
= BaseReader
->getSummary();
2307 ProfileSummary
&TestPS
= TestReader
->getSummary();
2309 ProfileSummaryBuilder::getHotCountThreshold(BasePS
.getDetailedSummary());
2311 ProfileSummaryBuilder::getHotCountThreshold(TestPS
.getDetailedSummary());
2313 return std::error_code();
2316 void overlapSampleProfile(const std::string
&BaseFilename
,
2317 const std::string
&TestFilename
,
2318 const OverlapFuncFilters
&FuncFilter
,
2319 uint64_t SimilarityCutoff
, raw_fd_ostream
&OS
) {
2320 using namespace sampleprof
;
2322 // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2323 // report 2--3 places after decimal point in percentage numbers.
2324 SampleOverlapAggregator
OverlapAggr(
2325 BaseFilename
, TestFilename
,
2326 static_cast<double>(SimilarityCutoff
) / 1000000, 0.000005, FuncFilter
);
2327 if (std::error_code EC
= OverlapAggr
.loadProfiles())
2328 exitWithErrorCode(EC
);
2330 OverlapAggr
.initializeSampleProfileOverlap();
2331 if (OverlapAggr
.detectZeroSampleProfile(OS
))
2334 OverlapAggr
.computeSampleProfileOverlap(OS
);
2336 OverlapAggr
.dumpProgramSummary(OS
);
2337 OverlapAggr
.dumpHotFuncAndBlockOverlap(OS
);
2338 OverlapAggr
.dumpFuncSimilarity(OS
);
2341 static int overlap_main(int argc
, const char *argv
[]) {
2342 cl::opt
<std::string
> BaseFilename(cl::Positional
, cl::Required
,
2343 cl::desc("<base profile file>"));
2344 cl::opt
<std::string
> TestFilename(cl::Positional
, cl::Required
,
2345 cl::desc("<test profile file>"));
2346 cl::opt
<std::string
> Output("output", cl::value_desc("output"), cl::init("-"),
2347 cl::desc("Output file"));
2348 cl::alias
OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output
));
2350 "cs", cl::init(false),
2351 cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."));
2352 cl::opt
<unsigned long long> ValueCutoff(
2353 "value-cutoff", cl::init(-1),
2355 "Function level overlap information for every function (with calling "
2356 "context for csspgo) in test "
2357 "profile with max count value greater then the parameter value"));
2358 cl::opt
<std::string
> FuncNameFilter(
2360 cl::desc("Function level overlap information for matching functions. For "
2361 "CSSPGO this takes a a function name with calling context"));
2362 cl::opt
<unsigned long long> SimilarityCutoff(
2363 "similarity-cutoff", cl::init(0),
2364 cl::desc("For sample profiles, list function names (with calling context "
2365 "for csspgo) for overlapped functions "
2366 "with similarities below the cutoff (percentage times 10000)."));
2367 cl::opt
<ProfileKinds
> ProfileKind(
2368 cl::desc("Profile kind:"), cl::init(instr
),
2369 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
2370 clEnumVal(sample
, "Sample profile")));
2371 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data overlap tool\n");
2374 raw_fd_ostream
OS(Output
.data(), EC
, sys::fs::OF_TextWithCRLF
);
2376 exitWithErrorCode(EC
, Output
);
2378 if (ProfileKind
== instr
)
2379 overlapInstrProfile(BaseFilename
, TestFilename
,
2380 OverlapFuncFilters
{ValueCutoff
, FuncNameFilter
}, OS
,
2383 overlapSampleProfile(BaseFilename
, TestFilename
,
2384 OverlapFuncFilters
{ValueCutoff
, FuncNameFilter
},
2385 SimilarityCutoff
, OS
);
2391 struct ValueSitesStats
{
2393 : TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
2394 TotalNumValues(0) {}
2395 uint64_t TotalNumValueSites
;
2396 uint64_t TotalNumValueSitesWithValueProfile
;
2397 uint64_t TotalNumValues
;
2398 std::vector
<unsigned> ValueSitesHistogram
;
2402 static void traverseAllValueSites(const InstrProfRecord
&Func
, uint32_t VK
,
2403 ValueSitesStats
&Stats
, raw_fd_ostream
&OS
,
2404 InstrProfSymtab
*Symtab
) {
2405 uint32_t NS
= Func
.getNumValueSites(VK
);
2406 Stats
.TotalNumValueSites
+= NS
;
2407 for (size_t I
= 0; I
< NS
; ++I
) {
2408 uint32_t NV
= Func
.getNumValueDataForSite(VK
, I
);
2409 std::unique_ptr
<InstrProfValueData
[]> VD
= Func
.getValueForSite(VK
, I
);
2410 Stats
.TotalNumValues
+= NV
;
2412 Stats
.TotalNumValueSitesWithValueProfile
++;
2413 if (NV
> Stats
.ValueSitesHistogram
.size())
2414 Stats
.ValueSitesHistogram
.resize(NV
, 0);
2415 Stats
.ValueSitesHistogram
[NV
- 1]++;
2418 uint64_t SiteSum
= 0;
2419 for (uint32_t V
= 0; V
< NV
; V
++)
2420 SiteSum
+= VD
[V
].Count
;
2424 for (uint32_t V
= 0; V
< NV
; V
++) {
2425 OS
<< "\t[ " << format("%2u", I
) << ", ";
2426 if (Symtab
== nullptr)
2427 OS
<< format("%4" PRIu64
, VD
[V
].Value
);
2429 OS
<< Symtab
->getFuncOrVarName(VD
[V
].Value
);
2430 OS
<< ", " << format("%10" PRId64
, VD
[V
].Count
) << " ] ("
2431 << format("%.2f%%", (VD
[V
].Count
* 100.0 / SiteSum
)) << ")\n";
2436 static void showValueSitesStats(raw_fd_ostream
&OS
, uint32_t VK
,
2437 ValueSitesStats
&Stats
) {
2438 OS
<< " Total number of sites: " << Stats
.TotalNumValueSites
<< "\n";
2439 OS
<< " Total number of sites with values: "
2440 << Stats
.TotalNumValueSitesWithValueProfile
<< "\n";
2441 OS
<< " Total number of profiled values: " << Stats
.TotalNumValues
<< "\n";
2443 OS
<< " Value sites histogram:\n\tNumTargets, SiteCount\n";
2444 for (unsigned I
= 0; I
< Stats
.ValueSitesHistogram
.size(); I
++) {
2445 if (Stats
.ValueSitesHistogram
[I
] > 0)
2446 OS
<< "\t" << I
+ 1 << ", " << Stats
.ValueSitesHistogram
[I
] << "\n";
2450 static int showInstrProfile(
2451 const std::string
&Filename
, bool ShowCounts
, uint32_t TopN
,
2452 bool ShowIndirectCallTargets
, bool ShowMemOPSizes
, bool ShowDetailedSummary
,
2453 std::vector
<uint32_t> DetailedSummaryCutoffs
, bool ShowAllFunctions
,
2454 bool ShowCS
, uint64_t ValueCutoff
, bool OnlyListBelow
,
2455 const std::string
&ShowFunction
, bool TextFormat
, bool ShowBinaryIds
,
2456 bool ShowCovered
, bool ShowProfileVersion
, bool ShowTemporalProfTraces
,
2457 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2458 if (SFormat
== ShowFormat::Json
)
2459 exitWithError("JSON output is not supported for instr profiles");
2460 if (SFormat
== ShowFormat::Yaml
)
2461 exitWithError("YAML output is not supported for instr profiles");
2462 auto FS
= vfs::getRealFileSystem();
2463 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
2464 std::vector
<uint32_t> Cutoffs
= std::move(DetailedSummaryCutoffs
);
2465 if (ShowDetailedSummary
&& Cutoffs
.empty()) {
2466 Cutoffs
= ProfileSummaryBuilder::DefaultCutoffs
;
2468 InstrProfSummaryBuilder
Builder(std::move(Cutoffs
));
2469 if (Error E
= ReaderOrErr
.takeError())
2470 exitWithError(std::move(E
), Filename
);
2472 auto Reader
= std::move(ReaderOrErr
.get());
2473 bool IsIRInstr
= Reader
->isIRLevelProfile();
2474 size_t ShownFunctions
= 0;
2475 size_t BelowCutoffFunctions
= 0;
2476 int NumVPKind
= IPVK_Last
- IPVK_First
+ 1;
2477 std::vector
<ValueSitesStats
> VPStats(NumVPKind
);
2479 auto MinCmp
= [](const std::pair
<std::string
, uint64_t> &v1
,
2480 const std::pair
<std::string
, uint64_t> &v2
) {
2481 return v1
.second
> v2
.second
;
2484 std::priority_queue
<std::pair
<std::string
, uint64_t>,
2485 std::vector
<std::pair
<std::string
, uint64_t>>,
2487 HottestFuncs(MinCmp
);
2489 if (!TextFormat
&& OnlyListBelow
) {
2490 OS
<< "The list of functions with the maximum counter less than "
2491 << ValueCutoff
<< ":\n";
2494 // Add marker so that IR-level instrumentation round-trips properly.
2495 if (TextFormat
&& IsIRInstr
)
2498 for (const auto &Func
: *Reader
) {
2499 if (Reader
->isIRLevelProfile()) {
2500 bool FuncIsCS
= NamedInstrProfRecord::hasCSFlagInHash(Func
.Hash
);
2501 if (FuncIsCS
!= ShowCS
)
2504 bool Show
= ShowAllFunctions
||
2505 (!ShowFunction
.empty() && Func
.Name
.contains(ShowFunction
));
2507 bool doTextFormatDump
= (Show
&& TextFormat
);
2509 if (doTextFormatDump
) {
2510 InstrProfSymtab
&Symtab
= Reader
->getSymtab();
2511 InstrProfWriter::writeRecordInText(Func
.Name
, Func
.Hash
, Func
, Symtab
,
2516 assert(Func
.Counts
.size() > 0 && "function missing entry counter");
2517 Builder
.addRecord(Func
);
2520 if (llvm::any_of(Func
.Counts
, [](uint64_t C
) { return C
; }))
2521 OS
<< Func
.Name
<< "\n";
2525 uint64_t FuncMax
= 0;
2526 uint64_t FuncSum
= 0;
2528 auto PseudoKind
= Func
.getCountPseudoKind();
2529 if (PseudoKind
!= InstrProfRecord::NotPseudo
) {
2531 if (!ShownFunctions
)
2532 OS
<< "Counters:\n";
2534 OS
<< " " << Func
.Name
<< ":\n"
2535 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2536 << " Counters: " << Func
.Counts
.size();
2537 if (PseudoKind
== InstrProfRecord::PseudoHot
)
2538 OS
<< " <PseudoHot>\n";
2539 else if (PseudoKind
== InstrProfRecord::PseudoWarm
)
2540 OS
<< " <PseudoWarm>\n";
2542 llvm_unreachable("Unknown PseudoKind");
2547 for (size_t I
= 0, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2548 FuncMax
= std::max(FuncMax
, Func
.Counts
[I
]);
2549 FuncSum
+= Func
.Counts
[I
];
2552 if (FuncMax
< ValueCutoff
) {
2553 ++BelowCutoffFunctions
;
2554 if (OnlyListBelow
) {
2555 OS
<< " " << Func
.Name
<< ": (Max = " << FuncMax
2556 << " Sum = " << FuncSum
<< ")\n";
2559 } else if (OnlyListBelow
)
2563 if (HottestFuncs
.size() == TopN
) {
2564 if (HottestFuncs
.top().second
< FuncMax
) {
2566 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2569 HottestFuncs
.emplace(std::make_pair(std::string(Func
.Name
), FuncMax
));
2573 if (!ShownFunctions
)
2574 OS
<< "Counters:\n";
2578 OS
<< " " << Func
.Name
<< ":\n"
2579 << " Hash: " << format("0x%016" PRIx64
, Func
.Hash
) << "\n"
2580 << " Counters: " << Func
.Counts
.size() << "\n";
2582 OS
<< " Function count: " << Func
.Counts
[0] << "\n";
2584 if (ShowIndirectCallTargets
)
2585 OS
<< " Indirect Call Site Count: "
2586 << Func
.getNumValueSites(IPVK_IndirectCallTarget
) << "\n";
2588 uint32_t NumMemOPCalls
= Func
.getNumValueSites(IPVK_MemOPSize
);
2589 if (ShowMemOPSizes
&& NumMemOPCalls
> 0)
2590 OS
<< " Number of Memory Intrinsics Calls: " << NumMemOPCalls
2594 OS
<< " Block counts: [";
2595 size_t Start
= (IsIRInstr
? 0 : 1);
2596 for (size_t I
= Start
, E
= Func
.Counts
.size(); I
< E
; ++I
) {
2597 OS
<< (I
== Start
? "" : ", ") << Func
.Counts
[I
];
2602 if (ShowIndirectCallTargets
) {
2603 OS
<< " Indirect Target Results:\n";
2604 traverseAllValueSites(Func
, IPVK_IndirectCallTarget
,
2605 VPStats
[IPVK_IndirectCallTarget
], OS
,
2606 &(Reader
->getSymtab()));
2609 if (ShowMemOPSizes
&& NumMemOPCalls
> 0) {
2610 OS
<< " Memory Intrinsic Size Results:\n";
2611 traverseAllValueSites(Func
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
], OS
,
2616 if (Reader
->hasError())
2617 exitWithError(Reader
->getError(), Filename
);
2619 if (TextFormat
|| ShowCovered
)
2621 std::unique_ptr
<ProfileSummary
> PS(Builder
.getSummary());
2622 bool IsIR
= Reader
->isIRLevelProfile();
2623 OS
<< "Instrumentation level: " << (IsIR
? "IR" : "Front-end");
2625 OS
<< " entry_first = " << Reader
->instrEntryBBEnabled();
2627 if (ShowAllFunctions
|| !ShowFunction
.empty())
2628 OS
<< "Functions shown: " << ShownFunctions
<< "\n";
2629 OS
<< "Total functions: " << PS
->getNumFunctions() << "\n";
2630 if (ValueCutoff
> 0) {
2631 OS
<< "Number of functions with maximum count (< " << ValueCutoff
2632 << "): " << BelowCutoffFunctions
<< "\n";
2633 OS
<< "Number of functions with maximum count (>= " << ValueCutoff
2634 << "): " << PS
->getNumFunctions() - BelowCutoffFunctions
<< "\n";
2636 OS
<< "Maximum function count: " << PS
->getMaxFunctionCount() << "\n";
2637 OS
<< "Maximum internal block count: " << PS
->getMaxInternalCount() << "\n";
2640 std::vector
<std::pair
<std::string
, uint64_t>> SortedHottestFuncs
;
2641 while (!HottestFuncs
.empty()) {
2642 SortedHottestFuncs
.emplace_back(HottestFuncs
.top());
2645 OS
<< "Top " << TopN
2646 << " functions with the largest internal block counts: \n";
2647 for (auto &hotfunc
: llvm::reverse(SortedHottestFuncs
))
2648 OS
<< " " << hotfunc
.first
<< ", max count = " << hotfunc
.second
<< "\n";
2651 if (ShownFunctions
&& ShowIndirectCallTargets
) {
2652 OS
<< "Statistics for indirect call sites profile:\n";
2653 showValueSitesStats(OS
, IPVK_IndirectCallTarget
,
2654 VPStats
[IPVK_IndirectCallTarget
]);
2657 if (ShownFunctions
&& ShowMemOPSizes
) {
2658 OS
<< "Statistics for memory intrinsic calls sizes profile:\n";
2659 showValueSitesStats(OS
, IPVK_MemOPSize
, VPStats
[IPVK_MemOPSize
]);
2662 if (ShowDetailedSummary
) {
2663 OS
<< "Total number of blocks: " << PS
->getNumCounts() << "\n";
2664 OS
<< "Total count: " << PS
->getTotalCount() << "\n";
2665 PS
->printDetailedSummary(OS
);
2669 if (Error E
= Reader
->printBinaryIds(OS
))
2670 exitWithError(std::move(E
), Filename
);
2672 if (ShowProfileVersion
)
2673 OS
<< "Profile version: " << Reader
->getVersion() << "\n";
2675 if (ShowTemporalProfTraces
) {
2676 auto &Traces
= Reader
->getTemporalProfTraces();
2677 OS
<< "Temporal Profile Traces (samples=" << Traces
.size()
2678 << " seen=" << Reader
->getTemporalProfTraceStreamSize() << "):\n";
2679 for (unsigned i
= 0; i
< Traces
.size(); i
++) {
2680 OS
<< " Temporal Profile Trace " << i
<< " (weight=" << Traces
[i
].Weight
2681 << " count=" << Traces
[i
].FunctionNameRefs
.size() << "):\n";
2682 for (auto &NameRef
: Traces
[i
].FunctionNameRefs
)
2683 OS
<< " " << Reader
->getSymtab().getFuncOrVarName(NameRef
) << "\n";
2690 static void showSectionInfo(sampleprof::SampleProfileReader
*Reader
,
2691 raw_fd_ostream
&OS
) {
2692 if (!Reader
->dumpSectionInfo(OS
)) {
2693 WithColor::warning() << "-show-sec-info-only is only supported for "
2694 << "sample profile in extbinary format and is "
2695 << "ignored for other formats.\n";
2701 struct HotFuncInfo
{
2702 std::string FuncName
;
2703 uint64_t TotalCount
;
2704 double TotalCountPercent
;
2706 uint64_t EntryCount
;
2709 : TotalCount(0), TotalCountPercent(0.0f
), MaxCount(0), EntryCount(0) {}
2711 HotFuncInfo(StringRef FN
, uint64_t TS
, double TSP
, uint64_t MS
, uint64_t ES
)
2712 : FuncName(FN
.begin(), FN
.end()), TotalCount(TS
), TotalCountPercent(TSP
),
2713 MaxCount(MS
), EntryCount(ES
) {}
2717 // Print out detailed information about hot functions in PrintValues vector.
2718 // Users specify titles and offset of every columns through ColumnTitle and
2719 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
2720 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
2721 // print out or let it be an empty string.
2722 static void dumpHotFunctionList(const std::vector
<std::string
> &ColumnTitle
,
2723 const std::vector
<int> &ColumnOffset
,
2724 const std::vector
<HotFuncInfo
> &PrintValues
,
2725 uint64_t HotFuncCount
, uint64_t TotalFuncCount
,
2726 uint64_t HotProfCount
, uint64_t TotalProfCount
,
2727 const std::string
&HotFuncMetric
,
2728 uint32_t TopNFunctions
, raw_fd_ostream
&OS
) {
2729 assert(ColumnOffset
.size() == ColumnTitle
.size() &&
2730 "ColumnOffset and ColumnTitle should have the same size");
2731 assert(ColumnTitle
.size() >= 4 &&
2732 "ColumnTitle should have at least 4 elements");
2733 assert(TotalFuncCount
> 0 &&
2734 "There should be at least one function in the profile");
2735 double TotalProfPercent
= 0;
2736 if (TotalProfCount
> 0)
2737 TotalProfPercent
= static_cast<double>(HotProfCount
) / TotalProfCount
* 100;
2739 formatted_raw_ostream
FOS(OS
);
2740 FOS
<< HotFuncCount
<< " out of " << TotalFuncCount
2741 << " functions with profile ("
2743 (static_cast<double>(HotFuncCount
) / TotalFuncCount
* 100))
2744 << ") are considered hot functions";
2745 if (!HotFuncMetric
.empty())
2746 FOS
<< " (" << HotFuncMetric
<< ")";
2748 FOS
<< HotProfCount
<< " out of " << TotalProfCount
<< " profile counts ("
2749 << format("%.2f%%", TotalProfPercent
) << ") are from hot functions.\n";
2751 for (size_t I
= 0; I
< ColumnTitle
.size(); ++I
) {
2752 FOS
.PadToColumn(ColumnOffset
[I
]);
2753 FOS
<< ColumnTitle
[I
];
2758 for (const auto &R
: PrintValues
) {
2759 if (TopNFunctions
&& (Count
++ == TopNFunctions
))
2761 FOS
.PadToColumn(ColumnOffset
[0]);
2762 FOS
<< R
.TotalCount
<< " (" << format("%.2f%%", R
.TotalCountPercent
) << ")";
2763 FOS
.PadToColumn(ColumnOffset
[1]);
2765 FOS
.PadToColumn(ColumnOffset
[2]);
2766 FOS
<< R
.EntryCount
;
2767 FOS
.PadToColumn(ColumnOffset
[3]);
2768 FOS
<< R
.FuncName
<< "\n";
2772 static int showHotFunctionList(const sampleprof::SampleProfileMap
&Profiles
,
2773 ProfileSummary
&PS
, uint32_t TopN
,
2774 raw_fd_ostream
&OS
) {
2775 using namespace sampleprof
;
2777 const uint32_t HotFuncCutoff
= 990000;
2778 auto &SummaryVector
= PS
.getDetailedSummary();
2779 uint64_t MinCountThreshold
= 0;
2780 for (const ProfileSummaryEntry
&SummaryEntry
: SummaryVector
) {
2781 if (SummaryEntry
.Cutoff
== HotFuncCutoff
) {
2782 MinCountThreshold
= SummaryEntry
.MinCount
;
2787 // Traverse all functions in the profile and keep only hot functions.
2788 // The following loop also calculates the sum of total samples of all
2790 std::multimap
<uint64_t, std::pair
<const FunctionSamples
*, const uint64_t>,
2791 std::greater
<uint64_t>>
2793 uint64_t ProfileTotalSample
= 0;
2794 uint64_t HotFuncSample
= 0;
2795 uint64_t HotFuncCount
= 0;
2797 for (const auto &I
: Profiles
) {
2798 FuncSampleStats FuncStats
;
2799 const FunctionSamples
&FuncProf
= I
.second
;
2800 ProfileTotalSample
+= FuncProf
.getTotalSamples();
2801 getFuncSampleStats(FuncProf
, FuncStats
, MinCountThreshold
);
2803 if (isFunctionHot(FuncStats
, MinCountThreshold
)) {
2804 HotFunc
.emplace(FuncProf
.getTotalSamples(),
2805 std::make_pair(&(I
.second
), FuncStats
.MaxSample
));
2806 HotFuncSample
+= FuncProf
.getTotalSamples();
2811 std::vector
<std::string
> ColumnTitle
{"Total sample (%)", "Max sample",
2812 "Entry sample", "Function name"};
2813 std::vector
<int> ColumnOffset
{0, 24, 42, 58};
2814 std::string Metric
=
2815 std::string("max sample >= ") + std::to_string(MinCountThreshold
);
2816 std::vector
<HotFuncInfo
> PrintValues
;
2817 for (const auto &FuncPair
: HotFunc
) {
2818 const FunctionSamples
&Func
= *FuncPair
.second
.first
;
2819 double TotalSamplePercent
=
2820 (ProfileTotalSample
> 0)
2821 ? (Func
.getTotalSamples() * 100.0) / ProfileTotalSample
2823 PrintValues
.emplace_back(
2824 HotFuncInfo(Func
.getContext().toString(), Func
.getTotalSamples(),
2825 TotalSamplePercent
, FuncPair
.second
.second
,
2826 Func
.getHeadSamplesEstimate()));
2828 dumpHotFunctionList(ColumnTitle
, ColumnOffset
, PrintValues
, HotFuncCount
,
2829 Profiles
.size(), HotFuncSample
, ProfileTotalSample
,
2835 static int showSampleProfile(const std::string
&Filename
, bool ShowCounts
,
2836 uint32_t TopN
, bool ShowAllFunctions
,
2837 bool ShowDetailedSummary
,
2838 const std::string
&ShowFunction
,
2839 bool ShowProfileSymbolList
,
2840 bool ShowSectionInfoOnly
, bool ShowHotFuncList
,
2841 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2842 if (SFormat
== ShowFormat::Yaml
)
2843 exitWithError("YAML output is not supported for sample profiles");
2844 using namespace sampleprof
;
2845 LLVMContext Context
;
2846 auto FS
= vfs::getRealFileSystem();
2847 auto ReaderOrErr
= SampleProfileReader::create(Filename
, Context
, *FS
,
2848 FSDiscriminatorPassOption
);
2849 if (std::error_code EC
= ReaderOrErr
.getError())
2850 exitWithErrorCode(EC
, Filename
);
2852 auto Reader
= std::move(ReaderOrErr
.get());
2853 if (ShowSectionInfoOnly
) {
2854 showSectionInfo(Reader
.get(), OS
);
2858 if (std::error_code EC
= Reader
->read())
2859 exitWithErrorCode(EC
, Filename
);
2861 if (ShowAllFunctions
|| ShowFunction
.empty()) {
2862 if (SFormat
== ShowFormat::Json
)
2863 Reader
->dumpJson(OS
);
2867 if (SFormat
== ShowFormat::Json
)
2869 "the JSON format is supported only when all functions are to "
2872 // TODO: parse context string to support filtering by contexts.
2873 FunctionSamples
*FS
= Reader
->getSamplesFor(StringRef(ShowFunction
));
2874 Reader
->dumpFunctionProfile(FS
? *FS
: FunctionSamples(), OS
);
2877 if (ShowProfileSymbolList
) {
2878 std::unique_ptr
<sampleprof::ProfileSymbolList
> ReaderList
=
2879 Reader
->getProfileSymbolList();
2880 ReaderList
->dump(OS
);
2883 if (ShowDetailedSummary
) {
2884 auto &PS
= Reader
->getSummary();
2885 PS
.printSummary(OS
);
2886 PS
.printDetailedSummary(OS
);
2889 if (ShowHotFuncList
|| TopN
)
2890 showHotFunctionList(Reader
->getProfiles(), Reader
->getSummary(), TopN
, OS
);
2895 static int showMemProfProfile(const std::string
&Filename
,
2896 const std::string
&ProfiledBinary
,
2897 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2898 if (SFormat
== ShowFormat::Json
)
2899 exitWithError("JSON output is not supported for MemProf");
2900 auto ReaderOr
= llvm::memprof::RawMemProfReader::create(
2901 Filename
, ProfiledBinary
, /*KeepNames=*/true);
2902 if (Error E
= ReaderOr
.takeError())
2903 // Since the error can be related to the profile or the binary we do not
2904 // pass whence. Instead additional context is provided where necessary in
2905 // the error message.
2906 exitWithError(std::move(E
), /*Whence*/ "");
2908 std::unique_ptr
<llvm::memprof::RawMemProfReader
> Reader(
2909 ReaderOr
.get().release());
2911 Reader
->printYAML(OS
);
2915 static int showDebugInfoCorrelation(const std::string
&Filename
,
2916 bool ShowDetailedSummary
,
2917 bool ShowProfileSymbolList
,
2918 int MaxDbgCorrelationWarnings
,
2919 ShowFormat SFormat
, raw_fd_ostream
&OS
) {
2920 if (SFormat
== ShowFormat::Json
)
2921 exitWithError("JSON output is not supported for debug info correlation");
2922 std::unique_ptr
<InstrProfCorrelator
> Correlator
;
2924 InstrProfCorrelator::get(Filename
, InstrProfCorrelator::DEBUG_INFO
)
2925 .moveInto(Correlator
))
2926 exitWithError(std::move(Err
), Filename
);
2927 if (SFormat
== ShowFormat::Yaml
) {
2928 if (auto Err
= Correlator
->dumpYaml(MaxDbgCorrelationWarnings
, OS
))
2929 exitWithError(std::move(Err
), Filename
);
2933 if (auto Err
= Correlator
->correlateProfileData(MaxDbgCorrelationWarnings
))
2934 exitWithError(std::move(Err
), Filename
);
2936 InstrProfSymtab Symtab
;
2937 if (auto Err
= Symtab
.create(
2938 StringRef(Correlator
->getNamesPointer(), Correlator
->getNamesSize())))
2939 exitWithError(std::move(Err
), Filename
);
2941 if (ShowProfileSymbolList
)
2942 Symtab
.dumpNames(OS
);
2943 // TODO: Read "Profile Data Type" from debug info to compute and show how many
2944 // counters the section holds.
2945 if (ShowDetailedSummary
)
2946 OS
<< "Counters section size: 0x"
2947 << Twine::utohexstr(Correlator
->getCountersSectionSize()) << " bytes\n";
2948 OS
<< "Found " << Correlator
->getDataSize() << " functions\n";
2953 static int show_main(int argc
, const char *argv
[]) {
2954 cl::opt
<std::string
> Filename(cl::Positional
, cl::desc("<profdata-file>"));
2956 cl::opt
<bool> ShowCounts("counts", cl::init(false),
2957 cl::desc("Show counter values for shown functions"));
2958 cl::opt
<ShowFormat
> SFormat(
2959 "show-format", cl::init(ShowFormat::Text
),
2960 cl::desc("Emit output in the selected format if supported"),
2961 cl::values(clEnumValN(ShowFormat::Text
, "text",
2962 "emit normal text output (default)"),
2963 clEnumValN(ShowFormat::Json
, "json", "emit JSON"),
2964 clEnumValN(ShowFormat::Yaml
, "yaml", "emit YAML")));
2965 // TODO: Consider replacing this with `--show-format=text-encoding`.
2966 cl::opt
<bool> TextFormat(
2967 "text", cl::init(false),
2968 cl::desc("Show instr profile data in text dump format"));
2969 cl::opt
<bool> JsonFormat(
2970 "json", cl::desc("Show sample profile data in the JSON format "
2971 "(deprecated, please use --show-format=json)"));
2972 cl::opt
<bool> ShowIndirectCallTargets(
2973 "ic-targets", cl::init(false),
2974 cl::desc("Show indirect call site target values for shown functions"));
2975 cl::opt
<bool> ShowMemOPSizes(
2976 "memop-sizes", cl::init(false),
2977 cl::desc("Show the profiled sizes of the memory intrinsic calls "
2978 "for shown functions"));
2979 cl::opt
<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
2980 cl::desc("Show detailed profile summary"));
2981 cl::list
<uint32_t> DetailedSummaryCutoffs(
2982 cl::CommaSeparated
, "detailed-summary-cutoffs",
2984 "Cutoff percentages (times 10000) for generating detailed summary"),
2985 cl::value_desc("800000,901000,999999"));
2986 cl::opt
<bool> ShowHotFuncList(
2987 "hot-func-list", cl::init(false),
2988 cl::desc("Show profile summary of a list of hot functions"));
2989 cl::opt
<bool> ShowAllFunctions("all-functions", cl::init(false),
2990 cl::desc("Details for every function"));
2991 cl::opt
<bool> ShowCS("showcs", cl::init(false),
2992 cl::desc("Show context sensitive counts"));
2993 cl::opt
<std::string
> ShowFunction("function",
2994 cl::desc("Details for matching functions"));
2996 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
2997 cl::init("-"), cl::desc("Output file"));
2998 cl::alias
OutputFilenameA("o", cl::desc("Alias for --output"),
2999 cl::aliasopt(OutputFilename
));
3000 cl::opt
<ProfileKinds
> ProfileKind(
3001 cl::desc("Profile kind:"), cl::init(instr
),
3002 cl::values(clEnumVal(instr
, "Instrumentation profile (default)"),
3003 clEnumVal(sample
, "Sample profile"),
3004 clEnumVal(memory
, "MemProf memory access profile")));
3005 cl::opt
<uint32_t> TopNFunctions(
3006 "topn", cl::init(0),
3007 cl::desc("Show the list of functions with the largest internal counts"));
3008 cl::opt
<uint32_t> ValueCutoff(
3009 "value-cutoff", cl::init(0),
3010 cl::desc("Set the count value cutoff. Functions with the maximum count "
3011 "less than this value will not be printed out. (Default is 0)"));
3012 cl::opt
<bool> OnlyListBelow(
3013 "list-below-cutoff", cl::init(false),
3014 cl::desc("Only output names of functions whose max count values are "
3015 "below the cutoff value"));
3016 cl::opt
<bool> ShowProfileSymbolList(
3017 "show-prof-sym-list", cl::init(false),
3018 cl::desc("Show profile symbol list if it exists in the profile. "));
3019 cl::opt
<bool> ShowSectionInfoOnly(
3020 "show-sec-info-only", cl::init(false),
3021 cl::desc("Show the information of each section in the sample profile. "
3022 "The flag is only usable when the sample profile is in "
3023 "extbinary format"));
3024 cl::opt
<bool> ShowBinaryIds("binary-ids", cl::init(false),
3025 cl::desc("Show binary ids in the profile. "));
3026 cl::opt
<bool> ShowTemporalProfTraces(
3027 "temporal-profile-traces",
3028 cl::desc("Show temporal profile traces in the profile."));
3029 cl::opt
<std::string
> DebugInfoFilename(
3030 "debug-info", cl::init(""),
3031 cl::desc("Read and extract profile metadata from debug info and show "
3032 "the functions it found."));
3033 cl::opt
<unsigned> MaxDbgCorrelationWarnings(
3034 "max-debug-info-correlation-warnings",
3035 cl::desc("The maximum number of warnings to emit when correlating "
3036 "profile from debug info (0 = no limit)"),
3038 cl::opt
<bool> ShowCovered(
3039 "covered", cl::init(false),
3040 cl::desc("Show only the functions that have been executed."));
3041 cl::opt
<std::string
> ProfiledBinary(
3042 "profiled-binary", cl::init(""),
3043 cl::desc("Path to binary from which the profile was collected."));
3044 cl::opt
<bool> ShowProfileVersion("profile-version", cl::init(false),
3045 cl::desc("Show profile version. "));
3046 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data summary\n");
3048 if (Filename
.empty() && DebugInfoFilename
.empty())
3050 "the positional argument '<profdata-file>' is required unless '--" +
3051 DebugInfoFilename
.ArgStr
+ "' is provided");
3053 if (Filename
== OutputFilename
) {
3054 errs() << sys::path::filename(argv
[0])
3055 << ": Input file name cannot be the same as the output file name!\n";
3059 SFormat
= ShowFormat::Json
;
3062 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3064 exitWithErrorCode(EC
, OutputFilename
);
3066 if (ShowAllFunctions
&& !ShowFunction
.empty())
3067 WithColor::warning() << "-function argument ignored: showing all functions\n";
3069 if (!DebugInfoFilename
.empty())
3070 return showDebugInfoCorrelation(DebugInfoFilename
, ShowDetailedSummary
,
3071 ShowProfileSymbolList
,
3072 MaxDbgCorrelationWarnings
, SFormat
, OS
);
3074 if (ProfileKind
== instr
)
3075 return showInstrProfile(
3076 Filename
, ShowCounts
, TopNFunctions
, ShowIndirectCallTargets
,
3077 ShowMemOPSizes
, ShowDetailedSummary
, DetailedSummaryCutoffs
,
3078 ShowAllFunctions
, ShowCS
, ValueCutoff
, OnlyListBelow
, ShowFunction
,
3079 TextFormat
, ShowBinaryIds
, ShowCovered
, ShowProfileVersion
,
3080 ShowTemporalProfTraces
, SFormat
, OS
);
3081 if (ProfileKind
== sample
)
3082 return showSampleProfile(Filename
, ShowCounts
, TopNFunctions
,
3083 ShowAllFunctions
, ShowDetailedSummary
,
3084 ShowFunction
, ShowProfileSymbolList
,
3085 ShowSectionInfoOnly
, ShowHotFuncList
, SFormat
, OS
);
3086 return showMemProfProfile(Filename
, ProfiledBinary
, SFormat
, OS
);
3089 static int order_main(int argc
, const char *argv
[]) {
3090 cl::opt
<std::string
> Filename(cl::Positional
, cl::desc("<profdata-file>"));
3091 cl::opt
<std::string
> OutputFilename("output", cl::value_desc("output"),
3092 cl::init("-"), cl::desc("Output file"));
3093 cl::alias
OutputFilenameA("o", cl::desc("Alias for --output"),
3094 cl::aliasopt(OutputFilename
));
3095 cl::ParseCommandLineOptions(argc
, argv
, "LLVM profile data order\n");
3098 raw_fd_ostream
OS(OutputFilename
.data(), EC
, sys::fs::OF_TextWithCRLF
);
3100 exitWithErrorCode(EC
, OutputFilename
);
3101 auto FS
= vfs::getRealFileSystem();
3102 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
3103 if (Error E
= ReaderOrErr
.takeError())
3104 exitWithError(std::move(E
), Filename
);
3106 auto Reader
= std::move(ReaderOrErr
.get());
3107 for (auto &I
: *Reader
) {
3111 auto &Traces
= Reader
->getTemporalProfTraces();
3112 auto Nodes
= TemporalProfTraceTy::createBPFunctionNodes(Traces
);
3113 BalancedPartitioningConfig Config
;
3114 BalancedPartitioning
BP(Config
);
3117 WithColor::note() << "# Ordered " << Nodes
.size() << " functions\n";
3118 for (auto &N
: Nodes
) {
3119 auto [Filename
, ParsedFuncName
] =
3120 getParsedIRPGOFuncName(Reader
->getSymtab().getFuncOrVarName(N
.Id
));
3121 if (!Filename
.empty())
3122 OS
<< "# " << Filename
<< "\n";
3123 OS
<< ParsedFuncName
<< "\n";
3128 typedef int (*llvm_profdata_subcommand
)(int, const char *[]);
3130 static std::tuple
<StringRef
, llvm_profdata_subcommand
>
3131 llvm_profdata_subcommands
[] = {
3132 {"merge", merge_main
},
3133 {"show", show_main
},
3134 {"order", order_main
},
3135 {"overlap", overlap_main
},
3138 int llvm_profdata_main(int argc
, char **argvNonConst
,
3139 const llvm::ToolContext
&) {
3140 const char **argv
= const_cast<const char **>(argvNonConst
);
3141 InitLLVM
X(argc
, argv
);
3143 StringRef
ProgName(sys::path::filename(argv
[0]));
3146 llvm_profdata_subcommand func
= nullptr;
3147 for (auto [subcmd_name
, subcmd_action
] : llvm_profdata_subcommands
)
3148 if (subcmd_name
== argv
[1])
3149 func
= subcmd_action
;
3152 std::string
Invocation(ProgName
.str() + " " + argv
[1]);
3153 argv
[1] = Invocation
.c_str();
3154 return func(argc
- 1, argv
+ 1);
3157 if (strcmp(argv
[1], "-h") == 0 || strcmp(argv
[1], "-help") == 0 ||
3158 strcmp(argv
[1], "--help") == 0) {
3160 errs() << "OVERVIEW: LLVM profile data tools\n\n"
3161 << "USAGE: " << ProgName
<< " <command> [args...]\n"
3162 << "USAGE: " << ProgName
<< " <command> -help\n\n"
3163 << "See each individual command --help for more details.\n"
3164 << "Available commands: "
3165 << join(map_range(llvm_profdata_subcommands
,
3166 [](auto const &KV
) { return std::get
<0>(KV
); }),
3172 if (strcmp(argv
[1], "--version") == 0) {
3173 outs() << ProgName
<< '\n';
3174 cl::PrintVersionMessage();
3180 errs() << ProgName
<< ": No command specified!\n";
3182 errs() << ProgName
<< ": Unknown command!\n";
3184 errs() << "USAGE: " << ProgName
<< " <"
3185 << join(map_range(llvm_profdata_subcommands
,
3186 [](auto const &KV
) { return std::get
<0>(KV
); }),