1 //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SampleProfileProber transformation.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/MDBuilder.h"
24 #include "llvm/IR/PseudoProbe.h"
25 #include "llvm/ProfileData/SampleProf.h"
26 #include "llvm/Support/CRC.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Target/TargetMachine.h"
29 #include "llvm/Transforms/Instrumentation.h"
30 #include "llvm/Transforms/Utils/ModuleUtils.h"
31 #include <unordered_set>
35 #define DEBUG_TYPE "sample-profile-probe"
37 STATISTIC(ArtificialDbgLine
,
38 "Number of probes that have an artificial debug line");
41 VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden
,
42 cl::desc("Do pseudo probe verification"));
44 static cl::list
<std::string
> VerifyPseudoProbeFuncList(
45 "verify-pseudo-probe-funcs", cl::Hidden
,
46 cl::desc("The option to specify the name of the functions to verify."));
49 UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden
,
50 cl::desc("Update pseudo probe distribution factor"));
52 static uint64_t getCallStackHash(const DILocation
*DIL
) {
54 const DILocation
*InlinedAt
= DIL
? DIL
->getInlinedAt() : nullptr;
56 Hash
^= MD5Hash(std::to_string(InlinedAt
->getLine()));
57 Hash
^= MD5Hash(std::to_string(InlinedAt
->getColumn()));
58 const DISubprogram
*SP
= InlinedAt
->getScope()->getSubprogram();
59 // Use linkage name for C++ if possible.
60 auto Name
= SP
->getLinkageName();
63 Hash
^= MD5Hash(Name
);
64 InlinedAt
= InlinedAt
->getInlinedAt();
69 static uint64_t computeCallStackHash(const Instruction
&Inst
) {
70 return getCallStackHash(Inst
.getDebugLoc());
73 bool PseudoProbeVerifier::shouldVerifyFunction(const Function
*F
) {
74 // Skip function declaration.
75 if (F
->isDeclaration())
77 // Skip function that will not be emitted into object file. The prevailing
78 // defintion will be verified instead.
79 if (F
->hasAvailableExternallyLinkage())
81 // Do a name matching.
82 static std::unordered_set
<std::string
> VerifyFuncNames(
83 VerifyPseudoProbeFuncList
.begin(), VerifyPseudoProbeFuncList
.end());
84 return VerifyFuncNames
.empty() || VerifyFuncNames
.count(F
->getName().str());
87 void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks
&PIC
) {
88 if (VerifyPseudoProbe
) {
89 PIC
.registerAfterPassCallback(
90 [this](StringRef P
, Any IR
, const PreservedAnalyses
&) {
91 this->runAfterPass(P
, IR
);
96 // Callback to run after each transformation for the new pass manager.
97 void PseudoProbeVerifier::runAfterPass(StringRef PassID
, Any IR
) {
99 "\n*** Pseudo Probe Verification After " + PassID
.str() + " ***\n";
101 if (any_isa
<const Module
*>(IR
))
102 runAfterPass(any_cast
<const Module
*>(IR
));
103 else if (any_isa
<const Function
*>(IR
))
104 runAfterPass(any_cast
<const Function
*>(IR
));
105 else if (any_isa
<const LazyCallGraph::SCC
*>(IR
))
106 runAfterPass(any_cast
<const LazyCallGraph::SCC
*>(IR
));
107 else if (any_isa
<const Loop
*>(IR
))
108 runAfterPass(any_cast
<const Loop
*>(IR
));
110 llvm_unreachable("Unknown IR unit");
113 void PseudoProbeVerifier::runAfterPass(const Module
*M
) {
114 for (const Function
&F
: *M
)
118 void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC
*C
) {
119 for (const LazyCallGraph::Node
&N
: *C
)
120 runAfterPass(&N
.getFunction());
123 void PseudoProbeVerifier::runAfterPass(const Function
*F
) {
124 if (!shouldVerifyFunction(F
))
126 ProbeFactorMap ProbeFactors
;
127 for (const auto &BB
: *F
)
128 collectProbeFactors(&BB
, ProbeFactors
);
129 verifyProbeFactors(F
, ProbeFactors
);
132 void PseudoProbeVerifier::runAfterPass(const Loop
*L
) {
133 const Function
*F
= L
->getHeader()->getParent();
137 void PseudoProbeVerifier::collectProbeFactors(const BasicBlock
*Block
,
138 ProbeFactorMap
&ProbeFactors
) {
139 for (const auto &I
: *Block
) {
140 if (Optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
141 uint64_t Hash
= computeCallStackHash(I
);
142 ProbeFactors
[{Probe
->Id
, Hash
}] += Probe
->Factor
;
147 void PseudoProbeVerifier::verifyProbeFactors(
148 const Function
*F
, const ProbeFactorMap
&ProbeFactors
) {
149 bool BannerPrinted
= false;
150 auto &PrevProbeFactors
= FunctionProbeFactors
[F
->getName()];
151 for (const auto &I
: ProbeFactors
) {
152 float CurProbeFactor
= I
.second
;
153 if (PrevProbeFactors
.count(I
.first
)) {
154 float PrevProbeFactor
= PrevProbeFactors
[I
.first
];
155 if (std::abs(CurProbeFactor
- PrevProbeFactor
) >
156 DistributionFactorVariance
) {
157 if (!BannerPrinted
) {
158 dbgs() << "Function " << F
->getName() << ":\n";
159 BannerPrinted
= true;
161 dbgs() << "Probe " << I
.first
.first
<< "\tprevious factor "
162 << format("%0.2f", PrevProbeFactor
) << "\tcurrent factor "
163 << format("%0.2f", CurProbeFactor
) << "\n";
168 PrevProbeFactors
[I
.first
] = I
.second
;
172 PseudoProbeManager::PseudoProbeManager(const Module
&M
) {
173 if (NamedMDNode
*FuncInfo
= M
.getNamedMetadata(PseudoProbeDescMetadataName
)) {
174 for (const auto *Operand
: FuncInfo
->operands()) {
175 const auto *MD
= cast
<MDNode
>(Operand
);
177 mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(0))->getZExtValue();
179 mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(1))->getZExtValue();
180 GUIDToProbeDescMap
.try_emplace(GUID
, PseudoProbeDescriptor(GUID
, Hash
));
185 const PseudoProbeDescriptor
*
186 PseudoProbeManager::getDesc(const Function
&F
) const {
187 auto I
= GUIDToProbeDescMap
.find(
188 Function::getGUID(FunctionSamples::getCanonicalFnName(F
)));
189 return I
== GUIDToProbeDescMap
.end() ? nullptr : &I
->second
;
192 bool PseudoProbeManager::moduleIsProbed(const Module
&M
) const {
193 return M
.getNamedMetadata(PseudoProbeDescMetadataName
);
196 bool PseudoProbeManager::profileIsValid(const Function
&F
,
197 const FunctionSamples
&Samples
) const {
198 const auto *Desc
= getDesc(F
);
200 LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F
.getName()
204 if (Desc
->getFunctionHash() != Samples
.getFunctionHash()) {
205 LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F
.getName()
213 SampleProfileProber::SampleProfileProber(Function
&Func
,
214 const std::string
&CurModuleUniqueId
)
215 : F(&Func
), CurModuleUniqueId(CurModuleUniqueId
) {
216 BlockProbeIds
.clear();
217 CallProbeIds
.clear();
218 LastProbeId
= (uint32_t)PseudoProbeReservedId::Last
;
219 computeProbeIdForBlocks();
220 computeProbeIdForCallsites();
224 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
225 // value of each BB in the CFG. The higher 32 bits record the number of edges
226 // preceded by the number of indirect calls.
227 // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
228 void SampleProfileProber::computeCFGHash() {
229 std::vector
<uint8_t> Indexes
;
231 for (auto &BB
: *F
) {
232 auto *TI
= BB
.getTerminator();
233 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
) {
234 auto *Succ
= TI
->getSuccessor(I
);
235 auto Index
= getBlockId(Succ
);
236 for (int J
= 0; J
< 4; J
++)
237 Indexes
.push_back((uint8_t)(Index
>> (J
* 8)));
243 FunctionHash
= (uint64_t)CallProbeIds
.size() << 48 |
244 (uint64_t)Indexes
.size() << 32 | JC
.getCRC();
245 // Reserve bit 60-63 for other information purpose.
246 FunctionHash
&= 0x0FFFFFFFFFFFFFFF;
247 assert(FunctionHash
&& "Function checksum should not be zero");
248 LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F
->getName()
250 << " CRC = " << JC
.getCRC() << ", Edges = "
251 << Indexes
.size() << ", ICSites = " << CallProbeIds
.size()
252 << ", Hash = " << FunctionHash
<< "\n");
255 void SampleProfileProber::computeProbeIdForBlocks() {
256 for (auto &BB
: *F
) {
257 BlockProbeIds
[&BB
] = ++LastProbeId
;
261 void SampleProfileProber::computeProbeIdForCallsites() {
262 for (auto &BB
: *F
) {
264 if (!isa
<CallBase
>(I
))
266 if (isa
<IntrinsicInst
>(&I
))
268 CallProbeIds
[&I
] = ++LastProbeId
;
273 uint32_t SampleProfileProber::getBlockId(const BasicBlock
*BB
) const {
274 auto I
= BlockProbeIds
.find(const_cast<BasicBlock
*>(BB
));
275 return I
== BlockProbeIds
.end() ? 0 : I
->second
;
278 uint32_t SampleProfileProber::getCallsiteId(const Instruction
*Call
) const {
279 auto Iter
= CallProbeIds
.find(const_cast<Instruction
*>(Call
));
280 return Iter
== CallProbeIds
.end() ? 0 : Iter
->second
;
283 void SampleProfileProber::instrumentOneFunc(Function
&F
, TargetMachine
*TM
) {
284 Module
*M
= F
.getParent();
285 MDBuilder
MDB(F
.getContext());
286 // Compute a GUID without considering the function's linkage type. This is
287 // fine since function name is the only key in the profile database.
288 uint64_t Guid
= Function::getGUID(F
.getName());
290 // Assign an artificial debug line to a probe that doesn't come with a real
291 // line. A probe not having a debug line will get an incomplete inline
292 // context. This will cause samples collected on the probe to be counted
293 // into the base profile instead of a context profile. The line number
294 // itself is not important though.
295 auto AssignDebugLoc
= [&](Instruction
*I
) {
296 assert((isa
<PseudoProbeInst
>(I
) || isa
<CallBase
>(I
)) &&
297 "Expecting pseudo probe or call instructions");
298 if (!I
->getDebugLoc()) {
299 if (auto *SP
= F
.getSubprogram()) {
300 auto DIL
= DILocation::get(SP
->getContext(), 0, 0, SP
);
304 dbgs() << "\nIn Function " << F
.getName()
305 << " Probe gets an artificial debug line\n";
312 // Probe basic blocks.
313 for (auto &I
: BlockProbeIds
) {
314 BasicBlock
*BB
= I
.first
;
315 uint32_t Index
= I
.second
;
316 // Insert a probe before an instruction with a valid debug line number which
317 // will be assigned to the probe. The line number will be used later to
318 // model the inline context when the probe is inlined into other functions.
319 // Debug instructions, phi nodes and lifetime markers do not have an valid
320 // line number. Real instructions generated by optimizations may not come
321 // with a line number either.
322 auto HasValidDbgLine
= [](Instruction
*J
) {
323 return !isa
<PHINode
>(J
) && !isa
<DbgInfoIntrinsic
>(J
) &&
324 !J
->isLifetimeStartOrEnd() && J
->getDebugLoc();
327 Instruction
*J
= &*BB
->getFirstInsertionPt();
328 while (J
!= BB
->getTerminator() && !HasValidDbgLine(J
)) {
329 J
= J
->getNextNode();
332 IRBuilder
<> Builder(J
);
333 assert(Builder
.GetInsertPoint() != BB
->end() &&
334 "Cannot get the probing point");
336 llvm::Intrinsic::getDeclaration(M
, Intrinsic::pseudoprobe
);
337 Value
*Args
[] = {Builder
.getInt64(Guid
), Builder
.getInt64(Index
),
339 Builder
.getInt64(PseudoProbeFullDistributionFactor
)};
340 auto *Probe
= Builder
.CreateCall(ProbeFn
, Args
);
341 AssignDebugLoc(Probe
);
344 // Probe both direct calls and indirect calls. Direct calls are probed so that
345 // their probe ID can be used as an call site identifier to represent a
347 for (auto &I
: CallProbeIds
) {
348 auto *Call
= I
.first
;
349 uint32_t Index
= I
.second
;
350 uint32_t Type
= cast
<CallBase
>(Call
)->getCalledFunction()
351 ? (uint32_t)PseudoProbeType::DirectCall
352 : (uint32_t)PseudoProbeType::IndirectCall
;
353 AssignDebugLoc(Call
);
354 // Levarge the 32-bit discriminator field of debug data to store the ID and
355 // type of a callsite probe. This gets rid of the dependency on plumbing a
356 // customized metadata through the codegen pipeline.
357 uint32_t V
= PseudoProbeDwarfDiscriminator::packProbeData(
358 Index
, Type
, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor
);
359 if (auto DIL
= Call
->getDebugLoc()) {
360 DIL
= DIL
->cloneWithDiscriminator(V
);
361 Call
->setDebugLoc(DIL
);
365 // Create module-level metadata that contains function info necessary to
366 // synthesize probe-based sample counts, which are
370 auto Hash
= getFunctionHash();
371 auto *MD
= MDB
.createPseudoProbeDesc(Guid
, Hash
, &F
);
372 auto *NMD
= M
->getNamedMetadata(PseudoProbeDescMetadataName
);
373 assert(NMD
&& "llvm.pseudo_probe_desc should be pre-created");
376 // Preserve a comdat group to hold all probes materialized later. This
377 // allows that when the function is considered dead and removed, the
378 // materialized probes are disposed too.
379 // Imported functions are defined in another module. They do not need
380 // the following handling since same care will be taken for them in their
381 // original module. The pseudo probes inserted into an imported functions
382 // above will naturally not be emitted since the imported function is free
383 // from object emission. However they will be emitted together with the
384 // inliner functions that the imported function is inlined into. We are not
385 // creating a comdat group for an import function since it's useless anyway.
386 if (!F
.isDeclarationForLinker()) {
388 auto Triple
= TM
->getTargetTriple();
389 if (Triple
.supportsCOMDAT() && TM
->getFunctionSections())
390 getOrCreateFunctionComdat(F
, Triple
);
395 PreservedAnalyses
SampleProfileProbePass::run(Module
&M
,
396 ModuleAnalysisManager
&AM
) {
397 auto ModuleId
= getUniqueModuleId(&M
);
398 // Create the pseudo probe desc metadata beforehand.
399 // Note that modules with only data but no functions will require this to
400 // be set up so that they will be known as probed later.
401 M
.getOrInsertNamedMetadata(PseudoProbeDescMetadataName
);
404 if (F
.isDeclaration())
406 SampleProfileProber
ProbeManager(F
, ModuleId
);
407 ProbeManager
.instrumentOneFunc(F
, TM
);
410 return PreservedAnalyses::none();
413 void PseudoProbeUpdatePass::runOnFunction(Function
&F
,
414 FunctionAnalysisManager
&FAM
) {
415 BlockFrequencyInfo
&BFI
= FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
416 auto BBProfileCount
= [&BFI
](BasicBlock
*BB
) {
417 return BFI
.getBlockProfileCount(BB
).value_or(0);
420 // Collect the sum of execution weight for each probe.
421 ProbeFactorMap ProbeFactors
;
422 for (auto &Block
: F
) {
423 for (auto &I
: Block
) {
424 if (Optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
425 uint64_t Hash
= computeCallStackHash(I
);
426 ProbeFactors
[{Probe
->Id
, Hash
}] += BBProfileCount(&Block
);
431 // Fix up over-counted probes.
432 for (auto &Block
: F
) {
433 for (auto &I
: Block
) {
434 if (Optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
435 uint64_t Hash
= computeCallStackHash(I
);
436 float Sum
= ProbeFactors
[{Probe
->Id
, Hash
}];
438 setProbeDistributionFactor(I
, BBProfileCount(&Block
) / Sum
);
444 PreservedAnalyses
PseudoProbeUpdatePass::run(Module
&M
,
445 ModuleAnalysisManager
&AM
) {
446 if (UpdatePseudoProbe
) {
448 if (F
.isDeclaration())
450 FunctionAnalysisManager
&FAM
=
451 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
452 runOnFunction(F
, FAM
);
455 return PreservedAnalyses::none();