1 //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SampleProfileProber transformation.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/Analysis/EHUtils.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DebugInfoMetadata.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/MDBuilder.h"
26 #include "llvm/IR/PseudoProbe.h"
27 #include "llvm/ProfileData/SampleProf.h"
28 #include "llvm/Support/CRC.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Target/TargetMachine.h"
31 #include "llvm/Transforms/Instrumentation.h"
32 #include "llvm/Transforms/Utils/ModuleUtils.h"
33 #include <unordered_set>
37 #define DEBUG_TYPE "pseudo-probe"
39 STATISTIC(ArtificialDbgLine
,
40 "Number of probes that have an artificial debug line");
43 VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden
,
44 cl::desc("Do pseudo probe verification"));
46 static cl::list
<std::string
> VerifyPseudoProbeFuncList(
47 "verify-pseudo-probe-funcs", cl::Hidden
,
48 cl::desc("The option to specify the name of the functions to verify."));
51 UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden
,
52 cl::desc("Update pseudo probe distribution factor"));
54 static uint64_t getCallStackHash(const DILocation
*DIL
) {
56 const DILocation
*InlinedAt
= DIL
? DIL
->getInlinedAt() : nullptr;
58 Hash
^= MD5Hash(std::to_string(InlinedAt
->getLine()));
59 Hash
^= MD5Hash(std::to_string(InlinedAt
->getColumn()));
60 auto Name
= InlinedAt
->getSubprogramLinkageName();
61 Hash
^= MD5Hash(Name
);
62 InlinedAt
= InlinedAt
->getInlinedAt();
67 static uint64_t computeCallStackHash(const Instruction
&Inst
) {
68 return getCallStackHash(Inst
.getDebugLoc());
71 bool PseudoProbeVerifier::shouldVerifyFunction(const Function
*F
) {
72 // Skip function declaration.
73 if (F
->isDeclaration())
75 // Skip function that will not be emitted into object file. The prevailing
76 // defintion will be verified instead.
77 if (F
->hasAvailableExternallyLinkage())
79 // Do a name matching.
80 static std::unordered_set
<std::string
> VerifyFuncNames(
81 VerifyPseudoProbeFuncList
.begin(), VerifyPseudoProbeFuncList
.end());
82 return VerifyFuncNames
.empty() || VerifyFuncNames
.count(F
->getName().str());
85 void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks
&PIC
) {
86 if (VerifyPseudoProbe
) {
87 PIC
.registerAfterPassCallback(
88 [this](StringRef P
, Any IR
, const PreservedAnalyses
&) {
89 this->runAfterPass(P
, IR
);
94 // Callback to run after each transformation for the new pass manager.
95 void PseudoProbeVerifier::runAfterPass(StringRef PassID
, Any IR
) {
97 "\n*** Pseudo Probe Verification After " + PassID
.str() + " ***\n";
99 if (const auto **M
= llvm::any_cast
<const Module
*>(&IR
))
101 else if (const auto **F
= llvm::any_cast
<const Function
*>(&IR
))
103 else if (const auto **C
= llvm::any_cast
<const LazyCallGraph::SCC
*>(&IR
))
105 else if (const auto **L
= llvm::any_cast
<const Loop
*>(&IR
))
108 llvm_unreachable("Unknown IR unit");
111 void PseudoProbeVerifier::runAfterPass(const Module
*M
) {
112 for (const Function
&F
: *M
)
116 void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC
*C
) {
117 for (const LazyCallGraph::Node
&N
: *C
)
118 runAfterPass(&N
.getFunction());
121 void PseudoProbeVerifier::runAfterPass(const Function
*F
) {
122 if (!shouldVerifyFunction(F
))
124 ProbeFactorMap ProbeFactors
;
125 for (const auto &BB
: *F
)
126 collectProbeFactors(&BB
, ProbeFactors
);
127 verifyProbeFactors(F
, ProbeFactors
);
130 void PseudoProbeVerifier::runAfterPass(const Loop
*L
) {
131 const Function
*F
= L
->getHeader()->getParent();
135 void PseudoProbeVerifier::collectProbeFactors(const BasicBlock
*Block
,
136 ProbeFactorMap
&ProbeFactors
) {
137 for (const auto &I
: *Block
) {
138 if (std::optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
139 uint64_t Hash
= computeCallStackHash(I
);
140 ProbeFactors
[{Probe
->Id
, Hash
}] += Probe
->Factor
;
145 void PseudoProbeVerifier::verifyProbeFactors(
146 const Function
*F
, const ProbeFactorMap
&ProbeFactors
) {
147 bool BannerPrinted
= false;
148 auto &PrevProbeFactors
= FunctionProbeFactors
[F
->getName()];
149 for (const auto &I
: ProbeFactors
) {
150 float CurProbeFactor
= I
.second
;
151 if (PrevProbeFactors
.count(I
.first
)) {
152 float PrevProbeFactor
= PrevProbeFactors
[I
.first
];
153 if (std::abs(CurProbeFactor
- PrevProbeFactor
) >
154 DistributionFactorVariance
) {
155 if (!BannerPrinted
) {
156 dbgs() << "Function " << F
->getName() << ":\n";
157 BannerPrinted
= true;
159 dbgs() << "Probe " << I
.first
.first
<< "\tprevious factor "
160 << format("%0.2f", PrevProbeFactor
) << "\tcurrent factor "
161 << format("%0.2f", CurProbeFactor
) << "\n";
166 PrevProbeFactors
[I
.first
] = I
.second
;
170 SampleProfileProber::SampleProfileProber(Function
&Func
,
171 const std::string
&CurModuleUniqueId
)
172 : F(&Func
), CurModuleUniqueId(CurModuleUniqueId
) {
173 BlockProbeIds
.clear();
174 CallProbeIds
.clear();
175 LastProbeId
= (uint32_t)PseudoProbeReservedId::Last
;
176 computeProbeIdForBlocks();
177 computeProbeIdForCallsites();
181 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
182 // value of each BB in the CFG. The higher 32 bits record the number of edges
183 // preceded by the number of indirect calls.
184 // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
185 void SampleProfileProber::computeCFGHash() {
186 std::vector
<uint8_t> Indexes
;
188 for (auto &BB
: *F
) {
189 for (BasicBlock
*Succ
: successors(&BB
)) {
190 auto Index
= getBlockId(Succ
);
191 for (int J
= 0; J
< 4; J
++)
192 Indexes
.push_back((uint8_t)(Index
>> (J
* 8)));
198 FunctionHash
= (uint64_t)CallProbeIds
.size() << 48 |
199 (uint64_t)Indexes
.size() << 32 | JC
.getCRC();
200 // Reserve bit 60-63 for other information purpose.
201 FunctionHash
&= 0x0FFFFFFFFFFFFFFF;
202 assert(FunctionHash
&& "Function checksum should not be zero");
203 LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F
->getName()
205 << " CRC = " << JC
.getCRC() << ", Edges = "
206 << Indexes
.size() << ", ICSites = " << CallProbeIds
.size()
207 << ", Hash = " << FunctionHash
<< "\n");
210 void SampleProfileProber::computeProbeIdForBlocks() {
211 DenseSet
<BasicBlock
*> KnownColdBlocks
;
212 computeEHOnlyBlocks(*F
, KnownColdBlocks
);
213 // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
214 // well as the binary size while retaining the profile quality.
215 for (auto &BB
: *F
) {
217 if (!KnownColdBlocks
.contains(&BB
))
218 BlockProbeIds
[&BB
] = LastProbeId
;
222 void SampleProfileProber::computeProbeIdForCallsites() {
223 LLVMContext
&Ctx
= F
->getContext();
224 Module
*M
= F
->getParent();
226 for (auto &BB
: *F
) {
228 if (!isa
<CallBase
>(I
))
230 if (isa
<IntrinsicInst
>(&I
))
233 // The current implementation uses the lower 16 bits of the discriminator
234 // so anything larger than 0xFFFF will be ignored.
235 if (LastProbeId
>= 0xFFFF) {
236 std::string Msg
= "Pseudo instrumentation incomplete for " +
237 std::string(F
->getName()) + " because it's too large";
239 DiagnosticInfoSampleProfile(M
->getName().data(), Msg
, DS_Warning
));
243 CallProbeIds
[&I
] = ++LastProbeId
;
248 uint32_t SampleProfileProber::getBlockId(const BasicBlock
*BB
) const {
249 auto I
= BlockProbeIds
.find(const_cast<BasicBlock
*>(BB
));
250 return I
== BlockProbeIds
.end() ? 0 : I
->second
;
253 uint32_t SampleProfileProber::getCallsiteId(const Instruction
*Call
) const {
254 auto Iter
= CallProbeIds
.find(const_cast<Instruction
*>(Call
));
255 return Iter
== CallProbeIds
.end() ? 0 : Iter
->second
;
258 void SampleProfileProber::instrumentOneFunc(Function
&F
, TargetMachine
*TM
) {
259 Module
*M
= F
.getParent();
260 MDBuilder
MDB(F
.getContext());
261 // Since the GUID from probe desc and inline stack are computed seperately, we
262 // need to make sure their names are consistent, so here also use the name
264 StringRef FName
= F
.getName();
265 if (auto *SP
= F
.getSubprogram()) {
266 FName
= SP
->getLinkageName();
268 FName
= SP
->getName();
270 uint64_t Guid
= Function::getGUID(FName
);
272 // Assign an artificial debug line to a probe that doesn't come with a real
273 // line. A probe not having a debug line will get an incomplete inline
274 // context. This will cause samples collected on the probe to be counted
275 // into the base profile instead of a context profile. The line number
276 // itself is not important though.
277 auto AssignDebugLoc
= [&](Instruction
*I
) {
278 assert((isa
<PseudoProbeInst
>(I
) || isa
<CallBase
>(I
)) &&
279 "Expecting pseudo probe or call instructions");
280 if (!I
->getDebugLoc()) {
281 if (auto *SP
= F
.getSubprogram()) {
282 auto DIL
= DILocation::get(SP
->getContext(), 0, 0, SP
);
286 dbgs() << "\nIn Function " << F
.getName()
287 << " Probe gets an artificial debug line\n";
294 // Probe basic blocks.
295 for (auto &I
: BlockProbeIds
) {
296 BasicBlock
*BB
= I
.first
;
297 uint32_t Index
= I
.second
;
298 // Insert a probe before an instruction with a valid debug line number which
299 // will be assigned to the probe. The line number will be used later to
300 // model the inline context when the probe is inlined into other functions.
301 // Debug instructions, phi nodes and lifetime markers do not have an valid
302 // line number. Real instructions generated by optimizations may not come
303 // with a line number either.
304 auto HasValidDbgLine
= [](Instruction
*J
) {
305 return !isa
<PHINode
>(J
) && !isa
<DbgInfoIntrinsic
>(J
) &&
306 !J
->isLifetimeStartOrEnd() && J
->getDebugLoc();
309 Instruction
*J
= &*BB
->getFirstInsertionPt();
310 while (J
!= BB
->getTerminator() && !HasValidDbgLine(J
)) {
311 J
= J
->getNextNode();
314 IRBuilder
<> Builder(J
);
315 assert(Builder
.GetInsertPoint() != BB
->end() &&
316 "Cannot get the probing point");
318 llvm::Intrinsic::getDeclaration(M
, Intrinsic::pseudoprobe
);
319 Value
*Args
[] = {Builder
.getInt64(Guid
), Builder
.getInt64(Index
),
321 Builder
.getInt64(PseudoProbeFullDistributionFactor
)};
322 auto *Probe
= Builder
.CreateCall(ProbeFn
, Args
);
323 AssignDebugLoc(Probe
);
324 // Reset the dwarf discriminator if the debug location comes with any. The
325 // discriminator field may be used by FS-AFDO later in the pipeline.
326 if (auto DIL
= Probe
->getDebugLoc()) {
327 if (DIL
->getDiscriminator()) {
328 DIL
= DIL
->cloneWithDiscriminator(0);
329 Probe
->setDebugLoc(DIL
);
334 // Probe both direct calls and indirect calls. Direct calls are probed so that
335 // their probe ID can be used as an call site identifier to represent a
337 for (auto &I
: CallProbeIds
) {
338 auto *Call
= I
.first
;
339 uint32_t Index
= I
.second
;
340 uint32_t Type
= cast
<CallBase
>(Call
)->getCalledFunction()
341 ? (uint32_t)PseudoProbeType::DirectCall
342 : (uint32_t)PseudoProbeType::IndirectCall
;
343 AssignDebugLoc(Call
);
344 if (auto DIL
= Call
->getDebugLoc()) {
345 // Levarge the 32-bit discriminator field of debug data to store the ID
346 // and type of a callsite probe. This gets rid of the dependency on
347 // plumbing a customized metadata through the codegen pipeline.
348 uint32_t V
= PseudoProbeDwarfDiscriminator::packProbeData(
350 PseudoProbeDwarfDiscriminator::FullDistributionFactor
);
351 DIL
= DIL
->cloneWithDiscriminator(V
);
352 Call
->setDebugLoc(DIL
);
356 // Create module-level metadata that contains function info necessary to
357 // synthesize probe-based sample counts, which are
361 auto Hash
= getFunctionHash();
362 auto *MD
= MDB
.createPseudoProbeDesc(Guid
, Hash
, FName
);
363 auto *NMD
= M
->getNamedMetadata(PseudoProbeDescMetadataName
);
364 assert(NMD
&& "llvm.pseudo_probe_desc should be pre-created");
368 PreservedAnalyses
SampleProfileProbePass::run(Module
&M
,
369 ModuleAnalysisManager
&AM
) {
370 auto ModuleId
= getUniqueModuleId(&M
);
371 // Create the pseudo probe desc metadata beforehand.
372 // Note that modules with only data but no functions will require this to
373 // be set up so that they will be known as probed later.
374 M
.getOrInsertNamedMetadata(PseudoProbeDescMetadataName
);
377 if (F
.isDeclaration())
379 SampleProfileProber
ProbeManager(F
, ModuleId
);
380 ProbeManager
.instrumentOneFunc(F
, TM
);
383 return PreservedAnalyses::none();
386 void PseudoProbeUpdatePass::runOnFunction(Function
&F
,
387 FunctionAnalysisManager
&FAM
) {
388 BlockFrequencyInfo
&BFI
= FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
389 auto BBProfileCount
= [&BFI
](BasicBlock
*BB
) {
390 return BFI
.getBlockProfileCount(BB
).value_or(0);
393 // Collect the sum of execution weight for each probe.
394 ProbeFactorMap ProbeFactors
;
395 for (auto &Block
: F
) {
396 for (auto &I
: Block
) {
397 if (std::optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
398 uint64_t Hash
= computeCallStackHash(I
);
399 ProbeFactors
[{Probe
->Id
, Hash
}] += BBProfileCount(&Block
);
404 // Fix up over-counted probes.
405 for (auto &Block
: F
) {
406 for (auto &I
: Block
) {
407 if (std::optional
<PseudoProbe
> Probe
= extractProbe(I
)) {
408 uint64_t Hash
= computeCallStackHash(I
);
409 float Sum
= ProbeFactors
[{Probe
->Id
, Hash
}];
411 setProbeDistributionFactor(I
, BBProfileCount(&Block
) / Sum
);
417 PreservedAnalyses
PseudoProbeUpdatePass::run(Module
&M
,
418 ModuleAnalysisManager
&AM
) {
419 if (UpdatePseudoProbe
) {
421 if (F
.isDeclaration())
423 FunctionAnalysisManager
&FAM
=
424 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
425 runOnFunction(F
, FAM
);
428 return PreservedAnalyses::none();