1 //===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of the contextual profile analysis, which maintains contextual
10 // profiling info through IPO passes.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/CtxProfAnalysis.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/IR/Analysis.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/PassManager.h"
20 #include "llvm/ProfileData/PGOCtxProfReader.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/MemoryBuffer.h"
24 #define DEBUG_TYPE "ctx_prof"
28 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden
,
29 cl::desc("Use the specified contextual profile file"));
31 static cl::opt
<CtxProfAnalysisPrinterPass::PrintMode
> PrintLevel(
32 "ctx-profile-printer-level",
33 cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML
), cl::Hidden
,
34 cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything
,
35 "everything", "print everything - most verbose"),
36 clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML
, "yaml",
37 "just the yaml representation of the profile")),
38 cl::desc("Verbosity level of the contextual profile printer pass."));
40 const char *AssignGUIDPass::GUIDMetadataName
= "guid";
42 PreservedAnalyses
AssignGUIDPass::run(Module
&M
, ModuleAnalysisManager
&MAM
) {
43 for (auto &F
: M
.functions()) {
44 if (F
.isDeclaration())
46 if (F
.getMetadata(GUIDMetadataName
))
48 const GlobalValue::GUID GUID
= F
.getGUID();
49 F
.setMetadata(GUIDMetadataName
,
50 MDNode::get(M
.getContext(),
51 {ConstantAsMetadata::get(ConstantInt::get(
52 Type::getInt64Ty(M
.getContext()), GUID
))}));
54 return PreservedAnalyses::none();
57 GlobalValue::GUID
AssignGUIDPass::getGUID(const Function
&F
) {
58 if (F
.isDeclaration()) {
59 assert(GlobalValue::isExternalLinkage(F
.getLinkage()));
60 return GlobalValue::getGUID(F
.getGlobalIdentifier());
62 auto *MD
= F
.getMetadata(GUIDMetadataName
);
63 assert(MD
&& "guid not found for defined function");
64 return cast
<ConstantInt
>(cast
<ConstantAsMetadata
>(MD
->getOperand(0))
66 ->stripPointerCasts())
69 AnalysisKey
CtxProfAnalysis::Key
;
71 CtxProfAnalysis::CtxProfAnalysis(std::optional
<StringRef
> Profile
)
72 : Profile([&]() -> std::optional
<StringRef
> {
75 if (UseCtxProfile
.getNumOccurrences())
80 PGOContextualProfile
CtxProfAnalysis::run(Module
&M
,
81 ModuleAnalysisManager
&MAM
) {
84 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
= MemoryBuffer::getFile(*Profile
);
85 if (auto EC
= MB
.getError()) {
86 M
.getContext().emitError("could not open contextual profile file: " +
90 PGOCtxProfileReader
Reader(MB
.get()->getBuffer());
91 auto MaybeCtx
= Reader
.loadContexts();
93 M
.getContext().emitError("contextual profile file is invalid: " +
94 toString(MaybeCtx
.takeError()));
98 DenseSet
<GlobalValue::GUID
> ProfileRootsInModule
;
99 for (const auto &F
: M
)
100 if (!F
.isDeclaration())
101 if (auto GUID
= AssignGUIDPass::getGUID(F
);
102 MaybeCtx
->find(GUID
) != MaybeCtx
->end())
103 ProfileRootsInModule
.insert(GUID
);
105 // Trim first the roots that aren't in this module.
106 for (auto &[RootGuid
, _
] : llvm::make_early_inc_range(*MaybeCtx
))
107 if (!ProfileRootsInModule
.contains(RootGuid
))
108 MaybeCtx
->erase(RootGuid
);
109 // If none of the roots are in the module, we have no profile (for this
111 if (MaybeCtx
->empty())
114 // OK, so we have a valid profile and it's applicable to roots in this module.
115 PGOContextualProfile Result
;
117 for (const auto &F
: M
) {
118 if (F
.isDeclaration())
120 auto GUID
= AssignGUIDPass::getGUID(F
);
121 assert(GUID
&& "guid not found for defined function");
122 const auto &Entry
= F
.begin();
123 uint32_t MaxCounters
= 0; // we expect at least a counter.
124 for (const auto &I
: *Entry
)
125 if (auto *C
= dyn_cast
<InstrProfIncrementInst
>(&I
)) {
127 static_cast<uint32_t>(C
->getNumCounters()->getZExtValue());
132 uint32_t MaxCallsites
= 0;
133 for (const auto &BB
: F
)
134 for (const auto &I
: BB
)
135 if (auto *C
= dyn_cast
<InstrProfCallsite
>(&I
)) {
137 static_cast<uint32_t>(C
->getNumCounters()->getZExtValue());
140 auto [It
, Ins
] = Result
.FuncInfo
.insert(
141 {GUID
, PGOContextualProfile::FunctionInfo(F
.getName())});
144 It
->second
.NextCallsiteIndex
= MaxCallsites
;
145 It
->second
.NextCounterIndex
= MaxCounters
;
147 // If we made it this far, the Result is valid - which we mark by setting
149 Result
.Profiles
= std::move(*MaybeCtx
);
155 PGOContextualProfile::getDefinedFunctionGUID(const Function
&F
) const {
156 if (auto It
= FuncInfo
.find(AssignGUIDPass::getGUID(F
)); It
!= FuncInfo
.end())
161 CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream
&OS
)
162 : OS(OS
), Mode(PrintLevel
) {}
164 PreservedAnalyses
CtxProfAnalysisPrinterPass::run(Module
&M
,
165 ModuleAnalysisManager
&MAM
) {
166 CtxProfAnalysis::Result
&C
= MAM
.getResult
<CtxProfAnalysis
>(M
);
168 OS
<< "No contextual profile was provided.\n";
169 return PreservedAnalyses::all();
172 if (Mode
== PrintMode::Everything
) {
173 OS
<< "Function Info:\n";
174 for (const auto &[Guid
, FuncInfo
] : C
.FuncInfo
)
175 OS
<< Guid
<< " : " << FuncInfo
.Name
176 << ". MaxCounterID: " << FuncInfo
.NextCounterIndex
177 << ". MaxCallsiteID: " << FuncInfo
.NextCallsiteIndex
<< "\n";
180 if (Mode
== PrintMode::Everything
)
181 OS
<< "\nCurrent Profile:\n";
182 convertCtxProfToYaml(OS
, C
.profiles());
184 if (Mode
== PrintMode::YAML
)
185 return PreservedAnalyses::all();
187 OS
<< "\nFlat Profile:\n";
188 auto Flat
= C
.flatten();
189 for (const auto &[Guid
, Counters
] : Flat
) {
191 for (auto V
: Counters
)
195 return PreservedAnalyses::all();
198 InstrProfCallsite
*CtxProfAnalysis::getCallsiteInstrumentation(CallBase
&CB
) {
199 if (!InstrProfCallsite::canInstrumentCallsite(CB
))
201 for (auto *Prev
= CB
.getPrevNode(); Prev
; Prev
= Prev
->getPrevNode()) {
202 if (auto *IPC
= dyn_cast
<InstrProfCallsite
>(Prev
))
204 assert(!isa
<CallBase
>(Prev
) &&
205 "didn't expect to find another call, that's not the callsite "
206 "instrumentation, before an instrumentable callsite");
211 InstrProfIncrementInst
*CtxProfAnalysis::getBBInstrumentation(BasicBlock
&BB
) {
213 if (auto *Incr
= dyn_cast
<InstrProfIncrementInst
>(&I
))
214 if (!isa
<InstrProfIncrementInstStep
>(&I
))
219 InstrProfIncrementInstStep
*
220 CtxProfAnalysis::getSelectInstrumentation(SelectInst
&SI
) {
221 Instruction
*Prev
= &SI
;
222 while ((Prev
= Prev
->getPrevNode()))
223 if (auto *Step
= dyn_cast
<InstrProfIncrementInstStep
>(Prev
))
228 template <class ProfilesTy
, class ProfTy
>
229 static void preorderVisit(ProfilesTy
&Profiles
,
230 function_ref
<void(ProfTy
&)> Visitor
) {
231 std::function
<void(ProfTy
&)> Traverser
= [&](auto &Ctx
) {
233 for (auto &[_
, SubCtxSet
] : Ctx
.callsites())
234 for (auto &[__
, Subctx
] : SubCtxSet
)
237 for (auto &[_
, P
] : Profiles
)
241 void PGOContextualProfile::initIndex() {
242 // Initialize the head of the index list for each function. We don't need it
244 DenseMap
<GlobalValue::GUID
, PGOCtxProfContext
*> InsertionPoints
;
245 for (auto &[Guid
, FI
] : FuncInfo
)
246 InsertionPoints
[Guid
] = &FI
.Index
;
247 preorderVisit
<PGOCtxProfContext::CallTargetMapTy
, PGOCtxProfContext
>(
248 *Profiles
, [&](PGOCtxProfContext
&Ctx
) {
249 auto InsertIt
= InsertionPoints
.find(Ctx
.guid());
250 if (InsertIt
== InsertionPoints
.end())
252 // Insert at the end of the list. Since we traverse in preorder, it
253 // means that when we iterate the list from the beginning, we'd
254 // encounter the contexts in the order we would have, should we have
255 // performed a full preorder traversal.
256 InsertIt
->second
->Next
= &Ctx
;
257 Ctx
.Previous
= InsertIt
->second
;
258 InsertIt
->second
= &Ctx
;
262 void PGOContextualProfile::update(Visitor V
, const Function
&F
) {
263 assert(isFunctionKnown(F
));
264 GlobalValue::GUID G
= getDefinedFunctionGUID(F
);
265 for (auto *Node
= FuncInfo
.find(G
)->second
.Index
.Next
; Node
;
267 V(*reinterpret_cast<PGOCtxProfContext
*>(Node
));
270 void PGOContextualProfile::visit(ConstVisitor V
, const Function
*F
) const {
272 return preorderVisit
<const PGOCtxProfContext::CallTargetMapTy
,
273 const PGOCtxProfContext
>(*Profiles
, V
);
274 assert(isFunctionKnown(*F
));
275 GlobalValue::GUID G
= getDefinedFunctionGUID(*F
);
276 for (const auto *Node
= FuncInfo
.find(G
)->second
.Index
.Next
; Node
;
278 V(*reinterpret_cast<const PGOCtxProfContext
*>(Node
));
281 const CtxProfFlatProfile
PGOContextualProfile::flatten() const {
282 assert(Profiles
.has_value());
283 CtxProfFlatProfile Flat
;
284 preorderVisit
<const PGOCtxProfContext::CallTargetMapTy
,
285 const PGOCtxProfContext
>(
286 *Profiles
, [&](const PGOCtxProfContext
&Ctx
) {
287 auto [It
, Ins
] = Flat
.insert({Ctx
.guid(), {}});
289 llvm::append_range(It
->second
, Ctx
.counters());
292 assert(It
->second
.size() == Ctx
.counters().size() &&
293 "All contexts corresponding to a function should have the exact "
294 "same number of counters.");
295 for (size_t I
= 0, E
= It
->second
.size(); I
< E
; ++I
)
296 It
->second
[I
] += Ctx
.counters()[I
];
301 void CtxProfAnalysis::collectIndirectCallPromotionList(
302 CallBase
&IC
, Result
&Profile
,
303 SetVector
<std::pair
<CallBase
*, Function
*>> &Candidates
) {
304 const auto *Instr
= CtxProfAnalysis::getCallsiteInstrumentation(IC
);
307 Module
&M
= *IC
.getParent()->getModule();
308 const uint32_t CallID
= Instr
->getIndex()->getZExtValue();
310 [&](const PGOCtxProfContext
&Ctx
) {
311 const auto &Targets
= Ctx
.callsites().find(CallID
);
312 if (Targets
== Ctx
.callsites().end())
314 for (const auto &[Guid
, _
] : Targets
->second
)
315 if (auto Name
= Profile
.getFunctionName(Guid
); !Name
.empty())
316 if (auto *Target
= M
.getFunction(Name
))
317 if (Target
->hasFnAttribute(Attribute::AlwaysInline
))
318 Candidates
.insert({&IC
, Target
});