1 //===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of the contextual profile analysis, which maintains contextual
10 // profiling info through IPO passes.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/CtxProfAnalysis.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/IR/Analysis.h"
17 #include "llvm/IR/IntrinsicInst.h"
18 #include "llvm/IR/Module.h"
19 #include "llvm/IR/PassManager.h"
20 #include "llvm/ProfileData/PGOCtxProfReader.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/JSON.h"
23 #include "llvm/Support/MemoryBuffer.h"
25 #define DEBUG_TYPE "ctx_prof"
29 UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden
,
30 cl::desc("Use the specified contextual profile file"));
32 static cl::opt
<CtxProfAnalysisPrinterPass::PrintMode
> PrintLevel(
33 "ctx-profile-printer-level",
34 cl::init(CtxProfAnalysisPrinterPass::PrintMode::JSON
), cl::Hidden
,
35 cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything
,
36 "everything", "print everything - most verbose"),
37 clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::JSON
, "json",
38 "just the json representation of the profile")),
39 cl::desc("Verbosity level of the contextual profile printer pass."));
43 Value
toJSON(const PGOCtxProfContext
&P
) {
45 Ret
["Guid"] = P
.guid();
46 Ret
["Counters"] = Array(P
.counters());
47 if (P
.callsites().empty())
50 ::llvm::map_range(P
.callsites(), [](const auto &P
) { return P
.first
; });
51 auto MaxIt
= ::llvm::max_element(AllCS
);
52 assert(MaxIt
!= AllCS
.end() && "We should have a max value because the "
53 "callsites collection is not empty.");
55 // Iterate to, and including, the maximum index.
56 for (auto I
= 0U, Max
= *MaxIt
; I
<= Max
; ++I
) {
57 CSites
.push_back(Array());
58 Array
&Targets
= *CSites
.back().getAsArray();
60 for (const auto &[_
, Ctx
] : P
.callsite(I
))
61 Targets
.push_back(toJSON(Ctx
));
63 Ret
["Callsites"] = std::move(CSites
);
68 Value
toJSON(const PGOCtxProfContext::CallTargetMapTy
&P
) {
70 for (const auto &[_
, Ctx
] : P
)
71 Ret
.push_back(toJSON(Ctx
));
77 const char *AssignGUIDPass::GUIDMetadataName
= "guid";
79 PreservedAnalyses
AssignGUIDPass::run(Module
&M
, ModuleAnalysisManager
&MAM
) {
80 for (auto &F
: M
.functions()) {
81 if (F
.isDeclaration())
83 if (F
.getMetadata(GUIDMetadataName
))
85 const GlobalValue::GUID GUID
= F
.getGUID();
86 F
.setMetadata(GUIDMetadataName
,
87 MDNode::get(M
.getContext(),
88 {ConstantAsMetadata::get(ConstantInt::get(
89 Type::getInt64Ty(M
.getContext()), GUID
))}));
91 return PreservedAnalyses::none();
94 GlobalValue::GUID
AssignGUIDPass::getGUID(const Function
&F
) {
95 if (F
.isDeclaration()) {
96 assert(GlobalValue::isExternalLinkage(F
.getLinkage()));
97 return GlobalValue::getGUID(F
.getGlobalIdentifier());
99 auto *MD
= F
.getMetadata(GUIDMetadataName
);
100 assert(MD
&& "guid not found for defined function");
101 return cast
<ConstantInt
>(cast
<ConstantAsMetadata
>(MD
->getOperand(0))
103 ->stripPointerCasts())
106 AnalysisKey
CtxProfAnalysis::Key
;
108 CtxProfAnalysis::CtxProfAnalysis(std::optional
<StringRef
> Profile
)
109 : Profile([&]() -> std::optional
<StringRef
> {
112 if (UseCtxProfile
.getNumOccurrences())
113 return UseCtxProfile
;
117 PGOContextualProfile
CtxProfAnalysis::run(Module
&M
,
118 ModuleAnalysisManager
&MAM
) {
121 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> MB
= MemoryBuffer::getFile(*Profile
);
122 if (auto EC
= MB
.getError()) {
123 M
.getContext().emitError("could not open contextual profile file: " +
127 PGOCtxProfileReader
Reader(MB
.get()->getBuffer());
128 auto MaybeCtx
= Reader
.loadContexts();
130 M
.getContext().emitError("contextual profile file is invalid: " +
131 toString(MaybeCtx
.takeError()));
135 DenseSet
<GlobalValue::GUID
> ProfileRootsInModule
;
136 for (const auto &F
: M
)
137 if (!F
.isDeclaration())
138 if (auto GUID
= AssignGUIDPass::getGUID(F
);
139 MaybeCtx
->find(GUID
) != MaybeCtx
->end())
140 ProfileRootsInModule
.insert(GUID
);
142 // Trim first the roots that aren't in this module.
143 for (auto &[RootGuid
, _
] : llvm::make_early_inc_range(*MaybeCtx
))
144 if (!ProfileRootsInModule
.contains(RootGuid
))
145 MaybeCtx
->erase(RootGuid
);
146 // If none of the roots are in the module, we have no profile (for this
148 if (MaybeCtx
->empty())
151 // OK, so we have a valid profile and it's applicable to roots in this module.
152 PGOContextualProfile Result
;
154 for (const auto &F
: M
) {
155 if (F
.isDeclaration())
157 auto GUID
= AssignGUIDPass::getGUID(F
);
158 assert(GUID
&& "guid not found for defined function");
159 const auto &Entry
= F
.begin();
160 uint32_t MaxCounters
= 0; // we expect at least a counter.
161 for (const auto &I
: *Entry
)
162 if (auto *C
= dyn_cast
<InstrProfIncrementInst
>(&I
)) {
164 static_cast<uint32_t>(C
->getNumCounters()->getZExtValue());
169 uint32_t MaxCallsites
= 0;
170 for (const auto &BB
: F
)
171 for (const auto &I
: BB
)
172 if (auto *C
= dyn_cast
<InstrProfCallsite
>(&I
)) {
174 static_cast<uint32_t>(C
->getNumCounters()->getZExtValue());
177 auto [It
, Ins
] = Result
.FuncInfo
.insert(
178 {GUID
, PGOContextualProfile::FunctionInfo(F
.getName())});
181 It
->second
.NextCallsiteIndex
= MaxCallsites
;
182 It
->second
.NextCounterIndex
= MaxCounters
;
184 // If we made it this far, the Result is valid - which we mark by setting
186 Result
.Profiles
= std::move(*MaybeCtx
);
192 PGOContextualProfile::getDefinedFunctionGUID(const Function
&F
) const {
193 if (auto It
= FuncInfo
.find(AssignGUIDPass::getGUID(F
)); It
!= FuncInfo
.end())
198 CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream
&OS
)
199 : OS(OS
), Mode(PrintLevel
) {}
201 PreservedAnalyses
CtxProfAnalysisPrinterPass::run(Module
&M
,
202 ModuleAnalysisManager
&MAM
) {
203 CtxProfAnalysis::Result
&C
= MAM
.getResult
<CtxProfAnalysis
>(M
);
205 OS
<< "No contextual profile was provided.\n";
206 return PreservedAnalyses::all();
209 if (Mode
== PrintMode::Everything
) {
210 OS
<< "Function Info:\n";
211 for (const auto &[Guid
, FuncInfo
] : C
.FuncInfo
)
212 OS
<< Guid
<< " : " << FuncInfo
.Name
213 << ". MaxCounterID: " << FuncInfo
.NextCounterIndex
214 << ". MaxCallsiteID: " << FuncInfo
.NextCallsiteIndex
<< "\n";
217 const auto JSONed
= ::llvm::json::toJSON(C
.profiles());
219 if (Mode
== PrintMode::Everything
)
220 OS
<< "\nCurrent Profile:\n";
221 OS
<< formatv("{0:2}", JSONed
);
222 if (Mode
== PrintMode::JSON
)
223 return PreservedAnalyses::all();
226 OS
<< "\nFlat Profile:\n";
227 auto Flat
= C
.flatten();
228 for (const auto &[Guid
, Counters
] : Flat
) {
230 for (auto V
: Counters
)
234 return PreservedAnalyses::all();
237 InstrProfCallsite
*CtxProfAnalysis::getCallsiteInstrumentation(CallBase
&CB
) {
238 if (!InstrProfCallsite::canInstrumentCallsite(CB
))
240 for (auto *Prev
= CB
.getPrevNode(); Prev
; Prev
= Prev
->getPrevNode()) {
241 if (auto *IPC
= dyn_cast
<InstrProfCallsite
>(Prev
))
243 assert(!isa
<CallBase
>(Prev
) &&
244 "didn't expect to find another call, that's not the callsite "
245 "instrumentation, before an instrumentable callsite");
250 InstrProfIncrementInst
*CtxProfAnalysis::getBBInstrumentation(BasicBlock
&BB
) {
252 if (auto *Incr
= dyn_cast
<InstrProfIncrementInst
>(&I
))
253 if (!isa
<InstrProfIncrementInstStep
>(&I
))
258 InstrProfIncrementInstStep
*
259 CtxProfAnalysis::getSelectInstrumentation(SelectInst
&SI
) {
260 Instruction
*Prev
= &SI
;
261 while ((Prev
= Prev
->getPrevNode()))
262 if (auto *Step
= dyn_cast
<InstrProfIncrementInstStep
>(Prev
))
267 template <class ProfilesTy
, class ProfTy
>
268 static void preorderVisit(ProfilesTy
&Profiles
,
269 function_ref
<void(ProfTy
&)> Visitor
) {
270 std::function
<void(ProfTy
&)> Traverser
= [&](auto &Ctx
) {
272 for (auto &[_
, SubCtxSet
] : Ctx
.callsites())
273 for (auto &[__
, Subctx
] : SubCtxSet
)
276 for (auto &[_
, P
] : Profiles
)
280 void PGOContextualProfile::initIndex() {
281 // Initialize the head of the index list for each function. We don't need it
283 DenseMap
<GlobalValue::GUID
, PGOCtxProfContext
*> InsertionPoints
;
284 for (auto &[Guid
, FI
] : FuncInfo
)
285 InsertionPoints
[Guid
] = &FI
.Index
;
286 preorderVisit
<PGOCtxProfContext::CallTargetMapTy
, PGOCtxProfContext
>(
287 *Profiles
, [&](PGOCtxProfContext
&Ctx
) {
288 auto InsertIt
= InsertionPoints
.find(Ctx
.guid());
289 if (InsertIt
== InsertionPoints
.end())
291 // Insert at the end of the list. Since we traverse in preorder, it
292 // means that when we iterate the list from the beginning, we'd
293 // encounter the contexts in the order we would have, should we have
294 // performed a full preorder traversal.
295 InsertIt
->second
->Next
= &Ctx
;
296 Ctx
.Previous
= InsertIt
->second
;
297 InsertIt
->second
= &Ctx
;
301 void PGOContextualProfile::update(Visitor V
, const Function
&F
) {
302 assert(isFunctionKnown(F
));
303 GlobalValue::GUID G
= getDefinedFunctionGUID(F
);
304 for (auto *Node
= FuncInfo
.find(G
)->second
.Index
.Next
; Node
;
306 V(*reinterpret_cast<PGOCtxProfContext
*>(Node
));
309 void PGOContextualProfile::visit(ConstVisitor V
, const Function
*F
) const {
311 return preorderVisit
<const PGOCtxProfContext::CallTargetMapTy
,
312 const PGOCtxProfContext
>(*Profiles
, V
);
313 assert(isFunctionKnown(*F
));
314 GlobalValue::GUID G
= getDefinedFunctionGUID(*F
);
315 for (const auto *Node
= FuncInfo
.find(G
)->second
.Index
.Next
; Node
;
317 V(*reinterpret_cast<const PGOCtxProfContext
*>(Node
));
320 const CtxProfFlatProfile
PGOContextualProfile::flatten() const {
321 assert(Profiles
.has_value());
322 CtxProfFlatProfile Flat
;
323 preorderVisit
<const PGOCtxProfContext::CallTargetMapTy
,
324 const PGOCtxProfContext
>(
325 *Profiles
, [&](const PGOCtxProfContext
&Ctx
) {
326 auto [It
, Ins
] = Flat
.insert({Ctx
.guid(), {}});
328 llvm::append_range(It
->second
, Ctx
.counters());
331 assert(It
->second
.size() == Ctx
.counters().size() &&
332 "All contexts corresponding to a function should have the exact "
333 "same number of counters.");
334 for (size_t I
= 0, E
= It
->second
.size(); I
< E
; ++I
)
335 It
->second
[I
] += Ctx
.counters()[I
];
340 void CtxProfAnalysis::collectIndirectCallPromotionList(
341 CallBase
&IC
, Result
&Profile
,
342 SetVector
<std::pair
<CallBase
*, Function
*>> &Candidates
) {
343 const auto *Instr
= CtxProfAnalysis::getCallsiteInstrumentation(IC
);
346 Module
&M
= *IC
.getParent()->getModule();
347 const uint32_t CallID
= Instr
->getIndex()->getZExtValue();
349 [&](const PGOCtxProfContext
&Ctx
) {
350 const auto &Targets
= Ctx
.callsites().find(CallID
);
351 if (Targets
== Ctx
.callsites().end())
353 for (const auto &[Guid
, _
] : Targets
->second
)
354 if (auto Name
= Profile
.getFunctionName(Guid
); !Name
.empty())
355 if (auto *Target
= M
.getFunction(Name
))
356 if (Target
->hasFnAttribute(Attribute::AlwaysInline
))
357 Candidates
.insert({&IC
, Target
});