1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/InteractiveModelRunner.h"
22 #include "llvm/Analysis/LazyCallGraph.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/MLModelRunner.h"
25 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
26 #include "llvm/Analysis/ReleaseModeModelRunner.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/InstIterator.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/Support/CommandLine.h"
35 static cl::opt
<std::string
> InteractiveChannelBaseName(
36 "inliner-interactive-channel-base", cl::Hidden
,
38 "Base file path for the interactive mode. The incoming filename should "
39 "have the name <inliner-interactive-channel-base>.in, while the "
40 "outgoing name should be <inliner-interactive-channel-base>.out"));
41 static const std::string InclDefaultMsg
=
42 (Twine("In interactive mode, also send the default policy decision: ") +
43 DefaultDecisionName
+ ".")
46 InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden
,
47 cl::desc(InclDefaultMsg
));
49 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
51 #include "InlinerSizeModel.h" // NOLINT
52 using CompiledModelType
= llvm::InlinerSizeModel
;
54 using CompiledModelType
= NoopSavedModelImpl
;
57 std::unique_ptr
<InlineAdvisor
>
58 llvm::getReleaseModeAdvisor(Module
&M
, ModuleAnalysisManager
&MAM
,
59 std::function
<bool(CallBase
&)> GetDefaultAdvice
) {
60 if (!llvm::isEmbeddedModelEvaluatorValid
<CompiledModelType
>() &&
61 InteractiveChannelBaseName
.empty())
63 std::unique_ptr
<MLModelRunner
> AOTRunner
;
64 if (InteractiveChannelBaseName
.empty())
65 AOTRunner
= std::make_unique
<ReleaseModeModelRunner
<CompiledModelType
>>(
66 M
.getContext(), FeatureMap
, DecisionName
);
68 auto Features
= FeatureMap
;
69 if (InteractiveIncludeDefault
)
70 Features
.push_back(DefaultDecisionSpec
);
71 AOTRunner
= std::make_unique
<InteractiveModelRunner
>(
72 M
.getContext(), Features
, InlineDecisionSpec
,
73 InteractiveChannelBaseName
+ ".out",
74 InteractiveChannelBaseName
+ ".in");
76 return std::make_unique
<MLInlineAdvisor
>(M
, MAM
, std::move(AOTRunner
),
80 #define DEBUG_TYPE "inline-ml"
82 static cl::opt
<float> SizeIncreaseThreshold(
83 "ml-advisor-size-increase-threshold", cl::Hidden
,
84 cl::desc("Maximum factor by which expected native size may increase before "
85 "blocking any further inlining."),
88 static cl::opt
<bool> KeepFPICache(
89 "ml-advisor-keep-fpi-cache", cl::Hidden
,
91 "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
95 const std::vector
<TensorSpec
> llvm::FeatureMap
{
96 #define POPULATE_NAMES(DTYPE, SHAPE, NAME, __) TensorSpec::createSpec<DTYPE>(#NAME, SHAPE),
97 // InlineCost features - these must come first
98 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES
)
101 INLINE_FEATURE_ITERATOR(POPULATE_NAMES
)
102 #undef POPULATE_NAMES
106 const char *const llvm::DecisionName
= "inlining_decision";
107 const TensorSpec
llvm::InlineDecisionSpec
=
108 TensorSpec::createSpec
<int64_t>(DecisionName
, {1});
109 const char *const llvm::DefaultDecisionName
= "inlining_default";
110 const TensorSpec
llvm::DefaultDecisionSpec
=
111 TensorSpec::createSpec
<int64_t>(DefaultDecisionName
, {1});
112 const char *const llvm::RewardName
= "delta_size";
114 CallBase
*getInlinableCS(Instruction
&I
) {
115 if (auto *CS
= dyn_cast
<CallBase
>(&I
))
116 if (Function
*Callee
= CS
->getCalledFunction()) {
117 if (!Callee
->isDeclaration()) {
124 MLInlineAdvisor::MLInlineAdvisor(
125 Module
&M
, ModuleAnalysisManager
&MAM
,
126 std::unique_ptr
<MLModelRunner
> Runner
,
127 std::function
<bool(CallBase
&)> GetDefaultAdvice
)
129 M
, MAM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager()),
130 ModelRunner(std::move(Runner
)), GetDefaultAdvice(GetDefaultAdvice
),
131 CG(MAM
.getResult
<LazyCallGraphAnalysis
>(M
)),
132 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize
) {
134 ModelRunner
->switchContext("");
135 // Extract the 'call site height' feature - the position of a call site
136 // relative to the farthest statically reachable SCC node. We don't mutate
137 // this value while inlining happens. Empirically, this feature proved
138 // critical in behavioral cloning - i.e. training a model to mimic the manual
139 // heuristic's decisions - and, thus, equally important for training for
142 for (auto I
= scc_begin(&CGraph
); !I
.isAtEnd(); ++I
) {
143 const std::vector
<CallGraphNode
*> &CGNodes
= *I
;
145 for (auto *CGNode
: CGNodes
) {
146 Function
*F
= CGNode
->getFunction();
147 if (!F
|| F
->isDeclaration())
149 for (auto &I
: instructions(F
)) {
150 if (auto *CS
= getInlinableCS(I
)) {
151 auto *Called
= CS
->getCalledFunction();
152 auto Pos
= FunctionLevels
.find(&CG
.get(*Called
));
153 // In bottom up traversal, an inlinable callee is either in the
154 // same SCC, or to a function in a visited SCC. So not finding its
155 // level means we haven't visited it yet, meaning it's in this SCC.
156 if (Pos
== FunctionLevels
.end())
158 Level
= std::max(Level
, Pos
->second
+ 1);
162 for (auto *CGNode
: CGNodes
) {
163 Function
*F
= CGNode
->getFunction();
164 if (F
&& !F
->isDeclaration())
165 FunctionLevels
[&CG
.get(*F
)] = Level
;
168 for (auto KVP
: FunctionLevels
) {
169 AllNodes
.insert(KVP
.first
);
170 EdgeCount
+= getLocalCalls(KVP
.first
->getFunction());
172 NodeCount
= AllNodes
.size();
175 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function
&F
) const {
176 return CG
.lookup(F
) ? FunctionLevels
.at(CG
.lookup(F
)) : 0;
179 void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC
*LastSCC
) {
180 if (!LastSCC
|| ForceStop
)
183 // Function passes executed between InlinerPass runs may have changed the
184 // module-wide features.
185 // The cgscc pass manager rules are such that:
186 // - if a pass leads to merging SCCs, then the pipeline is restarted on the
188 // - if a pass leads to splitting the SCC, then we continue with one of the
190 // This means that the NodesInLastSCC is a superset (not strict) of the nodes
191 // that subsequent passes would have processed
192 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
193 // they'd be adjacent to Nodes in the last SCC. So we just need to check the
194 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
195 // care about the nature of the Edge (call or ref). `FunctionLevels`-wise, we
196 // record them at the same level as the original node (this is a choice, may
198 NodeCount
-= static_cast<int64_t>(NodesInLastSCC
.size());
199 while (!NodesInLastSCC
.empty()) {
200 const auto *N
= *NodesInLastSCC
.begin();
201 NodesInLastSCC
.erase(N
);
202 // The Function wrapped by N could have been deleted since we last saw it.
204 assert(!N
->getFunction().isDeclaration());
208 EdgeCount
+= getLocalCalls(N
->getFunction());
209 const auto NLevel
= FunctionLevels
.at(N
);
210 for (const auto &E
: *(*N
)) {
211 const auto *AdjNode
= &E
.getNode();
212 assert(!AdjNode
->isDead() && !AdjNode
->getFunction().isDeclaration());
213 auto I
= AllNodes
.insert(AdjNode
);
215 NodesInLastSCC
.insert(AdjNode
);
216 FunctionLevels
[AdjNode
] = NLevel
;
221 EdgeCount
-= EdgesOfLastSeenNodes
;
222 EdgesOfLastSeenNodes
= 0;
224 // (Re)use NodesInLastSCC to remember the nodes in the SCC right now,
225 // in case the SCC is split before onPassExit and some nodes are split out
226 assert(NodesInLastSCC
.empty());
227 for (const auto &N
: *LastSCC
)
228 NodesInLastSCC
.insert(&N
);
231 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC
*LastSCC
) {
232 // No need to keep this around - function passes will invalidate it.
235 if (!LastSCC
|| ForceStop
)
237 // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
238 // we update the node count and edge count from the subset of these nodes that
240 EdgesOfLastSeenNodes
= 0;
242 // Check on nodes that were in SCC onPassEntry
243 for (auto I
= NodesInLastSCC
.begin(); I
!= NodesInLastSCC
.end();) {
245 NodesInLastSCC
.erase(*I
++);
247 EdgesOfLastSeenNodes
+= getLocalCalls((*I
++)->getFunction());
250 // Check on nodes that may have got added to SCC
251 for (const auto &N
: *LastSCC
) {
253 auto I
= NodesInLastSCC
.insert(&N
);
255 EdgesOfLastSeenNodes
+= getLocalCalls(N
.getFunction());
257 assert(NodeCount
>= NodesInLastSCC
.size());
258 assert(EdgeCount
>= EdgesOfLastSeenNodes
);
261 int64_t MLInlineAdvisor::getLocalCalls(Function
&F
) {
262 return getCachedFPI(F
).DirectCallsToDefinedFunctions
;
265 // Update the internal state of the advisor, and force invalidate feature
266 // analysis. Currently, we maintain minimal (and very simple) global state - the
267 // number of functions and the number of static calls. We also keep track of the
268 // total IR size in this module, to stop misbehaving policies at a certain bloat
269 // factor (SizeIncreaseThreshold)
270 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice
&Advice
,
271 bool CalleeWasDeleted
) {
273 Function
*Caller
= Advice
.getCaller();
274 Function
*Callee
= Advice
.getCallee();
275 // The caller features aren't valid anymore.
277 PreservedAnalyses PA
= PreservedAnalyses::all();
278 PA
.abandon
<FunctionPropertiesAnalysis
>();
279 PA
.abandon
<DominatorTreeAnalysis
>();
280 PA
.abandon
<LoopAnalysis
>();
281 FAM
.invalidate(*Caller
, PA
);
283 Advice
.updateCachedCallerFPI(FAM
);
284 int64_t IRSizeAfter
=
285 getIRSize(*Caller
) + (CalleeWasDeleted
? 0 : Advice
.CalleeIRSize
);
286 CurrentIRSize
+= IRSizeAfter
- (Advice
.CallerIRSize
+ Advice
.CalleeIRSize
);
287 if (CurrentIRSize
> SizeIncreaseThreshold
* InitialIRSize
)
290 // We can delta-update module-wide features. We know the inlining only changed
291 // the caller, and maybe the callee (by deleting the latter).
292 // Nodes are simple to update.
293 // For edges, we 'forget' the edges that the caller and callee used to have
294 // before inlining, and add back what they currently have together.
295 int64_t NewCallerAndCalleeEdges
=
296 getCachedFPI(*Caller
).DirectCallsToDefinedFunctions
;
298 if (CalleeWasDeleted
)
301 NewCallerAndCalleeEdges
+=
302 getCachedFPI(*Callee
).DirectCallsToDefinedFunctions
;
303 EdgeCount
+= (NewCallerAndCalleeEdges
- Advice
.CallerAndCalleeEdges
);
304 assert(CurrentIRSize
>= 0 && EdgeCount
>= 0 && NodeCount
>= 0);
307 int64_t MLInlineAdvisor::getModuleIRSize() const {
310 if (!F
.isDeclaration())
315 FunctionPropertiesInfo
&MLInlineAdvisor::getCachedFPI(Function
&F
) const {
317 FPICache
.insert(std::make_pair(&F
, FunctionPropertiesInfo()));
318 if (!InsertPair
.second
)
319 return InsertPair
.first
->second
;
320 InsertPair
.first
->second
= FAM
.getResult
<FunctionPropertiesAnalysis
>(F
);
321 return InsertPair
.first
->second
;
324 std::unique_ptr
<InlineAdvice
> MLInlineAdvisor::getAdviceImpl(CallBase
&CB
) {
325 if (auto Skip
= getSkipAdviceIfUnreachableCallsite(CB
))
328 auto &Caller
= *CB
.getCaller();
329 auto &Callee
= *CB
.getCalledFunction();
331 auto GetAssumptionCache
= [&](Function
&F
) -> AssumptionCache
& {
332 return FAM
.getResult
<AssumptionAnalysis
>(F
);
334 auto &TIR
= FAM
.getResult
<TargetIRAnalysis
>(Callee
);
335 auto &ORE
= FAM
.getResult
<OptimizationRemarkEmitterAnalysis
>(Caller
);
337 auto MandatoryKind
= InlineAdvisor::getMandatoryKind(CB
, FAM
, ORE
);
338 // If this is a "never inline" case, there won't be any changes to internal
339 // state we need to track, so we can just return the base InlineAdvice, which
340 // will do nothing interesting.
341 // Same thing if this is a recursive case.
342 if (MandatoryKind
== InlineAdvisor::MandatoryInliningKind::Never
||
344 return getMandatoryAdvice(CB
, false);
347 MandatoryKind
== InlineAdvisor::MandatoryInliningKind::Always
;
349 // If we need to stop, we won't want to track anymore any state changes, so
350 // we just return the base InlineAdvice, which acts as a noop.
353 return OptimizationRemarkMissed(DEBUG_TYPE
, "ForceStop", &CB
)
354 << "Won't attempt inlining because module size grew too much.";
356 return std::make_unique
<InlineAdvice
>(this, CB
, ORE
, Mandatory
);
359 int CostEstimate
= 0;
361 auto IsCallSiteInlinable
=
362 llvm::getInliningCostEstimate(CB
, TIR
, GetAssumptionCache
);
363 if (!IsCallSiteInlinable
) {
364 // We can't inline this for correctness reasons, so return the base
365 // InlineAdvice, as we don't care about tracking any state changes (which
367 return std::make_unique
<InlineAdvice
>(this, CB
, ORE
, false);
369 CostEstimate
= *IsCallSiteInlinable
;
372 const auto CostFeatures
=
373 llvm::getInliningCostFeatures(CB
, TIR
, GetAssumptionCache
);
375 return std::make_unique
<InlineAdvice
>(this, CB
, ORE
, false);
379 return getMandatoryAdvice(CB
, true);
381 auto NrCtantParams
= 0;
382 for (auto I
= CB
.arg_begin(), E
= CB
.arg_end(); I
!= E
; ++I
) {
383 NrCtantParams
+= (isa
<Constant
>(*I
));
386 auto &CallerBefore
= getCachedFPI(Caller
);
387 auto &CalleeBefore
= getCachedFPI(Callee
);
389 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::callee_basic_block_count
) =
390 CalleeBefore
.BasicBlockCount
;
391 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::callsite_height
) =
392 getInitialFunctionLevel(Caller
);
393 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::node_count
) = NodeCount
;
394 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::nr_ctant_params
) =
396 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::edge_count
) = EdgeCount
;
397 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::caller_users
) =
399 *ModelRunner
->getTensor
<int64_t>(
400 FeatureIndex::caller_conditionally_executed_blocks
) =
401 CallerBefore
.BlocksReachedFromConditionalInstruction
;
402 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::caller_basic_block_count
) =
403 CallerBefore
.BasicBlockCount
;
404 *ModelRunner
->getTensor
<int64_t>(
405 FeatureIndex::callee_conditionally_executed_blocks
) =
406 CalleeBefore
.BlocksReachedFromConditionalInstruction
;
407 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::callee_users
) =
409 *ModelRunner
->getTensor
<int64_t>(FeatureIndex::cost_estimate
) = CostEstimate
;
411 // Add the cost features
413 I
< static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures
); ++I
) {
414 *ModelRunner
->getTensor
<int64_t>(inlineCostFeatureToMlFeature(
415 static_cast<InlineCostFeatureIndex
>(I
))) = CostFeatures
->at(I
);
417 // This one would have been set up to be right at the end.
418 if (!InteractiveChannelBaseName
.empty() && InteractiveIncludeDefault
)
419 *ModelRunner
->getTensor
<int64_t>(InlineCostFeatureIndex::NumberOfFeatures
) =
420 GetDefaultAdvice(CB
);
421 return getAdviceFromModel(CB
, ORE
);
424 std::unique_ptr
<MLInlineAdvice
>
425 MLInlineAdvisor::getAdviceFromModel(CallBase
&CB
,
426 OptimizationRemarkEmitter
&ORE
) {
427 return std::make_unique
<MLInlineAdvice
>(
428 this, CB
, ORE
, static_cast<bool>(ModelRunner
->evaluate
<int64_t>()));
431 std::unique_ptr
<InlineAdvice
>
432 MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase
&CB
) {
433 if (!FAM
.getResult
<DominatorTreeAnalysis
>(*CB
.getCaller())
434 .isReachableFromEntry(CB
.getParent()))
435 return std::make_unique
<InlineAdvice
>(this, CB
, getCallerORE(CB
), false);
439 std::unique_ptr
<InlineAdvice
> MLInlineAdvisor::getMandatoryAdvice(CallBase
&CB
,
441 // Make sure we track inlinings in all cases - mandatory or not.
442 if (auto Skip
= getSkipAdviceIfUnreachableCallsite(CB
))
444 if (Advice
&& !ForceStop
)
445 return getMandatoryAdviceImpl(CB
);
447 // If this is a "never inline" case, there won't be any changes to internal
448 // state we need to track, so we can just return the base InlineAdvice, which
449 // will do nothing interesting.
450 // Same if we are forced to stop - we don't track anymore.
451 return std::make_unique
<InlineAdvice
>(this, CB
, getCallerORE(CB
), Advice
);
454 std::unique_ptr
<MLInlineAdvice
>
455 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase
&CB
) {
456 return std::make_unique
<MLInlineAdvice
>(this, CB
, getCallerORE(CB
), true);
459 void MLInlineAdvisor::print(raw_ostream
&OS
) const {
460 OS
<< "[MLInlineAdvisor] Nodes: " << NodeCount
<< " Edges: " << EdgeCount
461 << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes
<< "\n";
462 OS
<< "[MLInlineAdvisor] FPI:\n";
463 for (auto I
: FPICache
) {
464 OS
<< I
.first
->getName() << ":\n";
469 OS
<< "[MLInlineAdvisor] FuncLevels:\n";
470 for (auto I
: FunctionLevels
)
471 OS
<< (I
.first
->isDead() ? "<deleted>" : I
.first
->getFunction().getName())
472 << " : " << I
.second
<< "\n";
477 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor
*Advisor
, CallBase
&CB
,
478 OptimizationRemarkEmitter
&ORE
,
480 : InlineAdvice(Advisor
, CB
, ORE
, Recommendation
),
481 CallerIRSize(Advisor
->isForcedToStop() ? 0 : Advisor
->getIRSize(*Caller
)),
482 CalleeIRSize(Advisor
->isForcedToStop() ? 0 : Advisor
->getIRSize(*Callee
)),
483 CallerAndCalleeEdges(Advisor
->isForcedToStop()
485 : (Advisor
->getLocalCalls(*Caller
) +
486 Advisor
->getLocalCalls(*Callee
))),
487 PreInlineCallerFPI(Advisor
->getCachedFPI(*Caller
)) {
489 FPU
.emplace(Advisor
->getCachedFPI(*getCaller()), CB
);
492 void MLInlineAdvice::reportContextForRemark(
493 DiagnosticInfoOptimizationBase
&OR
) {
495 OR
<< NV("Callee", Callee
->getName());
496 for (size_t I
= 0; I
< NumberOfFeatures
; ++I
)
497 OR
<< NV(FeatureMap
[I
].name(),
498 *getAdvisor()->getModelRunner().getTensor
<int64_t>(I
));
499 OR
<< NV("ShouldInline", isInliningRecommended());
502 void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager
&FAM
) const {
506 void MLInlineAdvice::recordInliningImpl() {
508 OptimizationRemark
R(DEBUG_TYPE
, "InliningSuccess", DLoc
, Block
);
509 reportContextForRemark(R
);
512 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
515 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
517 OptimizationRemark
R(DEBUG_TYPE
, "InliningSuccessWithCalleeDeleted", DLoc
,
519 reportContextForRemark(R
);
522 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
525 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
526 const InlineResult
&Result
) {
527 getAdvisor()->getCachedFPI(*Caller
) = PreInlineCallerFPI
;
529 OptimizationRemarkMissed
R(DEBUG_TYPE
, "InliningAttemptedAndUnsuccessful",
531 reportContextForRemark(R
);
535 void MLInlineAdvice::recordUnattemptedInliningImpl() {
538 OptimizationRemarkMissed
R(DEBUG_TYPE
, "IniningNotAttempted", DLoc
, Block
);
539 reportContextForRemark(R
);