1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Inliner class used for inlining binary functions.
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
14 // * No exception handling
16 // * Single entry point
17 // * CFI update not supported - breaks unwinding
18 // * Regular Call Sites:
19 // - only leaf functions (or callees with only tail calls)
20 // * no invokes (they can't be tail calls)
21 // - no direct use of %rsp
23 // - since the stack is unmodified, the regular call limitations are lifted
25 //===----------------------------------------------------------------------===//
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
32 #define DEBUG_TYPE "bolt-inliner"
38 extern cl::OptionCategory BoltOptCategory
;
41 AdjustProfile("inline-ap",
42 cl::desc("adjust function profile after inlining"),
43 cl::cat(BoltOptCategory
));
45 static cl::list
<std::string
>
46 ForceInlineFunctions("force-inline",
48 cl::desc("list of functions to always consider for inlining"),
49 cl::value_desc("func1,func2,func3,..."),
51 cl::cat(BoltOptCategory
));
53 static cl::opt
<bool> InlineAll("inline-all", cl::desc("inline all functions"),
54 cl::cat(BoltOptCategory
));
56 static cl::opt
<bool> InlineIgnoreLeafCFI(
57 "inline-ignore-leaf-cfi",
58 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
59 cl::init(true), cl::ReallyHidden
, cl::cat(BoltOptCategory
));
61 static cl::opt
<bool> InlineIgnoreCFI(
64 "inline functions with CFI programs (can break exception handling)"),
65 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
67 static cl::opt
<unsigned>
68 InlineLimit("inline-limit",
69 cl::desc("maximum number of call sites to inline"), cl::init(0),
70 cl::Hidden
, cl::cat(BoltOptCategory
));
72 static cl::opt
<unsigned>
73 InlineMaxIters("inline-max-iters",
74 cl::desc("maximum number of inline iterations"), cl::init(3),
75 cl::Hidden
, cl::cat(BoltOptCategory
));
77 static cl::opt
<bool> InlineSmallFunctions(
78 "inline-small-functions",
79 cl::desc("inline functions if increase in size is less than defined by "
80 "-inline-small-functions-bytes"),
81 cl::cat(BoltOptCategory
));
83 static cl::opt
<unsigned> InlineSmallFunctionsBytes(
84 "inline-small-functions-bytes",
85 cl::desc("max number of bytes for the function to be considered small for "
87 cl::init(4), cl::Hidden
, cl::cat(BoltOptCategory
));
89 static cl::opt
<bool> NoInline(
91 cl::desc("disable all inlining (overrides other inlining options)"),
92 cl::cat(BoltOptCategory
));
94 /// This function returns true if any of inlining options are specified and the
95 /// inlining pass should be executed. Whenever a new inlining option is added,
96 /// this function should reflect the change.
97 bool inliningEnabled() {
99 (InlineAll
|| InlineSmallFunctions
|| !ForceInlineFunctions
.empty());
102 bool mustConsider(const llvm::bolt::BinaryFunction
&Function
) {
103 for (std::string
&Name
: opts::ForceInlineFunctions
)
104 if (Function
.hasName(Name
))
110 if (opts::InlineIgnoreCFI
)
111 opts::InlineIgnoreLeafCFI
= true;
114 opts::InlineSmallFunctions
= true;
122 uint64_t Inliner::SizeOfCallInst
;
123 uint64_t Inliner::SizeOfTailCallInst
;
125 uint64_t Inliner::getSizeOfCallInst(const BinaryContext
&BC
) {
127 return SizeOfCallInst
;
130 BC
.MIB
->createCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
131 SizeOfCallInst
= BC
.computeInstructionSize(Inst
);
133 return SizeOfCallInst
;
136 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext
&BC
) {
137 if (SizeOfTailCallInst
)
138 return SizeOfTailCallInst
;
141 BC
.MIB
->createTailCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
142 SizeOfTailCallInst
= BC
.computeInstructionSize(Inst
);
144 return SizeOfTailCallInst
;
147 InliningInfo
getInliningInfo(const BinaryFunction
&BF
) {
148 const BinaryContext
&BC
= BF
.getBinaryContext();
149 bool DirectSP
= false;
153 // Perform necessary checks unless the option overrides it.
154 if (!opts::mustConsider(BF
)) {
155 if (BF
.hasSDTMarker())
158 if (BF
.hasEHRanges())
161 if (BF
.isMultiEntry())
164 if (BF
.hasJumpTables())
167 const MCPhysReg SPReg
= BC
.MIB
->getStackPointer();
168 for (const BinaryBasicBlock
&BB
: BF
) {
169 for (const MCInst
&Inst
: BB
) {
170 // Tail calls are marked as implicitly using the stack pointer and they
172 if (BC
.MIB
->isTailCall(Inst
))
175 if (BC
.MIB
->isCFI(Inst
)) {
180 if (BC
.MIB
->isCall(Inst
))
183 // Push/pop instructions are straightforward to handle.
184 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
187 DirectSP
|= BC
.MIB
->hasDefOfPhysReg(Inst
, SPReg
) ||
188 BC
.MIB
->hasUseOfPhysReg(Inst
, SPReg
);
194 if (!opts::InlineIgnoreLeafCFI
)
197 if (!IsLeaf
&& !opts::InlineIgnoreCFI
)
201 InliningInfo
Info(DirectSP
? INL_TAILCALL
: INL_ANY
);
203 size_t Size
= BF
.estimateSize();
205 Info
.SizeAfterInlining
= Size
;
206 Info
.SizeAfterTailCallInlining
= Size
;
208 // Handle special case of the known size reduction.
209 if (BF
.size() == 1) {
210 // For a regular call the last return instruction could be removed
211 // (or converted to a branch).
212 const MCInst
*LastInst
= BF
.back().getLastNonPseudoInstr();
213 if (LastInst
&& BC
.MIB
->isReturn(*LastInst
) &&
214 !BC
.MIB
->isTailCall(*LastInst
)) {
215 const uint64_t RetInstSize
= BC
.computeInstructionSize(*LastInst
);
216 assert(Size
>= RetInstSize
);
217 Info
.SizeAfterInlining
-= RetInstSize
;
224 void Inliner::findInliningCandidates(BinaryContext
&BC
) {
225 for (const auto &BFI
: BC
.getBinaryFunctions()) {
226 const BinaryFunction
&Function
= BFI
.second
;
227 if (!shouldOptimize(Function
))
229 const InliningInfo InlInfo
= getInliningInfo(Function
);
230 if (InlInfo
.Type
!= INL_NONE
)
231 InliningCandidates
[&Function
] = InlInfo
;
235 std::pair
<BinaryBasicBlock
*, BinaryBasicBlock::iterator
>
236 Inliner::inlineCall(BinaryBasicBlock
&CallerBB
,
237 BinaryBasicBlock::iterator CallInst
,
238 const BinaryFunction
&Callee
) {
239 BinaryFunction
&CallerFunction
= *CallerBB
.getFunction();
240 BinaryContext
&BC
= CallerFunction
.getBinaryContext();
243 assert(MIB
.isCall(*CallInst
) && "can only inline a call or a tail call");
244 assert(!Callee
.isMultiEntry() &&
245 "cannot inline function with multiple entries");
246 assert(!Callee
.hasJumpTables() &&
247 "cannot inline function with jump table(s)");
249 // Get information about the call site.
250 const bool CSIsInvoke
= BC
.MIB
->isInvoke(*CallInst
);
251 const bool CSIsTailCall
= BC
.MIB
->isTailCall(*CallInst
);
252 const int64_t CSGNUArgsSize
= BC
.MIB
->getGnuArgsSize(*CallInst
);
253 const std::optional
<MCPlus::MCLandingPad
> CSEHInfo
=
254 BC
.MIB
->getEHInfo(*CallInst
);
256 // Split basic block at the call site if there will be more incoming edges
257 // coming from the callee.
258 BinaryBasicBlock
*FirstInlinedBB
= &CallerBB
;
259 if (Callee
.front().pred_size() && CallInst
!= CallerBB
.begin()) {
260 FirstInlinedBB
= CallerBB
.splitAt(CallInst
);
261 CallInst
= FirstInlinedBB
->begin();
264 // Split basic block after the call instruction unless the callee is trivial
265 // (i.e. consists of a single basic block). If necessary, obtain a basic block
266 // for return instructions in the callee to redirect to.
267 BinaryBasicBlock
*NextBB
= nullptr;
268 if (Callee
.size() > 1) {
269 if (std::next(CallInst
) != FirstInlinedBB
->end())
270 NextBB
= FirstInlinedBB
->splitAt(std::next(CallInst
));
272 NextBB
= FirstInlinedBB
->getSuccessor();
275 FirstInlinedBB
->removeSuccessor(NextBB
);
277 // Remove the call instruction.
278 auto InsertII
= FirstInlinedBB
->eraseInstruction(CallInst
);
280 double ProfileRatio
= 0;
281 if (uint64_t CalleeExecCount
= Callee
.getKnownExecutionCount())
283 (double)FirstInlinedBB
->getKnownExecutionCount() / CalleeExecCount
;
285 // Save execution count of the first block as we don't want it to change
286 // later due to profile adjustment rounding errors.
287 const uint64_t FirstInlinedBBCount
= FirstInlinedBB
->getKnownExecutionCount();
289 // Copy basic blocks and maintain a map from their origin.
290 std::unordered_map
<const BinaryBasicBlock
*, BinaryBasicBlock
*> InlinedBBMap
;
291 InlinedBBMap
[&Callee
.front()] = FirstInlinedBB
;
292 for (const BinaryBasicBlock
&BB
: llvm::drop_begin(Callee
)) {
293 BinaryBasicBlock
*InlinedBB
= CallerFunction
.addBasicBlock();
294 InlinedBBMap
[&BB
] = InlinedBB
;
295 InlinedBB
->setCFIState(FirstInlinedBB
->getCFIState());
296 if (Callee
.hasValidProfile())
297 InlinedBB
->setExecutionCount(BB
.getKnownExecutionCount());
299 InlinedBB
->setExecutionCount(FirstInlinedBBCount
);
302 // Copy over instructions and edges.
303 for (const BinaryBasicBlock
&BB
: Callee
) {
304 BinaryBasicBlock
*InlinedBB
= InlinedBBMap
[&BB
];
306 if (InlinedBB
!= FirstInlinedBB
)
307 InsertII
= InlinedBB
->begin();
309 // Copy over instructions making any necessary mods.
310 for (MCInst Inst
: BB
) {
311 if (MIB
.isPseudo(Inst
))
314 MIB
.stripAnnotations(Inst
, /*KeepTC=*/BC
.isX86());
316 // Fix branch target. Strictly speaking, we don't have to do this as
317 // targets of direct branches will be fixed later and don't matter
318 // in the CFG state. However, disassembly may look misleading, and
319 // hence we do the fixing.
320 if (MIB
.isBranch(Inst
)) {
321 assert(!MIB
.isIndirectBranch(Inst
) &&
322 "unexpected indirect branch in callee");
323 const BinaryBasicBlock
*TargetBB
=
324 Callee
.getBasicBlockForLabel(MIB
.getTargetSymbol(Inst
));
325 assert(TargetBB
&& "cannot find target block in callee");
326 MIB
.replaceBranchTarget(Inst
, InlinedBBMap
[TargetBB
]->getLabel(),
330 if (CSIsTailCall
|| (!MIB
.isCall(Inst
) && !MIB
.isReturn(Inst
))) {
332 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
336 // Handle special instructions for a non-tail call site.
337 if (!MIB
.isCall(Inst
)) {
338 // Returns are removed.
342 MIB
.convertTailCallToCall(Inst
);
344 // Propagate EH-related info to call instructions.
346 MIB
.addEHInfo(Inst
, *CSEHInfo
);
347 if (CSGNUArgsSize
>= 0)
348 MIB
.addGnuArgsSize(Inst
, CSGNUArgsSize
);
352 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
355 // Add CFG edges to the basic blocks of the inlined instance.
356 std::vector
<BinaryBasicBlock
*> Successors(BB
.succ_size());
357 llvm::transform(BB
.successors(), Successors
.begin(),
358 [&InlinedBBMap
](const BinaryBasicBlock
*BB
) {
359 return InlinedBBMap
.at(BB
);
362 if (CallerFunction
.hasValidProfile() && Callee
.hasValidProfile())
363 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end(),
364 BB
.branch_info_begin(), BB
.branch_info_end());
366 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end());
368 if (!CSIsTailCall
&& BB
.succ_size() == 0 && NextBB
) {
369 // Either it's a return block or the last instruction never returns.
370 InlinedBB
->addSuccessor(NextBB
, InlinedBB
->getExecutionCount());
373 // Scale profiling info for blocks and edges after inlining.
374 if (CallerFunction
.hasValidProfile() && Callee
.size() > 1) {
375 if (opts::AdjustProfile
)
376 InlinedBB
->adjustExecutionCount(ProfileRatio
);
378 InlinedBB
->setExecutionCount(InlinedBB
->getKnownExecutionCount() *
383 // Restore the original execution count of the first inlined basic block.
384 FirstInlinedBB
->setExecutionCount(FirstInlinedBBCount
);
386 CallerFunction
.recomputeLandingPads();
389 return std::make_pair(NextBB
, NextBB
->begin());
391 if (Callee
.size() == 1)
392 return std::make_pair(FirstInlinedBB
, InsertII
);
394 return std::make_pair(FirstInlinedBB
, FirstInlinedBB
->end());
397 bool Inliner::inlineCallsInFunction(BinaryFunction
&Function
) {
398 BinaryContext
&BC
= Function
.getBinaryContext();
399 std::vector
<BinaryBasicBlock
*> Blocks(Function
.getLayout().block_begin(),
400 Function
.getLayout().block_end());
402 Blocks
, [](const BinaryBasicBlock
*BB1
, const BinaryBasicBlock
*BB2
) {
403 return BB1
->getKnownExecutionCount() > BB2
->getKnownExecutionCount();
406 bool DidInlining
= false;
407 for (BinaryBasicBlock
*BB
: Blocks
) {
408 for (auto InstIt
= BB
->begin(); InstIt
!= BB
->end();) {
409 MCInst
&Inst
= *InstIt
;
410 if (!BC
.MIB
->isCall(Inst
) || MCPlus::getNumPrimeOperands(Inst
) != 1 ||
411 !Inst
.getOperand(0).isExpr()) {
416 const MCSymbol
*TargetSymbol
= BC
.MIB
->getTargetSymbol(Inst
);
417 assert(TargetSymbol
&& "target symbol expected for direct call");
419 // Don't inline calls to a secondary entry point in a target function.
420 uint64_t EntryID
= 0;
421 BinaryFunction
*TargetFunction
=
422 BC
.getFunctionForSymbol(TargetSymbol
, &EntryID
);
423 if (!TargetFunction
|| EntryID
!= 0) {
428 // Don't do recursive inlining.
429 if (TargetFunction
== &Function
) {
434 auto IInfo
= InliningCandidates
.find(TargetFunction
);
435 if (IInfo
== InliningCandidates
.end()) {
440 const bool IsTailCall
= BC
.MIB
->isTailCall(Inst
);
441 if (!IsTailCall
&& IInfo
->second
.Type
== INL_TAILCALL
) {
446 int64_t SizeAfterInlining
;
449 IInfo
->second
.SizeAfterTailCallInlining
- getSizeOfTailCallInst(BC
);
452 IInfo
->second
.SizeAfterInlining
- getSizeOfCallInst(BC
);
454 if (!opts::InlineAll
&& !opts::mustConsider(*TargetFunction
)) {
455 if (!opts::InlineSmallFunctions
||
456 SizeAfterInlining
> opts::InlineSmallFunctionsBytes
) {
462 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
463 << " in " << Function
<< " : " << BB
->getName()
464 << ". Count: " << BB
->getKnownExecutionCount()
465 << ". Size change: " << SizeAfterInlining
468 std::tie(BB
, InstIt
) = inlineCall(*BB
, InstIt
, *TargetFunction
);
471 TotalInlinedBytes
+= SizeAfterInlining
;
473 ++NumInlinedCallSites
;
474 NumInlinedDynamicCalls
+= BB
->getExecutionCount();
476 // Subtract basic block execution count from the callee execution count.
477 if (opts::AdjustProfile
)
478 TargetFunction
->adjustExecutionCount(BB
->getKnownExecutionCount());
480 // Check if the caller inlining status has to be adjusted.
481 if (IInfo
->second
.Type
== INL_TAILCALL
) {
482 auto CallerIInfo
= InliningCandidates
.find(&Function
);
483 if (CallerIInfo
!= InliningCandidates
.end() &&
484 CallerIInfo
->second
.Type
== INL_ANY
) {
485 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
486 << Function
<< '\n');
487 CallerIInfo
->second
.Type
= INL_TAILCALL
;
491 if (NumInlinedCallSites
== opts::InlineLimit
)
499 void Inliner::runOnFunctions(BinaryContext
&BC
) {
502 if (!opts::inliningEnabled())
506 unsigned NumIters
= 0;
508 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
513 InliningCandidates
.clear();
514 findInliningCandidates(BC
);
516 std::vector
<BinaryFunction
*> ConsideredFunctions
;
517 for (auto &BFI
: BC
.getBinaryFunctions()) {
518 BinaryFunction
&Function
= BFI
.second
;
519 if (!shouldOptimize(Function
))
521 ConsideredFunctions
.push_back(&Function
);
523 llvm::sort(ConsideredFunctions
, [](const BinaryFunction
*A
,
524 const BinaryFunction
*B
) {
525 return B
->getKnownExecutionCount() < A
->getKnownExecutionCount();
527 for (BinaryFunction
*Function
: ConsideredFunctions
) {
528 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
531 const bool DidInline
= inlineCallsInFunction(*Function
);
534 Modified
.insert(Function
);
536 InlinedOnce
|= DidInline
;
540 } while (InlinedOnce
&& NumIters
< opts::InlineMaxIters
);
542 if (NumInlinedCallSites
)
543 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls
<< " calls at "
544 << NumInlinedCallSites
<< " call sites in " << NumIters
545 << " iteration(s). Change in binary size: " << TotalInlinedBytes