1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Inliner class used for inlining binary functions.
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
14 // * No exception handling
16 // * Single entry point
17 // * CFI update not supported - breaks unwinding
18 // * Regular Call Sites:
19 // - only leaf functions (or callees with only tail calls)
20 // * no invokes (they can't be tail calls)
21 // - no direct use of %rsp
23 // - since the stack is unmodified, the regular call limitations are lifted
25 //===----------------------------------------------------------------------===//
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
31 #define DEBUG_TYPE "bolt-inliner"
37 extern cl::OptionCategory BoltOptCategory
;
40 AdjustProfile("inline-ap",
41 cl::desc("adjust function profile after inlining"),
42 cl::cat(BoltOptCategory
));
44 static cl::list
<std::string
>
45 ForceInlineFunctions("force-inline",
47 cl::desc("list of functions to always consider for inlining"),
48 cl::value_desc("func1,func2,func3,..."),
50 cl::cat(BoltOptCategory
));
52 static cl::opt
<bool> InlineAll("inline-all", cl::desc("inline all functions"),
53 cl::cat(BoltOptCategory
));
55 static cl::opt
<bool> InlineIgnoreLeafCFI(
56 "inline-ignore-leaf-cfi",
57 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
58 cl::init(true), cl::ReallyHidden
, cl::cat(BoltOptCategory
));
60 static cl::opt
<bool> InlineIgnoreCFI(
63 "inline functions with CFI programs (can break exception handling)"),
64 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
66 static cl::opt
<unsigned>
67 InlineLimit("inline-limit",
68 cl::desc("maximum number of call sites to inline"), cl::init(0),
69 cl::Hidden
, cl::cat(BoltOptCategory
));
71 static cl::opt
<unsigned>
72 InlineMaxIters("inline-max-iters",
73 cl::desc("maximum number of inline iterations"), cl::init(3),
74 cl::Hidden
, cl::cat(BoltOptCategory
));
76 static cl::opt
<bool> InlineSmallFunctions(
77 "inline-small-functions",
78 cl::desc("inline functions if increase in size is less than defined by "
79 "-inline-small-functions-bytes"),
80 cl::cat(BoltOptCategory
));
82 static cl::opt
<unsigned> InlineSmallFunctionsBytes(
83 "inline-small-functions-bytes",
84 cl::desc("max number of bytes for the function to be considered small for "
86 cl::init(4), cl::Hidden
, cl::cat(BoltOptCategory
));
88 static cl::opt
<bool> NoInline(
90 cl::desc("disable all inlining (overrides other inlining options)"),
91 cl::cat(BoltOptCategory
));
93 /// This function returns true if any of inlining options are specified and the
94 /// inlining pass should be executed. Whenever a new inlining option is added,
95 /// this function should reflect the change.
96 bool inliningEnabled() {
98 (InlineAll
|| InlineSmallFunctions
|| !ForceInlineFunctions
.empty());
101 bool mustConsider(const llvm::bolt::BinaryFunction
&Function
) {
102 for (std::string
&Name
: opts::ForceInlineFunctions
)
103 if (Function
.hasName(Name
))
109 if (opts::InlineIgnoreCFI
)
110 opts::InlineIgnoreLeafCFI
= true;
113 opts::InlineSmallFunctions
= true;
121 uint64_t Inliner::SizeOfCallInst
;
122 uint64_t Inliner::SizeOfTailCallInst
;
124 uint64_t Inliner::getSizeOfCallInst(const BinaryContext
&BC
) {
126 return SizeOfCallInst
;
129 BC
.MIB
->createCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
130 SizeOfCallInst
= BC
.computeInstructionSize(Inst
);
132 return SizeOfCallInst
;
135 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext
&BC
) {
136 if (SizeOfTailCallInst
)
137 return SizeOfTailCallInst
;
140 BC
.MIB
->createTailCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
141 SizeOfTailCallInst
= BC
.computeInstructionSize(Inst
);
143 return SizeOfTailCallInst
;
146 InliningInfo
getInliningInfo(const BinaryFunction
&BF
) {
147 const BinaryContext
&BC
= BF
.getBinaryContext();
148 bool DirectSP
= false;
152 // Perform necessary checks unless the option overrides it.
153 if (!opts::mustConsider(BF
)) {
154 if (BF
.hasSDTMarker())
157 if (BF
.hasEHRanges())
160 if (BF
.isMultiEntry())
163 if (BF
.hasJumpTables())
166 const MCPhysReg SPReg
= BC
.MIB
->getStackPointer();
167 for (const BinaryBasicBlock
&BB
: BF
) {
168 for (const MCInst
&Inst
: BB
) {
169 // Tail calls are marked as implicitly using the stack pointer and they
171 if (BC
.MIB
->isTailCall(Inst
))
174 if (BC
.MIB
->isCFI(Inst
)) {
179 if (BC
.MIB
->isCall(Inst
))
182 // Push/pop instructions are straightforward to handle.
183 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
186 DirectSP
|= BC
.MIB
->hasDefOfPhysReg(Inst
, SPReg
) ||
187 BC
.MIB
->hasUseOfPhysReg(Inst
, SPReg
);
193 if (!opts::InlineIgnoreLeafCFI
)
196 if (!IsLeaf
&& !opts::InlineIgnoreCFI
)
200 InliningInfo
Info(DirectSP
? INL_TAILCALL
: INL_ANY
);
202 size_t Size
= BF
.estimateSize();
204 Info
.SizeAfterInlining
= Size
;
205 Info
.SizeAfterTailCallInlining
= Size
;
207 // Handle special case of the known size reduction.
208 if (BF
.size() == 1) {
209 // For a regular call the last return instruction could be removed
210 // (or converted to a branch).
211 const MCInst
*LastInst
= BF
.back().getLastNonPseudoInstr();
212 if (LastInst
&& BC
.MIB
->isReturn(*LastInst
) &&
213 !BC
.MIB
->isTailCall(*LastInst
)) {
214 const uint64_t RetInstSize
= BC
.computeInstructionSize(*LastInst
);
215 assert(Size
>= RetInstSize
);
216 Info
.SizeAfterInlining
-= RetInstSize
;
223 void Inliner::findInliningCandidates(BinaryContext
&BC
) {
224 for (const auto &BFI
: BC
.getBinaryFunctions()) {
225 const BinaryFunction
&Function
= BFI
.second
;
226 if (!shouldOptimize(Function
))
228 const InliningInfo InlInfo
= getInliningInfo(Function
);
229 if (InlInfo
.Type
!= INL_NONE
)
230 InliningCandidates
[&Function
] = InlInfo
;
234 std::pair
<BinaryBasicBlock
*, BinaryBasicBlock::iterator
>
235 Inliner::inlineCall(BinaryBasicBlock
&CallerBB
,
236 BinaryBasicBlock::iterator CallInst
,
237 const BinaryFunction
&Callee
) {
238 BinaryFunction
&CallerFunction
= *CallerBB
.getFunction();
239 BinaryContext
&BC
= CallerFunction
.getBinaryContext();
242 assert(MIB
.isCall(*CallInst
) && "can only inline a call or a tail call");
243 assert(!Callee
.isMultiEntry() &&
244 "cannot inline function with multiple entries");
245 assert(!Callee
.hasJumpTables() &&
246 "cannot inline function with jump table(s)");
248 // Get information about the call site.
249 const bool CSIsInvoke
= BC
.MIB
->isInvoke(*CallInst
);
250 const bool CSIsTailCall
= BC
.MIB
->isTailCall(*CallInst
);
251 const int64_t CSGNUArgsSize
= BC
.MIB
->getGnuArgsSize(*CallInst
);
252 const std::optional
<MCPlus::MCLandingPad
> CSEHInfo
=
253 BC
.MIB
->getEHInfo(*CallInst
);
255 // Split basic block at the call site if there will be more incoming edges
256 // coming from the callee.
257 BinaryBasicBlock
*FirstInlinedBB
= &CallerBB
;
258 if (Callee
.front().pred_size() && CallInst
!= CallerBB
.begin()) {
259 FirstInlinedBB
= CallerBB
.splitAt(CallInst
);
260 CallInst
= FirstInlinedBB
->begin();
263 // Split basic block after the call instruction unless the callee is trivial
264 // (i.e. consists of a single basic block). If necessary, obtain a basic block
265 // for return instructions in the callee to redirect to.
266 BinaryBasicBlock
*NextBB
= nullptr;
267 if (Callee
.size() > 1) {
268 if (std::next(CallInst
) != FirstInlinedBB
->end())
269 NextBB
= FirstInlinedBB
->splitAt(std::next(CallInst
));
271 NextBB
= FirstInlinedBB
->getSuccessor();
274 FirstInlinedBB
->removeSuccessor(NextBB
);
276 // Remove the call instruction.
277 auto InsertII
= FirstInlinedBB
->eraseInstruction(CallInst
);
279 double ProfileRatio
= 0;
280 if (uint64_t CalleeExecCount
= Callee
.getKnownExecutionCount())
282 (double)FirstInlinedBB
->getKnownExecutionCount() / CalleeExecCount
;
284 // Save execution count of the first block as we don't want it to change
285 // later due to profile adjustment rounding errors.
286 const uint64_t FirstInlinedBBCount
= FirstInlinedBB
->getKnownExecutionCount();
288 // Copy basic blocks and maintain a map from their origin.
289 std::unordered_map
<const BinaryBasicBlock
*, BinaryBasicBlock
*> InlinedBBMap
;
290 InlinedBBMap
[&Callee
.front()] = FirstInlinedBB
;
291 for (const BinaryBasicBlock
&BB
: llvm::drop_begin(Callee
)) {
292 BinaryBasicBlock
*InlinedBB
= CallerFunction
.addBasicBlock();
293 InlinedBBMap
[&BB
] = InlinedBB
;
294 InlinedBB
->setCFIState(FirstInlinedBB
->getCFIState());
295 if (Callee
.hasValidProfile())
296 InlinedBB
->setExecutionCount(BB
.getKnownExecutionCount());
298 InlinedBB
->setExecutionCount(FirstInlinedBBCount
);
301 // Copy over instructions and edges.
302 for (const BinaryBasicBlock
&BB
: Callee
) {
303 BinaryBasicBlock
*InlinedBB
= InlinedBBMap
[&BB
];
305 if (InlinedBB
!= FirstInlinedBB
)
306 InsertII
= InlinedBB
->begin();
308 // Copy over instructions making any necessary mods.
309 for (MCInst Inst
: BB
) {
310 if (MIB
.isPseudo(Inst
))
313 MIB
.stripAnnotations(Inst
, /*KeepTC=*/BC
.isX86());
315 // Fix branch target. Strictly speaking, we don't have to do this as
316 // targets of direct branches will be fixed later and don't matter
317 // in the CFG state. However, disassembly may look misleading, and
318 // hence we do the fixing.
319 if (MIB
.isBranch(Inst
)) {
320 assert(!MIB
.isIndirectBranch(Inst
) &&
321 "unexpected indirect branch in callee");
322 const BinaryBasicBlock
*TargetBB
=
323 Callee
.getBasicBlockForLabel(MIB
.getTargetSymbol(Inst
));
324 assert(TargetBB
&& "cannot find target block in callee");
325 MIB
.replaceBranchTarget(Inst
, InlinedBBMap
[TargetBB
]->getLabel(),
329 if (CSIsTailCall
|| (!MIB
.isCall(Inst
) && !MIB
.isReturn(Inst
))) {
331 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
335 // Handle special instructions for a non-tail call site.
336 if (!MIB
.isCall(Inst
)) {
337 // Returns are removed.
341 MIB
.convertTailCallToCall(Inst
);
343 // Propagate EH-related info to call instructions.
345 MIB
.addEHInfo(Inst
, *CSEHInfo
);
346 if (CSGNUArgsSize
>= 0)
347 MIB
.addGnuArgsSize(Inst
, CSGNUArgsSize
);
351 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
354 // Add CFG edges to the basic blocks of the inlined instance.
355 std::vector
<BinaryBasicBlock
*> Successors(BB
.succ_size());
356 llvm::transform(BB
.successors(), Successors
.begin(),
357 [&InlinedBBMap
](const BinaryBasicBlock
*BB
) {
358 auto It
= InlinedBBMap
.find(BB
);
359 assert(It
!= InlinedBBMap
.end());
363 if (CallerFunction
.hasValidProfile() && Callee
.hasValidProfile())
364 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end(),
365 BB
.branch_info_begin(), BB
.branch_info_end());
367 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end());
369 if (!CSIsTailCall
&& BB
.succ_size() == 0 && NextBB
) {
370 // Either it's a return block or the last instruction never returns.
371 InlinedBB
->addSuccessor(NextBB
, InlinedBB
->getExecutionCount());
374 // Scale profiling info for blocks and edges after inlining.
375 if (CallerFunction
.hasValidProfile() && Callee
.size() > 1) {
376 if (opts::AdjustProfile
)
377 InlinedBB
->adjustExecutionCount(ProfileRatio
);
379 InlinedBB
->setExecutionCount(InlinedBB
->getKnownExecutionCount() *
384 // Restore the original execution count of the first inlined basic block.
385 FirstInlinedBB
->setExecutionCount(FirstInlinedBBCount
);
387 CallerFunction
.recomputeLandingPads();
390 return std::make_pair(NextBB
, NextBB
->begin());
392 if (Callee
.size() == 1)
393 return std::make_pair(FirstInlinedBB
, InsertII
);
395 return std::make_pair(FirstInlinedBB
, FirstInlinedBB
->end());
398 bool Inliner::inlineCallsInFunction(BinaryFunction
&Function
) {
399 BinaryContext
&BC
= Function
.getBinaryContext();
400 std::vector
<BinaryBasicBlock
*> Blocks(Function
.getLayout().block_begin(),
401 Function
.getLayout().block_end());
403 Blocks
, [](const BinaryBasicBlock
*BB1
, const BinaryBasicBlock
*BB2
) {
404 return BB1
->getKnownExecutionCount() > BB2
->getKnownExecutionCount();
407 bool DidInlining
= false;
408 for (BinaryBasicBlock
*BB
: Blocks
) {
409 for (auto InstIt
= BB
->begin(); InstIt
!= BB
->end();) {
410 MCInst
&Inst
= *InstIt
;
411 if (!BC
.MIB
->isCall(Inst
) || MCPlus::getNumPrimeOperands(Inst
) != 1 ||
412 !Inst
.getOperand(0).isExpr()) {
417 const MCSymbol
*TargetSymbol
= BC
.MIB
->getTargetSymbol(Inst
);
418 assert(TargetSymbol
&& "target symbol expected for direct call");
420 // Don't inline calls to a secondary entry point in a target function.
421 uint64_t EntryID
= 0;
422 BinaryFunction
*TargetFunction
=
423 BC
.getFunctionForSymbol(TargetSymbol
, &EntryID
);
424 if (!TargetFunction
|| EntryID
!= 0) {
429 // Don't do recursive inlining.
430 if (TargetFunction
== &Function
) {
435 auto IInfo
= InliningCandidates
.find(TargetFunction
);
436 if (IInfo
== InliningCandidates
.end()) {
441 const bool IsTailCall
= BC
.MIB
->isTailCall(Inst
);
442 if (!IsTailCall
&& IInfo
->second
.Type
== INL_TAILCALL
) {
447 int64_t SizeAfterInlining
;
450 IInfo
->second
.SizeAfterTailCallInlining
- getSizeOfTailCallInst(BC
);
453 IInfo
->second
.SizeAfterInlining
- getSizeOfCallInst(BC
);
455 if (!opts::InlineAll
&& !opts::mustConsider(*TargetFunction
)) {
456 if (!opts::InlineSmallFunctions
||
457 SizeAfterInlining
> opts::InlineSmallFunctionsBytes
) {
463 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
464 << " in " << Function
<< " : " << BB
->getName()
465 << ". Count: " << BB
->getKnownExecutionCount()
466 << ". Size change: " << SizeAfterInlining
469 std::tie(BB
, InstIt
) = inlineCall(*BB
, InstIt
, *TargetFunction
);
472 TotalInlinedBytes
+= SizeAfterInlining
;
474 ++NumInlinedCallSites
;
475 NumInlinedDynamicCalls
+= BB
->getExecutionCount();
477 // Subtract basic block execution count from the callee execution count.
478 if (opts::AdjustProfile
)
479 TargetFunction
->adjustExecutionCount(BB
->getKnownExecutionCount());
481 // Check if the caller inlining status has to be adjusted.
482 if (IInfo
->second
.Type
== INL_TAILCALL
) {
483 auto CallerIInfo
= InliningCandidates
.find(&Function
);
484 if (CallerIInfo
!= InliningCandidates
.end() &&
485 CallerIInfo
->second
.Type
== INL_ANY
) {
486 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
487 << Function
<< '\n');
488 CallerIInfo
->second
.Type
= INL_TAILCALL
;
492 if (NumInlinedCallSites
== opts::InlineLimit
)
500 Error
Inliner::runOnFunctions(BinaryContext
&BC
) {
503 if (!opts::inliningEnabled())
504 return Error::success();
507 unsigned NumIters
= 0;
509 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
514 InliningCandidates
.clear();
515 findInliningCandidates(BC
);
517 std::vector
<BinaryFunction
*> ConsideredFunctions
;
518 for (auto &BFI
: BC
.getBinaryFunctions()) {
519 BinaryFunction
&Function
= BFI
.second
;
520 if (!shouldOptimize(Function
))
522 ConsideredFunctions
.push_back(&Function
);
524 llvm::sort(ConsideredFunctions
, [](const BinaryFunction
*A
,
525 const BinaryFunction
*B
) {
526 return B
->getKnownExecutionCount() < A
->getKnownExecutionCount();
528 for (BinaryFunction
*Function
: ConsideredFunctions
) {
529 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
532 const bool DidInline
= inlineCallsInFunction(*Function
);
535 Modified
.insert(Function
);
537 InlinedOnce
|= DidInline
;
541 } while (InlinedOnce
&& NumIters
< opts::InlineMaxIters
);
543 if (NumInlinedCallSites
)
544 BC
.outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls
<< " calls at "
545 << NumInlinedCallSites
<< " call sites in " << NumIters
546 << " iteration(s). Change in binary size: " << TotalInlinedBytes
548 return Error::success();