1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the Inliner class used for inlining binary functions.
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
14 // * No exception handling
16 // * Single entry point
17 // * CFI update not supported - breaks unwinding
18 // * Regular Call Sites:
19 // - only leaf functions (or callees with only tail calls)
20 // * no invokes (they can't be tail calls)
21 // - no direct use of %rsp
23 // - since the stack is unmodified, the regular call limitations are lifted
25 //===----------------------------------------------------------------------===//
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
32 #define DEBUG_TYPE "bolt-inliner"
38 extern cl::OptionCategory BoltOptCategory
;
41 AdjustProfile("inline-ap",
42 cl::desc("adjust function profile after inlining"),
43 cl::cat(BoltOptCategory
));
45 static cl::list
<std::string
>
46 ForceInlineFunctions("force-inline",
48 cl::desc("list of functions to always consider for inlining"),
49 cl::value_desc("func1,func2,func3,..."),
51 cl::cat(BoltOptCategory
));
53 static cl::opt
<bool> InlineAll("inline-all", cl::desc("inline all functions"),
54 cl::cat(BoltOptCategory
));
56 static cl::opt
<bool> InlineIgnoreLeafCFI(
57 "inline-ignore-leaf-cfi",
58 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
59 cl::init(true), cl::ReallyHidden
, cl::cat(BoltOptCategory
));
61 static cl::opt
<bool> InlineIgnoreCFI(
64 "inline functions with CFI programs (can break exception handling)"),
65 cl::ReallyHidden
, cl::cat(BoltOptCategory
));
67 static cl::opt
<unsigned>
68 InlineLimit("inline-limit",
69 cl::desc("maximum number of call sites to inline"), cl::init(0),
70 cl::Hidden
, cl::cat(BoltOptCategory
));
72 static cl::opt
<unsigned>
73 InlineMaxIters("inline-max-iters",
74 cl::desc("maximum number of inline iterations"), cl::init(3),
75 cl::Hidden
, cl::cat(BoltOptCategory
));
77 static cl::opt
<bool> InlineSmallFunctions(
78 "inline-small-functions",
79 cl::desc("inline functions if increase in size is less than defined by "
80 "-inline-small-functions-bytes"),
81 cl::cat(BoltOptCategory
));
83 static cl::opt
<unsigned> InlineSmallFunctionsBytes(
84 "inline-small-functions-bytes",
85 cl::desc("max number of bytes for the function to be considered small for "
87 cl::init(4), cl::Hidden
, cl::cat(BoltOptCategory
));
89 static cl::opt
<bool> NoInline(
91 cl::desc("disable all inlining (overrides other inlining options)"),
92 cl::cat(BoltOptCategory
));
94 /// This function returns true if any of inlining options are specified and the
95 /// inlining pass should be executed. Whenever a new inlining option is added,
96 /// this function should reflect the change.
97 bool inliningEnabled() {
99 (InlineAll
|| InlineSmallFunctions
|| !ForceInlineFunctions
.empty());
102 bool mustConsider(const llvm::bolt::BinaryFunction
&Function
) {
103 for (std::string
&Name
: opts::ForceInlineFunctions
)
104 if (Function
.hasName(Name
))
110 if (opts::InlineIgnoreCFI
)
111 opts::InlineIgnoreLeafCFI
= true;
114 opts::InlineSmallFunctions
= true;
122 uint64_t Inliner::SizeOfCallInst
;
123 uint64_t Inliner::SizeOfTailCallInst
;
125 uint64_t Inliner::getSizeOfCallInst(const BinaryContext
&BC
) {
127 return SizeOfCallInst
;
130 BC
.MIB
->createCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
131 SizeOfCallInst
= BC
.computeInstructionSize(Inst
);
133 return SizeOfCallInst
;
136 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext
&BC
) {
137 if (SizeOfTailCallInst
)
138 return SizeOfTailCallInst
;
141 BC
.MIB
->createTailCall(Inst
, BC
.Ctx
->createNamedTempSymbol(), BC
.Ctx
.get());
142 SizeOfTailCallInst
= BC
.computeInstructionSize(Inst
);
144 return SizeOfTailCallInst
;
147 InliningInfo
getInliningInfo(const BinaryFunction
&BF
) {
148 const BinaryContext
&BC
= BF
.getBinaryContext();
149 bool DirectSP
= false;
153 // Perform necessary checks unless the option overrides it.
154 if (!opts::mustConsider(BF
)) {
155 if (BF
.hasSDTMarker())
158 if (BF
.hasEHRanges())
161 if (BF
.isMultiEntry())
164 if (BF
.hasJumpTables())
167 const MCPhysReg SPReg
= BC
.MIB
->getStackPointer();
168 for (const BinaryBasicBlock
*BB
: BF
.layout()) {
169 for (const MCInst
&Inst
: *BB
) {
170 // Tail calls are marked as implicitly using the stack pointer and they
172 if (BC
.MIB
->isTailCall(Inst
))
175 if (BC
.MIB
->isCFI(Inst
)) {
180 if (BC
.MIB
->isCall(Inst
))
183 // Push/pop instructions are straightforward to handle.
184 if (BC
.MIB
->isPush(Inst
) || BC
.MIB
->isPop(Inst
))
187 DirectSP
|= BC
.MIB
->hasDefOfPhysReg(Inst
, SPReg
) ||
188 BC
.MIB
->hasUseOfPhysReg(Inst
, SPReg
);
194 if (!opts::InlineIgnoreLeafCFI
)
197 if (!IsLeaf
&& !opts::InlineIgnoreCFI
)
201 InliningInfo
Info(DirectSP
? INL_TAILCALL
: INL_ANY
);
203 size_t Size
= BF
.estimateSize();
205 Info
.SizeAfterInlining
= Size
;
206 Info
.SizeAfterTailCallInlining
= Size
;
208 // Handle special case of the known size reduction.
209 if (BF
.size() == 1) {
210 // For a regular call the last return instruction could be removed
211 // (or converted to a branch).
212 const MCInst
*LastInst
= BF
.back().getLastNonPseudoInstr();
213 if (LastInst
&& BC
.MIB
->isReturn(*LastInst
) &&
214 !BC
.MIB
->isTailCall(*LastInst
)) {
215 const uint64_t RetInstSize
= BC
.computeInstructionSize(*LastInst
);
216 assert(Size
>= RetInstSize
);
217 Info
.SizeAfterInlining
-= RetInstSize
;
224 void Inliner::findInliningCandidates(BinaryContext
&BC
) {
225 for (const auto &BFI
: BC
.getBinaryFunctions()) {
226 const BinaryFunction
&Function
= BFI
.second
;
227 if (!shouldOptimize(Function
))
229 const InliningInfo InlInfo
= getInliningInfo(Function
);
230 if (InlInfo
.Type
!= INL_NONE
)
231 InliningCandidates
[&Function
] = InlInfo
;
235 std::pair
<BinaryBasicBlock
*, BinaryBasicBlock::iterator
>
236 Inliner::inlineCall(BinaryBasicBlock
&CallerBB
,
237 BinaryBasicBlock::iterator CallInst
,
238 const BinaryFunction
&Callee
) {
239 BinaryFunction
&CallerFunction
= *CallerBB
.getFunction();
240 BinaryContext
&BC
= CallerFunction
.getBinaryContext();
243 assert(MIB
.isCall(*CallInst
) && "can only inline a call or a tail call");
244 assert(!Callee
.isMultiEntry() &&
245 "cannot inline function with multiple entries");
246 assert(!Callee
.hasJumpTables() &&
247 "cannot inline function with jump table(s)");
249 // Get information about the call site.
250 const bool CSIsInvoke
= BC
.MIB
->isInvoke(*CallInst
);
251 const bool CSIsTailCall
= BC
.MIB
->isTailCall(*CallInst
);
252 const int64_t CSGNUArgsSize
= BC
.MIB
->getGnuArgsSize(*CallInst
);
253 const Optional
<MCPlus::MCLandingPad
> CSEHInfo
= BC
.MIB
->getEHInfo(*CallInst
);
255 // Split basic block at the call site if there will be more incoming edges
256 // coming from the callee.
257 BinaryBasicBlock
*FirstInlinedBB
= &CallerBB
;
258 if (Callee
.front().pred_size() && CallInst
!= CallerBB
.begin()) {
259 FirstInlinedBB
= CallerBB
.splitAt(CallInst
);
260 CallInst
= FirstInlinedBB
->begin();
263 // Split basic block after the call instruction unless the callee is trivial
264 // (i.e. consists of a single basic block). If necessary, obtain a basic block
265 // for return instructions in the callee to redirect to.
266 BinaryBasicBlock
*NextBB
= nullptr;
267 if (Callee
.size() > 1) {
268 if (std::next(CallInst
) != FirstInlinedBB
->end())
269 NextBB
= FirstInlinedBB
->splitAt(std::next(CallInst
));
271 NextBB
= FirstInlinedBB
->getSuccessor();
274 FirstInlinedBB
->removeSuccessor(NextBB
);
276 // Remove the call instruction.
277 auto InsertII
= FirstInlinedBB
->eraseInstruction(CallInst
);
279 double ProfileRatio
= 0;
280 if (uint64_t CalleeExecCount
= Callee
.getKnownExecutionCount())
282 (double)FirstInlinedBB
->getKnownExecutionCount() / CalleeExecCount
;
284 // Save execution count of the first block as we don't want it to change
285 // later due to profile adjustment rounding errors.
286 const uint64_t FirstInlinedBBCount
= FirstInlinedBB
->getKnownExecutionCount();
288 // Copy basic blocks and maintain a map from their origin.
289 std::unordered_map
<const BinaryBasicBlock
*, BinaryBasicBlock
*> InlinedBBMap
;
290 InlinedBBMap
[&Callee
.front()] = FirstInlinedBB
;
291 for (auto BBI
= std::next(Callee
.begin()); BBI
!= Callee
.end(); ++BBI
) {
292 BinaryBasicBlock
*InlinedBB
= CallerFunction
.addBasicBlock();
293 InlinedBBMap
[&*BBI
] = InlinedBB
;
294 InlinedBB
->setCFIState(FirstInlinedBB
->getCFIState());
295 if (Callee
.hasValidProfile())
296 InlinedBB
->setExecutionCount(BBI
->getKnownExecutionCount());
298 InlinedBB
->setExecutionCount(FirstInlinedBBCount
);
301 // Copy over instructions and edges.
302 for (const BinaryBasicBlock
&BB
: Callee
) {
303 BinaryBasicBlock
*InlinedBB
= InlinedBBMap
[&BB
];
305 if (InlinedBB
!= FirstInlinedBB
)
306 InsertII
= InlinedBB
->begin();
308 // Copy over instructions making any necessary mods.
309 for (MCInst Inst
: BB
) {
310 if (MIB
.isPseudo(Inst
))
313 MIB
.stripAnnotations(Inst
, /*KeepTC=*/BC
.isX86());
315 // Fix branch target. Strictly speaking, we don't have to do this as
316 // targets of direct branches will be fixed later and don't matter
317 // in the CFG state. However, disassembly may look misleading, and
318 // hence we do the fixing.
319 if (MIB
.isBranch(Inst
)) {
320 assert(!MIB
.isIndirectBranch(Inst
) &&
321 "unexpected indirect branch in callee");
322 const BinaryBasicBlock
*TargetBB
=
323 Callee
.getBasicBlockForLabel(MIB
.getTargetSymbol(Inst
));
324 assert(TargetBB
&& "cannot find target block in callee");
325 MIB
.replaceBranchTarget(Inst
, InlinedBBMap
[TargetBB
]->getLabel(),
329 if (CSIsTailCall
|| (!MIB
.isCall(Inst
) && !MIB
.isReturn(Inst
))) {
331 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
335 // Handle special instructions for a non-tail call site.
336 if (!MIB
.isCall(Inst
)) {
337 // Returns are removed.
341 MIB
.convertTailCallToCall(Inst
);
343 // Propagate EH-related info to call instructions.
345 MIB
.addEHInfo(Inst
, *CSEHInfo
);
346 if (CSGNUArgsSize
>= 0)
347 MIB
.addGnuArgsSize(Inst
, CSGNUArgsSize
);
351 std::next(InlinedBB
->insertInstruction(InsertII
, std::move(Inst
)));
354 // Add CFG edges to the basic blocks of the inlined instance.
355 std::vector
<BinaryBasicBlock
*> Successors(BB
.succ_size());
356 llvm::transform(BB
.successors(), Successors
.begin(),
357 [&InlinedBBMap
](const BinaryBasicBlock
*BB
) {
358 return InlinedBBMap
.at(BB
);
361 if (CallerFunction
.hasValidProfile() && Callee
.hasValidProfile())
362 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end(),
363 BB
.branch_info_begin(), BB
.branch_info_end());
365 InlinedBB
->addSuccessors(Successors
.begin(), Successors
.end());
367 if (!CSIsTailCall
&& BB
.succ_size() == 0 && NextBB
) {
368 // Either it's a return block or the last instruction never returns.
369 InlinedBB
->addSuccessor(NextBB
, InlinedBB
->getExecutionCount());
372 // Scale profiling info for blocks and edges after inlining.
373 if (CallerFunction
.hasValidProfile() && Callee
.size() > 1) {
374 if (opts::AdjustProfile
)
375 InlinedBB
->adjustExecutionCount(ProfileRatio
);
377 InlinedBB
->setExecutionCount(InlinedBB
->getKnownExecutionCount() *
382 // Restore the original execution count of the first inlined basic block.
383 FirstInlinedBB
->setExecutionCount(FirstInlinedBBCount
);
385 CallerFunction
.recomputeLandingPads();
388 return std::make_pair(NextBB
, NextBB
->begin());
390 if (Callee
.size() == 1)
391 return std::make_pair(FirstInlinedBB
, InsertII
);
393 return std::make_pair(FirstInlinedBB
, FirstInlinedBB
->end());
396 bool Inliner::inlineCallsInFunction(BinaryFunction
&Function
) {
397 BinaryContext
&BC
= Function
.getBinaryContext();
398 std::vector
<BinaryBasicBlock
*> Blocks(Function
.layout().begin(),
399 Function
.layout().end());
401 Blocks
, [](const BinaryBasicBlock
*BB1
, const BinaryBasicBlock
*BB2
) {
402 return BB1
->getKnownExecutionCount() > BB2
->getKnownExecutionCount();
405 bool DidInlining
= false;
406 for (BinaryBasicBlock
*BB
: Blocks
) {
407 for (auto InstIt
= BB
->begin(); InstIt
!= BB
->end();) {
408 MCInst
&Inst
= *InstIt
;
409 if (!BC
.MIB
->isCall(Inst
) || MCPlus::getNumPrimeOperands(Inst
) != 1 ||
410 !Inst
.getOperand(0).isExpr()) {
415 const MCSymbol
*TargetSymbol
= BC
.MIB
->getTargetSymbol(Inst
);
416 assert(TargetSymbol
&& "target symbol expected for direct call");
418 // Don't inline calls to a secondary entry point in a target function.
419 uint64_t EntryID
= 0;
420 BinaryFunction
*TargetFunction
=
421 BC
.getFunctionForSymbol(TargetSymbol
, &EntryID
);
422 if (!TargetFunction
|| EntryID
!= 0) {
427 // Don't do recursive inlining.
428 if (TargetFunction
== &Function
) {
433 auto IInfo
= InliningCandidates
.find(TargetFunction
);
434 if (IInfo
== InliningCandidates
.end()) {
439 const bool IsTailCall
= BC
.MIB
->isTailCall(Inst
);
440 if (!IsTailCall
&& IInfo
->second
.Type
== INL_TAILCALL
) {
445 int64_t SizeAfterInlining
;
448 IInfo
->second
.SizeAfterTailCallInlining
- getSizeOfTailCallInst(BC
);
451 IInfo
->second
.SizeAfterInlining
- getSizeOfCallInst(BC
);
453 if (!opts::InlineAll
&& !opts::mustConsider(*TargetFunction
)) {
454 if (!opts::InlineSmallFunctions
||
455 SizeAfterInlining
> opts::InlineSmallFunctionsBytes
) {
461 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
462 << " in " << Function
<< " : " << BB
->getName()
463 << ". Count: " << BB
->getKnownExecutionCount()
464 << ". Size change: " << SizeAfterInlining
467 std::tie(BB
, InstIt
) = inlineCall(*BB
, InstIt
, *TargetFunction
);
470 TotalInlinedBytes
+= SizeAfterInlining
;
472 ++NumInlinedCallSites
;
473 NumInlinedDynamicCalls
+= BB
->getExecutionCount();
475 // Subtract basic block execution count from the callee execution count.
476 if (opts::AdjustProfile
)
477 TargetFunction
->adjustExecutionCount(BB
->getKnownExecutionCount());
479 // Check if the caller inlining status has to be adjusted.
480 if (IInfo
->second
.Type
== INL_TAILCALL
) {
481 auto CallerIInfo
= InliningCandidates
.find(&Function
);
482 if (CallerIInfo
!= InliningCandidates
.end() &&
483 CallerIInfo
->second
.Type
== INL_ANY
) {
484 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
485 << Function
<< '\n');
486 CallerIInfo
->second
.Type
= INL_TAILCALL
;
490 if (NumInlinedCallSites
== opts::InlineLimit
)
498 void Inliner::runOnFunctions(BinaryContext
&BC
) {
501 if (!opts::inliningEnabled())
505 unsigned NumIters
= 0;
507 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
512 InliningCandidates
.clear();
513 findInliningCandidates(BC
);
515 std::vector
<BinaryFunction
*> ConsideredFunctions
;
516 for (auto &BFI
: BC
.getBinaryFunctions()) {
517 BinaryFunction
&Function
= BFI
.second
;
518 if (!shouldOptimize(Function
))
520 ConsideredFunctions
.push_back(&Function
);
522 llvm::sort(ConsideredFunctions
, [](const BinaryFunction
*A
,
523 const BinaryFunction
*B
) {
524 return B
->getKnownExecutionCount() < A
->getKnownExecutionCount();
526 for (BinaryFunction
*Function
: ConsideredFunctions
) {
527 if (opts::InlineLimit
&& NumInlinedCallSites
>= opts::InlineLimit
)
530 const bool DidInline
= inlineCallsInFunction(*Function
);
533 Modified
.insert(Function
);
535 InlinedOnce
|= DidInline
;
539 } while (InlinedOnce
&& NumIters
< opts::InlineMaxIters
);
541 if (NumInlinedCallSites
)
542 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls
<< " calls at "
543 << NumInlinedCallSites
<< " call sites in " << NumIters
544 << " iteration(s). Change in binary size: " << TotalInlinedBytes