Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Passes / Inliner.cpp
blob8dcb8934f2d20f7974c03f67808a403f619fc40a
1 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Inliner class used for inlining binary functions.
11 // The current inliner has a limited callee support
12 // (see Inliner::getInliningInfo() for the most up-to-date details):
14 // * No exception handling
15 // * No jump tables
16 // * Single entry point
17 // * CFI update not supported - breaks unwinding
18 // * Regular Call Sites:
19 // - only leaf functions (or callees with only tail calls)
20 // * no invokes (they can't be tail calls)
21 // - no direct use of %rsp
22 // * Tail Call Sites:
23 // - since the stack is unmodified, the regular call limitations are lifted
25 //===----------------------------------------------------------------------===//
27 #include "bolt/Passes/Inliner.h"
28 #include "bolt/Core/MCPlus.h"
29 #include "llvm/Support/CommandLine.h"
30 #include <map>
32 #define DEBUG_TYPE "bolt-inliner"
34 using namespace llvm;
36 namespace opts {
38 extern cl::OptionCategory BoltOptCategory;
40 static cl::opt<bool>
41 AdjustProfile("inline-ap",
42 cl::desc("adjust function profile after inlining"),
43 cl::cat(BoltOptCategory));
45 static cl::list<std::string>
46 ForceInlineFunctions("force-inline",
47 cl::CommaSeparated,
48 cl::desc("list of functions to always consider for inlining"),
49 cl::value_desc("func1,func2,func3,..."),
50 cl::Hidden,
51 cl::cat(BoltOptCategory));
53 static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
54 cl::cat(BoltOptCategory));
56 static cl::opt<bool> InlineIgnoreLeafCFI(
57 "inline-ignore-leaf-cfi",
58 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
59 cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));
61 static cl::opt<bool> InlineIgnoreCFI(
62 "inline-ignore-cfi",
63 cl::desc(
64 "inline functions with CFI programs (can break exception handling)"),
65 cl::ReallyHidden, cl::cat(BoltOptCategory));
67 static cl::opt<unsigned>
68 InlineLimit("inline-limit",
69 cl::desc("maximum number of call sites to inline"), cl::init(0),
70 cl::Hidden, cl::cat(BoltOptCategory));
72 static cl::opt<unsigned>
73 InlineMaxIters("inline-max-iters",
74 cl::desc("maximum number of inline iterations"), cl::init(3),
75 cl::Hidden, cl::cat(BoltOptCategory));
77 static cl::opt<bool> InlineSmallFunctions(
78 "inline-small-functions",
79 cl::desc("inline functions if increase in size is less than defined by "
80 "-inline-small-functions-bytes"),
81 cl::cat(BoltOptCategory));
83 static cl::opt<unsigned> InlineSmallFunctionsBytes(
84 "inline-small-functions-bytes",
85 cl::desc("max number of bytes for the function to be considered small for "
86 "inlining purposes"),
87 cl::init(4), cl::Hidden, cl::cat(BoltOptCategory));
89 static cl::opt<bool> NoInline(
90 "no-inline",
91 cl::desc("disable all inlining (overrides other inlining options)"),
92 cl::cat(BoltOptCategory));
94 /// This function returns true if any of inlining options are specified and the
95 /// inlining pass should be executed. Whenever a new inlining option is added,
96 /// this function should reflect the change.
97 bool inliningEnabled() {
98 return !NoInline &&
99 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
102 bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
103 for (std::string &Name : opts::ForceInlineFunctions)
104 if (Function.hasName(Name))
105 return true;
106 return false;
109 void syncOptions() {
110 if (opts::InlineIgnoreCFI)
111 opts::InlineIgnoreLeafCFI = true;
113 if (opts::InlineAll)
114 opts::InlineSmallFunctions = true;
117 } // namespace opts
119 namespace llvm {
120 namespace bolt {
122 uint64_t Inliner::SizeOfCallInst;
123 uint64_t Inliner::SizeOfTailCallInst;
125 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
126 if (SizeOfCallInst)
127 return SizeOfCallInst;
129 MCInst Inst;
130 BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
131 SizeOfCallInst = BC.computeInstructionSize(Inst);
133 return SizeOfCallInst;
136 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
137 if (SizeOfTailCallInst)
138 return SizeOfTailCallInst;
140 MCInst Inst;
141 BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
142 SizeOfTailCallInst = BC.computeInstructionSize(Inst);
144 return SizeOfTailCallInst;
147 InliningInfo getInliningInfo(const BinaryFunction &BF) {
148 const BinaryContext &BC = BF.getBinaryContext();
149 bool DirectSP = false;
150 bool HasCFI = false;
151 bool IsLeaf = true;
153 // Perform necessary checks unless the option overrides it.
154 if (!opts::mustConsider(BF)) {
155 if (BF.hasSDTMarker())
156 return INL_NONE;
158 if (BF.hasEHRanges())
159 return INL_NONE;
161 if (BF.isMultiEntry())
162 return INL_NONE;
164 if (BF.hasJumpTables())
165 return INL_NONE;
167 const MCPhysReg SPReg = BC.MIB->getStackPointer();
168 for (const BinaryBasicBlock &BB : BF) {
169 for (const MCInst &Inst : BB) {
170 // Tail calls are marked as implicitly using the stack pointer and they
171 // could be inlined.
172 if (BC.MIB->isTailCall(Inst))
173 break;
175 if (BC.MIB->isCFI(Inst)) {
176 HasCFI = true;
177 continue;
180 if (BC.MIB->isCall(Inst))
181 IsLeaf = false;
183 // Push/pop instructions are straightforward to handle.
184 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
185 continue;
187 DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
188 BC.MIB->hasUseOfPhysReg(Inst, SPReg);
193 if (HasCFI) {
194 if (!opts::InlineIgnoreLeafCFI)
195 return INL_NONE;
197 if (!IsLeaf && !opts::InlineIgnoreCFI)
198 return INL_NONE;
201 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
203 size_t Size = BF.estimateSize();
205 Info.SizeAfterInlining = Size;
206 Info.SizeAfterTailCallInlining = Size;
208 // Handle special case of the known size reduction.
209 if (BF.size() == 1) {
210 // For a regular call the last return instruction could be removed
211 // (or converted to a branch).
212 const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
213 if (LastInst && BC.MIB->isReturn(*LastInst) &&
214 !BC.MIB->isTailCall(*LastInst)) {
215 const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
216 assert(Size >= RetInstSize);
217 Info.SizeAfterInlining -= RetInstSize;
221 return Info;
224 void Inliner::findInliningCandidates(BinaryContext &BC) {
225 for (const auto &BFI : BC.getBinaryFunctions()) {
226 const BinaryFunction &Function = BFI.second;
227 if (!shouldOptimize(Function))
228 continue;
229 const InliningInfo InlInfo = getInliningInfo(Function);
230 if (InlInfo.Type != INL_NONE)
231 InliningCandidates[&Function] = InlInfo;
235 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
236 Inliner::inlineCall(BinaryBasicBlock &CallerBB,
237 BinaryBasicBlock::iterator CallInst,
238 const BinaryFunction &Callee) {
239 BinaryFunction &CallerFunction = *CallerBB.getFunction();
240 BinaryContext &BC = CallerFunction.getBinaryContext();
241 auto &MIB = *BC.MIB;
243 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
244 assert(!Callee.isMultiEntry() &&
245 "cannot inline function with multiple entries");
246 assert(!Callee.hasJumpTables() &&
247 "cannot inline function with jump table(s)");
249 // Get information about the call site.
250 const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
251 const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
252 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
253 const std::optional<MCPlus::MCLandingPad> CSEHInfo =
254 BC.MIB->getEHInfo(*CallInst);
256 // Split basic block at the call site if there will be more incoming edges
257 // coming from the callee.
258 BinaryBasicBlock *FirstInlinedBB = &CallerBB;
259 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
260 FirstInlinedBB = CallerBB.splitAt(CallInst);
261 CallInst = FirstInlinedBB->begin();
264 // Split basic block after the call instruction unless the callee is trivial
265 // (i.e. consists of a single basic block). If necessary, obtain a basic block
266 // for return instructions in the callee to redirect to.
267 BinaryBasicBlock *NextBB = nullptr;
268 if (Callee.size() > 1) {
269 if (std::next(CallInst) != FirstInlinedBB->end())
270 NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
271 else
272 NextBB = FirstInlinedBB->getSuccessor();
274 if (NextBB)
275 FirstInlinedBB->removeSuccessor(NextBB);
277 // Remove the call instruction.
278 auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);
280 double ProfileRatio = 0;
281 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
282 ProfileRatio =
283 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
285 // Save execution count of the first block as we don't want it to change
286 // later due to profile adjustment rounding errors.
287 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
289 // Copy basic blocks and maintain a map from their origin.
290 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
291 InlinedBBMap[&Callee.front()] = FirstInlinedBB;
292 for (const BinaryBasicBlock &BB : llvm::drop_begin(Callee)) {
293 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
294 InlinedBBMap[&BB] = InlinedBB;
295 InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
296 if (Callee.hasValidProfile())
297 InlinedBB->setExecutionCount(BB.getKnownExecutionCount());
298 else
299 InlinedBB->setExecutionCount(FirstInlinedBBCount);
302 // Copy over instructions and edges.
303 for (const BinaryBasicBlock &BB : Callee) {
304 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
306 if (InlinedBB != FirstInlinedBB)
307 InsertII = InlinedBB->begin();
309 // Copy over instructions making any necessary mods.
310 for (MCInst Inst : BB) {
311 if (MIB.isPseudo(Inst))
312 continue;
314 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
316 // Fix branch target. Strictly speaking, we don't have to do this as
317 // targets of direct branches will be fixed later and don't matter
318 // in the CFG state. However, disassembly may look misleading, and
319 // hence we do the fixing.
320 if (MIB.isBranch(Inst)) {
321 assert(!MIB.isIndirectBranch(Inst) &&
322 "unexpected indirect branch in callee");
323 const BinaryBasicBlock *TargetBB =
324 Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
325 assert(TargetBB && "cannot find target block in callee");
326 MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
327 BC.Ctx.get());
330 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
331 InsertII =
332 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
333 continue;
336 // Handle special instructions for a non-tail call site.
337 if (!MIB.isCall(Inst)) {
338 // Returns are removed.
339 break;
342 MIB.convertTailCallToCall(Inst);
344 // Propagate EH-related info to call instructions.
345 if (CSIsInvoke) {
346 MIB.addEHInfo(Inst, *CSEHInfo);
347 if (CSGNUArgsSize >= 0)
348 MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
351 InsertII =
352 std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
355 // Add CFG edges to the basic blocks of the inlined instance.
356 std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
357 llvm::transform(BB.successors(), Successors.begin(),
358 [&InlinedBBMap](const BinaryBasicBlock *BB) {
359 return InlinedBBMap.at(BB);
362 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
363 InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
364 BB.branch_info_begin(), BB.branch_info_end());
365 else
366 InlinedBB->addSuccessors(Successors.begin(), Successors.end());
368 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
369 // Either it's a return block or the last instruction never returns.
370 InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
373 // Scale profiling info for blocks and edges after inlining.
374 if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
375 if (opts::AdjustProfile)
376 InlinedBB->adjustExecutionCount(ProfileRatio);
377 else
378 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
379 ProfileRatio);
383 // Restore the original execution count of the first inlined basic block.
384 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
386 CallerFunction.recomputeLandingPads();
388 if (NextBB)
389 return std::make_pair(NextBB, NextBB->begin());
391 if (Callee.size() == 1)
392 return std::make_pair(FirstInlinedBB, InsertII);
394 return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
397 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
398 BinaryContext &BC = Function.getBinaryContext();
399 std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
400 Function.getLayout().block_end());
401 llvm::sort(
402 Blocks, [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
403 return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
406 bool DidInlining = false;
407 for (BinaryBasicBlock *BB : Blocks) {
408 for (auto InstIt = BB->begin(); InstIt != BB->end();) {
409 MCInst &Inst = *InstIt;
410 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
411 !Inst.getOperand(0).isExpr()) {
412 ++InstIt;
413 continue;
416 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
417 assert(TargetSymbol && "target symbol expected for direct call");
419 // Don't inline calls to a secondary entry point in a target function.
420 uint64_t EntryID = 0;
421 BinaryFunction *TargetFunction =
422 BC.getFunctionForSymbol(TargetSymbol, &EntryID);
423 if (!TargetFunction || EntryID != 0) {
424 ++InstIt;
425 continue;
428 // Don't do recursive inlining.
429 if (TargetFunction == &Function) {
430 ++InstIt;
431 continue;
434 auto IInfo = InliningCandidates.find(TargetFunction);
435 if (IInfo == InliningCandidates.end()) {
436 ++InstIt;
437 continue;
440 const bool IsTailCall = BC.MIB->isTailCall(Inst);
441 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
442 ++InstIt;
443 continue;
446 int64_t SizeAfterInlining;
447 if (IsTailCall)
448 SizeAfterInlining =
449 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
450 else
451 SizeAfterInlining =
452 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
454 if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
455 if (!opts::InlineSmallFunctions ||
456 SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
457 ++InstIt;
458 continue;
462 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
463 << " in " << Function << " : " << BB->getName()
464 << ". Count: " << BB->getKnownExecutionCount()
465 << ". Size change: " << SizeAfterInlining
466 << " bytes.\n");
468 std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
470 DidInlining = true;
471 TotalInlinedBytes += SizeAfterInlining;
473 ++NumInlinedCallSites;
474 NumInlinedDynamicCalls += BB->getExecutionCount();
476 // Subtract basic block execution count from the callee execution count.
477 if (opts::AdjustProfile)
478 TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());
480 // Check if the caller inlining status has to be adjusted.
481 if (IInfo->second.Type == INL_TAILCALL) {
482 auto CallerIInfo = InliningCandidates.find(&Function);
483 if (CallerIInfo != InliningCandidates.end() &&
484 CallerIInfo->second.Type == INL_ANY) {
485 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
486 << Function << '\n');
487 CallerIInfo->second.Type = INL_TAILCALL;
491 if (NumInlinedCallSites == opts::InlineLimit)
492 return true;
496 return DidInlining;
499 void Inliner::runOnFunctions(BinaryContext &BC) {
500 opts::syncOptions();
502 if (!opts::inliningEnabled())
503 return;
505 bool InlinedOnce;
506 unsigned NumIters = 0;
507 do {
508 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
509 break;
511 InlinedOnce = false;
513 InliningCandidates.clear();
514 findInliningCandidates(BC);
516 std::vector<BinaryFunction *> ConsideredFunctions;
517 for (auto &BFI : BC.getBinaryFunctions()) {
518 BinaryFunction &Function = BFI.second;
519 if (!shouldOptimize(Function))
520 continue;
521 ConsideredFunctions.push_back(&Function);
523 llvm::sort(ConsideredFunctions, [](const BinaryFunction *A,
524 const BinaryFunction *B) {
525 return B->getKnownExecutionCount() < A->getKnownExecutionCount();
527 for (BinaryFunction *Function : ConsideredFunctions) {
528 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
529 break;
531 const bool DidInline = inlineCallsInFunction(*Function);
533 if (DidInline)
534 Modified.insert(Function);
536 InlinedOnce |= DidInline;
539 ++NumIters;
540 } while (InlinedOnce && NumIters < opts::InlineMaxIters);
542 if (NumInlinedCallSites)
543 outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
544 << NumInlinedCallSites << " call sites in " << NumIters
545 << " iteration(s). Change in binary size: " << TotalInlinedBytes
546 << " bytes.\n";
549 } // namespace bolt
550 } // namespace llvm