1 //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file transforms calls of the current function (self recursion) followed
10 // by a return instruction with a branch to the entry of the function, creating
11 // a loop. This pass also implements the following extensions to the basic
14 // 1. Trivial instructions between the call and return do not prevent the
15 // transformation from taking place, though currently the analysis cannot
16 // support moving any really useful instructions (only dead ones).
17 // 2. This pass transforms functions that are prevented from being tail
18 // recursive by an associative and commutative expression to use an
19 // accumulator variable, thus compiling the typical naive factorial or
20 // 'fib' implementation into efficient code.
21 // 3. TRE is performed if the function returns void, if the return
22 // returns the result returned by the call, or if the function returns a
23 // run-time constant on all exits from the function. It is possible, though
24 // unlikely, that the return returns something else (like constant 0), and
25 // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
26 // the function return the exact same value.
27 // 4. If it can prove that callees do not access their caller stack frame,
28 // they are marked as eligible for tail call elimination (by the code
31 // There are several improvements that could be made:
33 // 1. If the function has any alloca instructions, these instructions will be
34 // moved out of the entry block of the function, causing them to be
35 // evaluated each time through the tail recursion. Safely keeping allocas
36 // in the entry block requires analysis to proves that the tail-called
37 // function does not read or write the stack object.
38 // 2. Tail recursion is only performed if the call immediately precedes the
39 // return instruction. It's possible that there could be a jump between
40 // the call and the return.
41 // 3. There can be intervening operations between the call and the return that
42 // prevent the TRE from occurring. For example, there could be GEP's and
43 // stores to memory that will not be read or written by the call. This
44 // requires some substantial analysis (such as with DSA) to prove safe to
45 // move ahead of the call, but doing so could allow many more TREs to be
46 // performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
47 // 4. The algorithm we use to detect if callees access their caller stack
48 // frames is very primitive.
50 //===----------------------------------------------------------------------===//
52 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
53 #include "llvm/ADT/STLExtras.h"
54 #include "llvm/ADT/SmallPtrSet.h"
55 #include "llvm/ADT/Statistic.h"
56 #include "llvm/Analysis/CFG.h"
57 #include "llvm/Analysis/CaptureTracking.h"
58 #include "llvm/Analysis/DomTreeUpdater.h"
59 #include "llvm/Analysis/GlobalsModRef.h"
60 #include "llvm/Analysis/InlineCost.h"
61 #include "llvm/Analysis/InstructionSimplify.h"
62 #include "llvm/Analysis/Loads.h"
63 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
64 #include "llvm/Analysis/PostDominators.h"
65 #include "llvm/Analysis/TargetTransformInfo.h"
66 #include "llvm/Analysis/ValueTracking.h"
67 #include "llvm/IR/CFG.h"
68 #include "llvm/IR/Constants.h"
69 #include "llvm/IR/DataLayout.h"
70 #include "llvm/IR/DerivedTypes.h"
71 #include "llvm/IR/DiagnosticInfo.h"
72 #include "llvm/IR/Dominators.h"
73 #include "llvm/IR/Function.h"
74 #include "llvm/IR/IRBuilder.h"
75 #include "llvm/IR/InstIterator.h"
76 #include "llvm/IR/Instructions.h"
77 #include "llvm/IR/IntrinsicInst.h"
78 #include "llvm/IR/Module.h"
79 #include "llvm/IR/ValueHandle.h"
80 #include "llvm/InitializePasses.h"
81 #include "llvm/Pass.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Scalar.h"
85 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
86 #include "llvm/Transforms/Utils/Local.h"
89 #define DEBUG_TYPE "tailcallelim"
91 STATISTIC(NumEliminated
, "Number of tail calls removed");
92 STATISTIC(NumRetDuped
, "Number of return duplicated");
93 STATISTIC(NumAccumAdded
, "Number of accumulators introduced");
95 /// Scan the specified function for alloca instructions.
96 /// If it contains any dynamic allocas, returns false.
97 static bool canTRE(Function
&F
) {
98 // TODO: We don't do TRE if dynamic allocas are used.
99 // Dynamic allocas allocate stack space which should be
100 // deallocated before new iteration started. That is
101 // currently not implemented.
102 return llvm::all_of(instructions(F
), [](Instruction
&I
) {
103 auto *AI
= dyn_cast
<AllocaInst
>(&I
);
104 return !AI
|| AI
->isStaticAlloca();
109 struct AllocaDerivedValueTracker
{
110 // Start at a root value and walk its use-def chain to mark calls that use the
111 // value or a derived value in AllocaUsers, and places where it may escape in
113 void walk(Value
*Root
) {
114 SmallVector
<Use
*, 32> Worklist
;
115 SmallPtrSet
<Use
*, 32> Visited
;
117 auto AddUsesToWorklist
= [&](Value
*V
) {
118 for (auto &U
: V
->uses()) {
119 if (!Visited
.insert(&U
).second
)
121 Worklist
.push_back(&U
);
125 AddUsesToWorklist(Root
);
127 while (!Worklist
.empty()) {
128 Use
*U
= Worklist
.pop_back_val();
129 Instruction
*I
= cast
<Instruction
>(U
->getUser());
131 switch (I
->getOpcode()) {
132 case Instruction::Call
:
133 case Instruction::Invoke
: {
134 auto &CB
= cast
<CallBase
>(*I
);
135 // If the alloca-derived argument is passed byval it is not an escape
136 // point, or a use of an alloca. Calling with byval copies the contents
137 // of the alloca into argument registers or stack slots, which exist
138 // beyond the lifetime of the current frame.
139 if (CB
.isArgOperand(U
) && CB
.isByValArgument(CB
.getArgOperandNo(U
)))
142 CB
.isDataOperand(U
) && CB
.doesNotCapture(CB
.getDataOperandNo(U
));
143 callUsesLocalStack(CB
, IsNocapture
);
145 // If the alloca-derived argument is passed in as nocapture, then it
146 // can't propagate to the call's return. That would be capturing.
151 case Instruction::Load
: {
152 // The result of a load is not alloca-derived (unless an alloca has
153 // otherwise escaped, but this is a local analysis).
156 case Instruction::Store
: {
157 if (U
->getOperandNo() == 0)
158 EscapePoints
.insert(I
);
159 continue; // Stores have no users to analyze.
161 case Instruction::BitCast
:
162 case Instruction::GetElementPtr
:
163 case Instruction::PHI
:
164 case Instruction::Select
:
165 case Instruction::AddrSpaceCast
:
168 EscapePoints
.insert(I
);
172 AddUsesToWorklist(I
);
176 void callUsesLocalStack(CallBase
&CB
, bool IsNocapture
) {
177 // Add it to the list of alloca users.
178 AllocaUsers
.insert(&CB
);
180 // If it's nocapture then it can't capture this alloca.
184 // If it can write to memory, it can leak the alloca value.
185 if (!CB
.onlyReadsMemory())
186 EscapePoints
.insert(&CB
);
189 SmallPtrSet
<Instruction
*, 32> AllocaUsers
;
190 SmallPtrSet
<Instruction
*, 32> EscapePoints
;
194 static bool markTails(Function
&F
, OptimizationRemarkEmitter
*ORE
) {
195 if (F
.callsFunctionThatReturnsTwice())
198 // The local stack holds all alloca instructions and all byval arguments.
199 AllocaDerivedValueTracker Tracker
;
200 for (Argument
&Arg
: F
.args()) {
201 if (Arg
.hasByValAttr())
206 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(&I
))
210 bool Modified
= false;
212 // Track whether a block is reachable after an alloca has escaped. Blocks that
213 // contain the escaping instruction will be marked as being visited without an
214 // escaped alloca, since that is how the block began.
220 DenseMap
<BasicBlock
*, VisitType
> Visited
;
222 // We propagate the fact that an alloca has escaped from block to successor.
223 // Visit the blocks that are propagating the escapedness first. To do this, we
224 // maintain two worklists.
225 SmallVector
<BasicBlock
*, 32> WorklistUnescaped
, WorklistEscaped
;
227 // We may enter a block and visit it thinking that no alloca has escaped yet,
228 // then see an escape point and go back around a loop edge and come back to
229 // the same block twice. Because of this, we defer setting tail on calls when
230 // we first encounter them in a block. Every entry in this list does not
231 // statically use an alloca via use-def chain analysis, but may find an alloca
232 // through other means if the block turns out to be reachable after an escape
234 SmallVector
<CallInst
*, 32> DeferredTails
;
236 BasicBlock
*BB
= &F
.getEntryBlock();
237 VisitType Escaped
= UNESCAPED
;
239 for (auto &I
: *BB
) {
240 if (Tracker
.EscapePoints
.count(&I
))
243 CallInst
*CI
= dyn_cast
<CallInst
>(&I
);
244 // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
245 // considered accessing memory and will be marked as a tail call if we
246 // don't bail out here.
247 if (!CI
|| CI
->isTailCall() || isa
<DbgInfoIntrinsic
>(&I
) ||
248 isa
<PseudoProbeInst
>(&I
))
251 // Special-case operand bundle "clang.arc.attachedcall".
253 CI
->isNoTailCall() || CI
->hasOperandBundlesOtherThan(
254 LLVMContext::OB_clang_arc_attachedcall
);
256 if (!IsNoTail
&& CI
->doesNotAccessMemory()) {
257 // A call to a readnone function whose arguments are all things computed
258 // outside this function can be marked tail. Even if you stored the
259 // alloca address into a global, a readnone function can't load the
262 // Note that this runs whether we know an alloca has escaped or not. If
263 // it has, then we can't trust Tracker.AllocaUsers to be accurate.
264 bool SafeToTail
= true;
265 for (auto &Arg
: CI
->arg_operands()) {
266 if (isa
<Constant
>(Arg
.getUser()))
268 if (Argument
*A
= dyn_cast
<Argument
>(Arg
.getUser()))
269 if (!A
->hasByValAttr())
277 return OptimizationRemark(DEBUG_TYPE
, "tailcall-readnone", CI
)
278 << "marked as tail call candidate (readnone)";
286 if (!IsNoTail
&& Escaped
== UNESCAPED
&& !Tracker
.AllocaUsers
.count(CI
))
287 DeferredTails
.push_back(CI
);
290 for (auto *SuccBB
: successors(BB
)) {
291 auto &State
= Visited
[SuccBB
];
292 if (State
< Escaped
) {
294 if (State
== ESCAPED
)
295 WorklistEscaped
.push_back(SuccBB
);
297 WorklistUnescaped
.push_back(SuccBB
);
301 if (!WorklistEscaped
.empty()) {
302 BB
= WorklistEscaped
.pop_back_val();
306 while (!WorklistUnescaped
.empty()) {
307 auto *NextBB
= WorklistUnescaped
.pop_back_val();
308 if (Visited
[NextBB
] == UNESCAPED
) {
317 for (CallInst
*CI
: DeferredTails
) {
318 if (Visited
[CI
->getParent()] != ESCAPED
) {
319 // If the escape point was part way through the block, calls after the
320 // escape point wouldn't have been put into DeferredTails.
321 LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI
<< "\n");
330 /// Return true if it is safe to move the specified
331 /// instruction from after the call to before the call, assuming that all
332 /// instructions between the call and this instruction are movable.
334 static bool canMoveAboveCall(Instruction
*I
, CallInst
*CI
, AliasAnalysis
*AA
) {
335 if (isa
<DbgInfoIntrinsic
>(I
))
338 if (const IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(I
))
339 if (II
->getIntrinsicID() == Intrinsic::lifetime_end
&&
340 llvm::findAllocaForValue(II
->getArgOperand(1)))
343 // FIXME: We can move load/store/call/free instructions above the call if the
344 // call does not mod/ref the memory location being processed.
345 if (I
->mayHaveSideEffects()) // This also handles volatile loads.
348 if (LoadInst
*L
= dyn_cast
<LoadInst
>(I
)) {
349 // Loads may always be moved above calls without side effects.
350 if (CI
->mayHaveSideEffects()) {
351 // Non-volatile loads may be moved above a call with side effects if it
352 // does not write to memory and the load provably won't trap.
353 // Writes to memory only matter if they may alias the pointer
354 // being loaded from.
355 const DataLayout
&DL
= L
->getModule()->getDataLayout();
356 if (isModSet(AA
->getModRefInfo(CI
, MemoryLocation::get(L
))) ||
357 !isSafeToLoadUnconditionally(L
->getPointerOperand(), L
->getType(),
358 L
->getAlign(), DL
, L
))
363 // Otherwise, if this is a side-effect free instruction, check to make sure
364 // that it does not use the return value of the call. If it doesn't use the
365 // return value of the call, it must only use things that are defined before
366 // the call, or movable instructions between the call and the instruction
368 return !is_contained(I
->operands(), CI
);
371 static bool canTransformAccumulatorRecursion(Instruction
*I
, CallInst
*CI
) {
372 if (!I
->isAssociative() || !I
->isCommutative())
375 assert(I
->getNumOperands() == 2 &&
376 "Associative/commutative operations should have 2 args!");
378 // Exactly one operand should be the result of the call instruction.
379 if ((I
->getOperand(0) == CI
&& I
->getOperand(1) == CI
) ||
380 (I
->getOperand(0) != CI
&& I
->getOperand(1) != CI
))
383 // The only user of this instruction we allow is a single return instruction.
384 if (!I
->hasOneUse() || !isa
<ReturnInst
>(I
->user_back()))
390 static Instruction
*firstNonDbg(BasicBlock::iterator I
) {
391 while (isa
<DbgInfoIntrinsic
>(I
))
397 class TailRecursionEliminator
{
399 const TargetTransformInfo
*TTI
;
401 OptimizationRemarkEmitter
*ORE
;
404 // The below are shared state we want to have available when eliminating any
405 // calls in the function. There values should be populated by
406 // createTailRecurseLoopHeader the first time we find a call we can eliminate.
407 BasicBlock
*HeaderBB
= nullptr;
408 SmallVector
<PHINode
*, 8> ArgumentPHIs
;
410 // PHI node to store our return value.
411 PHINode
*RetPN
= nullptr;
413 // i1 PHI node to track if we have a valid return value stored in RetPN.
414 PHINode
*RetKnownPN
= nullptr;
416 // Vector of select instructions we insereted. These selects use RetKnownPN
417 // to either propagate RetPN or select a new return value.
418 SmallVector
<SelectInst
*, 8> RetSelects
;
420 // The below are shared state needed when performing accumulator recursion.
421 // There values should be populated by insertAccumulator the first time we
422 // find an elimination that requires an accumulator.
424 // PHI node to store our current accumulated value.
425 PHINode
*AccPN
= nullptr;
427 // The instruction doing the accumulating.
428 Instruction
*AccumulatorRecursionInstr
= nullptr;
430 TailRecursionEliminator(Function
&F
, const TargetTransformInfo
*TTI
,
431 AliasAnalysis
*AA
, OptimizationRemarkEmitter
*ORE
,
433 : F(F
), TTI(TTI
), AA(AA
), ORE(ORE
), DTU(DTU
) {}
435 CallInst
*findTRECandidate(BasicBlock
*BB
);
437 void createTailRecurseLoopHeader(CallInst
*CI
);
439 void insertAccumulator(Instruction
*AccRecInstr
);
441 bool eliminateCall(CallInst
*CI
);
443 void cleanupAndFinalize();
445 bool processBlock(BasicBlock
&BB
);
447 void copyByValueOperandIntoLocalTemp(CallInst
*CI
, int OpndIdx
);
449 void copyLocalTempOfByValueOperandIntoArguments(CallInst
*CI
, int OpndIdx
);
452 static bool eliminate(Function
&F
, const TargetTransformInfo
*TTI
,
453 AliasAnalysis
*AA
, OptimizationRemarkEmitter
*ORE
,
454 DomTreeUpdater
&DTU
);
458 CallInst
*TailRecursionEliminator::findTRECandidate(BasicBlock
*BB
) {
459 Instruction
*TI
= BB
->getTerminator();
461 if (&BB
->front() == TI
) // Make sure there is something before the terminator.
464 // Scan backwards from the return, checking to see if there is a tail call in
465 // this block. If so, set CI to it.
466 CallInst
*CI
= nullptr;
467 BasicBlock::iterator
BBI(TI
);
469 CI
= dyn_cast
<CallInst
>(BBI
);
470 if (CI
&& CI
->getCalledFunction() == &F
)
473 if (BBI
== BB
->begin())
474 return nullptr; // Didn't find a potential tail call.
478 assert((!CI
->isTailCall() || !CI
->isNoTailCall()) &&
479 "Incompatible call site attributes(Tail,NoTail)");
480 if (!CI
->isTailCall())
483 // As a special case, detect code like this:
484 // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
485 // and disable this xform in this case, because the code generator will
486 // lower the call to fabs into inline code.
487 if (BB
== &F
.getEntryBlock() &&
488 firstNonDbg(BB
->front().getIterator()) == CI
&&
489 firstNonDbg(std::next(BB
->begin())) == TI
&& CI
->getCalledFunction() &&
490 !TTI
->isLoweredToCall(CI
->getCalledFunction())) {
491 // A single-block function with just a call and a return. Check that
492 // the arguments match.
493 auto I
= CI
->arg_begin(), E
= CI
->arg_end();
494 Function::arg_iterator FI
= F
.arg_begin(), FE
= F
.arg_end();
495 for (; I
!= E
&& FI
!= FE
; ++I
, ++FI
)
496 if (*I
!= &*FI
) break;
497 if (I
== E
&& FI
== FE
)
504 void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst
*CI
) {
505 HeaderBB
= &F
.getEntryBlock();
506 BasicBlock
*NewEntry
= BasicBlock::Create(F
.getContext(), "", &F
, HeaderBB
);
507 NewEntry
->takeName(HeaderBB
);
508 HeaderBB
->setName("tailrecurse");
509 BranchInst
*BI
= BranchInst::Create(HeaderBB
, NewEntry
);
510 BI
->setDebugLoc(CI
->getDebugLoc());
512 // Move all fixed sized allocas from HeaderBB to NewEntry.
513 for (BasicBlock::iterator OEBI
= HeaderBB
->begin(), E
= HeaderBB
->end(),
514 NEBI
= NewEntry
->begin();
516 if (AllocaInst
*AI
= dyn_cast
<AllocaInst
>(OEBI
++))
517 if (isa
<ConstantInt
>(AI
->getArraySize()))
518 AI
->moveBefore(&*NEBI
);
520 // Now that we have created a new block, which jumps to the entry
521 // block, insert a PHI node for each argument of the function.
522 // For now, we initialize each PHI to only have the real arguments
523 // which are passed in.
524 Instruction
*InsertPos
= &HeaderBB
->front();
525 for (Function::arg_iterator I
= F
.arg_begin(), E
= F
.arg_end(); I
!= E
; ++I
) {
527 PHINode::Create(I
->getType(), 2, I
->getName() + ".tr", InsertPos
);
528 I
->replaceAllUsesWith(PN
); // Everyone use the PHI node now!
529 PN
->addIncoming(&*I
, NewEntry
);
530 ArgumentPHIs
.push_back(PN
);
533 // If the function doen't return void, create the RetPN and RetKnownPN PHI
534 // nodes to track our return value. We initialize RetPN with undef and
535 // RetKnownPN with false since we can't know our return value at function
537 Type
*RetType
= F
.getReturnType();
538 if (!RetType
->isVoidTy()) {
539 Type
*BoolType
= Type::getInt1Ty(F
.getContext());
540 RetPN
= PHINode::Create(RetType
, 2, "ret.tr", InsertPos
);
541 RetKnownPN
= PHINode::Create(BoolType
, 2, "ret.known.tr", InsertPos
);
543 RetPN
->addIncoming(UndefValue::get(RetType
), NewEntry
);
544 RetKnownPN
->addIncoming(ConstantInt::getFalse(BoolType
), NewEntry
);
547 // The entry block was changed from HeaderBB to NewEntry.
548 // The forward DominatorTree needs to be recalculated when the EntryBB is
549 // changed. In this corner-case we recalculate the entire tree.
550 DTU
.recalculate(*NewEntry
->getParent());
553 void TailRecursionEliminator::insertAccumulator(Instruction
*AccRecInstr
) {
554 assert(!AccPN
&& "Trying to insert multiple accumulators");
556 AccumulatorRecursionInstr
= AccRecInstr
;
558 // Start by inserting a new PHI node for the accumulator.
559 pred_iterator PB
= pred_begin(HeaderBB
), PE
= pred_end(HeaderBB
);
560 AccPN
= PHINode::Create(F
.getReturnType(), std::distance(PB
, PE
) + 1,
561 "accumulator.tr", &HeaderBB
->front());
563 // Loop over all of the predecessors of the tail recursion block. For the
564 // real entry into the function we seed the PHI with the identity constant for
565 // the accumulation operation. For any other existing branches to this block
566 // (due to other tail recursions eliminated) the accumulator is not modified.
567 // Because we haven't added the branch in the current block to HeaderBB yet,
568 // it will not show up as a predecessor.
569 for (pred_iterator PI
= PB
; PI
!= PE
; ++PI
) {
571 if (P
== &F
.getEntryBlock()) {
572 Constant
*Identity
= ConstantExpr::getBinOpIdentity(
573 AccRecInstr
->getOpcode(), AccRecInstr
->getType());
574 AccPN
->addIncoming(Identity
, P
);
576 AccPN
->addIncoming(AccPN
, P
);
583 // Creates a copy of contents of ByValue operand of the specified
584 // call instruction into the newly created temporarily variable.
585 void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst
*CI
,
587 PointerType
*ArgTy
= cast
<PointerType
>(CI
->getArgOperand(OpndIdx
)->getType());
588 Type
*AggTy
= ArgTy
->getElementType();
589 const DataLayout
&DL
= F
.getParent()->getDataLayout();
591 // Get alignment of byVal operand.
592 Align
Alignment(CI
->getParamAlign(OpndIdx
).valueOrOne());
594 // Create alloca for temporarily byval operands.
595 // Put alloca into the entry block.
596 Value
*NewAlloca
= new AllocaInst(
597 AggTy
, DL
.getAllocaAddrSpace(), nullptr, Alignment
,
598 CI
->getArgOperand(OpndIdx
)->getName(), &*F
.getEntryBlock().begin());
600 IRBuilder
<> Builder(CI
);
601 Value
*Size
= Builder
.getInt64(DL
.getTypeAllocSize(AggTy
));
603 // Copy data from byvalue operand into the temporarily variable.
604 Builder
.CreateMemCpy(NewAlloca
, /*DstAlign*/ Alignment
,
605 CI
->getArgOperand(OpndIdx
),
606 /*SrcAlign*/ Alignment
, Size
);
607 CI
->setArgOperand(OpndIdx
, NewAlloca
);
610 // Creates a copy from temporarily variable(keeping value of ByVal argument)
611 // into the corresponding function argument location.
612 void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
613 CallInst
*CI
, int OpndIdx
) {
614 PointerType
*ArgTy
= cast
<PointerType
>(CI
->getArgOperand(OpndIdx
)->getType());
615 Type
*AggTy
= ArgTy
->getElementType();
616 const DataLayout
&DL
= F
.getParent()->getDataLayout();
618 // Get alignment of byVal operand.
619 Align
Alignment(CI
->getParamAlign(OpndIdx
).valueOrOne());
621 IRBuilder
<> Builder(CI
);
622 Value
*Size
= Builder
.getInt64(DL
.getTypeAllocSize(AggTy
));
624 // Copy data from the temporarily variable into corresponding
625 // function argument location.
626 Builder
.CreateMemCpy(F
.getArg(OpndIdx
), /*DstAlign*/ Alignment
,
627 CI
->getArgOperand(OpndIdx
),
628 /*SrcAlign*/ Alignment
, Size
);
631 bool TailRecursionEliminator::eliminateCall(CallInst
*CI
) {
632 ReturnInst
*Ret
= cast
<ReturnInst
>(CI
->getParent()->getTerminator());
634 // Ok, we found a potential tail call. We can currently only transform the
635 // tail call if all of the instructions between the call and the return are
636 // movable to above the call itself, leaving the call next to the return.
637 // Check that this is the case now.
638 Instruction
*AccRecInstr
= nullptr;
639 BasicBlock::iterator
BBI(CI
);
640 for (++BBI
; &*BBI
!= Ret
; ++BBI
) {
641 if (canMoveAboveCall(&*BBI
, CI
, AA
))
644 // If we can't move the instruction above the call, it might be because it
645 // is an associative and commutative operation that could be transformed
646 // using accumulator recursion elimination. Check to see if this is the
647 // case, and if so, remember which instruction accumulates for later.
648 if (AccPN
|| !canTransformAccumulatorRecursion(&*BBI
, CI
))
649 return false; // We cannot eliminate the tail recursion!
651 // Yes, this is accumulator recursion. Remember which instruction
656 BasicBlock
*BB
= Ret
->getParent();
660 return OptimizationRemark(DEBUG_TYPE
, "tailcall-recursion", CI
)
661 << "transforming tail recursion into loop";
664 // OK! We can transform this tail call. If this is the first one found,
665 // create the new entry block, allowing us to branch back to the old entry.
667 createTailRecurseLoopHeader(CI
);
669 // Copy values of ByVal operands into local temporarily variables.
670 for (unsigned I
= 0, E
= CI
->getNumArgOperands(); I
!= E
; ++I
) {
671 if (CI
->isByValArgument(I
))
672 copyByValueOperandIntoLocalTemp(CI
, I
);
675 // Ok, now that we know we have a pseudo-entry block WITH all of the
676 // required PHI nodes, add entries into the PHI node for the actual
677 // parameters passed into the tail-recursive call.
678 for (unsigned I
= 0, E
= CI
->getNumArgOperands(); I
!= E
; ++I
) {
679 if (CI
->isByValArgument(I
)) {
680 copyLocalTempOfByValueOperandIntoArguments(CI
, I
);
681 ArgumentPHIs
[I
]->addIncoming(F
.getArg(I
), BB
);
683 ArgumentPHIs
[I
]->addIncoming(CI
->getArgOperand(I
), BB
);
687 insertAccumulator(AccRecInstr
);
689 // Rewrite the accumulator recursion instruction so that it does not use
690 // the result of the call anymore, instead, use the PHI node we just
692 AccRecInstr
->setOperand(AccRecInstr
->getOperand(0) != CI
, AccPN
);
695 // Update our return value tracking
697 if (Ret
->getReturnValue() == CI
|| AccRecInstr
) {
698 // Defer selecting a return value
699 RetPN
->addIncoming(RetPN
, BB
);
700 RetKnownPN
->addIncoming(RetKnownPN
, BB
);
702 // We found a return value we want to use, insert a select instruction to
703 // select it if we don't already know what our return value will be and
704 // store the result in our return value PHI node.
705 SelectInst
*SI
= SelectInst::Create(
706 RetKnownPN
, RetPN
, Ret
->getReturnValue(), "current.ret.tr", Ret
);
707 RetSelects
.push_back(SI
);
709 RetPN
->addIncoming(SI
, BB
);
710 RetKnownPN
->addIncoming(ConstantInt::getTrue(RetKnownPN
->getType()), BB
);
714 AccPN
->addIncoming(AccRecInstr
? AccRecInstr
: AccPN
, BB
);
717 // Now that all of the PHI nodes are in place, remove the call and
718 // ret instructions, replacing them with an unconditional branch.
719 BranchInst
*NewBI
= BranchInst::Create(HeaderBB
, Ret
);
720 NewBI
->setDebugLoc(CI
->getDebugLoc());
722 BB
->getInstList().erase(Ret
); // Remove return.
723 BB
->getInstList().erase(CI
); // Remove call.
724 DTU
.applyUpdates({{DominatorTree::Insert
, BB
, HeaderBB
}});
729 void TailRecursionEliminator::cleanupAndFinalize() {
730 // If we eliminated any tail recursions, it's possible that we inserted some
731 // silly PHI nodes which just merge an initial value (the incoming operand)
732 // with themselves. Check to see if we did and clean up our mess if so. This
733 // occurs when a function passes an argument straight through to its tail
735 for (PHINode
*PN
: ArgumentPHIs
) {
736 // If the PHI Node is a dynamic constant, replace it with the value it is.
737 if (Value
*PNV
= SimplifyInstruction(PN
, F
.getParent()->getDataLayout())) {
738 PN
->replaceAllUsesWith(PNV
);
739 PN
->eraseFromParent();
744 if (RetSelects
.empty()) {
745 // If we didn't insert any select instructions, then we know we didn't
746 // store a return value and we can remove the PHI nodes we inserted.
747 RetPN
->dropAllReferences();
748 RetPN
->eraseFromParent();
750 RetKnownPN
->dropAllReferences();
751 RetKnownPN
->eraseFromParent();
754 // We need to insert a copy of our accumulator instruction before any
755 // return in the function, and return its result instead.
756 Instruction
*AccRecInstr
= AccumulatorRecursionInstr
;
757 for (BasicBlock
&BB
: F
) {
758 ReturnInst
*RI
= dyn_cast
<ReturnInst
>(BB
.getTerminator());
762 Instruction
*AccRecInstrNew
= AccRecInstr
->clone();
763 AccRecInstrNew
->setName("accumulator.ret.tr");
764 AccRecInstrNew
->setOperand(AccRecInstr
->getOperand(0) == AccPN
,
766 AccRecInstrNew
->insertBefore(RI
);
767 RI
->setOperand(0, AccRecInstrNew
);
771 // We need to insert a select instruction before any return left in the
772 // function to select our stored return value if we have one.
773 for (BasicBlock
&BB
: F
) {
774 ReturnInst
*RI
= dyn_cast
<ReturnInst
>(BB
.getTerminator());
778 SelectInst
*SI
= SelectInst::Create(
779 RetKnownPN
, RetPN
, RI
->getOperand(0), "current.ret.tr", RI
);
780 RetSelects
.push_back(SI
);
781 RI
->setOperand(0, SI
);
785 // We need to insert a copy of our accumulator instruction before any
786 // of the selects we inserted, and select its result instead.
787 Instruction
*AccRecInstr
= AccumulatorRecursionInstr
;
788 for (SelectInst
*SI
: RetSelects
) {
789 Instruction
*AccRecInstrNew
= AccRecInstr
->clone();
790 AccRecInstrNew
->setName("accumulator.ret.tr");
791 AccRecInstrNew
->setOperand(AccRecInstr
->getOperand(0) == AccPN
,
792 SI
->getFalseValue());
793 AccRecInstrNew
->insertBefore(SI
);
794 SI
->setFalseValue(AccRecInstrNew
);
801 bool TailRecursionEliminator::processBlock(BasicBlock
&BB
) {
802 Instruction
*TI
= BB
.getTerminator();
804 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
)) {
805 if (BI
->isConditional())
808 BasicBlock
*Succ
= BI
->getSuccessor(0);
809 ReturnInst
*Ret
= dyn_cast
<ReturnInst
>(Succ
->getFirstNonPHIOrDbg(true));
814 CallInst
*CI
= findTRECandidate(&BB
);
819 LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
820 << "INTO UNCOND BRANCH PRED: " << BB
);
821 FoldReturnIntoUncondBranch(Ret
, Succ
, &BB
, &DTU
);
824 // If all predecessors of Succ have been eliminated by
825 // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
826 // because the ret instruction in there is still using a value which
827 // eliminateCall will attempt to remove. This block can only contain
828 // instructions that can't have uses, therefore it is safe to remove.
829 if (pred_empty(Succ
))
834 } else if (isa
<ReturnInst
>(TI
)) {
835 CallInst
*CI
= findTRECandidate(&BB
);
838 return eliminateCall(CI
);
844 bool TailRecursionEliminator::eliminate(Function
&F
,
845 const TargetTransformInfo
*TTI
,
847 OptimizationRemarkEmitter
*ORE
,
848 DomTreeUpdater
&DTU
) {
849 if (F
.getFnAttribute("disable-tail-calls").getValueAsBool())
852 bool MadeChange
= false;
853 MadeChange
|= markTails(F
, ORE
);
855 // If this function is a varargs function, we won't be able to PHI the args
856 // right, so don't even try to convert it...
857 if (F
.getFunctionType()->isVarArg())
863 // Change any tail recursive calls to loops.
864 TailRecursionEliminator
TRE(F
, TTI
, AA
, ORE
, DTU
);
866 for (BasicBlock
&BB
: F
)
867 MadeChange
|= TRE
.processBlock(BB
);
869 TRE
.cleanupAndFinalize();
875 struct TailCallElim
: public FunctionPass
{
876 static char ID
; // Pass identification, replacement for typeid
877 TailCallElim() : FunctionPass(ID
) {
878 initializeTailCallElimPass(*PassRegistry::getPassRegistry());
881 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
882 AU
.addRequired
<TargetTransformInfoWrapperPass
>();
883 AU
.addRequired
<AAResultsWrapperPass
>();
884 AU
.addRequired
<OptimizationRemarkEmitterWrapperPass
>();
885 AU
.addPreserved
<GlobalsAAWrapperPass
>();
886 AU
.addPreserved
<DominatorTreeWrapperPass
>();
887 AU
.addPreserved
<PostDominatorTreeWrapperPass
>();
890 bool runOnFunction(Function
&F
) override
{
894 auto *DTWP
= getAnalysisIfAvailable
<DominatorTreeWrapperPass
>();
895 auto *DT
= DTWP
? &DTWP
->getDomTree() : nullptr;
896 auto *PDTWP
= getAnalysisIfAvailable
<PostDominatorTreeWrapperPass
>();
897 auto *PDT
= PDTWP
? &PDTWP
->getPostDomTree() : nullptr;
898 // There is no noticable performance difference here between Lazy and Eager
899 // UpdateStrategy based on some test results. It is feasible to switch the
900 // UpdateStrategy to Lazy if we find it profitable later.
901 DomTreeUpdater
DTU(DT
, PDT
, DomTreeUpdater::UpdateStrategy::Eager
);
903 return TailRecursionEliminator::eliminate(
904 F
, &getAnalysis
<TargetTransformInfoWrapperPass
>().getTTI(F
),
905 &getAnalysis
<AAResultsWrapperPass
>().getAAResults(),
906 &getAnalysis
<OptimizationRemarkEmitterWrapperPass
>().getORE(), DTU
);
911 char TailCallElim::ID
= 0;
912 INITIALIZE_PASS_BEGIN(TailCallElim
, "tailcallelim", "Tail Call Elimination",
914 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass
)
915 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass
)
916 INITIALIZE_PASS_END(TailCallElim
, "tailcallelim", "Tail Call Elimination",
919 // Public interface to the TailCallElimination pass
920 FunctionPass
*llvm::createTailCallEliminationPass() {
921 return new TailCallElim();
924 PreservedAnalyses
TailCallElimPass::run(Function
&F
,
925 FunctionAnalysisManager
&AM
) {
927 TargetTransformInfo
&TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
928 AliasAnalysis
&AA
= AM
.getResult
<AAManager
>(F
);
929 auto &ORE
= AM
.getResult
<OptimizationRemarkEmitterAnalysis
>(F
);
930 auto *DT
= AM
.getCachedResult
<DominatorTreeAnalysis
>(F
);
931 auto *PDT
= AM
.getCachedResult
<PostDominatorTreeAnalysis
>(F
);
932 // There is no noticable performance difference here between Lazy and Eager
933 // UpdateStrategy based on some test results. It is feasible to switch the
934 // UpdateStrategy to Lazy if we find it profitable later.
935 DomTreeUpdater
DTU(DT
, PDT
, DomTreeUpdater::UpdateStrategy::Eager
);
936 bool Changed
= TailRecursionEliminator::eliminate(F
, &TTI
, &AA
, &ORE
, DTU
);
939 return PreservedAnalyses::all();
940 PreservedAnalyses PA
;
941 PA
.preserve
<DominatorTreeAnalysis
>();
942 PA
.preserve
<PostDominatorTreeAnalysis
>();