1 //===- InlineFunction.cpp - Code to perform function inlining -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements inlining of a function into a call site, resolving
11 // parameters and the return value as appropriate.
13 // The code in this file for handling inlines through invoke
14 // instructions preserves semantics only under some assumptions about
15 // the behavior of unwinders which correspond to gcc-style libUnwind
16 // exception personality functions. Eventually the IR will be
17 // improved to make this unnecessary, but until then, this code is
18 // marked [LIBUNWIND].
20 //===----------------------------------------------------------------------===//
22 #include "llvm/Transforms/Utils/Cloning.h"
23 #include "llvm/Constants.h"
24 #include "llvm/DerivedTypes.h"
25 #include "llvm/Module.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/IntrinsicInst.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Attributes.h"
30 #include "llvm/Analysis/CallGraph.h"
31 #include "llvm/Analysis/DebugInfo.h"
32 #include "llvm/Analysis/InstructionSimplify.h"
33 #include "llvm/Target/TargetData.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/ADT/StringExtras.h"
37 #include "llvm/Support/CallSite.h"
38 #include "llvm/Support/IRBuilder.h"
41 bool llvm::InlineFunction(CallInst
*CI
, InlineFunctionInfo
&IFI
) {
42 return InlineFunction(CallSite(CI
), IFI
);
44 bool llvm::InlineFunction(InvokeInst
*II
, InlineFunctionInfo
&IFI
) {
45 return InlineFunction(CallSite(II
), IFI
);
48 /// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
49 static EHExceptionInst
*findExceptionInBlock(BasicBlock
*bb
) {
50 for (BasicBlock::iterator i
= bb
->begin(), e
= bb
->end(); i
!= e
; i
++) {
51 EHExceptionInst
*exn
= dyn_cast
<EHExceptionInst
>(i
);
58 /// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
59 /// the given llvm.eh.exception call.
60 static EHSelectorInst
*findSelectorForException(EHExceptionInst
*exn
) {
61 BasicBlock
*exnBlock
= exn
->getParent();
63 EHSelectorInst
*outOfBlockSelector
= 0;
64 for (Instruction::use_iterator
65 ui
= exn
->use_begin(), ue
= exn
->use_end(); ui
!= ue
; ++ui
) {
66 EHSelectorInst
*sel
= dyn_cast
<EHSelectorInst
>(*ui
);
69 // Immediately accept an eh.selector in the same block as the
71 if (sel
->getParent() == exnBlock
) return sel
;
73 // Otherwise, use the first selector we see.
74 if (!outOfBlockSelector
) outOfBlockSelector
= sel
;
77 return outOfBlockSelector
;
80 /// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
81 /// in the given landing pad. In principle, llvm.eh.exception is
82 /// required to be in the landing pad; in practice, SplitCriticalEdge
83 /// can break that invariant, and then inlining can break it further.
84 /// There's a real need for a reliable solution here, but until that
85 /// happens, we have some fragile workarounds here.
86 static EHSelectorInst
*findSelectorForLandingPad(BasicBlock
*lpad
) {
87 // Look for an exception call in the actual landing pad.
88 EHExceptionInst
*exn
= findExceptionInBlock(lpad
);
89 if (exn
) return findSelectorForException(exn
);
91 // Okay, if that failed, look for one in an obvious successor. If
92 // we find one, we'll fix the IR by moving things back to the
95 bool dominates
= true; // does the lpad dominate the exn call
96 BasicBlock
*nonDominated
= 0; // if not, the first non-dominated block
97 BasicBlock
*lastDominated
= 0; // and the block which branched to it
99 BasicBlock
*exnBlock
= lpad
;
101 // We need to protect against lpads that lead into infinite loops.
102 SmallPtrSet
<BasicBlock
*,4> visited
;
103 visited
.insert(exnBlock
);
106 // We're not going to apply this hack to anything more complicated
107 // than a series of unconditional branches, so if the block
108 // doesn't terminate in an unconditional branch, just fail. More
109 // complicated cases can arise when, say, sinking a call into a
110 // split unwind edge and then inlining it; but that can do almost
111 // *anything* to the CFG, including leaving the selector
112 // completely unreachable. The only way to fix that properly is
113 // to (1) prohibit transforms which move the exception or selector
114 // values away from the landing pad, e.g. by producing them with
115 // instructions that are pinned to an edge like a phi, or
116 // producing them with not-really-instructions, and (2) making
117 // transforms which split edges deal with that.
118 BranchInst
*branch
= dyn_cast
<BranchInst
>(&exnBlock
->back());
119 if (!branch
|| branch
->isConditional()) return 0;
121 BasicBlock
*successor
= branch
->getSuccessor(0);
123 // Fail if we found an infinite loop.
124 if (!visited
.insert(successor
)) return 0;
126 // If the successor isn't dominated by exnBlock:
127 if (!successor
->getSinglePredecessor()) {
128 // We don't want to have to deal with threading the exception
129 // through multiple levels of phi, so give up if we've already
130 // followed a non-dominating edge.
131 if (!dominates
) return 0;
133 // Otherwise, remember this as a non-dominating edge.
135 nonDominated
= successor
;
136 lastDominated
= exnBlock
;
139 exnBlock
= successor
;
142 exn
= findExceptionInBlock(exnBlock
);
145 // Look for a selector call for the exception we found.
146 EHSelectorInst
*selector
= findSelectorForException(exn
);
147 if (!selector
) return 0;
149 // The easy case is when the landing pad still dominates the
150 // exception call, in which case we can just move both calls back to
153 selector
->moveBefore(lpad
->getFirstNonPHI());
154 exn
->moveBefore(selector
);
158 // Otherwise, we have to split at the first non-dominating block.
159 // The CFG looks basically like this:
162 // insnsAndBranches_1
163 // br label %nonDominated
167 // %exn = call i8* @llvm.eh.exception()
168 // insnsAndBranches_4
169 // %selector = call @llvm.eh.selector(i8* %exn, ...
170 // We need to turn this into:
173 // %exn0 = call i8* @llvm.eh.exception()
174 // %selector0 = call @llvm.eh.selector(i8* %exn0, ...
175 // insnsAndBranches_1
176 // br label %split // from lastDominated
178 // phis_2 (without edge from lastDominated)
179 // %exn1 = call i8* @llvm.eh.exception()
180 // %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
183 // phis_2 (edge from lastDominated, edge from split)
185 // %selector = phi ...
187 // insnsAndBranches_4
189 assert(nonDominated
);
190 assert(lastDominated
);
192 // First, make clones of the intrinsics to go in lpad.
193 EHExceptionInst
*lpadExn
= cast
<EHExceptionInst
>(exn
->clone());
194 EHSelectorInst
*lpadSelector
= cast
<EHSelectorInst
>(selector
->clone());
195 lpadSelector
->setArgOperand(0, lpadExn
);
196 lpadSelector
->insertBefore(lpad
->getFirstNonPHI());
197 lpadExn
->insertBefore(lpadSelector
);
199 // Split the non-dominated block.
201 nonDominated
->splitBasicBlock(nonDominated
->getFirstNonPHI(),
202 nonDominated
->getName() + ".lpad-fix");
204 // Redirect the last dominated branch there.
205 cast
<BranchInst
>(lastDominated
->back()).setSuccessor(0, split
);
207 // Move the existing intrinsics to the end of the old block.
208 selector
->moveBefore(&nonDominated
->back());
209 exn
->moveBefore(selector
);
211 Instruction
*splitIP
= &split
->front();
213 // For all the phis in nonDominated, make a new phi in split to join
214 // that phi with the edge from lastDominated.
215 for (BasicBlock::iterator
216 i
= nonDominated
->begin(), e
= nonDominated
->end(); i
!= e
; ++i
) {
217 PHINode
*phi
= dyn_cast
<PHINode
>(i
);
220 PHINode
*splitPhi
= PHINode::Create(phi
->getType(), 2, phi
->getName(),
222 phi
->replaceAllUsesWith(splitPhi
);
223 splitPhi
->addIncoming(phi
, nonDominated
);
224 splitPhi
->addIncoming(phi
->removeIncomingValue(lastDominated
),
228 // Make new phis for the exception and selector.
229 PHINode
*exnPhi
= PHINode::Create(exn
->getType(), 2, "", splitIP
);
230 exn
->replaceAllUsesWith(exnPhi
);
231 selector
->setArgOperand(0, exn
); // except for this use
232 exnPhi
->addIncoming(exn
, nonDominated
);
233 exnPhi
->addIncoming(lpadExn
, lastDominated
);
235 PHINode
*selectorPhi
= PHINode::Create(selector
->getType(), 2, "", splitIP
);
236 selector
->replaceAllUsesWith(selectorPhi
);
237 selectorPhi
->addIncoming(selector
, nonDominated
);
238 selectorPhi
->addIncoming(lpadSelector
, lastDominated
);
244 /// A class for recording information about inlining through an invoke.
245 class InvokeInliningInfo
{
246 BasicBlock
*OuterUnwindDest
;
247 EHSelectorInst
*OuterSelector
;
248 BasicBlock
*InnerUnwindDest
;
249 PHINode
*InnerExceptionPHI
;
250 PHINode
*InnerSelectorPHI
;
251 SmallVector
<Value
*, 8> UnwindDestPHIValues
;
254 InvokeInliningInfo(InvokeInst
*II
) :
255 OuterUnwindDest(II
->getUnwindDest()), OuterSelector(0),
256 InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0) {
258 // If there are PHI nodes in the unwind destination block, we
259 // need to keep track of which values came into them from the
260 // invoke before removing the edge from this block.
261 llvm::BasicBlock
*invokeBB
= II
->getParent();
262 for (BasicBlock::iterator I
= OuterUnwindDest
->begin();
263 isa
<PHINode
>(I
); ++I
) {
264 // Save the value to use for this edge.
265 PHINode
*phi
= cast
<PHINode
>(I
);
266 UnwindDestPHIValues
.push_back(phi
->getIncomingValueForBlock(invokeBB
));
270 /// The outer unwind destination is the target of unwind edges
271 /// introduced for calls within the inlined function.
272 BasicBlock
*getOuterUnwindDest() const {
273 return OuterUnwindDest
;
276 EHSelectorInst
*getOuterSelector() {
278 OuterSelector
= findSelectorForLandingPad(OuterUnwindDest
);
279 return OuterSelector
;
282 BasicBlock
*getInnerUnwindDest();
284 bool forwardEHResume(CallInst
*call
, BasicBlock
*src
);
286 /// Add incoming-PHI values to the unwind destination block for
287 /// the given basic block, using the values for the original
288 /// invoke's source block.
289 void addIncomingPHIValuesFor(BasicBlock
*BB
) const {
290 addIncomingPHIValuesForInto(BB
, OuterUnwindDest
);
293 void addIncomingPHIValuesForInto(BasicBlock
*src
, BasicBlock
*dest
) const {
294 BasicBlock::iterator I
= dest
->begin();
295 for (unsigned i
= 0, e
= UnwindDestPHIValues
.size(); i
!= e
; ++i
, ++I
) {
296 PHINode
*phi
= cast
<PHINode
>(I
);
297 phi
->addIncoming(UnwindDestPHIValues
[i
], src
);
303 /// Get or create a target for the branch out of rewritten calls to
305 BasicBlock
*InvokeInliningInfo::getInnerUnwindDest() {
306 if (InnerUnwindDest
) return InnerUnwindDest
;
308 // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
309 // in the outer landing pad to immediately following the phis.
310 EHSelectorInst
*selector
= getOuterSelector();
311 if (!selector
) return 0;
313 // The call to llvm.eh.exception *must* be in the landing pad.
314 Instruction
*exn
= cast
<Instruction
>(selector
->getArgOperand(0));
315 assert(exn
->getParent() == OuterUnwindDest
);
317 // TODO: recognize when we've already done this, so that we don't
318 // get a linear number of these when inlining calls into lots of
319 // invokes with the same landing pad.
322 Instruction
*splitPoint
= exn
->getParent()->getFirstNonPHI();
323 assert(splitPoint
!= selector
&& "selector-on-exception dominance broken!");
324 if (splitPoint
== exn
) {
325 selector
->removeFromParent();
326 selector
->insertAfter(exn
);
327 splitPoint
= selector
->getNextNode();
329 exn
->moveBefore(splitPoint
);
330 selector
->moveBefore(splitPoint
);
333 // Split the landing pad.
334 InnerUnwindDest
= OuterUnwindDest
->splitBasicBlock(splitPoint
,
335 OuterUnwindDest
->getName() + ".body");
337 // The number of incoming edges we expect to the inner landing pad.
338 const unsigned phiCapacity
= 2;
340 // Create corresponding new phis for all the phis in the outer landing pad.
341 BasicBlock::iterator insertPoint
= InnerUnwindDest
->begin();
342 BasicBlock::iterator I
= OuterUnwindDest
->begin();
343 for (unsigned i
= 0, e
= UnwindDestPHIValues
.size(); i
!= e
; ++i
, ++I
) {
344 PHINode
*outerPhi
= cast
<PHINode
>(I
);
345 PHINode
*innerPhi
= PHINode::Create(outerPhi
->getType(), phiCapacity
,
346 outerPhi
->getName() + ".lpad-body",
348 outerPhi
->replaceAllUsesWith(innerPhi
);
349 innerPhi
->addIncoming(outerPhi
, OuterUnwindDest
);
352 // Create a phi for the exception value...
353 InnerExceptionPHI
= PHINode::Create(exn
->getType(), phiCapacity
,
354 "exn.lpad-body", insertPoint
);
355 exn
->replaceAllUsesWith(InnerExceptionPHI
);
356 selector
->setArgOperand(0, exn
); // restore this use
357 InnerExceptionPHI
->addIncoming(exn
, OuterUnwindDest
);
359 // ...and the selector.
360 InnerSelectorPHI
= PHINode::Create(selector
->getType(), phiCapacity
,
361 "selector.lpad-body", insertPoint
);
362 selector
->replaceAllUsesWith(InnerSelectorPHI
);
363 InnerSelectorPHI
->addIncoming(selector
, OuterUnwindDest
);
366 return InnerUnwindDest
;
369 /// [LIBUNWIND] Try to forward the given call, which logically occurs
370 /// at the end of the given block, as a branch to the inner unwind
371 /// block. Returns true if the call was forwarded.
372 bool InvokeInliningInfo::forwardEHResume(CallInst
*call
, BasicBlock
*src
) {
373 // First, check whether this is a call to the intrinsic.
374 Function
*fn
= dyn_cast
<Function
>(call
->getCalledValue());
375 if (!fn
|| fn
->getName() != "llvm.eh.resume")
378 // At this point, we need to return true on all paths, because
379 // otherwise we'll construct an invoke of the intrinsic, which is
382 // Try to find or make an inner unwind dest, which will fail if we
383 // can't find a selector call for the outer unwind dest.
384 BasicBlock
*dest
= getInnerUnwindDest();
385 bool hasSelector
= (dest
!= 0);
387 // If we failed, just use the outer unwind dest, dropping the
388 // exception and selector on the floor.
390 dest
= OuterUnwindDest
;
393 BranchInst::Create(dest
, src
);
395 // Update the phis in the destination. They were inserted in an
396 // order which makes this work.
397 addIncomingPHIValuesForInto(src
, dest
);
400 InnerExceptionPHI
->addIncoming(call
->getArgOperand(0), src
);
401 InnerSelectorPHI
->addIncoming(call
->getArgOperand(1), src
);
407 /// [LIBUNWIND] Check whether this selector is "only cleanups":
408 /// call i32 @llvm.eh.selector(blah, blah, i32 0)
409 static bool isCleanupOnlySelector(EHSelectorInst
*selector
) {
410 if (selector
->getNumArgOperands() != 3) return false;
411 ConstantInt
*val
= dyn_cast
<ConstantInt
>(selector
->getArgOperand(2));
412 return (val
&& val
->isZero());
415 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
416 /// an invoke, we have to turn all of the calls that can throw into
417 /// invokes. This function analyze BB to see if there are any calls, and if so,
418 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
419 /// nodes in that block with the values specified in InvokeDestPHIValues.
421 /// Returns true to indicate that the next block should be skipped.
422 static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock
*BB
,
423 InvokeInliningInfo
&Invoke
) {
424 for (BasicBlock::iterator BBI
= BB
->begin(), E
= BB
->end(); BBI
!= E
; ) {
425 Instruction
*I
= BBI
++;
427 // We only need to check for function calls: inlined invoke
428 // instructions require no special handling.
429 CallInst
*CI
= dyn_cast
<CallInst
>(I
);
430 if (CI
== 0) continue;
432 // LIBUNWIND: merge selector instructions.
433 if (EHSelectorInst
*Inner
= dyn_cast
<EHSelectorInst
>(CI
)) {
434 EHSelectorInst
*Outer
= Invoke
.getOuterSelector();
435 if (!Outer
) continue;
437 bool innerIsOnlyCleanup
= isCleanupOnlySelector(Inner
);
438 bool outerIsOnlyCleanup
= isCleanupOnlySelector(Outer
);
440 // If both selectors contain only cleanups, we don't need to do
441 // anything. TODO: this is really just a very specific instance
442 // of a much more general optimization.
443 if (innerIsOnlyCleanup
&& outerIsOnlyCleanup
) continue;
445 // Otherwise, we just append the outer selector to the inner selector.
446 SmallVector
<Value
*, 16> NewSelector
;
447 for (unsigned i
= 0, e
= Inner
->getNumArgOperands(); i
!= e
; ++i
)
448 NewSelector
.push_back(Inner
->getArgOperand(i
));
449 for (unsigned i
= 2, e
= Outer
->getNumArgOperands(); i
!= e
; ++i
)
450 NewSelector
.push_back(Outer
->getArgOperand(i
));
453 IRBuilder
<>(Inner
).CreateCall(Inner
->getCalledValue(),
456 // No need to copy attributes, calling convention, etc.
457 NewInner
->takeName(Inner
);
458 Inner
->replaceAllUsesWith(NewInner
);
459 Inner
->eraseFromParent();
463 // If this call cannot unwind, don't convert it to an invoke.
464 if (CI
->doesNotThrow())
467 // Convert this function call into an invoke instruction.
468 // First, split the basic block.
469 BasicBlock
*Split
= BB
->splitBasicBlock(CI
, CI
->getName()+".noexc");
471 // Delete the unconditional branch inserted by splitBasicBlock
472 BB
->getInstList().pop_back();
474 // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
475 // directly to the new landing pad.
476 if (Invoke
.forwardEHResume(CI
, BB
)) {
477 // TODO: 'Split' is now unreachable; clean it up.
479 // We want to leave the original call intact so that the call
480 // graph and other structures won't get misled. We also have to
481 // avoid processing the next block, or we'll iterate here forever.
485 // Otherwise, create the new invoke instruction.
486 ImmutableCallSite
CS(CI
);
487 SmallVector
<Value
*, 8> InvokeArgs(CS
.arg_begin(), CS
.arg_end());
489 InvokeInst::Create(CI
->getCalledValue(), Split
,
490 Invoke
.getOuterUnwindDest(),
491 InvokeArgs
.begin(), InvokeArgs
.end(),
493 II
->setCallingConv(CI
->getCallingConv());
494 II
->setAttributes(CI
->getAttributes());
496 // Make sure that anything using the call now uses the invoke! This also
497 // updates the CallGraph if present, because it uses a WeakVH.
498 CI
->replaceAllUsesWith(II
);
500 Split
->getInstList().pop_front(); // Delete the original call
502 // Update any PHI nodes in the exceptional block to indicate that
503 // there is now a new entry in them.
504 Invoke
.addIncomingPHIValuesFor(BB
);
512 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
513 /// in the body of the inlined function into invokes and turn unwind
514 /// instructions into branches to the invoke unwind dest.
516 /// II is the invoke instruction being inlined. FirstNewBlock is the first
517 /// block of the inlined code (the last block is the end of the function),
518 /// and InlineCodeInfo is information about the code that got inlined.
519 static void HandleInlinedInvoke(InvokeInst
*II
, BasicBlock
*FirstNewBlock
,
520 ClonedCodeInfo
&InlinedCodeInfo
) {
521 BasicBlock
*InvokeDest
= II
->getUnwindDest();
523 Function
*Caller
= FirstNewBlock
->getParent();
525 // The inlined code is currently at the end of the function, scan from the
526 // start of the inlined code to its end, checking for stuff we need to
527 // rewrite. If the code doesn't have calls or unwinds, we know there is
528 // nothing to rewrite.
529 if (!InlinedCodeInfo
.ContainsCalls
&& !InlinedCodeInfo
.ContainsUnwinds
) {
530 // Now that everything is happy, we have one final detail. The PHI nodes in
531 // the exception destination block still have entries due to the original
532 // invoke instruction. Eliminate these entries (which might even delete the
534 InvokeDest
->removePredecessor(II
->getParent());
538 InvokeInliningInfo
Invoke(II
);
540 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end(); BB
!= E
; ++BB
){
541 if (InlinedCodeInfo
.ContainsCalls
)
542 if (HandleCallsInBlockInlinedThroughInvoke(BB
, Invoke
)) {
543 // Honor a request to skip the next block. We don't need to
544 // consider UnwindInsts in this case either.
549 if (UnwindInst
*UI
= dyn_cast
<UnwindInst
>(BB
->getTerminator())) {
550 // An UnwindInst requires special handling when it gets inlined into an
551 // invoke site. Once this happens, we know that the unwind would cause
552 // a control transfer to the invoke exception destination, so we can
553 // transform it into a direct branch to the exception destination.
554 BranchInst::Create(InvokeDest
, UI
);
556 // Delete the unwind instruction!
557 UI
->eraseFromParent();
559 // Update any PHI nodes in the exceptional block to indicate that
560 // there is now a new entry in them.
561 Invoke
.addIncomingPHIValuesFor(BB
);
565 // Now that everything is happy, we have one final detail. The PHI nodes in
566 // the exception destination block still have entries due to the original
567 // invoke instruction. Eliminate these entries (which might even delete the
569 InvokeDest
->removePredecessor(II
->getParent());
572 /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
573 /// into the caller, update the specified callgraph to reflect the changes we
574 /// made. Note that it's possible that not all code was copied over, so only
575 /// some edges of the callgraph may remain.
576 static void UpdateCallGraphAfterInlining(CallSite CS
,
577 Function::iterator FirstNewBlock
,
578 ValueToValueMapTy
&VMap
,
579 InlineFunctionInfo
&IFI
) {
580 CallGraph
&CG
= *IFI
.CG
;
581 const Function
*Caller
= CS
.getInstruction()->getParent()->getParent();
582 const Function
*Callee
= CS
.getCalledFunction();
583 CallGraphNode
*CalleeNode
= CG
[Callee
];
584 CallGraphNode
*CallerNode
= CG
[Caller
];
586 // Since we inlined some uninlined call sites in the callee into the caller,
587 // add edges from the caller to all of the callees of the callee.
588 CallGraphNode::iterator I
= CalleeNode
->begin(), E
= CalleeNode
->end();
590 // Consider the case where CalleeNode == CallerNode.
591 CallGraphNode::CalledFunctionsVector CallCache
;
592 if (CalleeNode
== CallerNode
) {
593 CallCache
.assign(I
, E
);
594 I
= CallCache
.begin();
598 for (; I
!= E
; ++I
) {
599 const Value
*OrigCall
= I
->first
;
601 ValueToValueMapTy::iterator VMI
= VMap
.find(OrigCall
);
602 // Only copy the edge if the call was inlined!
603 if (VMI
== VMap
.end() || VMI
->second
== 0)
606 // If the call was inlined, but then constant folded, there is no edge to
607 // add. Check for this case.
608 Instruction
*NewCall
= dyn_cast
<Instruction
>(VMI
->second
);
609 if (NewCall
== 0) continue;
611 // Remember that this call site got inlined for the client of
613 IFI
.InlinedCalls
.push_back(NewCall
);
615 // It's possible that inlining the callsite will cause it to go from an
616 // indirect to a direct call by resolving a function pointer. If this
617 // happens, set the callee of the new call site to a more precise
618 // destination. This can also happen if the call graph node of the caller
619 // was just unnecessarily imprecise.
620 if (I
->second
->getFunction() == 0)
621 if (Function
*F
= CallSite(NewCall
).getCalledFunction()) {
622 // Indirect call site resolved to direct call.
623 CallerNode
->addCalledFunction(CallSite(NewCall
), CG
[F
]);
628 CallerNode
->addCalledFunction(CallSite(NewCall
), I
->second
);
631 // Update the call graph by deleting the edge from Callee to Caller. We must
632 // do this after the loop above in case Caller and Callee are the same.
633 CallerNode
->removeCallEdgeFor(CS
);
636 /// HandleByValArgument - When inlining a call site that has a byval argument,
637 /// we have to make the implicit memcpy explicit by adding it.
638 static Value
*HandleByValArgument(Value
*Arg
, Instruction
*TheCall
,
639 const Function
*CalledFunc
,
640 InlineFunctionInfo
&IFI
,
641 unsigned ByValAlignment
) {
642 const Type
*AggTy
= cast
<PointerType
>(Arg
->getType())->getElementType();
644 // If the called function is readonly, then it could not mutate the caller's
645 // copy of the byval'd memory. In this case, it is safe to elide the copy and
647 if (CalledFunc
->onlyReadsMemory()) {
648 // If the byval argument has a specified alignment that is greater than the
649 // passed in pointer, then we either have to round up the input pointer or
650 // give up on this transformation.
651 if (ByValAlignment
<= 1) // 0 = unspecified, 1 = no particular alignment.
654 // If the pointer is already known to be sufficiently aligned, or if we can
655 // round it up to a larger alignment, then we don't need a temporary.
656 if (getOrEnforceKnownAlignment(Arg
, ByValAlignment
,
657 IFI
.TD
) >= ByValAlignment
)
660 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
661 // for code quality, but rarely happens and is required for correctness.
664 LLVMContext
&Context
= Arg
->getContext();
666 const Type
*VoidPtrTy
= Type::getInt8PtrTy(Context
);
668 // Create the alloca. If we have TargetData, use nice alignment.
671 Align
= IFI
.TD
->getPrefTypeAlignment(AggTy
);
673 // If the byval had an alignment specified, we *must* use at least that
674 // alignment, as it is required by the byval argument (and uses of the
675 // pointer inside the callee).
676 Align
= std::max(Align
, ByValAlignment
);
678 Function
*Caller
= TheCall
->getParent()->getParent();
680 Value
*NewAlloca
= new AllocaInst(AggTy
, 0, Align
, Arg
->getName(),
681 &*Caller
->begin()->begin());
683 const Type
*Tys
[3] = {VoidPtrTy
, VoidPtrTy
, Type::getInt64Ty(Context
)};
684 Function
*MemCpyFn
= Intrinsic::getDeclaration(Caller
->getParent(),
687 Value
*DestCast
= new BitCastInst(NewAlloca
, VoidPtrTy
, "tmp", TheCall
);
688 Value
*SrcCast
= new BitCastInst(Arg
, VoidPtrTy
, "tmp", TheCall
);
692 Size
= ConstantExpr::getSizeOf(AggTy
);
694 Size
= ConstantInt::get(Type::getInt64Ty(Context
),
695 IFI
.TD
->getTypeStoreSize(AggTy
));
697 // Always generate a memcpy of alignment 1 here because we don't know
698 // the alignment of the src pointer. Other optimizations can infer
700 Value
*CallArgs
[] = {
701 DestCast
, SrcCast
, Size
,
702 ConstantInt::get(Type::getInt32Ty(Context
), 1),
703 ConstantInt::getFalse(Context
) // isVolatile
705 IRBuilder
<>(TheCall
).CreateCall(MemCpyFn
, CallArgs
, CallArgs
+5);
707 // Uses of the argument in the function should use our new alloca
712 // isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
714 static bool isUsedByLifetimeMarker(Value
*V
) {
715 for (Value::use_iterator UI
= V
->use_begin(), UE
= V
->use_end(); UI
!= UE
;
717 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(*UI
)) {
718 switch (II
->getIntrinsicID()) {
720 case Intrinsic::lifetime_start
:
721 case Intrinsic::lifetime_end
:
729 // hasLifetimeMarkers - Check whether the given alloca already has
730 // lifetime.start or lifetime.end intrinsics.
731 static bool hasLifetimeMarkers(AllocaInst
*AI
) {
732 const Type
*Int8PtrTy
= Type::getInt8PtrTy(AI
->getType()->getContext());
733 if (AI
->getType() == Int8PtrTy
)
734 return isUsedByLifetimeMarker(AI
);
736 // Do a scan to find all the casts to i8*.
737 for (Value::use_iterator I
= AI
->use_begin(), E
= AI
->use_end(); I
!= E
;
739 if (I
->getType() != Int8PtrTy
) continue;
740 if (I
->stripPointerCasts() != AI
) continue;
741 if (isUsedByLifetimeMarker(*I
))
747 /// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
748 /// update InlinedAtEntry of a DebugLoc.
749 static DebugLoc
updateInlinedAtInfo(const DebugLoc
&DL
,
750 const DebugLoc
&InlinedAtDL
,
752 if (MDNode
*IA
= DL
.getInlinedAt(Ctx
)) {
753 DebugLoc NewInlinedAtDL
754 = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA
), InlinedAtDL
, Ctx
);
755 return DebugLoc::get(DL
.getLine(), DL
.getCol(), DL
.getScope(Ctx
),
756 NewInlinedAtDL
.getAsMDNode(Ctx
));
759 return DebugLoc::get(DL
.getLine(), DL
.getCol(), DL
.getScope(Ctx
),
760 InlinedAtDL
.getAsMDNode(Ctx
));
764 /// fixupLineNumbers - Update inlined instructions' line numbers to
765 /// to encode location where these instructions are inlined.
766 static void fixupLineNumbers(Function
*Fn
, Function::iterator FI
,
767 Instruction
*TheCall
) {
768 DebugLoc TheCallDL
= TheCall
->getDebugLoc();
769 if (TheCallDL
.isUnknown())
772 for (; FI
!= Fn
->end(); ++FI
) {
773 for (BasicBlock::iterator BI
= FI
->begin(), BE
= FI
->end();
775 DebugLoc DL
= BI
->getDebugLoc();
777 BI
->setDebugLoc(updateInlinedAtInfo(DL
, TheCallDL
, BI
->getContext()));
782 // InlineFunction - This function inlines the called function into the basic
783 // block of the caller. This returns false if it is not possible to inline this
784 // call. The program is still in a well defined state if this occurs though.
786 // Note that this only does one level of inlining. For example, if the
787 // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
788 // exists in the instruction stream. Similarly this will inline a recursive
789 // function by one level.
791 bool llvm::InlineFunction(CallSite CS
, InlineFunctionInfo
&IFI
) {
792 Instruction
*TheCall
= CS
.getInstruction();
793 LLVMContext
&Context
= TheCall
->getContext();
794 assert(TheCall
->getParent() && TheCall
->getParent()->getParent() &&
795 "Instruction not in function!");
797 // If IFI has any state in it, zap it before we fill it in.
800 const Function
*CalledFunc
= CS
.getCalledFunction();
801 if (CalledFunc
== 0 || // Can't inline external function or indirect
802 CalledFunc
->isDeclaration() || // call, or call to a vararg function!
803 CalledFunc
->getFunctionType()->isVarArg()) return false;
805 // If the call to the callee is not a tail call, we must clear the 'tail'
806 // flags on any calls that we inline.
807 bool MustClearTailCallFlags
=
808 !(isa
<CallInst
>(TheCall
) && cast
<CallInst
>(TheCall
)->isTailCall());
810 // If the call to the callee cannot throw, set the 'nounwind' flag on any
811 // calls that we inline.
812 bool MarkNoUnwind
= CS
.doesNotThrow();
814 BasicBlock
*OrigBB
= TheCall
->getParent();
815 Function
*Caller
= OrigBB
->getParent();
817 // GC poses two hazards to inlining, which only occur when the callee has GC:
818 // 1. If the caller has no GC, then the callee's GC must be propagated to the
820 // 2. If the caller has a differing GC, it is invalid to inline.
821 if (CalledFunc
->hasGC()) {
822 if (!Caller
->hasGC())
823 Caller
->setGC(CalledFunc
->getGC());
824 else if (CalledFunc
->getGC() != Caller
->getGC())
828 // Get an iterator to the last basic block in the function, which will have
829 // the new function inlined after it.
831 Function::iterator LastBlock
= &Caller
->back();
833 // Make sure to capture all of the return instructions from the cloned
835 SmallVector
<ReturnInst
*, 8> Returns
;
836 ClonedCodeInfo InlinedFunctionInfo
;
837 Function::iterator FirstNewBlock
;
839 { // Scope to destroy VMap after cloning.
840 ValueToValueMapTy VMap
;
842 assert(CalledFunc
->arg_size() == CS
.arg_size() &&
843 "No varargs calls can be inlined!");
845 // Calculate the vector of arguments to pass into the function cloner, which
846 // matches up the formal to the actual argument values.
847 CallSite::arg_iterator AI
= CS
.arg_begin();
849 for (Function::const_arg_iterator I
= CalledFunc
->arg_begin(),
850 E
= CalledFunc
->arg_end(); I
!= E
; ++I
, ++AI
, ++ArgNo
) {
851 Value
*ActualArg
= *AI
;
853 // When byval arguments actually inlined, we need to make the copy implied
854 // by them explicit. However, we don't do this if the callee is readonly
855 // or readnone, because the copy would be unneeded: the callee doesn't
856 // modify the struct.
857 if (CalledFunc
->paramHasAttr(ArgNo
+1, Attribute::ByVal
)) {
858 ActualArg
= HandleByValArgument(ActualArg
, TheCall
, CalledFunc
, IFI
,
859 CalledFunc
->getParamAlignment(ArgNo
+1));
861 // Calls that we inline may use the new alloca, so we need to clear
862 // their 'tail' flags if HandleByValArgument introduced a new alloca and
863 // the callee has calls.
864 MustClearTailCallFlags
|= ActualArg
!= *AI
;
870 // We want the inliner to prune the code as it copies. We would LOVE to
871 // have no dead or constant instructions leftover after inlining occurs
872 // (which can happen, e.g., because an argument was constant), but we'll be
873 // happy with whatever the cloner can do.
874 CloneAndPruneFunctionInto(Caller
, CalledFunc
, VMap
,
875 /*ModuleLevelChanges=*/false, Returns
, ".i",
876 &InlinedFunctionInfo
, IFI
.TD
, TheCall
);
878 // Remember the first block that is newly cloned over.
879 FirstNewBlock
= LastBlock
; ++FirstNewBlock
;
881 // Update the callgraph if requested.
883 UpdateCallGraphAfterInlining(CS
, FirstNewBlock
, VMap
, IFI
);
885 // Update inlined instructions' line number information.
886 fixupLineNumbers(Caller
, FirstNewBlock
, TheCall
);
889 // If there are any alloca instructions in the block that used to be the entry
890 // block for the callee, move them to the entry block of the caller. First
891 // calculate which instruction they should be inserted before. We insert the
892 // instructions at the end of the current alloca list.
895 BasicBlock::iterator InsertPoint
= Caller
->begin()->begin();
896 for (BasicBlock::iterator I
= FirstNewBlock
->begin(),
897 E
= FirstNewBlock
->end(); I
!= E
; ) {
898 AllocaInst
*AI
= dyn_cast
<AllocaInst
>(I
++);
899 if (AI
== 0) continue;
901 // If the alloca is now dead, remove it. This often occurs due to code
903 if (AI
->use_empty()) {
904 AI
->eraseFromParent();
908 if (!isa
<Constant
>(AI
->getArraySize()))
911 // Keep track of the static allocas that we inline into the caller.
912 IFI
.StaticAllocas
.push_back(AI
);
914 // Scan for the block of allocas that we can move over, and move them
916 while (isa
<AllocaInst
>(I
) &&
917 isa
<Constant
>(cast
<AllocaInst
>(I
)->getArraySize())) {
918 IFI
.StaticAllocas
.push_back(cast
<AllocaInst
>(I
));
922 // Transfer all of the allocas over in a block. Using splice means
923 // that the instructions aren't removed from the symbol table, then
925 Caller
->getEntryBlock().getInstList().splice(InsertPoint
,
926 FirstNewBlock
->getInstList(),
931 // Leave lifetime markers for the static alloca's, scoping them to the
932 // function we just inlined.
933 if (!IFI
.StaticAllocas
.empty()) {
934 IRBuilder
<> builder(FirstNewBlock
->begin());
935 for (unsigned ai
= 0, ae
= IFI
.StaticAllocas
.size(); ai
!= ae
; ++ai
) {
936 AllocaInst
*AI
= IFI
.StaticAllocas
[ai
];
938 // If the alloca is already scoped to something smaller than the whole
939 // function then there's no need to add redundant, less accurate markers.
940 if (hasLifetimeMarkers(AI
))
943 builder
.CreateLifetimeStart(AI
);
944 for (unsigned ri
= 0, re
= Returns
.size(); ri
!= re
; ++ri
) {
945 IRBuilder
<> builder(Returns
[ri
]);
946 builder
.CreateLifetimeEnd(AI
);
951 // If the inlined code contained dynamic alloca instructions, wrap the inlined
952 // code with llvm.stacksave/llvm.stackrestore intrinsics.
953 if (InlinedFunctionInfo
.ContainsDynamicAllocas
) {
954 Module
*M
= Caller
->getParent();
955 // Get the two intrinsics we care about.
956 Function
*StackSave
= Intrinsic::getDeclaration(M
, Intrinsic::stacksave
);
957 Function
*StackRestore
=Intrinsic::getDeclaration(M
,Intrinsic::stackrestore
);
959 // Insert the llvm.stacksave.
960 CallInst
*SavedPtr
= IRBuilder
<>(FirstNewBlock
, FirstNewBlock
->begin())
961 .CreateCall(StackSave
, "savedstack");
963 // Insert a call to llvm.stackrestore before any return instructions in the
965 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
966 IRBuilder
<>(Returns
[i
]).CreateCall(StackRestore
, SavedPtr
);
969 // Count the number of StackRestore calls we insert.
970 unsigned NumStackRestores
= Returns
.size();
972 // If we are inlining an invoke instruction, insert restores before each
973 // unwind. These unwinds will be rewritten into branches later.
974 if (InlinedFunctionInfo
.ContainsUnwinds
&& isa
<InvokeInst
>(TheCall
)) {
975 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
977 if (UnwindInst
*UI
= dyn_cast
<UnwindInst
>(BB
->getTerminator())) {
978 IRBuilder
<>(UI
).CreateCall(StackRestore
, SavedPtr
);
984 // If we are inlining tail call instruction through a call site that isn't
985 // marked 'tail', we must remove the tail marker for any calls in the inlined
986 // code. Also, calls inlined through a 'nounwind' call site should be marked
988 if (InlinedFunctionInfo
.ContainsCalls
&&
989 (MustClearTailCallFlags
|| MarkNoUnwind
)) {
990 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
992 for (BasicBlock::iterator I
= BB
->begin(), E
= BB
->end(); I
!= E
; ++I
)
993 if (CallInst
*CI
= dyn_cast
<CallInst
>(I
)) {
994 if (MustClearTailCallFlags
)
995 CI
->setTailCall(false);
997 CI
->setDoesNotThrow();
1001 // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
1002 // instructions are unreachable.
1003 if (InlinedFunctionInfo
.ContainsUnwinds
&& MarkNoUnwind
)
1004 for (Function::iterator BB
= FirstNewBlock
, E
= Caller
->end();
1006 TerminatorInst
*Term
= BB
->getTerminator();
1007 if (isa
<UnwindInst
>(Term
)) {
1008 new UnreachableInst(Context
, Term
);
1009 BB
->getInstList().erase(Term
);
1013 // If we are inlining for an invoke instruction, we must make sure to rewrite
1014 // any inlined 'unwind' instructions into branches to the invoke exception
1015 // destination, and call instructions into invoke instructions.
1016 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
))
1017 HandleInlinedInvoke(II
, FirstNewBlock
, InlinedFunctionInfo
);
1019 // If we cloned in _exactly one_ basic block, and if that block ends in a
1020 // return instruction, we splice the body of the inlined callee directly into
1021 // the calling basic block.
1022 if (Returns
.size() == 1 && std::distance(FirstNewBlock
, Caller
->end()) == 1) {
1023 // Move all of the instructions right before the call.
1024 OrigBB
->getInstList().splice(TheCall
, FirstNewBlock
->getInstList(),
1025 FirstNewBlock
->begin(), FirstNewBlock
->end());
1026 // Remove the cloned basic block.
1027 Caller
->getBasicBlockList().pop_back();
1029 // If the call site was an invoke instruction, add a branch to the normal
1031 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
))
1032 BranchInst::Create(II
->getNormalDest(), TheCall
);
1034 // If the return instruction returned a value, replace uses of the call with
1035 // uses of the returned value.
1036 if (!TheCall
->use_empty()) {
1037 ReturnInst
*R
= Returns
[0];
1038 if (TheCall
== R
->getReturnValue())
1039 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
1041 TheCall
->replaceAllUsesWith(R
->getReturnValue());
1043 // Since we are now done with the Call/Invoke, we can delete it.
1044 TheCall
->eraseFromParent();
1046 // Since we are now done with the return instruction, delete it also.
1047 Returns
[0]->eraseFromParent();
1049 // We are now done with the inlining.
1053 // Otherwise, we have the normal case, of more than one block to inline or
1054 // multiple return sites.
1056 // We want to clone the entire callee function into the hole between the
1057 // "starter" and "ender" blocks. How we accomplish this depends on whether
1058 // this is an invoke instruction or a call instruction.
1059 BasicBlock
*AfterCallBB
;
1060 if (InvokeInst
*II
= dyn_cast
<InvokeInst
>(TheCall
)) {
1062 // Add an unconditional branch to make this look like the CallInst case...
1063 BranchInst
*NewBr
= BranchInst::Create(II
->getNormalDest(), TheCall
);
1065 // Split the basic block. This guarantees that no PHI nodes will have to be
1066 // updated due to new incoming edges, and make the invoke case more
1067 // symmetric to the call case.
1068 AfterCallBB
= OrigBB
->splitBasicBlock(NewBr
,
1069 CalledFunc
->getName()+".exit");
1071 } else { // It's a call
1072 // If this is a call instruction, we need to split the basic block that
1073 // the call lives in.
1075 AfterCallBB
= OrigBB
->splitBasicBlock(TheCall
,
1076 CalledFunc
->getName()+".exit");
1079 // Change the branch that used to go to AfterCallBB to branch to the first
1080 // basic block of the inlined function.
1082 TerminatorInst
*Br
= OrigBB
->getTerminator();
1083 assert(Br
&& Br
->getOpcode() == Instruction::Br
&&
1084 "splitBasicBlock broken!");
1085 Br
->setOperand(0, FirstNewBlock
);
1088 // Now that the function is correct, make it a little bit nicer. In
1089 // particular, move the basic blocks inserted from the end of the function
1090 // into the space made by splitting the source basic block.
1091 Caller
->getBasicBlockList().splice(AfterCallBB
, Caller
->getBasicBlockList(),
1092 FirstNewBlock
, Caller
->end());
1094 // Handle all of the return instructions that we just cloned in, and eliminate
1095 // any users of the original call/invoke instruction.
1096 const Type
*RTy
= CalledFunc
->getReturnType();
1099 if (Returns
.size() > 1) {
1100 // The PHI node should go at the front of the new basic block to merge all
1101 // possible incoming values.
1102 if (!TheCall
->use_empty()) {
1103 PHI
= PHINode::Create(RTy
, Returns
.size(), TheCall
->getName(),
1104 AfterCallBB
->begin());
1105 // Anything that used the result of the function call should now use the
1106 // PHI node as their operand.
1107 TheCall
->replaceAllUsesWith(PHI
);
1110 // Loop over all of the return instructions adding entries to the PHI node
1113 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
1114 ReturnInst
*RI
= Returns
[i
];
1115 assert(RI
->getReturnValue()->getType() == PHI
->getType() &&
1116 "Ret value not consistent in function!");
1117 PHI
->addIncoming(RI
->getReturnValue(), RI
->getParent());
1122 // Add a branch to the merge points and remove return instructions.
1123 for (unsigned i
= 0, e
= Returns
.size(); i
!= e
; ++i
) {
1124 ReturnInst
*RI
= Returns
[i
];
1125 BranchInst::Create(AfterCallBB
, RI
);
1126 RI
->eraseFromParent();
1128 } else if (!Returns
.empty()) {
1129 // Otherwise, if there is exactly one return value, just replace anything
1130 // using the return value of the call with the computed value.
1131 if (!TheCall
->use_empty()) {
1132 if (TheCall
== Returns
[0]->getReturnValue())
1133 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
1135 TheCall
->replaceAllUsesWith(Returns
[0]->getReturnValue());
1138 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
1139 BasicBlock
*ReturnBB
= Returns
[0]->getParent();
1140 ReturnBB
->replaceAllUsesWith(AfterCallBB
);
1142 // Splice the code from the return block into the block that it will return
1143 // to, which contains the code that was after the call.
1144 AfterCallBB
->getInstList().splice(AfterCallBB
->begin(),
1145 ReturnBB
->getInstList());
1147 // Delete the return instruction now and empty ReturnBB now.
1148 Returns
[0]->eraseFromParent();
1149 ReturnBB
->eraseFromParent();
1150 } else if (!TheCall
->use_empty()) {
1151 // No returns, but something is using the return value of the call. Just
1153 TheCall
->replaceAllUsesWith(UndefValue::get(TheCall
->getType()));
1156 // Since we are now done with the Call/Invoke, we can delete it.
1157 TheCall
->eraseFromParent();
1159 // We should always be able to fold the entry block of the function into the
1160 // single predecessor of the block...
1161 assert(cast
<BranchInst
>(Br
)->isUnconditional() && "splitBasicBlock broken!");
1162 BasicBlock
*CalleeEntry
= cast
<BranchInst
>(Br
)->getSuccessor(0);
1164 // Splice the code entry block into calling block, right before the
1165 // unconditional branch.
1166 CalleeEntry
->replaceAllUsesWith(OrigBB
); // Update PHI nodes
1167 OrigBB
->getInstList().splice(Br
, CalleeEntry
->getInstList());
1169 // Remove the unconditional branch.
1170 OrigBB
->getInstList().erase(Br
);
1172 // Now we can remove the CalleeEntry block, which is now empty.
1173 Caller
->getBasicBlockList().erase(CalleeEntry
);
1175 // If we inserted a phi node, check to see if it has a single value (e.g. all
1176 // the entries are the same or undef). If so, remove the PHI so it doesn't
1177 // block other optimizations.
1179 if (Value
*V
= SimplifyInstruction(PHI
, IFI
.TD
)) {
1180 PHI
->replaceAllUsesWith(V
);
1181 PHI
->eraseFromParent();