1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "codegenprepare"
17 #include "llvm/Transforms/Scalar.h"
18 #include "llvm/Constants.h"
19 #include "llvm/DerivedTypes.h"
20 #include "llvm/Function.h"
21 #include "llvm/InlineAsm.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/IntrinsicInst.h"
24 #include "llvm/Pass.h"
25 #include "llvm/Analysis/Dominators.h"
26 #include "llvm/Analysis/InstructionSimplify.h"
27 #include "llvm/Analysis/ProfileInfo.h"
28 #include "llvm/Target/TargetData.h"
29 #include "llvm/Target/TargetLowering.h"
30 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
31 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
32 #include "llvm/Transforms/Utils/Local.h"
33 #include "llvm/Transforms/Utils/BuildLibCalls.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/Assembly/Writer.h"
38 #include "llvm/Support/CallSite.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/GetElementPtrTypeIterator.h"
42 #include "llvm/Support/PatternMatch.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Support/IRBuilder.h"
45 #include "llvm/Support/ValueHandle.h"
47 using namespace llvm::PatternMatch
;
49 STATISTIC(NumBlocksElim
, "Number of blocks eliminated");
50 STATISTIC(NumPHIsElim
, "Number of trivial PHIs eliminated");
51 STATISTIC(NumGEPsElim
, "Number of GEPs converted to casts");
52 STATISTIC(NumCmpUses
, "Number of uses of Cmp expressions replaced with uses of "
54 STATISTIC(NumCastUses
, "Number of uses of Cast expressions replaced with uses "
56 STATISTIC(NumMemoryInsts
, "Number of memory instructions whose address "
57 "computations were sunk");
58 STATISTIC(NumExtsMoved
, "Number of [s|z]ext instructions combined with loads");
59 STATISTIC(NumExtUses
, "Number of uses of [s|z]ext instructions optimized");
60 STATISTIC(NumRetsDup
, "Number of return instructions duplicated");
62 static cl::opt
<bool> DisableBranchOpts(
63 "disable-cgp-branch-opts", cl::Hidden
, cl::init(false),
64 cl::desc("Disable branch optimizations in CodeGenPrepare"));
67 class CodeGenPrepare
: public FunctionPass
{
68 /// TLI - Keep a pointer of a TargetLowering to consult for determining
69 /// transformation profitability.
70 const TargetLowering
*TLI
;
74 /// CurInstIterator - As we scan instructions optimizing them, this is the
75 /// next instruction to optimize. Xforms that can invalidate this should
77 BasicBlock::iterator CurInstIterator
;
79 /// Keeps track of non-local addresses that have been sunk into a block.
80 /// This allows us to avoid inserting duplicate code for blocks with
81 /// multiple load/stores of the same address.
82 DenseMap
<Value
*, Value
*> SunkAddrs
;
84 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
89 static char ID
; // Pass identification, replacement for typeid
90 explicit CodeGenPrepare(const TargetLowering
*tli
= 0)
91 : FunctionPass(ID
), TLI(tli
) {
92 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
94 bool runOnFunction(Function
&F
);
96 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
97 AU
.addPreserved
<DominatorTree
>();
98 AU
.addPreserved
<ProfileInfo
>();
102 bool EliminateMostlyEmptyBlocks(Function
&F
);
103 bool CanMergeBlocks(const BasicBlock
*BB
, const BasicBlock
*DestBB
) const;
104 void EliminateMostlyEmptyBlock(BasicBlock
*BB
);
105 bool OptimizeBlock(BasicBlock
&BB
);
106 bool OptimizeInst(Instruction
*I
);
107 bool OptimizeMemoryInst(Instruction
*I
, Value
*Addr
, const Type
*AccessTy
);
108 bool OptimizeInlineAsmInst(CallInst
*CS
);
109 bool OptimizeCallInst(CallInst
*CI
);
110 bool MoveExtToFormExtLoad(Instruction
*I
);
111 bool OptimizeExtUses(Instruction
*I
);
112 bool DupRetToEnableTailCallOpts(ReturnInst
*RI
);
116 char CodeGenPrepare::ID
= 0;
117 INITIALIZE_PASS(CodeGenPrepare
, "codegenprepare",
118 "Optimize for code generation", false, false)
120 FunctionPass
*llvm::createCodeGenPreparePass(const TargetLowering
*TLI
) {
121 return new CodeGenPrepare(TLI
);
124 bool CodeGenPrepare::runOnFunction(Function
&F
) {
125 bool EverMadeChange
= false;
128 DT
= getAnalysisIfAvailable
<DominatorTree
>();
129 PFI
= getAnalysisIfAvailable
<ProfileInfo
>();
131 // First pass, eliminate blocks that contain only PHI nodes and an
132 // unconditional branch.
133 EverMadeChange
|= EliminateMostlyEmptyBlocks(F
);
135 bool MadeChange
= true;
138 for (Function::iterator I
= F
.begin(), E
= F
.end(); I
!= E
; ) {
139 BasicBlock
*BB
= I
++;
140 MadeChange
|= OptimizeBlock(*BB
);
142 EverMadeChange
|= MadeChange
;
147 if (!DisableBranchOpts
) {
149 for (Function::iterator BB
= F
.begin(), E
= F
.end(); BB
!= E
; ++BB
)
150 MadeChange
|= ConstantFoldTerminator(BB
);
154 EverMadeChange
|= MadeChange
;
157 if (ModifiedDT
&& DT
)
158 DT
->DT
->recalculate(F
);
160 return EverMadeChange
;
163 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
164 /// debug info directives, and an unconditional branch. Passes before isel
165 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
166 /// isel. Start by eliminating these blocks so we can split them the way we
168 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function
&F
) {
169 bool MadeChange
= false;
170 // Note that this intentionally skips the entry block.
171 for (Function::iterator I
= ++F
.begin(), E
= F
.end(); I
!= E
; ) {
172 BasicBlock
*BB
= I
++;
174 // If this block doesn't end with an uncond branch, ignore it.
175 BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
176 if (!BI
|| !BI
->isUnconditional())
179 // If the instruction before the branch (skipping debug info) isn't a phi
180 // node, then other stuff is happening here.
181 BasicBlock::iterator BBI
= BI
;
182 if (BBI
!= BB
->begin()) {
184 while (isa
<DbgInfoIntrinsic
>(BBI
)) {
185 if (BBI
== BB
->begin())
189 if (!isa
<DbgInfoIntrinsic
>(BBI
) && !isa
<PHINode
>(BBI
))
193 // Do not break infinite loops.
194 BasicBlock
*DestBB
= BI
->getSuccessor(0);
198 if (!CanMergeBlocks(BB
, DestBB
))
201 EliminateMostlyEmptyBlock(BB
);
207 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
208 /// single uncond branch between them, and BB contains no other non-phi
210 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock
*BB
,
211 const BasicBlock
*DestBB
) const {
212 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
213 // the successor. If there are more complex condition (e.g. preheaders),
214 // don't mess around with them.
215 BasicBlock::const_iterator BBI
= BB
->begin();
216 while (const PHINode
*PN
= dyn_cast
<PHINode
>(BBI
++)) {
217 for (Value::const_use_iterator UI
= PN
->use_begin(), E
= PN
->use_end();
219 const Instruction
*User
= cast
<Instruction
>(*UI
);
220 if (User
->getParent() != DestBB
|| !isa
<PHINode
>(User
))
222 // If User is inside DestBB block and it is a PHINode then check
223 // incoming value. If incoming value is not from BB then this is
224 // a complex condition (e.g. preheaders) we want to avoid here.
225 if (User
->getParent() == DestBB
) {
226 if (const PHINode
*UPN
= dyn_cast
<PHINode
>(User
))
227 for (unsigned I
= 0, E
= UPN
->getNumIncomingValues(); I
!= E
; ++I
) {
228 Instruction
*Insn
= dyn_cast
<Instruction
>(UPN
->getIncomingValue(I
));
229 if (Insn
&& Insn
->getParent() == BB
&&
230 Insn
->getParent() != UPN
->getIncomingBlock(I
))
237 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
238 // and DestBB may have conflicting incoming values for the block. If so, we
239 // can't merge the block.
240 const PHINode
*DestBBPN
= dyn_cast
<PHINode
>(DestBB
->begin());
241 if (!DestBBPN
) return true; // no conflict.
243 // Collect the preds of BB.
244 SmallPtrSet
<const BasicBlock
*, 16> BBPreds
;
245 if (const PHINode
*BBPN
= dyn_cast
<PHINode
>(BB
->begin())) {
246 // It is faster to get preds from a PHI than with pred_iterator.
247 for (unsigned i
= 0, e
= BBPN
->getNumIncomingValues(); i
!= e
; ++i
)
248 BBPreds
.insert(BBPN
->getIncomingBlock(i
));
250 BBPreds
.insert(pred_begin(BB
), pred_end(BB
));
253 // Walk the preds of DestBB.
254 for (unsigned i
= 0, e
= DestBBPN
->getNumIncomingValues(); i
!= e
; ++i
) {
255 BasicBlock
*Pred
= DestBBPN
->getIncomingBlock(i
);
256 if (BBPreds
.count(Pred
)) { // Common predecessor?
257 BBI
= DestBB
->begin();
258 while (const PHINode
*PN
= dyn_cast
<PHINode
>(BBI
++)) {
259 const Value
*V1
= PN
->getIncomingValueForBlock(Pred
);
260 const Value
*V2
= PN
->getIncomingValueForBlock(BB
);
262 // If V2 is a phi node in BB, look up what the mapped value will be.
263 if (const PHINode
*V2PN
= dyn_cast
<PHINode
>(V2
))
264 if (V2PN
->getParent() == BB
)
265 V2
= V2PN
->getIncomingValueForBlock(Pred
);
267 // If there is a conflict, bail out.
268 if (V1
!= V2
) return false;
277 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
278 /// an unconditional branch in it.
279 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock
*BB
) {
280 BranchInst
*BI
= cast
<BranchInst
>(BB
->getTerminator());
281 BasicBlock
*DestBB
= BI
->getSuccessor(0);
283 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB
<< *DestBB
);
285 // If the destination block has a single pred, then this is a trivial edge,
287 if (BasicBlock
*SinglePred
= DestBB
->getSinglePredecessor()) {
288 if (SinglePred
!= DestBB
) {
289 // Remember if SinglePred was the entry block of the function. If so, we
290 // will need to move BB back to the entry position.
291 bool isEntry
= SinglePred
== &SinglePred
->getParent()->getEntryBlock();
292 MergeBasicBlockIntoOnlyPred(DestBB
, this);
294 if (isEntry
&& BB
!= &BB
->getParent()->getEntryBlock())
295 BB
->moveBefore(&BB
->getParent()->getEntryBlock());
297 DEBUG(dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n");
302 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
303 // to handle the new incoming edges it is about to have.
305 for (BasicBlock::iterator BBI
= DestBB
->begin();
306 (PN
= dyn_cast
<PHINode
>(BBI
)); ++BBI
) {
307 // Remove the incoming value for BB, and remember it.
308 Value
*InVal
= PN
->removeIncomingValue(BB
, false);
310 // Two options: either the InVal is a phi node defined in BB or it is some
311 // value that dominates BB.
312 PHINode
*InValPhi
= dyn_cast
<PHINode
>(InVal
);
313 if (InValPhi
&& InValPhi
->getParent() == BB
) {
314 // Add all of the input values of the input PHI as inputs of this phi.
315 for (unsigned i
= 0, e
= InValPhi
->getNumIncomingValues(); i
!= e
; ++i
)
316 PN
->addIncoming(InValPhi
->getIncomingValue(i
),
317 InValPhi
->getIncomingBlock(i
));
319 // Otherwise, add one instance of the dominating value for each edge that
320 // we will be adding.
321 if (PHINode
*BBPN
= dyn_cast
<PHINode
>(BB
->begin())) {
322 for (unsigned i
= 0, e
= BBPN
->getNumIncomingValues(); i
!= e
; ++i
)
323 PN
->addIncoming(InVal
, BBPN
->getIncomingBlock(i
));
325 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
)
326 PN
->addIncoming(InVal
, *PI
);
331 // The PHIs are now updated, change everything that refers to BB to use
332 // DestBB and remove BB.
333 BB
->replaceAllUsesWith(DestBB
);
334 if (DT
&& !ModifiedDT
) {
335 BasicBlock
*BBIDom
= DT
->getNode(BB
)->getIDom()->getBlock();
336 BasicBlock
*DestBBIDom
= DT
->getNode(DestBB
)->getIDom()->getBlock();
337 BasicBlock
*NewIDom
= DT
->findNearestCommonDominator(BBIDom
, DestBBIDom
);
338 DT
->changeImmediateDominator(DestBB
, NewIDom
);
342 PFI
->replaceAllUses(BB
, DestBB
);
343 PFI
->removeEdge(ProfileInfo::getEdge(BB
, DestBB
));
345 BB
->eraseFromParent();
348 DEBUG(dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n");
351 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
352 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
353 /// sink it into user blocks to reduce the number of virtual
354 /// registers that must be created and coalesced.
356 /// Return true if any changes are made.
358 static bool OptimizeNoopCopyExpression(CastInst
*CI
, const TargetLowering
&TLI
){
359 // If this is a noop copy,
360 EVT SrcVT
= TLI
.getValueType(CI
->getOperand(0)->getType());
361 EVT DstVT
= TLI
.getValueType(CI
->getType());
363 // This is an fp<->int conversion?
364 if (SrcVT
.isInteger() != DstVT
.isInteger())
367 // If this is an extension, it will be a zero or sign extension, which
369 if (SrcVT
.bitsLT(DstVT
)) return false;
371 // If these values will be promoted, find out what they will be promoted
372 // to. This helps us consider truncates on PPC as noop copies when they
374 if (TLI
.getTypeAction(SrcVT
) == TargetLowering::Promote
)
375 SrcVT
= TLI
.getTypeToTransformTo(CI
->getContext(), SrcVT
);
376 if (TLI
.getTypeAction(DstVT
) == TargetLowering::Promote
)
377 DstVT
= TLI
.getTypeToTransformTo(CI
->getContext(), DstVT
);
379 // If, after promotion, these are the same types, this is a noop copy.
383 BasicBlock
*DefBB
= CI
->getParent();
385 /// InsertedCasts - Only insert a cast in each block once.
386 DenseMap
<BasicBlock
*, CastInst
*> InsertedCasts
;
388 bool MadeChange
= false;
389 for (Value::use_iterator UI
= CI
->use_begin(), E
= CI
->use_end();
391 Use
&TheUse
= UI
.getUse();
392 Instruction
*User
= cast
<Instruction
>(*UI
);
394 // Figure out which BB this cast is used in. For PHI's this is the
395 // appropriate predecessor block.
396 BasicBlock
*UserBB
= User
->getParent();
397 if (PHINode
*PN
= dyn_cast
<PHINode
>(User
)) {
398 UserBB
= PN
->getIncomingBlock(UI
);
401 // Preincrement use iterator so we don't invalidate it.
404 // If this user is in the same block as the cast, don't change the cast.
405 if (UserBB
== DefBB
) continue;
407 // If we have already inserted a cast into this block, use it.
408 CastInst
*&InsertedCast
= InsertedCasts
[UserBB
];
411 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
414 CastInst::Create(CI
->getOpcode(), CI
->getOperand(0), CI
->getType(), "",
419 // Replace a use of the cast with a use of the new cast.
420 TheUse
= InsertedCast
;
424 // If we removed all uses, nuke the cast.
425 if (CI
->use_empty()) {
426 CI
->eraseFromParent();
433 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
434 /// the number of virtual registers that must be created and coalesced. This is
435 /// a clear win except on targets with multiple condition code registers
436 /// (PowerPC), where it might lose; some adjustment may be wanted there.
438 /// Return true if any changes are made.
439 static bool OptimizeCmpExpression(CmpInst
*CI
) {
440 BasicBlock
*DefBB
= CI
->getParent();
442 /// InsertedCmp - Only insert a cmp in each block once.
443 DenseMap
<BasicBlock
*, CmpInst
*> InsertedCmps
;
445 bool MadeChange
= false;
446 for (Value::use_iterator UI
= CI
->use_begin(), E
= CI
->use_end();
448 Use
&TheUse
= UI
.getUse();
449 Instruction
*User
= cast
<Instruction
>(*UI
);
451 // Preincrement use iterator so we don't invalidate it.
454 // Don't bother for PHI nodes.
455 if (isa
<PHINode
>(User
))
458 // Figure out which BB this cmp is used in.
459 BasicBlock
*UserBB
= User
->getParent();
461 // If this user is in the same block as the cmp, don't change the cmp.
462 if (UserBB
== DefBB
) continue;
464 // If we have already inserted a cmp into this block, use it.
465 CmpInst
*&InsertedCmp
= InsertedCmps
[UserBB
];
468 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
471 CmpInst::Create(CI
->getOpcode(),
472 CI
->getPredicate(), CI
->getOperand(0),
473 CI
->getOperand(1), "", InsertPt
);
477 // Replace a use of the cmp with a use of the new cmp.
478 TheUse
= InsertedCmp
;
482 // If we removed all uses, nuke the cmp.
484 CI
->eraseFromParent();
490 class CodeGenPrepareFortifiedLibCalls
: public SimplifyFortifiedLibCalls
{
492 void replaceCall(Value
*With
) {
493 CI
->replaceAllUsesWith(With
);
494 CI
->eraseFromParent();
496 bool isFoldable(unsigned SizeCIOp
, unsigned, bool) const {
497 if (ConstantInt
*SizeCI
=
498 dyn_cast
<ConstantInt
>(CI
->getArgOperand(SizeCIOp
)))
499 return SizeCI
->isAllOnesValue();
503 } // end anonymous namespace
505 bool CodeGenPrepare::OptimizeCallInst(CallInst
*CI
) {
506 BasicBlock
*BB
= CI
->getParent();
508 // Lower inline assembly if we can.
509 // If we found an inline asm expession, and if the target knows how to
510 // lower it to normal LLVM code, do so now.
511 if (TLI
&& isa
<InlineAsm
>(CI
->getCalledValue())) {
512 if (TLI
->ExpandInlineAsm(CI
)) {
513 // Avoid invalidating the iterator.
514 CurInstIterator
= BB
->begin();
515 // Avoid processing instructions out of order, which could cause
516 // reuse before a value is defined.
520 // Sink address computing for memory operands into the block.
521 if (OptimizeInlineAsmInst(CI
))
525 // Lower all uses of llvm.objectsize.*
526 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(CI
);
527 if (II
&& II
->getIntrinsicID() == Intrinsic::objectsize
) {
528 bool Min
= (cast
<ConstantInt
>(II
->getArgOperand(1))->getZExtValue() == 1);
529 const Type
*ReturnTy
= CI
->getType();
530 Constant
*RetVal
= ConstantInt::get(ReturnTy
, Min
? 0 : -1ULL);
532 // Substituting this can cause recursive simplifications, which can
533 // invalidate our iterator. Use a WeakVH to hold onto it in case this
535 WeakVH
IterHandle(CurInstIterator
);
537 ReplaceAndSimplifyAllUses(CI
, RetVal
, TLI
? TLI
->getTargetData() : 0,
538 ModifiedDT
? 0 : DT
);
540 // If the iterator instruction was recursively deleted, start over at the
541 // start of the block.
542 if (IterHandle
!= CurInstIterator
) {
543 CurInstIterator
= BB
->begin();
549 // From here on out we're working with named functions.
550 if (CI
->getCalledFunction() == 0) return false;
552 // We'll need TargetData from here on out.
553 const TargetData
*TD
= TLI
? TLI
->getTargetData() : 0;
554 if (!TD
) return false;
556 // Lower all default uses of _chk calls. This is very similar
557 // to what InstCombineCalls does, but here we are only lowering calls
558 // that have the default "don't know" as the objectsize. Anything else
559 // should be left alone.
560 CodeGenPrepareFortifiedLibCalls Simplifier
;
561 return Simplifier
.fold(CI
, TD
);
564 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
565 /// instructions to the predecessor to enable tail call optimizations. The
566 /// case it is currently looking for is:
568 /// %tmp0 = tail call i32 @f0()
571 /// %tmp1 = tail call i32 @f1()
574 /// %tmp2 = tail call i32 @f2()
577 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
583 /// %tmp0 = tail call i32 @f0()
586 /// %tmp1 = tail call i32 @f1()
589 /// %tmp2 = tail call i32 @f2()
592 bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst
*RI
) {
596 Value
*V
= RI
->getReturnValue();
597 PHINode
*PN
= V
? dyn_cast
<PHINode
>(V
) : NULL
;
601 BasicBlock
*BB
= RI
->getParent();
602 if (PN
&& PN
->getParent() != BB
)
605 // It's not safe to eliminate the sign / zero extension of the return value.
606 // See llvm::isInTailCallPosition().
607 const Function
*F
= BB
->getParent();
608 unsigned CallerRetAttr
= F
->getAttributes().getRetAttributes();
609 if ((CallerRetAttr
& Attribute::ZExt
) || (CallerRetAttr
& Attribute::SExt
))
612 // Make sure there are no instructions between the PHI and return, or that the
613 // return is the first instruction in the block.
615 BasicBlock::iterator BI
= BB
->begin();
616 do { ++BI
; } while (isa
<DbgInfoIntrinsic
>(BI
));
620 BasicBlock::iterator BI
= BB
->begin();
621 while (isa
<DbgInfoIntrinsic
>(BI
)) ++BI
;
626 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
628 SmallVector
<CallInst
*, 4> TailCalls
;
630 for (unsigned I
= 0, E
= PN
->getNumIncomingValues(); I
!= E
; ++I
) {
631 CallInst
*CI
= dyn_cast
<CallInst
>(PN
->getIncomingValue(I
));
632 // Make sure the phi value is indeed produced by the tail call.
633 if (CI
&& CI
->hasOneUse() && CI
->getParent() == PN
->getIncomingBlock(I
) &&
634 TLI
->mayBeEmittedAsTailCall(CI
))
635 TailCalls
.push_back(CI
);
638 SmallPtrSet
<BasicBlock
*, 4> VisitedBBs
;
639 for (pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
); PI
!= PE
; ++PI
) {
640 if (!VisitedBBs
.insert(*PI
))
643 BasicBlock::InstListType
&InstList
= (*PI
)->getInstList();
644 BasicBlock::InstListType::reverse_iterator RI
= InstList
.rbegin();
645 BasicBlock::InstListType::reverse_iterator RE
= InstList
.rend();
646 do { ++RI
; } while (RI
!= RE
&& isa
<DbgInfoIntrinsic
>(&*RI
));
650 CallInst
*CI
= dyn_cast
<CallInst
>(&*RI
);
651 if (CI
&& CI
->use_empty() && TLI
->mayBeEmittedAsTailCall(CI
))
652 TailCalls
.push_back(CI
);
656 bool Changed
= false;
657 for (unsigned i
= 0, e
= TailCalls
.size(); i
!= e
; ++i
) {
658 CallInst
*CI
= TailCalls
[i
];
661 // Conservatively require the attributes of the call to match those of the
662 // return. Ignore noalias because it doesn't affect the call sequence.
663 unsigned CalleeRetAttr
= CS
.getAttributes().getRetAttributes();
664 if ((CalleeRetAttr
^ CallerRetAttr
) & ~Attribute::NoAlias
)
667 // Make sure the call instruction is followed by an unconditional branch to
669 BasicBlock
*CallBB
= CI
->getParent();
670 BranchInst
*BI
= dyn_cast
<BranchInst
>(CallBB
->getTerminator());
671 if (!BI
|| !BI
->isUnconditional() || BI
->getSuccessor(0) != BB
)
674 // Duplicate the return into CallBB.
675 (void)FoldReturnIntoUncondBranch(RI
, BB
, CallBB
);
676 ModifiedDT
= Changed
= true;
680 // If we eliminated all predecessors of the block, delete the block now.
681 if (Changed
&& pred_begin(BB
) == pred_end(BB
))
682 BB
->eraseFromParent();
687 //===----------------------------------------------------------------------===//
688 // Memory Optimization
689 //===----------------------------------------------------------------------===//
691 /// IsNonLocalValue - Return true if the specified values are defined in a
692 /// different basic block than BB.
693 static bool IsNonLocalValue(Value
*V
, BasicBlock
*BB
) {
694 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
695 return I
->getParent() != BB
;
699 /// OptimizeMemoryInst - Load and Store Instructions often have
700 /// addressing modes that can do significant amounts of computation. As such,
701 /// instruction selection will try to get the load or store to do as much
702 /// computation as possible for the program. The problem is that isel can only
703 /// see within a single block. As such, we sink as much legal addressing mode
704 /// stuff into the block as possible.
706 /// This method is used to optimize both load/store and inline asms with memory
708 bool CodeGenPrepare::OptimizeMemoryInst(Instruction
*MemoryInst
, Value
*Addr
,
709 const Type
*AccessTy
) {
712 // Try to collapse single-value PHI nodes. This is necessary to undo
713 // unprofitable PRE transformations.
714 SmallVector
<Value
*, 8> worklist
;
715 SmallPtrSet
<Value
*, 16> Visited
;
716 worklist
.push_back(Addr
);
718 // Use a worklist to iteratively look through PHI nodes, and ensure that
719 // the addressing mode obtained from the non-PHI roots of the graph
721 Value
*Consensus
= 0;
722 unsigned NumUsesConsensus
= 0;
723 bool IsNumUsesConsensusValid
= false;
724 SmallVector
<Instruction
*, 16> AddrModeInsts
;
725 ExtAddrMode AddrMode
;
726 while (!worklist
.empty()) {
727 Value
*V
= worklist
.back();
730 // Break use-def graph loops.
731 if (Visited
.count(V
)) {
738 // For a PHI node, push all of its incoming values.
739 if (PHINode
*P
= dyn_cast
<PHINode
>(V
)) {
740 for (unsigned i
= 0, e
= P
->getNumIncomingValues(); i
!= e
; ++i
)
741 worklist
.push_back(P
->getIncomingValue(i
));
745 // For non-PHIs, determine the addressing mode being computed.
746 SmallVector
<Instruction
*, 16> NewAddrModeInsts
;
747 ExtAddrMode NewAddrMode
=
748 AddressingModeMatcher::Match(V
, AccessTy
,MemoryInst
,
749 NewAddrModeInsts
, *TLI
);
751 // This check is broken into two cases with very similar code to avoid using
752 // getNumUses() as much as possible. Some values have a lot of uses, so
753 // calling getNumUses() unconditionally caused a significant compile-time
757 AddrMode
= NewAddrMode
;
758 AddrModeInsts
= NewAddrModeInsts
;
760 } else if (NewAddrMode
== AddrMode
) {
761 if (!IsNumUsesConsensusValid
) {
762 NumUsesConsensus
= Consensus
->getNumUses();
763 IsNumUsesConsensusValid
= true;
766 // Ensure that the obtained addressing mode is equivalent to that obtained
767 // for all other roots of the PHI traversal. Also, when choosing one
768 // such root as representative, select the one with the most uses in order
769 // to keep the cost modeling heuristics in AddressingModeMatcher
771 unsigned NumUses
= V
->getNumUses();
772 if (NumUses
> NumUsesConsensus
) {
774 NumUsesConsensus
= NumUses
;
775 AddrModeInsts
= NewAddrModeInsts
;
784 // If the addressing mode couldn't be determined, or if multiple different
785 // ones were determined, bail out now.
786 if (!Consensus
) return false;
788 // Check to see if any of the instructions supersumed by this addr mode are
789 // non-local to I's BB.
790 bool AnyNonLocal
= false;
791 for (unsigned i
= 0, e
= AddrModeInsts
.size(); i
!= e
; ++i
) {
792 if (IsNonLocalValue(AddrModeInsts
[i
], MemoryInst
->getParent())) {
798 // If all the instructions matched are already in this BB, don't do anything.
800 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
<< "\n");
804 // Insert this computation right after this user. Since our caller is
805 // scanning from the top of the BB to the bottom, reuse of the expr are
806 // guaranteed to happen later.
807 BasicBlock::iterator InsertPt
= MemoryInst
;
809 // Now that we determined the addressing expression we want to use and know
810 // that we have to sink it into this block. Check to see if we have already
811 // done this for some other load/store instr in this block. If so, reuse the
813 Value
*&SunkAddr
= SunkAddrs
[Addr
];
815 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for "
817 if (SunkAddr
->getType() != Addr
->getType())
818 SunkAddr
= new BitCastInst(SunkAddr
, Addr
->getType(), "tmp", InsertPt
);
820 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
<< " for "
822 const Type
*IntPtrTy
=
823 TLI
->getTargetData()->getIntPtrType(AccessTy
->getContext());
827 // Start with the base register. Do this first so that subsequent address
828 // matching finds it last, which will prevent it from trying to match it
829 // as the scaled value in case it happens to be a mul. That would be
830 // problematic if we've sunk a different mul for the scale, because then
831 // we'd end up sinking both muls.
832 if (AddrMode
.BaseReg
) {
833 Value
*V
= AddrMode
.BaseReg
;
834 if (V
->getType()->isPointerTy())
835 V
= new PtrToIntInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
836 if (V
->getType() != IntPtrTy
)
837 V
= CastInst::CreateIntegerCast(V
, IntPtrTy
, /*isSigned=*/true,
838 "sunkaddr", InsertPt
);
842 // Add the scale value.
843 if (AddrMode
.Scale
) {
844 Value
*V
= AddrMode
.ScaledReg
;
845 if (V
->getType() == IntPtrTy
) {
847 } else if (V
->getType()->isPointerTy()) {
848 V
= new PtrToIntInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
849 } else if (cast
<IntegerType
>(IntPtrTy
)->getBitWidth() <
850 cast
<IntegerType
>(V
->getType())->getBitWidth()) {
851 V
= new TruncInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
853 V
= new SExtInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
855 if (AddrMode
.Scale
!= 1)
856 V
= BinaryOperator::CreateMul(V
, ConstantInt::get(IntPtrTy
,
858 "sunkaddr", InsertPt
);
860 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
865 // Add in the BaseGV if present.
866 if (AddrMode
.BaseGV
) {
867 Value
*V
= new PtrToIntInst(AddrMode
.BaseGV
, IntPtrTy
, "sunkaddr",
870 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
875 // Add in the Base Offset if present.
876 if (AddrMode
.BaseOffs
) {
877 Value
*V
= ConstantInt::get(IntPtrTy
, AddrMode
.BaseOffs
);
879 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
885 SunkAddr
= Constant::getNullValue(Addr
->getType());
887 SunkAddr
= new IntToPtrInst(Result
, Addr
->getType(), "sunkaddr",InsertPt
);
890 MemoryInst
->replaceUsesOfWith(Repl
, SunkAddr
);
892 // If we have no uses, recursively delete the value and all dead instructions
894 if (Repl
->use_empty()) {
895 // This can cause recursive deletion, which can invalidate our iterator.
896 // Use a WeakVH to hold onto it in case this happens.
897 WeakVH
IterHandle(CurInstIterator
);
898 BasicBlock
*BB
= CurInstIterator
->getParent();
900 RecursivelyDeleteTriviallyDeadInstructions(Repl
);
902 if (IterHandle
!= CurInstIterator
) {
903 // If the iterator instruction was recursively deleted, start over at the
904 // start of the block.
905 CurInstIterator
= BB
->begin();
908 // This address is now available for reassignment, so erase the table
909 // entry; we don't want to match some completely different instruction.
917 /// OptimizeInlineAsmInst - If there are any memory operands, use
918 /// OptimizeMemoryInst to sink their address computing into the block when
919 /// possible / profitable.
920 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst
*CS
) {
921 bool MadeChange
= false;
923 TargetLowering::AsmOperandInfoVector
924 TargetConstraints
= TLI
->ParseConstraints(CS
);
926 for (unsigned i
= 0, e
= TargetConstraints
.size(); i
!= e
; ++i
) {
927 TargetLowering::AsmOperandInfo
&OpInfo
= TargetConstraints
[i
];
929 // Compute the constraint code and ConstraintType to use.
930 TLI
->ComputeConstraintToUse(OpInfo
, SDValue());
932 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
&&
934 Value
*OpVal
= CS
->getArgOperand(ArgNo
++);
935 MadeChange
|= OptimizeMemoryInst(CS
, OpVal
, OpVal
->getType());
936 } else if (OpInfo
.Type
== InlineAsm::isInput
)
943 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
944 /// basic block as the load, unless conditions are unfavorable. This allows
945 /// SelectionDAG to fold the extend into the load.
947 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction
*I
) {
948 // Look for a load being extended.
949 LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->getOperand(0));
950 if (!LI
) return false;
952 // If they're already in the same block, there's nothing to do.
953 if (LI
->getParent() == I
->getParent())
956 // If the load has other users and the truncate is not free, this probably
958 if (!LI
->hasOneUse() &&
959 TLI
&& (TLI
->isTypeLegal(TLI
->getValueType(LI
->getType())) ||
960 !TLI
->isTypeLegal(TLI
->getValueType(I
->getType()))) &&
961 !TLI
->isTruncateFree(I
->getType(), LI
->getType()))
964 // Check whether the target supports casts folded into loads.
966 if (isa
<ZExtInst
>(I
))
967 LType
= ISD::ZEXTLOAD
;
969 assert(isa
<SExtInst
>(I
) && "Unexpected ext type!");
970 LType
= ISD::SEXTLOAD
;
972 if (TLI
&& !TLI
->isLoadExtLegal(LType
, TLI
->getValueType(LI
->getType())))
975 // Move the extend into the same block as the load, so that SelectionDAG
977 I
->removeFromParent();
983 bool CodeGenPrepare::OptimizeExtUses(Instruction
*I
) {
984 BasicBlock
*DefBB
= I
->getParent();
986 // If the result of a {s|z}ext and its source are both live out, rewrite all
987 // other uses of the source with result of extension.
988 Value
*Src
= I
->getOperand(0);
989 if (Src
->hasOneUse())
992 // Only do this xform if truncating is free.
993 if (TLI
&& !TLI
->isTruncateFree(I
->getType(), Src
->getType()))
996 // Only safe to perform the optimization if the source is also defined in
998 if (!isa
<Instruction
>(Src
) || DefBB
!= cast
<Instruction
>(Src
)->getParent())
1001 bool DefIsLiveOut
= false;
1002 for (Value::use_iterator UI
= I
->use_begin(), E
= I
->use_end();
1004 Instruction
*User
= cast
<Instruction
>(*UI
);
1006 // Figure out which BB this ext is used in.
1007 BasicBlock
*UserBB
= User
->getParent();
1008 if (UserBB
== DefBB
) continue;
1009 DefIsLiveOut
= true;
1015 // Make sure non of the uses are PHI nodes.
1016 for (Value::use_iterator UI
= Src
->use_begin(), E
= Src
->use_end();
1018 Instruction
*User
= cast
<Instruction
>(*UI
);
1019 BasicBlock
*UserBB
= User
->getParent();
1020 if (UserBB
== DefBB
) continue;
1021 // Be conservative. We don't want this xform to end up introducing
1022 // reloads just before load / store instructions.
1023 if (isa
<PHINode
>(User
) || isa
<LoadInst
>(User
) || isa
<StoreInst
>(User
))
1027 // InsertedTruncs - Only insert one trunc in each block once.
1028 DenseMap
<BasicBlock
*, Instruction
*> InsertedTruncs
;
1030 bool MadeChange
= false;
1031 for (Value::use_iterator UI
= Src
->use_begin(), E
= Src
->use_end();
1033 Use
&TheUse
= UI
.getUse();
1034 Instruction
*User
= cast
<Instruction
>(*UI
);
1036 // Figure out which BB this ext is used in.
1037 BasicBlock
*UserBB
= User
->getParent();
1038 if (UserBB
== DefBB
) continue;
1040 // Both src and def are live in this block. Rewrite the use.
1041 Instruction
*&InsertedTrunc
= InsertedTruncs
[UserBB
];
1043 if (!InsertedTrunc
) {
1044 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
1046 InsertedTrunc
= new TruncInst(I
, Src
->getType(), "", InsertPt
);
1049 // Replace a use of the {s|z}ext source with a use of the result.
1050 TheUse
= InsertedTrunc
;
1058 bool CodeGenPrepare::OptimizeInst(Instruction
*I
) {
1059 if (PHINode
*P
= dyn_cast
<PHINode
>(I
)) {
1060 // It is possible for very late stage optimizations (such as SimplifyCFG)
1061 // to introduce PHI nodes too late to be cleaned up. If we detect such a
1062 // trivial PHI, go ahead and zap it here.
1063 if (Value
*V
= SimplifyInstruction(P
)) {
1064 P
->replaceAllUsesWith(V
);
1065 P
->eraseFromParent();
1072 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
1073 // If the source of the cast is a constant, then this should have
1074 // already been constant folded. The only reason NOT to constant fold
1075 // it is if something (e.g. LSR) was careful to place the constant
1076 // evaluation in a block other than then one that uses it (e.g. to hoist
1077 // the address of globals out of a loop). If this is the case, we don't
1078 // want to forward-subst the cast.
1079 if (isa
<Constant
>(CI
->getOperand(0)))
1082 if (TLI
&& OptimizeNoopCopyExpression(CI
, *TLI
))
1085 if (isa
<ZExtInst
>(I
) || isa
<SExtInst
>(I
)) {
1086 bool MadeChange
= MoveExtToFormExtLoad(I
);
1087 return MadeChange
| OptimizeExtUses(I
);
1092 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(I
))
1093 return OptimizeCmpExpression(CI
);
1095 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
1097 return OptimizeMemoryInst(I
, I
->getOperand(0), LI
->getType());
1101 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
)) {
1103 return OptimizeMemoryInst(I
, SI
->getOperand(1),
1104 SI
->getOperand(0)->getType());
1108 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(I
)) {
1109 if (GEPI
->hasAllZeroIndices()) {
1110 /// The GEP operand must be a pointer, so must its result -> BitCast
1111 Instruction
*NC
= new BitCastInst(GEPI
->getOperand(0), GEPI
->getType(),
1112 GEPI
->getName(), GEPI
);
1113 GEPI
->replaceAllUsesWith(NC
);
1114 GEPI
->eraseFromParent();
1122 if (CallInst
*CI
= dyn_cast
<CallInst
>(I
))
1123 return OptimizeCallInst(CI
);
1125 if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(I
))
1126 return DupRetToEnableTailCallOpts(RI
);
1131 // In this pass we look for GEP and cast instructions that are used
1132 // across basic blocks and rewrite them to improve basic-block-at-a-time
1134 bool CodeGenPrepare::OptimizeBlock(BasicBlock
&BB
) {
1136 bool MadeChange
= false;
1138 CurInstIterator
= BB
.begin();
1139 for (BasicBlock::iterator E
= BB
.end(); CurInstIterator
!= E
; )
1140 MadeChange
|= OptimizeInst(CurInstIterator
++);