1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass munges the code in the input function to better prepare it for
11 // SelectionDAG-based code generation. This works around limitations in it's
12 // basic-block-at-a-time approach. It should eventually be removed.
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "codegenprepare"
17 #include "llvm/Transforms/Scalar.h"
18 #include "llvm/Constants.h"
19 #include "llvm/DerivedTypes.h"
20 #include "llvm/Function.h"
21 #include "llvm/InlineAsm.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/IntrinsicInst.h"
24 #include "llvm/Pass.h"
25 #include "llvm/Analysis/Dominators.h"
26 #include "llvm/Analysis/InstructionSimplify.h"
27 #include "llvm/Analysis/ProfileInfo.h"
28 #include "llvm/Target/TargetData.h"
29 #include "llvm/Target/TargetLowering.h"
30 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
31 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
32 #include "llvm/Transforms/Utils/Local.h"
33 #include "llvm/Transforms/Utils/BuildLibCalls.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/Assembly/Writer.h"
38 #include "llvm/Support/CallSite.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/GetElementPtrTypeIterator.h"
42 #include "llvm/Support/PatternMatch.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Support/IRBuilder.h"
45 #include "llvm/Support/ValueHandle.h"
47 using namespace llvm::PatternMatch
;
49 STATISTIC(NumBlocksElim
, "Number of blocks eliminated");
50 STATISTIC(NumPHIsElim
, "Number of trivial PHIs eliminated");
51 STATISTIC(NumGEPsElim
, "Number of GEPs converted to casts");
52 STATISTIC(NumCmpUses
, "Number of uses of Cmp expressions replaced with uses of "
54 STATISTIC(NumCastUses
, "Number of uses of Cast expressions replaced with uses "
56 STATISTIC(NumMemoryInsts
, "Number of memory instructions whose address "
57 "computations were sunk");
58 STATISTIC(NumExtsMoved
, "Number of [s|z]ext instructions combined with loads");
59 STATISTIC(NumExtUses
, "Number of uses of [s|z]ext instructions optimized");
60 STATISTIC(NumRetsDup
, "Number of return instructions duplicated");
62 static cl::opt
<bool> DisableBranchOpts(
63 "disable-cgp-branch-opts", cl::Hidden
, cl::init(false),
64 cl::desc("Disable branch optimizations in CodeGenPrepare"));
67 class CodeGenPrepare
: public FunctionPass
{
68 /// TLI - Keep a pointer of a TargetLowering to consult for determining
69 /// transformation profitability.
70 const TargetLowering
*TLI
;
74 /// CurInstIterator - As we scan instructions optimizing them, this is the
75 /// next instruction to optimize. Xforms that can invalidate this should
77 BasicBlock::iterator CurInstIterator
;
79 /// Keeps track of non-local addresses that have been sunk into a block.
80 /// This allows us to avoid inserting duplicate code for blocks with
81 /// multiple load/stores of the same address.
82 DenseMap
<Value
*, Value
*> SunkAddrs
;
84 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
89 static char ID
; // Pass identification, replacement for typeid
90 explicit CodeGenPrepare(const TargetLowering
*tli
= 0)
91 : FunctionPass(ID
), TLI(tli
) {
92 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
94 bool runOnFunction(Function
&F
);
96 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
97 AU
.addPreserved
<DominatorTree
>();
98 AU
.addPreserved
<ProfileInfo
>();
102 bool EliminateMostlyEmptyBlocks(Function
&F
);
103 bool CanMergeBlocks(const BasicBlock
*BB
, const BasicBlock
*DestBB
) const;
104 void EliminateMostlyEmptyBlock(BasicBlock
*BB
);
105 bool OptimizeBlock(BasicBlock
&BB
);
106 bool OptimizeInst(Instruction
*I
);
107 bool OptimizeMemoryInst(Instruction
*I
, Value
*Addr
, const Type
*AccessTy
);
108 bool OptimizeInlineAsmInst(CallInst
*CS
);
109 bool OptimizeCallInst(CallInst
*CI
);
110 bool MoveExtToFormExtLoad(Instruction
*I
);
111 bool OptimizeExtUses(Instruction
*I
);
112 bool DupRetToEnableTailCallOpts(ReturnInst
*RI
);
116 char CodeGenPrepare::ID
= 0;
117 INITIALIZE_PASS(CodeGenPrepare
, "codegenprepare",
118 "Optimize for code generation", false, false)
120 FunctionPass
*llvm::createCodeGenPreparePass(const TargetLowering
*TLI
) {
121 return new CodeGenPrepare(TLI
);
124 bool CodeGenPrepare::runOnFunction(Function
&F
) {
125 bool EverMadeChange
= false;
128 DT
= getAnalysisIfAvailable
<DominatorTree
>();
129 PFI
= getAnalysisIfAvailable
<ProfileInfo
>();
131 // First pass, eliminate blocks that contain only PHI nodes and an
132 // unconditional branch.
133 EverMadeChange
|= EliminateMostlyEmptyBlocks(F
);
135 bool MadeChange
= true;
138 for (Function::iterator I
= F
.begin(), E
= F
.end(); I
!= E
; ) {
139 BasicBlock
*BB
= I
++;
140 MadeChange
|= OptimizeBlock(*BB
);
142 EverMadeChange
|= MadeChange
;
147 if (!DisableBranchOpts
) {
149 for (Function::iterator BB
= F
.begin(), E
= F
.end(); BB
!= E
; ++BB
)
150 MadeChange
|= ConstantFoldTerminator(BB
, true);
154 EverMadeChange
|= MadeChange
;
157 if (ModifiedDT
&& DT
)
158 DT
->DT
->recalculate(F
);
160 return EverMadeChange
;
163 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
164 /// debug info directives, and an unconditional branch. Passes before isel
165 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
166 /// isel. Start by eliminating these blocks so we can split them the way we
168 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function
&F
) {
169 bool MadeChange
= false;
170 // Note that this intentionally skips the entry block.
171 for (Function::iterator I
= ++F
.begin(), E
= F
.end(); I
!= E
; ) {
172 BasicBlock
*BB
= I
++;
174 // If this block doesn't end with an uncond branch, ignore it.
175 BranchInst
*BI
= dyn_cast
<BranchInst
>(BB
->getTerminator());
176 if (!BI
|| !BI
->isUnconditional())
179 // If the instruction before the branch (skipping debug info) isn't a phi
180 // node, then other stuff is happening here.
181 BasicBlock::iterator BBI
= BI
;
182 if (BBI
!= BB
->begin()) {
184 while (isa
<DbgInfoIntrinsic
>(BBI
)) {
185 if (BBI
== BB
->begin())
189 if (!isa
<DbgInfoIntrinsic
>(BBI
) && !isa
<PHINode
>(BBI
))
193 // Do not break infinite loops.
194 BasicBlock
*DestBB
= BI
->getSuccessor(0);
198 if (!CanMergeBlocks(BB
, DestBB
))
201 EliminateMostlyEmptyBlock(BB
);
207 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
208 /// single uncond branch between them, and BB contains no other non-phi
210 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock
*BB
,
211 const BasicBlock
*DestBB
) const {
212 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
213 // the successor. If there are more complex condition (e.g. preheaders),
214 // don't mess around with them.
215 BasicBlock::const_iterator BBI
= BB
->begin();
216 while (const PHINode
*PN
= dyn_cast
<PHINode
>(BBI
++)) {
217 for (Value::const_use_iterator UI
= PN
->use_begin(), E
= PN
->use_end();
219 const Instruction
*User
= cast
<Instruction
>(*UI
);
220 if (User
->getParent() != DestBB
|| !isa
<PHINode
>(User
))
222 // If User is inside DestBB block and it is a PHINode then check
223 // incoming value. If incoming value is not from BB then this is
224 // a complex condition (e.g. preheaders) we want to avoid here.
225 if (User
->getParent() == DestBB
) {
226 if (const PHINode
*UPN
= dyn_cast
<PHINode
>(User
))
227 for (unsigned I
= 0, E
= UPN
->getNumIncomingValues(); I
!= E
; ++I
) {
228 Instruction
*Insn
= dyn_cast
<Instruction
>(UPN
->getIncomingValue(I
));
229 if (Insn
&& Insn
->getParent() == BB
&&
230 Insn
->getParent() != UPN
->getIncomingBlock(I
))
237 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
238 // and DestBB may have conflicting incoming values for the block. If so, we
239 // can't merge the block.
240 const PHINode
*DestBBPN
= dyn_cast
<PHINode
>(DestBB
->begin());
241 if (!DestBBPN
) return true; // no conflict.
243 // Collect the preds of BB.
244 SmallPtrSet
<const BasicBlock
*, 16> BBPreds
;
245 if (const PHINode
*BBPN
= dyn_cast
<PHINode
>(BB
->begin())) {
246 // It is faster to get preds from a PHI than with pred_iterator.
247 for (unsigned i
= 0, e
= BBPN
->getNumIncomingValues(); i
!= e
; ++i
)
248 BBPreds
.insert(BBPN
->getIncomingBlock(i
));
250 BBPreds
.insert(pred_begin(BB
), pred_end(BB
));
253 // Walk the preds of DestBB.
254 for (unsigned i
= 0, e
= DestBBPN
->getNumIncomingValues(); i
!= e
; ++i
) {
255 BasicBlock
*Pred
= DestBBPN
->getIncomingBlock(i
);
256 if (BBPreds
.count(Pred
)) { // Common predecessor?
257 BBI
= DestBB
->begin();
258 while (const PHINode
*PN
= dyn_cast
<PHINode
>(BBI
++)) {
259 const Value
*V1
= PN
->getIncomingValueForBlock(Pred
);
260 const Value
*V2
= PN
->getIncomingValueForBlock(BB
);
262 // If V2 is a phi node in BB, look up what the mapped value will be.
263 if (const PHINode
*V2PN
= dyn_cast
<PHINode
>(V2
))
264 if (V2PN
->getParent() == BB
)
265 V2
= V2PN
->getIncomingValueForBlock(Pred
);
267 // If there is a conflict, bail out.
268 if (V1
!= V2
) return false;
277 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
278 /// an unconditional branch in it.
279 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock
*BB
) {
280 BranchInst
*BI
= cast
<BranchInst
>(BB
->getTerminator());
281 BasicBlock
*DestBB
= BI
->getSuccessor(0);
283 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB
<< *DestBB
);
285 // If the destination block has a single pred, then this is a trivial edge,
287 if (BasicBlock
*SinglePred
= DestBB
->getSinglePredecessor()) {
288 if (SinglePred
!= DestBB
) {
289 // Remember if SinglePred was the entry block of the function. If so, we
290 // will need to move BB back to the entry position.
291 bool isEntry
= SinglePred
== &SinglePred
->getParent()->getEntryBlock();
292 MergeBasicBlockIntoOnlyPred(DestBB
, this);
294 if (isEntry
&& BB
!= &BB
->getParent()->getEntryBlock())
295 BB
->moveBefore(&BB
->getParent()->getEntryBlock());
297 DEBUG(dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n");
302 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
303 // to handle the new incoming edges it is about to have.
305 for (BasicBlock::iterator BBI
= DestBB
->begin();
306 (PN
= dyn_cast
<PHINode
>(BBI
)); ++BBI
) {
307 // Remove the incoming value for BB, and remember it.
308 Value
*InVal
= PN
->removeIncomingValue(BB
, false);
310 // Two options: either the InVal is a phi node defined in BB or it is some
311 // value that dominates BB.
312 PHINode
*InValPhi
= dyn_cast
<PHINode
>(InVal
);
313 if (InValPhi
&& InValPhi
->getParent() == BB
) {
314 // Add all of the input values of the input PHI as inputs of this phi.
315 for (unsigned i
= 0, e
= InValPhi
->getNumIncomingValues(); i
!= e
; ++i
)
316 PN
->addIncoming(InValPhi
->getIncomingValue(i
),
317 InValPhi
->getIncomingBlock(i
));
319 // Otherwise, add one instance of the dominating value for each edge that
320 // we will be adding.
321 if (PHINode
*BBPN
= dyn_cast
<PHINode
>(BB
->begin())) {
322 for (unsigned i
= 0, e
= BBPN
->getNumIncomingValues(); i
!= e
; ++i
)
323 PN
->addIncoming(InVal
, BBPN
->getIncomingBlock(i
));
325 for (pred_iterator PI
= pred_begin(BB
), E
= pred_end(BB
); PI
!= E
; ++PI
)
326 PN
->addIncoming(InVal
, *PI
);
331 // The PHIs are now updated, change everything that refers to BB to use
332 // DestBB and remove BB.
333 BB
->replaceAllUsesWith(DestBB
);
334 if (DT
&& !ModifiedDT
) {
335 BasicBlock
*BBIDom
= DT
->getNode(BB
)->getIDom()->getBlock();
336 BasicBlock
*DestBBIDom
= DT
->getNode(DestBB
)->getIDom()->getBlock();
337 BasicBlock
*NewIDom
= DT
->findNearestCommonDominator(BBIDom
, DestBBIDom
);
338 DT
->changeImmediateDominator(DestBB
, NewIDom
);
342 PFI
->replaceAllUses(BB
, DestBB
);
343 PFI
->removeEdge(ProfileInfo::getEdge(BB
, DestBB
));
345 BB
->eraseFromParent();
348 DEBUG(dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n");
351 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
352 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
353 /// sink it into user blocks to reduce the number of virtual
354 /// registers that must be created and coalesced.
356 /// Return true if any changes are made.
358 static bool OptimizeNoopCopyExpression(CastInst
*CI
, const TargetLowering
&TLI
){
359 // If this is a noop copy,
360 EVT SrcVT
= TLI
.getValueType(CI
->getOperand(0)->getType());
361 EVT DstVT
= TLI
.getValueType(CI
->getType());
363 // This is an fp<->int conversion?
364 if (SrcVT
.isInteger() != DstVT
.isInteger())
367 // If this is an extension, it will be a zero or sign extension, which
369 if (SrcVT
.bitsLT(DstVT
)) return false;
371 // If these values will be promoted, find out what they will be promoted
372 // to. This helps us consider truncates on PPC as noop copies when they
374 if (TLI
.getTypeAction(CI
->getContext(), SrcVT
) ==
375 TargetLowering::TypePromoteInteger
)
376 SrcVT
= TLI
.getTypeToTransformTo(CI
->getContext(), SrcVT
);
377 if (TLI
.getTypeAction(CI
->getContext(), DstVT
) ==
378 TargetLowering::TypePromoteInteger
)
379 DstVT
= TLI
.getTypeToTransformTo(CI
->getContext(), DstVT
);
381 // If, after promotion, these are the same types, this is a noop copy.
385 BasicBlock
*DefBB
= CI
->getParent();
387 /// InsertedCasts - Only insert a cast in each block once.
388 DenseMap
<BasicBlock
*, CastInst
*> InsertedCasts
;
390 bool MadeChange
= false;
391 for (Value::use_iterator UI
= CI
->use_begin(), E
= CI
->use_end();
393 Use
&TheUse
= UI
.getUse();
394 Instruction
*User
= cast
<Instruction
>(*UI
);
396 // Figure out which BB this cast is used in. For PHI's this is the
397 // appropriate predecessor block.
398 BasicBlock
*UserBB
= User
->getParent();
399 if (PHINode
*PN
= dyn_cast
<PHINode
>(User
)) {
400 UserBB
= PN
->getIncomingBlock(UI
);
403 // Preincrement use iterator so we don't invalidate it.
406 // If this user is in the same block as the cast, don't change the cast.
407 if (UserBB
== DefBB
) continue;
409 // If we have already inserted a cast into this block, use it.
410 CastInst
*&InsertedCast
= InsertedCasts
[UserBB
];
413 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
416 CastInst::Create(CI
->getOpcode(), CI
->getOperand(0), CI
->getType(), "",
421 // Replace a use of the cast with a use of the new cast.
422 TheUse
= InsertedCast
;
426 // If we removed all uses, nuke the cast.
427 if (CI
->use_empty()) {
428 CI
->eraseFromParent();
435 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
436 /// the number of virtual registers that must be created and coalesced. This is
437 /// a clear win except on targets with multiple condition code registers
438 /// (PowerPC), where it might lose; some adjustment may be wanted there.
440 /// Return true if any changes are made.
441 static bool OptimizeCmpExpression(CmpInst
*CI
) {
442 BasicBlock
*DefBB
= CI
->getParent();
444 /// InsertedCmp - Only insert a cmp in each block once.
445 DenseMap
<BasicBlock
*, CmpInst
*> InsertedCmps
;
447 bool MadeChange
= false;
448 for (Value::use_iterator UI
= CI
->use_begin(), E
= CI
->use_end();
450 Use
&TheUse
= UI
.getUse();
451 Instruction
*User
= cast
<Instruction
>(*UI
);
453 // Preincrement use iterator so we don't invalidate it.
456 // Don't bother for PHI nodes.
457 if (isa
<PHINode
>(User
))
460 // Figure out which BB this cmp is used in.
461 BasicBlock
*UserBB
= User
->getParent();
463 // If this user is in the same block as the cmp, don't change the cmp.
464 if (UserBB
== DefBB
) continue;
466 // If we have already inserted a cmp into this block, use it.
467 CmpInst
*&InsertedCmp
= InsertedCmps
[UserBB
];
470 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
473 CmpInst::Create(CI
->getOpcode(),
474 CI
->getPredicate(), CI
->getOperand(0),
475 CI
->getOperand(1), "", InsertPt
);
479 // Replace a use of the cmp with a use of the new cmp.
480 TheUse
= InsertedCmp
;
484 // If we removed all uses, nuke the cmp.
486 CI
->eraseFromParent();
492 class CodeGenPrepareFortifiedLibCalls
: public SimplifyFortifiedLibCalls
{
494 void replaceCall(Value
*With
) {
495 CI
->replaceAllUsesWith(With
);
496 CI
->eraseFromParent();
498 bool isFoldable(unsigned SizeCIOp
, unsigned, bool) const {
499 if (ConstantInt
*SizeCI
=
500 dyn_cast
<ConstantInt
>(CI
->getArgOperand(SizeCIOp
)))
501 return SizeCI
->isAllOnesValue();
505 } // end anonymous namespace
507 bool CodeGenPrepare::OptimizeCallInst(CallInst
*CI
) {
508 BasicBlock
*BB
= CI
->getParent();
510 // Lower inline assembly if we can.
511 // If we found an inline asm expession, and if the target knows how to
512 // lower it to normal LLVM code, do so now.
513 if (TLI
&& isa
<InlineAsm
>(CI
->getCalledValue())) {
514 if (TLI
->ExpandInlineAsm(CI
)) {
515 // Avoid invalidating the iterator.
516 CurInstIterator
= BB
->begin();
517 // Avoid processing instructions out of order, which could cause
518 // reuse before a value is defined.
522 // Sink address computing for memory operands into the block.
523 if (OptimizeInlineAsmInst(CI
))
527 // Lower all uses of llvm.objectsize.*
528 IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(CI
);
529 if (II
&& II
->getIntrinsicID() == Intrinsic::objectsize
) {
530 bool Min
= (cast
<ConstantInt
>(II
->getArgOperand(1))->getZExtValue() == 1);
531 const Type
*ReturnTy
= CI
->getType();
532 Constant
*RetVal
= ConstantInt::get(ReturnTy
, Min
? 0 : -1ULL);
534 // Substituting this can cause recursive simplifications, which can
535 // invalidate our iterator. Use a WeakVH to hold onto it in case this
537 WeakVH
IterHandle(CurInstIterator
);
539 ReplaceAndSimplifyAllUses(CI
, RetVal
, TLI
? TLI
->getTargetData() : 0,
540 ModifiedDT
? 0 : DT
);
542 // If the iterator instruction was recursively deleted, start over at the
543 // start of the block.
544 if (IterHandle
!= CurInstIterator
) {
545 CurInstIterator
= BB
->begin();
551 // From here on out we're working with named functions.
552 if (CI
->getCalledFunction() == 0) return false;
554 // llvm.dbg.value is far away from the value then iSel may not be able
555 // handle it properly. iSel will drop llvm.dbg.value if it can not
556 // find a node corresponding to the value.
557 if (DbgValueInst
*DVI
= dyn_cast
<DbgValueInst
>(CI
))
558 if (Instruction
*VI
= dyn_cast_or_null
<Instruction
>(DVI
->getValue()))
559 if (!VI
->isTerminator() &&
560 (DVI
->getParent() != VI
->getParent() || DT
->dominates(DVI
, VI
))) {
561 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI
<< ' ' << *VI
);
562 DVI
->removeFromParent();
563 if (isa
<PHINode
>(VI
))
564 DVI
->insertBefore(VI
->getParent()->getFirstNonPHI());
566 DVI
->insertAfter(VI
);
570 // We'll need TargetData from here on out.
571 const TargetData
*TD
= TLI
? TLI
->getTargetData() : 0;
572 if (!TD
) return false;
574 // Lower all default uses of _chk calls. This is very similar
575 // to what InstCombineCalls does, but here we are only lowering calls
576 // that have the default "don't know" as the objectsize. Anything else
577 // should be left alone.
578 CodeGenPrepareFortifiedLibCalls Simplifier
;
579 return Simplifier
.fold(CI
, TD
);
582 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
583 /// instructions to the predecessor to enable tail call optimizations. The
584 /// case it is currently looking for is:
586 /// %tmp0 = tail call i32 @f0()
589 /// %tmp1 = tail call i32 @f1()
592 /// %tmp2 = tail call i32 @f2()
595 /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
601 /// %tmp0 = tail call i32 @f0()
604 /// %tmp1 = tail call i32 @f1()
607 /// %tmp2 = tail call i32 @f2()
610 bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst
*RI
) {
614 Value
*V
= RI
->getReturnValue();
615 PHINode
*PN
= V
? dyn_cast
<PHINode
>(V
) : NULL
;
619 BasicBlock
*BB
= RI
->getParent();
620 if (PN
&& PN
->getParent() != BB
)
623 // It's not safe to eliminate the sign / zero extension of the return value.
624 // See llvm::isInTailCallPosition().
625 const Function
*F
= BB
->getParent();
626 unsigned CallerRetAttr
= F
->getAttributes().getRetAttributes();
627 if ((CallerRetAttr
& Attribute::ZExt
) || (CallerRetAttr
& Attribute::SExt
))
630 // Make sure there are no instructions between the PHI and return, or that the
631 // return is the first instruction in the block.
633 BasicBlock::iterator BI
= BB
->begin();
634 do { ++BI
; } while (isa
<DbgInfoIntrinsic
>(BI
));
638 BasicBlock::iterator BI
= BB
->begin();
639 while (isa
<DbgInfoIntrinsic
>(BI
)) ++BI
;
644 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
646 SmallVector
<CallInst
*, 4> TailCalls
;
648 for (unsigned I
= 0, E
= PN
->getNumIncomingValues(); I
!= E
; ++I
) {
649 CallInst
*CI
= dyn_cast
<CallInst
>(PN
->getIncomingValue(I
));
650 // Make sure the phi value is indeed produced by the tail call.
651 if (CI
&& CI
->hasOneUse() && CI
->getParent() == PN
->getIncomingBlock(I
) &&
652 TLI
->mayBeEmittedAsTailCall(CI
))
653 TailCalls
.push_back(CI
);
656 SmallPtrSet
<BasicBlock
*, 4> VisitedBBs
;
657 for (pred_iterator PI
= pred_begin(BB
), PE
= pred_end(BB
); PI
!= PE
; ++PI
) {
658 if (!VisitedBBs
.insert(*PI
))
661 BasicBlock::InstListType
&InstList
= (*PI
)->getInstList();
662 BasicBlock::InstListType::reverse_iterator RI
= InstList
.rbegin();
663 BasicBlock::InstListType::reverse_iterator RE
= InstList
.rend();
664 do { ++RI
; } while (RI
!= RE
&& isa
<DbgInfoIntrinsic
>(&*RI
));
668 CallInst
*CI
= dyn_cast
<CallInst
>(&*RI
);
669 if (CI
&& CI
->use_empty() && TLI
->mayBeEmittedAsTailCall(CI
))
670 TailCalls
.push_back(CI
);
674 bool Changed
= false;
675 for (unsigned i
= 0, e
= TailCalls
.size(); i
!= e
; ++i
) {
676 CallInst
*CI
= TailCalls
[i
];
679 // Conservatively require the attributes of the call to match those of the
680 // return. Ignore noalias because it doesn't affect the call sequence.
681 unsigned CalleeRetAttr
= CS
.getAttributes().getRetAttributes();
682 if ((CalleeRetAttr
^ CallerRetAttr
) & ~Attribute::NoAlias
)
685 // Make sure the call instruction is followed by an unconditional branch to
687 BasicBlock
*CallBB
= CI
->getParent();
688 BranchInst
*BI
= dyn_cast
<BranchInst
>(CallBB
->getTerminator());
689 if (!BI
|| !BI
->isUnconditional() || BI
->getSuccessor(0) != BB
)
692 // Duplicate the return into CallBB.
693 (void)FoldReturnIntoUncondBranch(RI
, BB
, CallBB
);
694 ModifiedDT
= Changed
= true;
698 // If we eliminated all predecessors of the block, delete the block now.
699 if (Changed
&& pred_begin(BB
) == pred_end(BB
))
700 BB
->eraseFromParent();
705 //===----------------------------------------------------------------------===//
706 // Memory Optimization
707 //===----------------------------------------------------------------------===//
709 /// IsNonLocalValue - Return true if the specified values are defined in a
710 /// different basic block than BB.
711 static bool IsNonLocalValue(Value
*V
, BasicBlock
*BB
) {
712 if (Instruction
*I
= dyn_cast
<Instruction
>(V
))
713 return I
->getParent() != BB
;
717 /// OptimizeMemoryInst - Load and Store Instructions often have
718 /// addressing modes that can do significant amounts of computation. As such,
719 /// instruction selection will try to get the load or store to do as much
720 /// computation as possible for the program. The problem is that isel can only
721 /// see within a single block. As such, we sink as much legal addressing mode
722 /// stuff into the block as possible.
724 /// This method is used to optimize both load/store and inline asms with memory
726 bool CodeGenPrepare::OptimizeMemoryInst(Instruction
*MemoryInst
, Value
*Addr
,
727 const Type
*AccessTy
) {
730 // Try to collapse single-value PHI nodes. This is necessary to undo
731 // unprofitable PRE transformations.
732 SmallVector
<Value
*, 8> worklist
;
733 SmallPtrSet
<Value
*, 16> Visited
;
734 worklist
.push_back(Addr
);
736 // Use a worklist to iteratively look through PHI nodes, and ensure that
737 // the addressing mode obtained from the non-PHI roots of the graph
739 Value
*Consensus
= 0;
740 unsigned NumUsesConsensus
= 0;
741 bool IsNumUsesConsensusValid
= false;
742 SmallVector
<Instruction
*, 16> AddrModeInsts
;
743 ExtAddrMode AddrMode
;
744 while (!worklist
.empty()) {
745 Value
*V
= worklist
.back();
748 // Break use-def graph loops.
749 if (Visited
.count(V
)) {
756 // For a PHI node, push all of its incoming values.
757 if (PHINode
*P
= dyn_cast
<PHINode
>(V
)) {
758 for (unsigned i
= 0, e
= P
->getNumIncomingValues(); i
!= e
; ++i
)
759 worklist
.push_back(P
->getIncomingValue(i
));
763 // For non-PHIs, determine the addressing mode being computed.
764 SmallVector
<Instruction
*, 16> NewAddrModeInsts
;
765 ExtAddrMode NewAddrMode
=
766 AddressingModeMatcher::Match(V
, AccessTy
,MemoryInst
,
767 NewAddrModeInsts
, *TLI
);
769 // This check is broken into two cases with very similar code to avoid using
770 // getNumUses() as much as possible. Some values have a lot of uses, so
771 // calling getNumUses() unconditionally caused a significant compile-time
775 AddrMode
= NewAddrMode
;
776 AddrModeInsts
= NewAddrModeInsts
;
778 } else if (NewAddrMode
== AddrMode
) {
779 if (!IsNumUsesConsensusValid
) {
780 NumUsesConsensus
= Consensus
->getNumUses();
781 IsNumUsesConsensusValid
= true;
784 // Ensure that the obtained addressing mode is equivalent to that obtained
785 // for all other roots of the PHI traversal. Also, when choosing one
786 // such root as representative, select the one with the most uses in order
787 // to keep the cost modeling heuristics in AddressingModeMatcher
789 unsigned NumUses
= V
->getNumUses();
790 if (NumUses
> NumUsesConsensus
) {
792 NumUsesConsensus
= NumUses
;
793 AddrModeInsts
= NewAddrModeInsts
;
802 // If the addressing mode couldn't be determined, or if multiple different
803 // ones were determined, bail out now.
804 if (!Consensus
) return false;
806 // Check to see if any of the instructions supersumed by this addr mode are
807 // non-local to I's BB.
808 bool AnyNonLocal
= false;
809 for (unsigned i
= 0, e
= AddrModeInsts
.size(); i
!= e
; ++i
) {
810 if (IsNonLocalValue(AddrModeInsts
[i
], MemoryInst
->getParent())) {
816 // If all the instructions matched are already in this BB, don't do anything.
818 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
<< "\n");
822 // Insert this computation right after this user. Since our caller is
823 // scanning from the top of the BB to the bottom, reuse of the expr are
824 // guaranteed to happen later.
825 BasicBlock::iterator InsertPt
= MemoryInst
;
827 // Now that we determined the addressing expression we want to use and know
828 // that we have to sink it into this block. Check to see if we have already
829 // done this for some other load/store instr in this block. If so, reuse the
831 Value
*&SunkAddr
= SunkAddrs
[Addr
];
833 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for "
835 if (SunkAddr
->getType() != Addr
->getType())
836 SunkAddr
= new BitCastInst(SunkAddr
, Addr
->getType(), "tmp", InsertPt
);
838 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
<< " for "
840 const Type
*IntPtrTy
=
841 TLI
->getTargetData()->getIntPtrType(AccessTy
->getContext());
845 // Start with the base register. Do this first so that subsequent address
846 // matching finds it last, which will prevent it from trying to match it
847 // as the scaled value in case it happens to be a mul. That would be
848 // problematic if we've sunk a different mul for the scale, because then
849 // we'd end up sinking both muls.
850 if (AddrMode
.BaseReg
) {
851 Value
*V
= AddrMode
.BaseReg
;
852 if (V
->getType()->isPointerTy())
853 V
= new PtrToIntInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
854 if (V
->getType() != IntPtrTy
)
855 V
= CastInst::CreateIntegerCast(V
, IntPtrTy
, /*isSigned=*/true,
856 "sunkaddr", InsertPt
);
860 // Add the scale value.
861 if (AddrMode
.Scale
) {
862 Value
*V
= AddrMode
.ScaledReg
;
863 if (V
->getType() == IntPtrTy
) {
865 } else if (V
->getType()->isPointerTy()) {
866 V
= new PtrToIntInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
867 } else if (cast
<IntegerType
>(IntPtrTy
)->getBitWidth() <
868 cast
<IntegerType
>(V
->getType())->getBitWidth()) {
869 V
= new TruncInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
871 V
= new SExtInst(V
, IntPtrTy
, "sunkaddr", InsertPt
);
873 if (AddrMode
.Scale
!= 1)
874 V
= BinaryOperator::CreateMul(V
, ConstantInt::get(IntPtrTy
,
876 "sunkaddr", InsertPt
);
878 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
883 // Add in the BaseGV if present.
884 if (AddrMode
.BaseGV
) {
885 Value
*V
= new PtrToIntInst(AddrMode
.BaseGV
, IntPtrTy
, "sunkaddr",
888 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
893 // Add in the Base Offset if present.
894 if (AddrMode
.BaseOffs
) {
895 Value
*V
= ConstantInt::get(IntPtrTy
, AddrMode
.BaseOffs
);
897 Result
= BinaryOperator::CreateAdd(Result
, V
, "sunkaddr", InsertPt
);
903 SunkAddr
= Constant::getNullValue(Addr
->getType());
905 SunkAddr
= new IntToPtrInst(Result
, Addr
->getType(), "sunkaddr",InsertPt
);
908 MemoryInst
->replaceUsesOfWith(Repl
, SunkAddr
);
910 // If we have no uses, recursively delete the value and all dead instructions
912 if (Repl
->use_empty()) {
913 // This can cause recursive deletion, which can invalidate our iterator.
914 // Use a WeakVH to hold onto it in case this happens.
915 WeakVH
IterHandle(CurInstIterator
);
916 BasicBlock
*BB
= CurInstIterator
->getParent();
918 RecursivelyDeleteTriviallyDeadInstructions(Repl
);
920 if (IterHandle
!= CurInstIterator
) {
921 // If the iterator instruction was recursively deleted, start over at the
922 // start of the block.
923 CurInstIterator
= BB
->begin();
926 // This address is now available for reassignment, so erase the table
927 // entry; we don't want to match some completely different instruction.
935 /// OptimizeInlineAsmInst - If there are any memory operands, use
936 /// OptimizeMemoryInst to sink their address computing into the block when
937 /// possible / profitable.
938 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst
*CS
) {
939 bool MadeChange
= false;
941 TargetLowering::AsmOperandInfoVector
942 TargetConstraints
= TLI
->ParseConstraints(CS
);
944 for (unsigned i
= 0, e
= TargetConstraints
.size(); i
!= e
; ++i
) {
945 TargetLowering::AsmOperandInfo
&OpInfo
= TargetConstraints
[i
];
947 // Compute the constraint code and ConstraintType to use.
948 TLI
->ComputeConstraintToUse(OpInfo
, SDValue());
950 if (OpInfo
.ConstraintType
== TargetLowering::C_Memory
&&
952 Value
*OpVal
= CS
->getArgOperand(ArgNo
++);
953 MadeChange
|= OptimizeMemoryInst(CS
, OpVal
, OpVal
->getType());
954 } else if (OpInfo
.Type
== InlineAsm::isInput
)
961 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
962 /// basic block as the load, unless conditions are unfavorable. This allows
963 /// SelectionDAG to fold the extend into the load.
965 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction
*I
) {
966 // Look for a load being extended.
967 LoadInst
*LI
= dyn_cast
<LoadInst
>(I
->getOperand(0));
968 if (!LI
) return false;
970 // If they're already in the same block, there's nothing to do.
971 if (LI
->getParent() == I
->getParent())
974 // If the load has other users and the truncate is not free, this probably
976 if (!LI
->hasOneUse() &&
977 TLI
&& (TLI
->isTypeLegal(TLI
->getValueType(LI
->getType())) ||
978 !TLI
->isTypeLegal(TLI
->getValueType(I
->getType()))) &&
979 !TLI
->isTruncateFree(I
->getType(), LI
->getType()))
982 // Check whether the target supports casts folded into loads.
984 if (isa
<ZExtInst
>(I
))
985 LType
= ISD::ZEXTLOAD
;
987 assert(isa
<SExtInst
>(I
) && "Unexpected ext type!");
988 LType
= ISD::SEXTLOAD
;
990 if (TLI
&& !TLI
->isLoadExtLegal(LType
, TLI
->getValueType(LI
->getType())))
993 // Move the extend into the same block as the load, so that SelectionDAG
995 I
->removeFromParent();
1001 bool CodeGenPrepare::OptimizeExtUses(Instruction
*I
) {
1002 BasicBlock
*DefBB
= I
->getParent();
1004 // If the result of a {s|z}ext and its source are both live out, rewrite all
1005 // other uses of the source with result of extension.
1006 Value
*Src
= I
->getOperand(0);
1007 if (Src
->hasOneUse())
1010 // Only do this xform if truncating is free.
1011 if (TLI
&& !TLI
->isTruncateFree(I
->getType(), Src
->getType()))
1014 // Only safe to perform the optimization if the source is also defined in
1016 if (!isa
<Instruction
>(Src
) || DefBB
!= cast
<Instruction
>(Src
)->getParent())
1019 bool DefIsLiveOut
= false;
1020 for (Value::use_iterator UI
= I
->use_begin(), E
= I
->use_end();
1022 Instruction
*User
= cast
<Instruction
>(*UI
);
1024 // Figure out which BB this ext is used in.
1025 BasicBlock
*UserBB
= User
->getParent();
1026 if (UserBB
== DefBB
) continue;
1027 DefIsLiveOut
= true;
1033 // Make sure non of the uses are PHI nodes.
1034 for (Value::use_iterator UI
= Src
->use_begin(), E
= Src
->use_end();
1036 Instruction
*User
= cast
<Instruction
>(*UI
);
1037 BasicBlock
*UserBB
= User
->getParent();
1038 if (UserBB
== DefBB
) continue;
1039 // Be conservative. We don't want this xform to end up introducing
1040 // reloads just before load / store instructions.
1041 if (isa
<PHINode
>(User
) || isa
<LoadInst
>(User
) || isa
<StoreInst
>(User
))
1045 // InsertedTruncs - Only insert one trunc in each block once.
1046 DenseMap
<BasicBlock
*, Instruction
*> InsertedTruncs
;
1048 bool MadeChange
= false;
1049 for (Value::use_iterator UI
= Src
->use_begin(), E
= Src
->use_end();
1051 Use
&TheUse
= UI
.getUse();
1052 Instruction
*User
= cast
<Instruction
>(*UI
);
1054 // Figure out which BB this ext is used in.
1055 BasicBlock
*UserBB
= User
->getParent();
1056 if (UserBB
== DefBB
) continue;
1058 // Both src and def are live in this block. Rewrite the use.
1059 Instruction
*&InsertedTrunc
= InsertedTruncs
[UserBB
];
1061 if (!InsertedTrunc
) {
1062 BasicBlock::iterator InsertPt
= UserBB
->getFirstNonPHI();
1064 InsertedTrunc
= new TruncInst(I
, Src
->getType(), "", InsertPt
);
1067 // Replace a use of the {s|z}ext source with a use of the result.
1068 TheUse
= InsertedTrunc
;
1076 bool CodeGenPrepare::OptimizeInst(Instruction
*I
) {
1077 if (PHINode
*P
= dyn_cast
<PHINode
>(I
)) {
1078 // It is possible for very late stage optimizations (such as SimplifyCFG)
1079 // to introduce PHI nodes too late to be cleaned up. If we detect such a
1080 // trivial PHI, go ahead and zap it here.
1081 if (Value
*V
= SimplifyInstruction(P
)) {
1082 P
->replaceAllUsesWith(V
);
1083 P
->eraseFromParent();
1090 if (CastInst
*CI
= dyn_cast
<CastInst
>(I
)) {
1091 // If the source of the cast is a constant, then this should have
1092 // already been constant folded. The only reason NOT to constant fold
1093 // it is if something (e.g. LSR) was careful to place the constant
1094 // evaluation in a block other than then one that uses it (e.g. to hoist
1095 // the address of globals out of a loop). If this is the case, we don't
1096 // want to forward-subst the cast.
1097 if (isa
<Constant
>(CI
->getOperand(0)))
1100 if (TLI
&& OptimizeNoopCopyExpression(CI
, *TLI
))
1103 if (isa
<ZExtInst
>(I
) || isa
<SExtInst
>(I
)) {
1104 bool MadeChange
= MoveExtToFormExtLoad(I
);
1105 return MadeChange
| OptimizeExtUses(I
);
1110 if (CmpInst
*CI
= dyn_cast
<CmpInst
>(I
))
1111 return OptimizeCmpExpression(CI
);
1113 if (LoadInst
*LI
= dyn_cast
<LoadInst
>(I
)) {
1115 return OptimizeMemoryInst(I
, I
->getOperand(0), LI
->getType());
1119 if (StoreInst
*SI
= dyn_cast
<StoreInst
>(I
)) {
1121 return OptimizeMemoryInst(I
, SI
->getOperand(1),
1122 SI
->getOperand(0)->getType());
1126 if (GetElementPtrInst
*GEPI
= dyn_cast
<GetElementPtrInst
>(I
)) {
1127 if (GEPI
->hasAllZeroIndices()) {
1128 /// The GEP operand must be a pointer, so must its result -> BitCast
1129 Instruction
*NC
= new BitCastInst(GEPI
->getOperand(0), GEPI
->getType(),
1130 GEPI
->getName(), GEPI
);
1131 GEPI
->replaceAllUsesWith(NC
);
1132 GEPI
->eraseFromParent();
1140 if (CallInst
*CI
= dyn_cast
<CallInst
>(I
))
1141 return OptimizeCallInst(CI
);
1143 if (ReturnInst
*RI
= dyn_cast
<ReturnInst
>(I
))
1144 return DupRetToEnableTailCallOpts(RI
);
1149 // In this pass we look for GEP and cast instructions that are used
1150 // across basic blocks and rewrite them to improve basic-block-at-a-time
1152 bool CodeGenPrepare::OptimizeBlock(BasicBlock
&BB
) {
1154 bool MadeChange
= false;
1156 CurInstIterator
= BB
.begin();
1157 for (BasicBlock::iterator E
= BB
.end(); CurInstIterator
!= E
; )
1158 MadeChange
|= OptimizeInst(CurInstIterator
++);