1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
13 /// We use a variant of global value numbering to decide what can be sunk.
16 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
19 /// [ %e = phi i32 %a2, %c2 ]
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
34 //===----------------------------------------------------------------------===//
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseMapInfo.h"
39 #include "llvm/ADT/DenseSet.h"
40 #include "llvm/ADT/Hashing.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/PostOrderIterator.h"
44 #include "llvm/ADT/STLExtras.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Analysis/GlobalsModRef.h"
50 #include "llvm/IR/BasicBlock.h"
51 #include "llvm/IR/CFG.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/Use.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/Allocator.h"
64 #include "llvm/Support/ArrayRecycler.h"
65 #include "llvm/Support/AtomicOrdering.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Compiler.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72 #include "llvm/Transforms/Scalar/GVNExpression.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
84 #define DEBUG_TYPE "gvn-sink"
86 STATISTIC(NumRemoved
, "Number of instructions removed");
89 namespace GVNExpression
{
91 LLVM_DUMP_METHOD
void Expression::dump() const {
96 } // end namespace GVNExpression
97 } // end namespace llvm
101 static bool isMemoryInst(const Instruction
*I
) {
102 return isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
) ||
103 (isa
<InvokeInst
>(I
) && !cast
<InvokeInst
>(I
)->doesNotAccessMemory()) ||
104 (isa
<CallInst
>(I
) && !cast
<CallInst
>(I
)->doesNotAccessMemory());
107 /// Iterates through instructions in a set of blocks in reverse order from the
108 /// first non-terminator. For example (assume all blocks have size n):
109 /// LockstepReverseIterator I([B1, B2, B3]);
110 /// *I-- = [B1[n], B2[n], B3[n]];
111 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
112 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
115 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
117 /// determine which blocks are still going and the order they appear in the
118 /// list returned by operator*.
119 class LockstepReverseIterator
{
120 ArrayRef
<BasicBlock
*> Blocks
;
121 SmallSetVector
<BasicBlock
*, 4> ActiveBlocks
;
122 SmallVector
<Instruction
*, 4> Insts
;
126 LockstepReverseIterator(ArrayRef
<BasicBlock
*> Blocks
) : Blocks(Blocks
) {
132 ActiveBlocks
.clear();
133 for (BasicBlock
*BB
: Blocks
)
134 ActiveBlocks
.insert(BB
);
136 for (BasicBlock
*BB
: Blocks
) {
137 if (BB
->size() <= 1) {
138 // Block wasn't big enough - only contained a terminator.
139 ActiveBlocks
.remove(BB
);
142 Insts
.push_back(BB
->getTerminator()->getPrevNode());
148 bool isValid() const { return !Fail
; }
149 ArrayRef
<Instruction
*> operator*() const { return Insts
; }
151 // Note: This needs to return a SmallSetVector as the elements of
152 // ActiveBlocks will be later copied to Blocks using std::copy. The
153 // resultant order of elements in Blocks needs to be deterministic.
154 // Using SmallPtrSet instead causes non-deterministic order while
155 // copying. And we cannot simply sort Blocks as they need to match the
156 // corresponding Values.
157 SmallSetVector
<BasicBlock
*, 4> &getActiveBlocks() { return ActiveBlocks
; }
159 void restrictToBlocks(SmallSetVector
<BasicBlock
*, 4> &Blocks
) {
160 for (auto II
= Insts
.begin(); II
!= Insts
.end();) {
161 if (!llvm::is_contained(Blocks
, (*II
)->getParent())) {
162 ActiveBlocks
.remove((*II
)->getParent());
163 II
= Insts
.erase(II
);
173 SmallVector
<Instruction
*, 4> NewInsts
;
174 for (auto *Inst
: Insts
) {
175 if (Inst
== &Inst
->getParent()->front())
176 ActiveBlocks
.remove(Inst
->getParent());
178 NewInsts
.push_back(Inst
->getPrevNode());
180 if (NewInsts
.empty()) {
188 //===----------------------------------------------------------------------===//
190 /// Candidate solution for sinking. There may be different ways to
191 /// sink instructions, differing in the number of instructions sunk,
192 /// the number of predecessors sunk from and the number of PHIs
194 struct SinkingInstructionCandidate
{
196 unsigned NumInstructions
;
198 unsigned NumMemoryInsts
;
200 SmallVector
<BasicBlock
*, 4> Blocks
;
202 void calculateCost(unsigned NumOrigPHIs
, unsigned NumOrigBlocks
) {
203 unsigned NumExtraPHIs
= NumPHIs
- NumOrigPHIs
;
204 unsigned SplitEdgeCost
= (NumOrigBlocks
> NumBlocks
) ? 2 : 0;
205 Cost
= (NumInstructions
* (NumBlocks
- 1)) -
207 NumExtraPHIs
) // PHIs are expensive, so make sure they're worth it.
211 bool operator>(const SinkingInstructionCandidate
&Other
) const {
212 return Cost
> Other
.Cost
;
217 raw_ostream
&operator<<(raw_ostream
&OS
, const SinkingInstructionCandidate
&C
) {
218 OS
<< "<Candidate Cost=" << C
.Cost
<< " #Blocks=" << C
.NumBlocks
219 << " #Insts=" << C
.NumInstructions
<< " #PHIs=" << C
.NumPHIs
<< ">";
224 //===----------------------------------------------------------------------===//
226 /// Describes a PHI node that may or may not exist. These track the PHIs
227 /// that must be created if we sunk a sequence of instructions. It provides
228 /// a hash function for efficient equality comparisons.
230 SmallVector
<Value
*, 4> Values
;
231 SmallVector
<BasicBlock
*, 4> Blocks
;
234 ModelledPHI() = default;
236 ModelledPHI(const PHINode
*PN
) {
237 // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
238 SmallVector
<std::pair
<BasicBlock
*, Value
*>, 4> Ops
;
239 for (unsigned I
= 0, E
= PN
->getNumIncomingValues(); I
!= E
; ++I
)
240 Ops
.push_back({PN
->getIncomingBlock(I
), PN
->getIncomingValue(I
)});
242 for (auto &P
: Ops
) {
243 Blocks
.push_back(P
.first
);
244 Values
.push_back(P
.second
);
248 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
249 /// without the same ID.
250 /// \note This is specifically for DenseMapInfo - do not use this!
251 static ModelledPHI
createDummy(size_t ID
) {
253 M
.Values
.push_back(reinterpret_cast<Value
*>(ID
));
257 /// Create a PHI from an array of incoming values and incoming blocks.
258 template <typename VArray
, typename BArray
>
259 ModelledPHI(const VArray
&V
, const BArray
&B
) {
260 llvm::copy(V
, std::back_inserter(Values
));
261 llvm::copy(B
, std::back_inserter(Blocks
));
264 /// Create a PHI from [I[OpNum] for I in Insts].
265 template <typename BArray
>
266 ModelledPHI(ArrayRef
<Instruction
*> Insts
, unsigned OpNum
, const BArray
&B
) {
267 llvm::copy(B
, std::back_inserter(Blocks
));
268 for (auto *I
: Insts
)
269 Values
.push_back(I
->getOperand(OpNum
));
272 /// Restrict the PHI's contents down to only \c NewBlocks.
273 /// \c NewBlocks must be a subset of \c this->Blocks.
274 void restrictToBlocks(const SmallSetVector
<BasicBlock
*, 4> &NewBlocks
) {
275 auto BI
= Blocks
.begin();
276 auto VI
= Values
.begin();
277 while (BI
!= Blocks
.end()) {
278 assert(VI
!= Values
.end());
279 if (!llvm::is_contained(NewBlocks
, *BI
)) {
280 BI
= Blocks
.erase(BI
);
281 VI
= Values
.erase(VI
);
287 assert(Blocks
.size() == NewBlocks
.size());
290 ArrayRef
<Value
*> getValues() const { return Values
; }
292 bool areAllIncomingValuesSame() const {
293 return llvm::all_of(Values
, [&](Value
*V
) { return V
== Values
[0]; });
296 bool areAllIncomingValuesSameType() const {
298 Values
, [&](Value
*V
) { return V
->getType() == Values
[0]->getType(); });
301 bool areAnyIncomingValuesConstant() const {
302 return llvm::any_of(Values
, [&](Value
*V
) { return isa
<Constant
>(V
); });
306 unsigned hash() const {
307 return (unsigned)hash_combine_range(Values
.begin(), Values
.end());
310 bool operator==(const ModelledPHI
&Other
) const {
311 return Values
== Other
.Values
&& Blocks
== Other
.Blocks
;
315 template <typename ModelledPHI
> struct DenseMapInfo
{
316 static inline ModelledPHI
&getEmptyKey() {
317 static ModelledPHI Dummy
= ModelledPHI::createDummy(0);
321 static inline ModelledPHI
&getTombstoneKey() {
322 static ModelledPHI Dummy
= ModelledPHI::createDummy(1);
326 static unsigned getHashValue(const ModelledPHI
&V
) { return V
.hash(); }
328 static bool isEqual(const ModelledPHI
&LHS
, const ModelledPHI
&RHS
) {
333 using ModelledPHISet
= DenseSet
<ModelledPHI
, DenseMapInfo
<ModelledPHI
>>;
335 //===----------------------------------------------------------------------===//
337 //===----------------------------------------------------------------------===//
338 // This is a value number table where the value number is a function of the
339 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
340 // that the program would be equivalent if we replaced A with PHI(A, B).
341 //===----------------------------------------------------------------------===//
343 /// A GVN expression describing how an instruction is used. The operands
344 /// field of BasicExpression is used to store uses, not operands.
346 /// This class also contains fields for discriminators used when determining
347 /// equivalence of instructions with sideeffects.
348 class InstructionUseExpr
: public GVNExpression::BasicExpression
{
349 unsigned MemoryUseOrder
= -1;
350 bool Volatile
= false;
351 ArrayRef
<int> ShuffleMask
;
354 InstructionUseExpr(Instruction
*I
, ArrayRecycler
<Value
*> &R
,
356 : GVNExpression::BasicExpression(I
->getNumUses()) {
357 allocateOperands(R
, A
);
358 setOpcode(I
->getOpcode());
359 setType(I
->getType());
361 if (ShuffleVectorInst
*SVI
= dyn_cast
<ShuffleVectorInst
>(I
))
362 ShuffleMask
= SVI
->getShuffleMask().copy(A
);
364 for (auto &U
: I
->uses())
365 op_push_back(U
.getUser());
366 llvm::sort(op_begin(), op_end());
369 void setMemoryUseOrder(unsigned MUO
) { MemoryUseOrder
= MUO
; }
370 void setVolatile(bool V
) { Volatile
= V
; }
372 hash_code
getHashValue() const override
{
373 return hash_combine(GVNExpression::BasicExpression::getHashValue(),
374 MemoryUseOrder
, Volatile
, ShuffleMask
);
377 template <typename Function
> hash_code
getHashValue(Function MapFn
) {
378 hash_code H
= hash_combine(getOpcode(), getType(), MemoryUseOrder
, Volatile
,
380 for (auto *V
: operands())
381 H
= hash_combine(H
, MapFn(V
));
387 DenseMap
<Value
*, uint32_t> ValueNumbering
;
388 DenseMap
<GVNExpression::Expression
*, uint32_t> ExpressionNumbering
;
389 DenseMap
<size_t, uint32_t> HashNumbering
;
390 BumpPtrAllocator Allocator
;
391 ArrayRecycler
<Value
*> Recycler
;
392 uint32_t nextValueNumber
= 1;
394 /// Create an expression for I based on its opcode and its uses. If I
395 /// touches or reads memory, the expression is also based upon its memory
396 /// order - see \c getMemoryUseOrder().
397 InstructionUseExpr
*createExpr(Instruction
*I
) {
398 InstructionUseExpr
*E
=
399 new (Allocator
) InstructionUseExpr(I
, Recycler
, Allocator
);
401 E
->setMemoryUseOrder(getMemoryUseOrder(I
));
403 if (CmpInst
*C
= dyn_cast
<CmpInst
>(I
)) {
404 CmpInst::Predicate Predicate
= C
->getPredicate();
405 E
->setOpcode((C
->getOpcode() << 8) | Predicate
);
410 /// Helper to compute the value number for a memory instruction
411 /// (LoadInst/StoreInst), including checking the memory ordering and
413 template <class Inst
> InstructionUseExpr
*createMemoryExpr(Inst
*I
) {
414 if (isStrongerThanUnordered(I
->getOrdering()) || I
->isAtomic())
416 InstructionUseExpr
*E
= createExpr(I
);
417 E
->setVolatile(I
->isVolatile());
422 ValueTable() = default;
424 /// Returns the value number for the specified value, assigning
425 /// it a new number if it did not have one before.
426 uint32_t lookupOrAdd(Value
*V
) {
427 auto VI
= ValueNumbering
.find(V
);
428 if (VI
!= ValueNumbering
.end())
431 if (!isa
<Instruction
>(V
)) {
432 ValueNumbering
[V
] = nextValueNumber
;
433 return nextValueNumber
++;
436 Instruction
*I
= cast
<Instruction
>(V
);
437 InstructionUseExpr
*exp
= nullptr;
438 switch (I
->getOpcode()) {
439 case Instruction::Load
:
440 exp
= createMemoryExpr(cast
<LoadInst
>(I
));
442 case Instruction::Store
:
443 exp
= createMemoryExpr(cast
<StoreInst
>(I
));
445 case Instruction::Call
:
446 case Instruction::Invoke
:
447 case Instruction::FNeg
:
448 case Instruction::Add
:
449 case Instruction::FAdd
:
450 case Instruction::Sub
:
451 case Instruction::FSub
:
452 case Instruction::Mul
:
453 case Instruction::FMul
:
454 case Instruction::UDiv
:
455 case Instruction::SDiv
:
456 case Instruction::FDiv
:
457 case Instruction::URem
:
458 case Instruction::SRem
:
459 case Instruction::FRem
:
460 case Instruction::Shl
:
461 case Instruction::LShr
:
462 case Instruction::AShr
:
463 case Instruction::And
:
464 case Instruction::Or
:
465 case Instruction::Xor
:
466 case Instruction::ICmp
:
467 case Instruction::FCmp
:
468 case Instruction::Trunc
:
469 case Instruction::ZExt
:
470 case Instruction::SExt
:
471 case Instruction::FPToUI
:
472 case Instruction::FPToSI
:
473 case Instruction::UIToFP
:
474 case Instruction::SIToFP
:
475 case Instruction::FPTrunc
:
476 case Instruction::FPExt
:
477 case Instruction::PtrToInt
:
478 case Instruction::IntToPtr
:
479 case Instruction::BitCast
:
480 case Instruction::AddrSpaceCast
:
481 case Instruction::Select
:
482 case Instruction::ExtractElement
:
483 case Instruction::InsertElement
:
484 case Instruction::ShuffleVector
:
485 case Instruction::InsertValue
:
486 case Instruction::GetElementPtr
:
494 ValueNumbering
[V
] = nextValueNumber
;
495 return nextValueNumber
++;
498 uint32_t e
= ExpressionNumbering
[exp
];
500 hash_code H
= exp
->getHashValue([=](Value
*V
) { return lookupOrAdd(V
); });
501 auto I
= HashNumbering
.find(H
);
502 if (I
!= HashNumbering
.end()) {
505 e
= nextValueNumber
++;
506 HashNumbering
[H
] = e
;
507 ExpressionNumbering
[exp
] = e
;
510 ValueNumbering
[V
] = e
;
514 /// Returns the value number of the specified value. Fails if the value has
515 /// not yet been numbered.
516 uint32_t lookup(Value
*V
) const {
517 auto VI
= ValueNumbering
.find(V
);
518 assert(VI
!= ValueNumbering
.end() && "Value not numbered?");
522 /// Removes all value numberings and resets the value table.
524 ValueNumbering
.clear();
525 ExpressionNumbering
.clear();
526 HashNumbering
.clear();
527 Recycler
.clear(Allocator
);
531 /// \c Inst uses or touches memory. Return an ID describing the memory state
532 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
533 /// the exact same memory operations happen after I1 and I2.
535 /// This is a very hard problem in general, so we use domain-specific
536 /// knowledge that we only ever check for equivalence between blocks sharing a
537 /// single immediate successor that is common, and when determining if I1 ==
538 /// I2 we will have already determined that next(I1) == next(I2). This
539 /// inductive property allows us to simply return the value number of the next
540 /// instruction that defines memory.
541 uint32_t getMemoryUseOrder(Instruction
*Inst
) {
542 auto *BB
= Inst
->getParent();
543 for (auto I
= std::next(Inst
->getIterator()), E
= BB
->end();
544 I
!= E
&& !I
->isTerminator(); ++I
) {
545 if (!isMemoryInst(&*I
))
547 if (isa
<LoadInst
>(&*I
))
549 CallInst
*CI
= dyn_cast
<CallInst
>(&*I
);
550 if (CI
&& CI
->onlyReadsMemory())
552 InvokeInst
*II
= dyn_cast
<InvokeInst
>(&*I
);
553 if (II
&& II
->onlyReadsMemory())
555 return lookupOrAdd(&*I
);
561 //===----------------------------------------------------------------------===//
567 bool run(Function
&F
) {
568 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F
.getName()
571 unsigned NumSunk
= 0;
572 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
574 NumSunk
+= sinkBB(N
);
582 bool shouldAvoidSinkingInstruction(Instruction
*I
) {
583 // These instructions may change or break semantics if moved.
584 if (isa
<PHINode
>(I
) || I
->isEHPad() || isa
<AllocaInst
>(I
) ||
585 I
->getType()->isTokenTy())
590 /// The main heuristic function. Analyze the set of instructions pointed to by
591 /// LRI and return a candidate solution if these instructions can be sunk, or
593 Optional
<SinkingInstructionCandidate
> analyzeInstructionForSinking(
594 LockstepReverseIterator
&LRI
, unsigned &InstNum
, unsigned &MemoryInstNum
,
595 ModelledPHISet
&NeededPHIs
, SmallPtrSetImpl
<Value
*> &PHIContents
);
597 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
598 void analyzeInitialPHIs(BasicBlock
*BB
, ModelledPHISet
&PHIs
,
599 SmallPtrSetImpl
<Value
*> &PHIContents
) {
600 for (PHINode
&PN
: BB
->phis()) {
601 auto MPHI
= ModelledPHI(&PN
);
603 for (auto *V
: MPHI
.getValues())
604 PHIContents
.insert(V
);
608 /// The main instruction sinking driver. Set up state and try and sink
609 /// instructions into BBEnd from its predecessors.
610 unsigned sinkBB(BasicBlock
*BBEnd
);
612 /// Perform the actual mechanics of sinking an instruction from Blocks into
613 /// BBEnd, which is their only successor.
614 void sinkLastInstruction(ArrayRef
<BasicBlock
*> Blocks
, BasicBlock
*BBEnd
);
616 /// Remove PHIs that all have the same incoming value.
617 void foldPointlessPHINodes(BasicBlock
*BB
) {
618 auto I
= BB
->begin();
619 while (PHINode
*PN
= dyn_cast
<PHINode
>(I
++)) {
620 if (!llvm::all_of(PN
->incoming_values(), [&](const Value
*V
) {
621 return V
== PN
->getIncomingValue(0);
624 if (PN
->getIncomingValue(0) != PN
)
625 PN
->replaceAllUsesWith(PN
->getIncomingValue(0));
627 PN
->replaceAllUsesWith(UndefValue::get(PN
->getType()));
628 PN
->eraseFromParent();
633 Optional
<SinkingInstructionCandidate
> GVNSink::analyzeInstructionForSinking(
634 LockstepReverseIterator
&LRI
, unsigned &InstNum
, unsigned &MemoryInstNum
,
635 ModelledPHISet
&NeededPHIs
, SmallPtrSetImpl
<Value
*> &PHIContents
) {
637 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
640 } dbgs() << " ]\n";);
642 DenseMap
<uint32_t, unsigned> VNums
;
643 for (auto *I
: Insts
) {
644 uint32_t N
= VN
.lookupOrAdd(I
);
645 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N
) << " for" << *I
<< "\n");
650 unsigned VNumToSink
=
651 std::max_element(VNums
.begin(), VNums
.end(),
652 [](const std::pair
<uint32_t, unsigned> &I
,
653 const std::pair
<uint32_t, unsigned> &J
) {
654 return I
.second
< J
.second
;
658 if (VNums
[VNumToSink
] == 1)
659 // Can't sink anything!
662 // Now restrict the number of incoming blocks down to only those with
664 auto &ActivePreds
= LRI
.getActiveBlocks();
665 unsigned InitialActivePredSize
= ActivePreds
.size();
666 SmallVector
<Instruction
*, 4> NewInsts
;
667 for (auto *I
: Insts
) {
668 if (VN
.lookup(I
) != VNumToSink
)
669 ActivePreds
.remove(I
->getParent());
671 NewInsts
.push_back(I
);
673 for (auto *I
: NewInsts
)
674 if (shouldAvoidSinkingInstruction(I
))
677 // If we've restricted the incoming blocks, restrict all needed PHIs also
679 bool RecomputePHIContents
= false;
680 if (ActivePreds
.size() != InitialActivePredSize
) {
681 ModelledPHISet NewNeededPHIs
;
682 for (auto P
: NeededPHIs
) {
683 P
.restrictToBlocks(ActivePreds
);
684 NewNeededPHIs
.insert(P
);
686 NeededPHIs
= NewNeededPHIs
;
687 LRI
.restrictToBlocks(ActivePreds
);
688 RecomputePHIContents
= true;
691 // The sunk instruction's results.
692 ModelledPHI
NewPHI(NewInsts
, ActivePreds
);
694 // Does sinking this instruction render previous PHIs redundant?
695 if (NeededPHIs
.erase(NewPHI
))
696 RecomputePHIContents
= true;
698 if (RecomputePHIContents
) {
699 // The needed PHIs have changed, so recompute the set of all needed
702 for (auto &PHI
: NeededPHIs
)
703 PHIContents
.insert(PHI
.getValues().begin(), PHI
.getValues().end());
706 // Is this instruction required by a later PHI that doesn't match this PHI?
707 // if so, we can't sink this instruction.
708 for (auto *V
: NewPHI
.getValues())
709 if (PHIContents
.count(V
))
710 // V exists in this PHI, but the whole PHI is different to NewPHI
711 // (else it would have been removed earlier). We cannot continue
712 // because this isn't representable.
715 // Which operands need PHIs?
716 // FIXME: If any of these fail, we should partition up the candidates to
717 // try and continue making progress.
718 Instruction
*I0
= NewInsts
[0];
720 // If all instructions that are going to participate don't have the same
721 // number of operands, we can't do any useful PHI analysis for all operands.
722 auto hasDifferentNumOperands
= [&I0
](Instruction
*I
) {
723 return I
->getNumOperands() != I0
->getNumOperands();
725 if (any_of(NewInsts
, hasDifferentNumOperands
))
728 for (unsigned OpNum
= 0, E
= I0
->getNumOperands(); OpNum
!= E
; ++OpNum
) {
729 ModelledPHI
PHI(NewInsts
, OpNum
, ActivePreds
);
730 if (PHI
.areAllIncomingValuesSame())
732 if (!canReplaceOperandWithVariable(I0
, OpNum
))
733 // We can 't create a PHI from this instruction!
735 if (NeededPHIs
.count(PHI
))
737 if (!PHI
.areAllIncomingValuesSameType())
739 // Don't create indirect calls! The called value is the final operand.
740 if ((isa
<CallInst
>(I0
) || isa
<InvokeInst
>(I0
)) && OpNum
== E
- 1 &&
741 PHI
.areAnyIncomingValuesConstant())
744 NeededPHIs
.reserve(NeededPHIs
.size());
745 NeededPHIs
.insert(PHI
);
746 PHIContents
.insert(PHI
.getValues().begin(), PHI
.getValues().end());
749 if (isMemoryInst(NewInsts
[0]))
752 SinkingInstructionCandidate Cand
;
753 Cand
.NumInstructions
= ++InstNum
;
754 Cand
.NumMemoryInsts
= MemoryInstNum
;
755 Cand
.NumBlocks
= ActivePreds
.size();
756 Cand
.NumPHIs
= NeededPHIs
.size();
757 append_range(Cand
.Blocks
, ActivePreds
);
762 unsigned GVNSink::sinkBB(BasicBlock
*BBEnd
) {
763 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
764 BBEnd
->printAsOperand(dbgs()); dbgs() << "\n");
765 SmallVector
<BasicBlock
*, 4> Preds
;
766 for (auto *B
: predecessors(BBEnd
)) {
767 auto *T
= B
->getTerminator();
768 if (isa
<BranchInst
>(T
) || isa
<SwitchInst
>(T
))
773 if (Preds
.size() < 2)
777 unsigned NumOrigPreds
= Preds
.size();
778 // We can only sink instructions through unconditional branches.
779 for (auto I
= Preds
.begin(); I
!= Preds
.end();) {
780 if ((*I
)->getTerminator()->getNumSuccessors() != 1)
786 LockstepReverseIterator
LRI(Preds
);
787 SmallVector
<SinkingInstructionCandidate
, 4> Candidates
;
788 unsigned InstNum
= 0, MemoryInstNum
= 0;
789 ModelledPHISet NeededPHIs
;
790 SmallPtrSet
<Value
*, 4> PHIContents
;
791 analyzeInitialPHIs(BBEnd
, NeededPHIs
, PHIContents
);
792 unsigned NumOrigPHIs
= NeededPHIs
.size();
794 while (LRI
.isValid()) {
795 auto Cand
= analyzeInstructionForSinking(LRI
, InstNum
, MemoryInstNum
,
796 NeededPHIs
, PHIContents
);
799 Cand
->calculateCost(NumOrigPHIs
, Preds
.size());
800 Candidates
.emplace_back(*Cand
);
804 llvm::stable_sort(Candidates
, std::greater
<SinkingInstructionCandidate
>());
805 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
807 << " " << C
<< "\n";);
809 // Pick the top candidate, as long it is positive!
810 if (Candidates
.empty() || Candidates
.front().Cost
<= 0)
812 auto C
= Candidates
.front();
814 LLVM_DEBUG(dbgs() << " -- Sinking: " << C
<< "\n");
815 BasicBlock
*InsertBB
= BBEnd
;
816 if (C
.Blocks
.size() < NumOrigPreds
) {
817 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
818 BBEnd
->printAsOperand(dbgs()); dbgs() << "\n");
819 InsertBB
= SplitBlockPredecessors(BBEnd
, C
.Blocks
, ".gvnsink.split");
821 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
822 // Edge couldn't be split.
827 for (unsigned I
= 0; I
< C
.NumInstructions
; ++I
)
828 sinkLastInstruction(C
.Blocks
, InsertBB
);
830 return C
.NumInstructions
;
833 void GVNSink::sinkLastInstruction(ArrayRef
<BasicBlock
*> Blocks
,
835 SmallVector
<Instruction
*, 4> Insts
;
836 for (BasicBlock
*BB
: Blocks
)
837 Insts
.push_back(BB
->getTerminator()->getPrevNode());
838 Instruction
*I0
= Insts
.front();
840 SmallVector
<Value
*, 4> NewOperands
;
841 for (unsigned O
= 0, E
= I0
->getNumOperands(); O
!= E
; ++O
) {
842 bool NeedPHI
= llvm::any_of(Insts
, [&I0
, O
](const Instruction
*I
) {
843 return I
->getOperand(O
) != I0
->getOperand(O
);
846 NewOperands
.push_back(I0
->getOperand(O
));
850 // Create a new PHI in the successor block and populate it.
851 auto *Op
= I0
->getOperand(O
);
852 assert(!Op
->getType()->isTokenTy() && "Can't PHI tokens!");
853 auto *PN
= PHINode::Create(Op
->getType(), Insts
.size(),
854 Op
->getName() + ".sink", &BBEnd
->front());
855 for (auto *I
: Insts
)
856 PN
->addIncoming(I
->getOperand(O
), I
->getParent());
857 NewOperands
.push_back(PN
);
860 // Arbitrarily use I0 as the new "common" instruction; remap its operands
861 // and move it to the start of the successor block.
862 for (unsigned O
= 0, E
= I0
->getNumOperands(); O
!= E
; ++O
)
863 I0
->getOperandUse(O
).set(NewOperands
[O
]);
864 I0
->moveBefore(&*BBEnd
->getFirstInsertionPt());
866 // Update metadata and IR flags.
867 for (auto *I
: Insts
)
869 combineMetadataForCSE(I0
, I
, true);
873 for (auto *I
: Insts
)
875 I
->replaceAllUsesWith(I0
);
876 foldPointlessPHINodes(BBEnd
);
878 // Finally nuke all instructions apart from the common instruction.
879 for (auto *I
: Insts
)
881 I
->eraseFromParent();
883 NumRemoved
+= Insts
.size() - 1;
886 ////////////////////////////////////////////////////////////////////////////////
887 // Pass machinery / boilerplate
889 class GVNSinkLegacyPass
: public FunctionPass
{
893 GVNSinkLegacyPass() : FunctionPass(ID
) {
894 initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
897 bool runOnFunction(Function
&F
) override
{
904 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
905 AU
.addPreserved
<GlobalsAAWrapperPass
>();
909 } // end anonymous namespace
911 PreservedAnalyses
GVNSinkPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
914 return PreservedAnalyses::all();
915 return PreservedAnalyses::none();
918 char GVNSinkLegacyPass::ID
= 0;
920 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass
, "gvn-sink",
921 "Early GVN sinking of Expressions", false, false)
922 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
923 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
924 INITIALIZE_PASS_END(GVNSinkLegacyPass
, "gvn-sink",
925 "Early GVN sinking of Expressions", false, false)
927 FunctionPass
*llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }