1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
13 /// We use a variant of global value numbering to decide what can be sunk.
16 /// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17 /// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
19 /// [ %e = phi i32 %a2, %c2 ]
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
34 //===----------------------------------------------------------------------===//
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseMapInfo.h"
39 #include "llvm/ADT/DenseSet.h"
40 #include "llvm/ADT/Hashing.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/PostOrderIterator.h"
44 #include "llvm/ADT/STLExtras.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Analysis/GlobalsModRef.h"
50 #include "llvm/Transforms/Utils/Local.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/CFG.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/InstrTypes.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/Type.h"
60 #include "llvm/IR/Use.h"
61 #include "llvm/IR/Value.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/Allocator.h"
64 #include "llvm/Support/ArrayRecycler.h"
65 #include "llvm/Support/AtomicOrdering.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Compiler.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72 #include "llvm/Transforms/Scalar/GVNExpression.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #define DEBUG_TYPE "gvn-sink"
85 STATISTIC(NumRemoved
, "Number of instructions removed");
88 namespace GVNExpression
{
90 LLVM_DUMP_METHOD
void Expression::dump() const {
95 } // end namespace GVNExpression
96 } // end namespace llvm
100 static bool isMemoryInst(const Instruction
*I
) {
101 return isa
<LoadInst
>(I
) || isa
<StoreInst
>(I
) ||
102 (isa
<InvokeInst
>(I
) && !cast
<InvokeInst
>(I
)->doesNotAccessMemory()) ||
103 (isa
<CallInst
>(I
) && !cast
<CallInst
>(I
)->doesNotAccessMemory());
106 /// Iterates through instructions in a set of blocks in reverse order from the
107 /// first non-terminator. For example (assume all blocks have size n):
108 /// LockstepReverseIterator I([B1, B2, B3]);
109 /// *I-- = [B1[n], B2[n], B3[n]];
110 /// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
111 /// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
114 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
116 /// determine which blocks are still going and the order they appear in the
117 /// list returned by operator*.
118 class LockstepReverseIterator
{
119 ArrayRef
<BasicBlock
*> Blocks
;
120 SmallSetVector
<BasicBlock
*, 4> ActiveBlocks
;
121 SmallVector
<Instruction
*, 4> Insts
;
125 LockstepReverseIterator(ArrayRef
<BasicBlock
*> Blocks
) : Blocks(Blocks
) {
131 ActiveBlocks
.clear();
132 for (BasicBlock
*BB
: Blocks
)
133 ActiveBlocks
.insert(BB
);
135 for (BasicBlock
*BB
: Blocks
) {
136 if (BB
->size() <= 1) {
137 // Block wasn't big enough - only contained a terminator.
138 ActiveBlocks
.remove(BB
);
141 Insts
.push_back(BB
->getTerminator()->getPrevNode());
147 bool isValid() const { return !Fail
; }
148 ArrayRef
<Instruction
*> operator*() const { return Insts
; }
150 // Note: This needs to return a SmallSetVector as the elements of
151 // ActiveBlocks will be later copied to Blocks using std::copy. The
152 // resultant order of elements in Blocks needs to be deterministic.
153 // Using SmallPtrSet instead causes non-deterministic order while
154 // copying. And we cannot simply sort Blocks as they need to match the
155 // corresponding Values.
156 SmallSetVector
<BasicBlock
*, 4> &getActiveBlocks() { return ActiveBlocks
; }
158 void restrictToBlocks(SmallSetVector
<BasicBlock
*, 4> &Blocks
) {
159 for (auto II
= Insts
.begin(); II
!= Insts
.end();) {
160 if (std::find(Blocks
.begin(), Blocks
.end(), (*II
)->getParent()) ==
162 ActiveBlocks
.remove((*II
)->getParent());
163 II
= Insts
.erase(II
);
173 SmallVector
<Instruction
*, 4> NewInsts
;
174 for (auto *Inst
: Insts
) {
175 if (Inst
== &Inst
->getParent()->front())
176 ActiveBlocks
.remove(Inst
->getParent());
178 NewInsts
.push_back(Inst
->getPrevNode());
180 if (NewInsts
.empty()) {
188 //===----------------------------------------------------------------------===//
190 /// Candidate solution for sinking. There may be different ways to
191 /// sink instructions, differing in the number of instructions sunk,
192 /// the number of predecessors sunk from and the number of PHIs
194 struct SinkingInstructionCandidate
{
196 unsigned NumInstructions
;
198 unsigned NumMemoryInsts
;
200 SmallVector
<BasicBlock
*, 4> Blocks
;
202 void calculateCost(unsigned NumOrigPHIs
, unsigned NumOrigBlocks
) {
203 unsigned NumExtraPHIs
= NumPHIs
- NumOrigPHIs
;
204 unsigned SplitEdgeCost
= (NumOrigBlocks
> NumBlocks
) ? 2 : 0;
205 Cost
= (NumInstructions
* (NumBlocks
- 1)) -
207 NumExtraPHIs
) // PHIs are expensive, so make sure they're worth it.
211 bool operator>(const SinkingInstructionCandidate
&Other
) const {
212 return Cost
> Other
.Cost
;
217 raw_ostream
&operator<<(raw_ostream
&OS
, const SinkingInstructionCandidate
&C
) {
218 OS
<< "<Candidate Cost=" << C
.Cost
<< " #Blocks=" << C
.NumBlocks
219 << " #Insts=" << C
.NumInstructions
<< " #PHIs=" << C
.NumPHIs
<< ">";
224 //===----------------------------------------------------------------------===//
226 /// Describes a PHI node that may or may not exist. These track the PHIs
227 /// that must be created if we sunk a sequence of instructions. It provides
228 /// a hash function for efficient equality comparisons.
230 SmallVector
<Value
*, 4> Values
;
231 SmallVector
<BasicBlock
*, 4> Blocks
;
234 ModelledPHI() = default;
236 ModelledPHI(const PHINode
*PN
) {
237 // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
238 SmallVector
<std::pair
<BasicBlock
*, Value
*>, 4> Ops
;
239 for (unsigned I
= 0, E
= PN
->getNumIncomingValues(); I
!= E
; ++I
)
240 Ops
.push_back({PN
->getIncomingBlock(I
), PN
->getIncomingValue(I
)});
242 for (auto &P
: Ops
) {
243 Blocks
.push_back(P
.first
);
244 Values
.push_back(P
.second
);
248 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
249 /// without the same ID.
250 /// \note This is specifically for DenseMapInfo - do not use this!
251 static ModelledPHI
createDummy(size_t ID
) {
253 M
.Values
.push_back(reinterpret_cast<Value
*>(ID
));
257 /// Create a PHI from an array of incoming values and incoming blocks.
258 template <typename VArray
, typename BArray
>
259 ModelledPHI(const VArray
&V
, const BArray
&B
) {
260 llvm::copy(V
, std::back_inserter(Values
));
261 llvm::copy(B
, std::back_inserter(Blocks
));
264 /// Create a PHI from [I[OpNum] for I in Insts].
265 template <typename BArray
>
266 ModelledPHI(ArrayRef
<Instruction
*> Insts
, unsigned OpNum
, const BArray
&B
) {
267 llvm::copy(B
, std::back_inserter(Blocks
));
268 for (auto *I
: Insts
)
269 Values
.push_back(I
->getOperand(OpNum
));
272 /// Restrict the PHI's contents down to only \c NewBlocks.
273 /// \c NewBlocks must be a subset of \c this->Blocks.
274 void restrictToBlocks(const SmallSetVector
<BasicBlock
*, 4> &NewBlocks
) {
275 auto BI
= Blocks
.begin();
276 auto VI
= Values
.begin();
277 while (BI
!= Blocks
.end()) {
278 assert(VI
!= Values
.end());
279 if (std::find(NewBlocks
.begin(), NewBlocks
.end(), *BI
) ==
281 BI
= Blocks
.erase(BI
);
282 VI
= Values
.erase(VI
);
288 assert(Blocks
.size() == NewBlocks
.size());
291 ArrayRef
<Value
*> getValues() const { return Values
; }
293 bool areAllIncomingValuesSame() const {
294 return llvm::all_of(Values
, [&](Value
*V
) { return V
== Values
[0]; });
297 bool areAllIncomingValuesSameType() const {
299 Values
, [&](Value
*V
) { return V
->getType() == Values
[0]->getType(); });
302 bool areAnyIncomingValuesConstant() const {
303 return llvm::any_of(Values
, [&](Value
*V
) { return isa
<Constant
>(V
); });
307 unsigned hash() const {
308 return (unsigned)hash_combine_range(Values
.begin(), Values
.end());
311 bool operator==(const ModelledPHI
&Other
) const {
312 return Values
== Other
.Values
&& Blocks
== Other
.Blocks
;
316 template <typename ModelledPHI
> struct DenseMapInfo
{
317 static inline ModelledPHI
&getEmptyKey() {
318 static ModelledPHI Dummy
= ModelledPHI::createDummy(0);
322 static inline ModelledPHI
&getTombstoneKey() {
323 static ModelledPHI Dummy
= ModelledPHI::createDummy(1);
327 static unsigned getHashValue(const ModelledPHI
&V
) { return V
.hash(); }
329 static bool isEqual(const ModelledPHI
&LHS
, const ModelledPHI
&RHS
) {
334 using ModelledPHISet
= DenseSet
<ModelledPHI
, DenseMapInfo
<ModelledPHI
>>;
336 //===----------------------------------------------------------------------===//
338 //===----------------------------------------------------------------------===//
339 // This is a value number table where the value number is a function of the
340 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
341 // that the program would be equivalent if we replaced A with PHI(A, B).
342 //===----------------------------------------------------------------------===//
344 /// A GVN expression describing how an instruction is used. The operands
345 /// field of BasicExpression is used to store uses, not operands.
347 /// This class also contains fields for discriminators used when determining
348 /// equivalence of instructions with sideeffects.
349 class InstructionUseExpr
: public GVNExpression::BasicExpression
{
350 unsigned MemoryUseOrder
= -1;
351 bool Volatile
= false;
354 InstructionUseExpr(Instruction
*I
, ArrayRecycler
<Value
*> &R
,
356 : GVNExpression::BasicExpression(I
->getNumUses()) {
357 allocateOperands(R
, A
);
358 setOpcode(I
->getOpcode());
359 setType(I
->getType());
361 for (auto &U
: I
->uses())
362 op_push_back(U
.getUser());
363 llvm::sort(op_begin(), op_end());
366 void setMemoryUseOrder(unsigned MUO
) { MemoryUseOrder
= MUO
; }
367 void setVolatile(bool V
) { Volatile
= V
; }
369 hash_code
getHashValue() const override
{
370 return hash_combine(GVNExpression::BasicExpression::getHashValue(),
371 MemoryUseOrder
, Volatile
);
374 template <typename Function
> hash_code
getHashValue(Function MapFn
) {
376 hash_combine(getOpcode(), getType(), MemoryUseOrder
, Volatile
);
377 for (auto *V
: operands())
378 H
= hash_combine(H
, MapFn(V
));
384 DenseMap
<Value
*, uint32_t> ValueNumbering
;
385 DenseMap
<GVNExpression::Expression
*, uint32_t> ExpressionNumbering
;
386 DenseMap
<size_t, uint32_t> HashNumbering
;
387 BumpPtrAllocator Allocator
;
388 ArrayRecycler
<Value
*> Recycler
;
389 uint32_t nextValueNumber
= 1;
391 /// Create an expression for I based on its opcode and its uses. If I
392 /// touches or reads memory, the expression is also based upon its memory
393 /// order - see \c getMemoryUseOrder().
394 InstructionUseExpr
*createExpr(Instruction
*I
) {
395 InstructionUseExpr
*E
=
396 new (Allocator
) InstructionUseExpr(I
, Recycler
, Allocator
);
398 E
->setMemoryUseOrder(getMemoryUseOrder(I
));
400 if (CmpInst
*C
= dyn_cast
<CmpInst
>(I
)) {
401 CmpInst::Predicate Predicate
= C
->getPredicate();
402 E
->setOpcode((C
->getOpcode() << 8) | Predicate
);
407 /// Helper to compute the value number for a memory instruction
408 /// (LoadInst/StoreInst), including checking the memory ordering and
410 template <class Inst
> InstructionUseExpr
*createMemoryExpr(Inst
*I
) {
411 if (isStrongerThanUnordered(I
->getOrdering()) || I
->isAtomic())
413 InstructionUseExpr
*E
= createExpr(I
);
414 E
->setVolatile(I
->isVolatile());
419 ValueTable() = default;
421 /// Returns the value number for the specified value, assigning
422 /// it a new number if it did not have one before.
423 uint32_t lookupOrAdd(Value
*V
) {
424 auto VI
= ValueNumbering
.find(V
);
425 if (VI
!= ValueNumbering
.end())
428 if (!isa
<Instruction
>(V
)) {
429 ValueNumbering
[V
] = nextValueNumber
;
430 return nextValueNumber
++;
433 Instruction
*I
= cast
<Instruction
>(V
);
434 InstructionUseExpr
*exp
= nullptr;
435 switch (I
->getOpcode()) {
436 case Instruction::Load
:
437 exp
= createMemoryExpr(cast
<LoadInst
>(I
));
439 case Instruction::Store
:
440 exp
= createMemoryExpr(cast
<StoreInst
>(I
));
442 case Instruction::Call
:
443 case Instruction::Invoke
:
444 case Instruction::FNeg
:
445 case Instruction::Add
:
446 case Instruction::FAdd
:
447 case Instruction::Sub
:
448 case Instruction::FSub
:
449 case Instruction::Mul
:
450 case Instruction::FMul
:
451 case Instruction::UDiv
:
452 case Instruction::SDiv
:
453 case Instruction::FDiv
:
454 case Instruction::URem
:
455 case Instruction::SRem
:
456 case Instruction::FRem
:
457 case Instruction::Shl
:
458 case Instruction::LShr
:
459 case Instruction::AShr
:
460 case Instruction::And
:
461 case Instruction::Or
:
462 case Instruction::Xor
:
463 case Instruction::ICmp
:
464 case Instruction::FCmp
:
465 case Instruction::Trunc
:
466 case Instruction::ZExt
:
467 case Instruction::SExt
:
468 case Instruction::FPToUI
:
469 case Instruction::FPToSI
:
470 case Instruction::UIToFP
:
471 case Instruction::SIToFP
:
472 case Instruction::FPTrunc
:
473 case Instruction::FPExt
:
474 case Instruction::PtrToInt
:
475 case Instruction::IntToPtr
:
476 case Instruction::BitCast
:
477 case Instruction::Select
:
478 case Instruction::ExtractElement
:
479 case Instruction::InsertElement
:
480 case Instruction::ShuffleVector
:
481 case Instruction::InsertValue
:
482 case Instruction::GetElementPtr
:
490 ValueNumbering
[V
] = nextValueNumber
;
491 return nextValueNumber
++;
494 uint32_t e
= ExpressionNumbering
[exp
];
496 hash_code H
= exp
->getHashValue([=](Value
*V
) { return lookupOrAdd(V
); });
497 auto I
= HashNumbering
.find(H
);
498 if (I
!= HashNumbering
.end()) {
501 e
= nextValueNumber
++;
502 HashNumbering
[H
] = e
;
503 ExpressionNumbering
[exp
] = e
;
506 ValueNumbering
[V
] = e
;
510 /// Returns the value number of the specified value. Fails if the value has
511 /// not yet been numbered.
512 uint32_t lookup(Value
*V
) const {
513 auto VI
= ValueNumbering
.find(V
);
514 assert(VI
!= ValueNumbering
.end() && "Value not numbered?");
518 /// Removes all value numberings and resets the value table.
520 ValueNumbering
.clear();
521 ExpressionNumbering
.clear();
522 HashNumbering
.clear();
523 Recycler
.clear(Allocator
);
527 /// \c Inst uses or touches memory. Return an ID describing the memory state
528 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
529 /// the exact same memory operations happen after I1 and I2.
531 /// This is a very hard problem in general, so we use domain-specific
532 /// knowledge that we only ever check for equivalence between blocks sharing a
533 /// single immediate successor that is common, and when determining if I1 ==
534 /// I2 we will have already determined that next(I1) == next(I2). This
535 /// inductive property allows us to simply return the value number of the next
536 /// instruction that defines memory.
537 uint32_t getMemoryUseOrder(Instruction
*Inst
) {
538 auto *BB
= Inst
->getParent();
539 for (auto I
= std::next(Inst
->getIterator()), E
= BB
->end();
540 I
!= E
&& !I
->isTerminator(); ++I
) {
541 if (!isMemoryInst(&*I
))
543 if (isa
<LoadInst
>(&*I
))
545 CallInst
*CI
= dyn_cast
<CallInst
>(&*I
);
546 if (CI
&& CI
->onlyReadsMemory())
548 InvokeInst
*II
= dyn_cast
<InvokeInst
>(&*I
);
549 if (II
&& II
->onlyReadsMemory())
551 return lookupOrAdd(&*I
);
557 //===----------------------------------------------------------------------===//
563 bool run(Function
&F
) {
564 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F
.getName()
567 unsigned NumSunk
= 0;
568 ReversePostOrderTraversal
<Function
*> RPOT(&F
);
570 NumSunk
+= sinkBB(N
);
578 bool isInstructionBlacklisted(Instruction
*I
) {
579 // These instructions may change or break semantics if moved.
580 if (isa
<PHINode
>(I
) || I
->isEHPad() || isa
<AllocaInst
>(I
) ||
581 I
->getType()->isTokenTy())
586 /// The main heuristic function. Analyze the set of instructions pointed to by
587 /// LRI and return a candidate solution if these instructions can be sunk, or
589 Optional
<SinkingInstructionCandidate
> analyzeInstructionForSinking(
590 LockstepReverseIterator
&LRI
, unsigned &InstNum
, unsigned &MemoryInstNum
,
591 ModelledPHISet
&NeededPHIs
, SmallPtrSetImpl
<Value
*> &PHIContents
);
593 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
594 void analyzeInitialPHIs(BasicBlock
*BB
, ModelledPHISet
&PHIs
,
595 SmallPtrSetImpl
<Value
*> &PHIContents
) {
596 for (PHINode
&PN
: BB
->phis()) {
597 auto MPHI
= ModelledPHI(&PN
);
599 for (auto *V
: MPHI
.getValues())
600 PHIContents
.insert(V
);
604 /// The main instruction sinking driver. Set up state and try and sink
605 /// instructions into BBEnd from its predecessors.
606 unsigned sinkBB(BasicBlock
*BBEnd
);
608 /// Perform the actual mechanics of sinking an instruction from Blocks into
609 /// BBEnd, which is their only successor.
610 void sinkLastInstruction(ArrayRef
<BasicBlock
*> Blocks
, BasicBlock
*BBEnd
);
612 /// Remove PHIs that all have the same incoming value.
613 void foldPointlessPHINodes(BasicBlock
*BB
) {
614 auto I
= BB
->begin();
615 while (PHINode
*PN
= dyn_cast
<PHINode
>(I
++)) {
616 if (!llvm::all_of(PN
->incoming_values(), [&](const Value
*V
) {
617 return V
== PN
->getIncomingValue(0);
620 if (PN
->getIncomingValue(0) != PN
)
621 PN
->replaceAllUsesWith(PN
->getIncomingValue(0));
623 PN
->replaceAllUsesWith(UndefValue::get(PN
->getType()));
624 PN
->eraseFromParent();
629 Optional
<SinkingInstructionCandidate
> GVNSink::analyzeInstructionForSinking(
630 LockstepReverseIterator
&LRI
, unsigned &InstNum
, unsigned &MemoryInstNum
,
631 ModelledPHISet
&NeededPHIs
, SmallPtrSetImpl
<Value
*> &PHIContents
) {
633 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
636 } dbgs() << " ]\n";);
638 DenseMap
<uint32_t, unsigned> VNums
;
639 for (auto *I
: Insts
) {
640 uint32_t N
= VN
.lookupOrAdd(I
);
641 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N
) << " for" << *I
<< "\n");
646 unsigned VNumToSink
=
647 std::max_element(VNums
.begin(), VNums
.end(),
648 [](const std::pair
<uint32_t, unsigned> &I
,
649 const std::pair
<uint32_t, unsigned> &J
) {
650 return I
.second
< J
.second
;
654 if (VNums
[VNumToSink
] == 1)
655 // Can't sink anything!
658 // Now restrict the number of incoming blocks down to only those with
660 auto &ActivePreds
= LRI
.getActiveBlocks();
661 unsigned InitialActivePredSize
= ActivePreds
.size();
662 SmallVector
<Instruction
*, 4> NewInsts
;
663 for (auto *I
: Insts
) {
664 if (VN
.lookup(I
) != VNumToSink
)
665 ActivePreds
.remove(I
->getParent());
667 NewInsts
.push_back(I
);
669 for (auto *I
: NewInsts
)
670 if (isInstructionBlacklisted(I
))
673 // If we've restricted the incoming blocks, restrict all needed PHIs also
675 bool RecomputePHIContents
= false;
676 if (ActivePreds
.size() != InitialActivePredSize
) {
677 ModelledPHISet NewNeededPHIs
;
678 for (auto P
: NeededPHIs
) {
679 P
.restrictToBlocks(ActivePreds
);
680 NewNeededPHIs
.insert(P
);
682 NeededPHIs
= NewNeededPHIs
;
683 LRI
.restrictToBlocks(ActivePreds
);
684 RecomputePHIContents
= true;
687 // The sunk instruction's results.
688 ModelledPHI
NewPHI(NewInsts
, ActivePreds
);
690 // Does sinking this instruction render previous PHIs redundant?
691 if (NeededPHIs
.find(NewPHI
) != NeededPHIs
.end()) {
692 NeededPHIs
.erase(NewPHI
);
693 RecomputePHIContents
= true;
696 if (RecomputePHIContents
) {
697 // The needed PHIs have changed, so recompute the set of all needed
700 for (auto &PHI
: NeededPHIs
)
701 PHIContents
.insert(PHI
.getValues().begin(), PHI
.getValues().end());
704 // Is this instruction required by a later PHI that doesn't match this PHI?
705 // if so, we can't sink this instruction.
706 for (auto *V
: NewPHI
.getValues())
707 if (PHIContents
.count(V
))
708 // V exists in this PHI, but the whole PHI is different to NewPHI
709 // (else it would have been removed earlier). We cannot continue
710 // because this isn't representable.
713 // Which operands need PHIs?
714 // FIXME: If any of these fail, we should partition up the candidates to
715 // try and continue making progress.
716 Instruction
*I0
= NewInsts
[0];
718 // If all instructions that are going to participate don't have the same
719 // number of operands, we can't do any useful PHI analysis for all operands.
720 auto hasDifferentNumOperands
= [&I0
](Instruction
*I
) {
721 return I
->getNumOperands() != I0
->getNumOperands();
723 if (any_of(NewInsts
, hasDifferentNumOperands
))
726 for (unsigned OpNum
= 0, E
= I0
->getNumOperands(); OpNum
!= E
; ++OpNum
) {
727 ModelledPHI
PHI(NewInsts
, OpNum
, ActivePreds
);
728 if (PHI
.areAllIncomingValuesSame())
730 if (!canReplaceOperandWithVariable(I0
, OpNum
))
731 // We can 't create a PHI from this instruction!
733 if (NeededPHIs
.count(PHI
))
735 if (!PHI
.areAllIncomingValuesSameType())
737 // Don't create indirect calls! The called value is the final operand.
738 if ((isa
<CallInst
>(I0
) || isa
<InvokeInst
>(I0
)) && OpNum
== E
- 1 &&
739 PHI
.areAnyIncomingValuesConstant())
742 NeededPHIs
.reserve(NeededPHIs
.size());
743 NeededPHIs
.insert(PHI
);
744 PHIContents
.insert(PHI
.getValues().begin(), PHI
.getValues().end());
747 if (isMemoryInst(NewInsts
[0]))
750 SinkingInstructionCandidate Cand
;
751 Cand
.NumInstructions
= ++InstNum
;
752 Cand
.NumMemoryInsts
= MemoryInstNum
;
753 Cand
.NumBlocks
= ActivePreds
.size();
754 Cand
.NumPHIs
= NeededPHIs
.size();
755 for (auto *C
: ActivePreds
)
756 Cand
.Blocks
.push_back(C
);
761 unsigned GVNSink::sinkBB(BasicBlock
*BBEnd
) {
762 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
763 BBEnd
->printAsOperand(dbgs()); dbgs() << "\n");
764 SmallVector
<BasicBlock
*, 4> Preds
;
765 for (auto *B
: predecessors(BBEnd
)) {
766 auto *T
= B
->getTerminator();
767 if (isa
<BranchInst
>(T
) || isa
<SwitchInst
>(T
))
772 if (Preds
.size() < 2)
776 unsigned NumOrigPreds
= Preds
.size();
777 // We can only sink instructions through unconditional branches.
778 for (auto I
= Preds
.begin(); I
!= Preds
.end();) {
779 if ((*I
)->getTerminator()->getNumSuccessors() != 1)
785 LockstepReverseIterator
LRI(Preds
);
786 SmallVector
<SinkingInstructionCandidate
, 4> Candidates
;
787 unsigned InstNum
= 0, MemoryInstNum
= 0;
788 ModelledPHISet NeededPHIs
;
789 SmallPtrSet
<Value
*, 4> PHIContents
;
790 analyzeInitialPHIs(BBEnd
, NeededPHIs
, PHIContents
);
791 unsigned NumOrigPHIs
= NeededPHIs
.size();
793 while (LRI
.isValid()) {
794 auto Cand
= analyzeInstructionForSinking(LRI
, InstNum
, MemoryInstNum
,
795 NeededPHIs
, PHIContents
);
798 Cand
->calculateCost(NumOrigPHIs
, Preds
.size());
799 Candidates
.emplace_back(*Cand
);
803 llvm::stable_sort(Candidates
, std::greater
<SinkingInstructionCandidate
>());
804 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
806 << " " << C
<< "\n";);
808 // Pick the top candidate, as long it is positive!
809 if (Candidates
.empty() || Candidates
.front().Cost
<= 0)
811 auto C
= Candidates
.front();
813 LLVM_DEBUG(dbgs() << " -- Sinking: " << C
<< "\n");
814 BasicBlock
*InsertBB
= BBEnd
;
815 if (C
.Blocks
.size() < NumOrigPreds
) {
816 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
817 BBEnd
->printAsOperand(dbgs()); dbgs() << "\n");
818 InsertBB
= SplitBlockPredecessors(BBEnd
, C
.Blocks
, ".gvnsink.split");
820 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
821 // Edge couldn't be split.
826 for (unsigned I
= 0; I
< C
.NumInstructions
; ++I
)
827 sinkLastInstruction(C
.Blocks
, InsertBB
);
829 return C
.NumInstructions
;
832 void GVNSink::sinkLastInstruction(ArrayRef
<BasicBlock
*> Blocks
,
834 SmallVector
<Instruction
*, 4> Insts
;
835 for (BasicBlock
*BB
: Blocks
)
836 Insts
.push_back(BB
->getTerminator()->getPrevNode());
837 Instruction
*I0
= Insts
.front();
839 SmallVector
<Value
*, 4> NewOperands
;
840 for (unsigned O
= 0, E
= I0
->getNumOperands(); O
!= E
; ++O
) {
841 bool NeedPHI
= llvm::any_of(Insts
, [&I0
, O
](const Instruction
*I
) {
842 return I
->getOperand(O
) != I0
->getOperand(O
);
845 NewOperands
.push_back(I0
->getOperand(O
));
849 // Create a new PHI in the successor block and populate it.
850 auto *Op
= I0
->getOperand(O
);
851 assert(!Op
->getType()->isTokenTy() && "Can't PHI tokens!");
852 auto *PN
= PHINode::Create(Op
->getType(), Insts
.size(),
853 Op
->getName() + ".sink", &BBEnd
->front());
854 for (auto *I
: Insts
)
855 PN
->addIncoming(I
->getOperand(O
), I
->getParent());
856 NewOperands
.push_back(PN
);
859 // Arbitrarily use I0 as the new "common" instruction; remap its operands
860 // and move it to the start of the successor block.
861 for (unsigned O
= 0, E
= I0
->getNumOperands(); O
!= E
; ++O
)
862 I0
->getOperandUse(O
).set(NewOperands
[O
]);
863 I0
->moveBefore(&*BBEnd
->getFirstInsertionPt());
865 // Update metadata and IR flags.
866 for (auto *I
: Insts
)
868 combineMetadataForCSE(I0
, I
, true);
872 for (auto *I
: Insts
)
874 I
->replaceAllUsesWith(I0
);
875 foldPointlessPHINodes(BBEnd
);
877 // Finally nuke all instructions apart from the common instruction.
878 for (auto *I
: Insts
)
880 I
->eraseFromParent();
882 NumRemoved
+= Insts
.size() - 1;
885 ////////////////////////////////////////////////////////////////////////////////
886 // Pass machinery / boilerplate
888 class GVNSinkLegacyPass
: public FunctionPass
{
892 GVNSinkLegacyPass() : FunctionPass(ID
) {
893 initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
896 bool runOnFunction(Function
&F
) override
{
903 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
904 AU
.addPreserved
<GlobalsAAWrapperPass
>();
908 } // end anonymous namespace
910 PreservedAnalyses
GVNSinkPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
913 return PreservedAnalyses::all();
915 PreservedAnalyses PA
;
916 PA
.preserve
<GlobalsAA
>();
920 char GVNSinkLegacyPass::ID
= 0;
922 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass
, "gvn-sink",
923 "Early GVN sinking of Expressions", false, false)
924 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass
)
925 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass
)
926 INITIALIZE_PASS_END(GVNSinkLegacyPass
, "gvn-sink",
927 "Early GVN sinking of Expressions", false, false)
929 FunctionPass
*llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }