llvm/lib/Transforms/Scalar/GVNSink.cpp

   1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file GVNSink.cpp
  10 /// This pass attempts to sink instructions into successors, reducing static
  11 /// instruction count and enabling if-conversion.
  12 ///
  13 /// We use a variant of global value numbering to decide what can be sunk.
  14 /// Consider:
  15 ///
  16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
  17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
  18 ///                  \           /
  19 ///            [ %e = phi i32 %a2, %c2 ]
  20 ///            [ add i32 %e, 4         ]
  21 ///
  22 ///
  23 /// GVN would number %a1 and %c1 differently because they compute different
  24 /// results - the VN of an instruction is a function of its opcode and the
  25 /// transitive closure of its operands. This is the key property for hoisting
  26 /// and CSE.
  27 ///
  28 /// What we want when sinking however is for a numbering that is a function of
  29 /// the *uses* of an instruction, which allows us to answer the question "if I
  30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
  31 /// successive instructions?". The PostValueTable class in GVN provides this
  32 /// mapping.
  33 //
  34 //===----------------------------------------------------------------------===//
  35
  36 #include "llvm/ADT/ArrayRef.h"
  37 #include "llvm/ADT/DenseMap.h"
  38 #include "llvm/ADT/DenseSet.h"
  39 #include "llvm/ADT/Hashing.h"
  40 #include "llvm/ADT/PostOrderIterator.h"
  41 #include "llvm/ADT/STLExtras.h"
  42 #include "llvm/ADT/SmallPtrSet.h"
  43 #include "llvm/ADT/SmallVector.h"
  44 #include "llvm/ADT/Statistic.h"
  45 #include "llvm/Analysis/GlobalsModRef.h"
  46 #include "llvm/IR/BasicBlock.h"
  47 #include "llvm/IR/CFG.h"
  48 #include "llvm/IR/Constants.h"
  49 #include "llvm/IR/Function.h"
  50 #include "llvm/IR/InstrTypes.h"
  51 #include "llvm/IR/Instruction.h"
  52 #include "llvm/IR/Instructions.h"
  53 #include "llvm/IR/PassManager.h"
  54 #include "llvm/IR/Type.h"
  55 #include "llvm/IR/Use.h"
  56 #include "llvm/IR/Value.h"
  57 #include "llvm/Support/Allocator.h"
  58 #include "llvm/Support/ArrayRecycler.h"
  59 #include "llvm/Support/AtomicOrdering.h"
  60 #include "llvm/Support/Casting.h"
  61 #include "llvm/Support/Compiler.h"
  62 #include "llvm/Support/Debug.h"
  63 #include "llvm/Support/raw_ostream.h"
  64 #include "llvm/Transforms/Scalar/GVN.h"
  65 #include "llvm/Transforms/Scalar/GVNExpression.h"
  66 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  67 #include "llvm/Transforms/Utils/Local.h"
  68 #include <algorithm>
  69 #include <cassert>
  70 #include <cstddef>
  71 #include <cstdint>
  72 #include <iterator>
  73 #include <utility>
  74
  75 using namespace llvm;
  76
  77 #define DEBUG_TYPE "gvn-sink"
  78
  79 STATISTIC(NumRemoved, "Number of instructions removed");
  80
  81 namespace llvm {
  82 namespace GVNExpression {
  83
  84 LLVM_DUMP_METHOD void Expression::dump() const {
  85   print(dbgs());
  86   dbgs() << "\n";
  87 }
  88
  89 } // end namespace GVNExpression
  90 } // end namespace llvm
  91
  92 namespace {
  93
  94 static bool isMemoryInst(const Instruction *I) {
  95   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
  96          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
  97          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
  98 }
  99
 100 /// Iterates through instructions in a set of blocks in reverse order from the
 101 /// first non-terminator. For example (assume all blocks have size n):
 102 ///   LockstepReverseIterator I([B1, B2, B3]);
 103 ///   *I-- = [B1[n], B2[n], B3[n]];
 104 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
 105 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
 106 ///   ...
 107 ///
 108 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
 109 /// to
 110 /// determine which blocks are still going and the order they appear in the
 111 /// list returned by operator*.
 112 class LockstepReverseIterator {
 113   ArrayRef<BasicBlock *> Blocks;
 114   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
 115   SmallVector<Instruction *, 4> Insts;
 116   bool Fail;
 117
 118 public:
 119   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
 120     reset();
 121   }
 122
 123   void reset() {
 124     Fail = false;
 125     ActiveBlocks.clear();
 126     for (BasicBlock *BB : Blocks)
 127       ActiveBlocks.insert(BB);
 128     Insts.clear();
 129     for (BasicBlock *BB : Blocks) {
 130       if (BB->size() <= 1) {
 131         // Block wasn't big enough - only contained a terminator.
 132         ActiveBlocks.remove(BB);
 133         continue;
 134       }
 135       Insts.push_back(BB->getTerminator()->getPrevNode());
 136     }
 137     if (Insts.empty())
 138       Fail = true;
 139   }
 140
 141   bool isValid() const { return !Fail; }
 142   ArrayRef<Instruction *> operator*() const { return Insts; }
 143
 144   // Note: This needs to return a SmallSetVector as the elements of
 145   // ActiveBlocks will be later copied to Blocks using std::copy. The
 146   // resultant order of elements in Blocks needs to be deterministic.
 147   // Using SmallPtrSet instead causes non-deterministic order while
 148   // copying. And we cannot simply sort Blocks as they need to match the
 149   // corresponding Values.
 150   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
 151
 152   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
 153     for (auto II = Insts.begin(); II != Insts.end();) {
 154       if (!Blocks.contains((*II)->getParent())) {
 155         ActiveBlocks.remove((*II)->getParent());
 156         II = Insts.erase(II);
 157       } else {
 158         ++II;
 159       }
 160     }
 161   }
 162
 163   void operator--() {
 164     if (Fail)
 165       return;
 166     SmallVector<Instruction *, 4> NewInsts;
 167     for (auto *Inst : Insts) {
 168       if (Inst == &Inst->getParent()->front())
 169         ActiveBlocks.remove(Inst->getParent());
 170       else
 171         NewInsts.push_back(Inst->getPrevNode());
 172     }
 173     if (NewInsts.empty()) {
 174       Fail = true;
 175       return;
 176     }
 177     Insts = NewInsts;
 178   }
 179 };
 180
 181 //===----------------------------------------------------------------------===//
 182
 183 /// Candidate solution for sinking. There may be different ways to
 184 /// sink instructions, differing in the number of instructions sunk,
 185 /// the number of predecessors sunk from and the number of PHIs
 186 /// required.
 187 struct SinkingInstructionCandidate {
 188   unsigned NumBlocks;
 189   unsigned NumInstructions;
 190   unsigned NumPHIs;
 191   unsigned NumMemoryInsts;
 192   int Cost = -1;
 193   SmallVector<BasicBlock *, 4> Blocks;
 194
 195   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
 196     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
 197     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
 198     Cost = (NumInstructions * (NumBlocks - 1)) -
 199            (NumExtraPHIs *
 200             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
 201            - SplitEdgeCost;
 202   }
 203
 204   bool operator>(const SinkingInstructionCandidate &Other) const {
 205     return Cost > Other.Cost;
 206   }
 207 };
 208
 209 #ifndef NDEBUG
 210 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
 211   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
 212      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
 213   return OS;
 214 }
 215 #endif
 216
 217 //===----------------------------------------------------------------------===//
 218
 219 /// Describes a PHI node that may or may not exist. These track the PHIs
 220 /// that must be created if we sunk a sequence of instructions. It provides
 221 /// a hash function for efficient equality comparisons.
 222 class ModelledPHI {
 223   SmallVector<Value *, 4> Values;
 224   SmallVector<BasicBlock *, 4> Blocks;
 225
 226 public:
 227   ModelledPHI() = default;
 228
 229   ModelledPHI(const PHINode *PN) {
 230     // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
 231     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
 232     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
 233       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
 234     llvm::sort(Ops);
 235     for (auto &P : Ops) {
 236       Blocks.push_back(P.first);
 237       Values.push_back(P.second);
 238     }
 239   }
 240
 241   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
 242   /// without the same ID.
 243   /// \note This is specifically for DenseMapInfo - do not use this!
 244   static ModelledPHI createDummy(size_t ID) {
 245     ModelledPHI M;
 246     M.Values.push_back(reinterpret_cast<Value*>(ID));
 247     return M;
 248   }
 249
 250   /// Create a PHI from an array of incoming values and incoming blocks.
 251   template <typename VArray, typename BArray>
 252   ModelledPHI(const VArray &V, const BArray &B) {
 253     llvm::copy(V, std::back_inserter(Values));
 254     llvm::copy(B, std::back_inserter(Blocks));
 255   }
 256
 257   /// Create a PHI from [I[OpNum] for I in Insts].
 258   template <typename BArray>
 259   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
 260     llvm::copy(B, std::back_inserter(Blocks));
 261     for (auto *I : Insts)
 262       Values.push_back(I->getOperand(OpNum));
 263   }
 264
 265   /// Restrict the PHI's contents down to only \c NewBlocks.
 266   /// \c NewBlocks must be a subset of \c this->Blocks.
 267   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
 268     auto BI = Blocks.begin();
 269     auto VI = Values.begin();
 270     while (BI != Blocks.end()) {
 271       assert(VI != Values.end());
 272       if (!NewBlocks.contains(*BI)) {
 273         BI = Blocks.erase(BI);
 274         VI = Values.erase(VI);
 275       } else {
 276         ++BI;
 277         ++VI;
 278       }
 279     }
 280     assert(Blocks.size() == NewBlocks.size());
 281   }
 282
 283   ArrayRef<Value *> getValues() const { return Values; }
 284
 285   bool areAllIncomingValuesSame() const {
 286     return llvm::all_equal(Values);
 287   }
 288
 289   bool areAllIncomingValuesSameType() const {
 290     return llvm::all_of(
 291         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
 292   }
 293
 294   bool areAnyIncomingValuesConstant() const {
 295     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
 296   }
 297
 298   // Hash functor
 299   unsigned hash() const {
 300       return (unsigned)hash_combine_range(Values.begin(), Values.end());
 301   }
 302
 303   bool operator==(const ModelledPHI &Other) const {
 304     return Values == Other.Values && Blocks == Other.Blocks;
 305   }
 306 };
 307
 308 template <typename ModelledPHI> struct DenseMapInfo {
 309   static inline ModelledPHI &getEmptyKey() {
 310     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
 311     return Dummy;
 312   }
 313
 314   static inline ModelledPHI &getTombstoneKey() {
 315     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
 316     return Dummy;
 317   }
 318
 319   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
 320
 321   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
 322     return LHS == RHS;
 323   }
 324 };
 325
 326 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
 327
 328 //===----------------------------------------------------------------------===//
 329 //                             ValueTable
 330 //===----------------------------------------------------------------------===//
 331 // This is a value number table where the value number is a function of the
 332 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
 333 // that the program would be equivalent if we replaced A with PHI(A, B).
 334 //===----------------------------------------------------------------------===//
 335
 336 /// A GVN expression describing how an instruction is used. The operands
 337 /// field of BasicExpression is used to store uses, not operands.
 338 ///
 339 /// This class also contains fields for discriminators used when determining
 340 /// equivalence of instructions with sideeffects.
 341 class InstructionUseExpr : public GVNExpression::BasicExpression {
 342   unsigned MemoryUseOrder = -1;
 343   bool Volatile = false;
 344   ArrayRef<int> ShuffleMask;
 345
 346 public:
 347   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
 348                      BumpPtrAllocator &A)
 349       : GVNExpression::BasicExpression(I->getNumUses()) {
 350     allocateOperands(R, A);
 351     setOpcode(I->getOpcode());
 352     setType(I->getType());
 353
 354     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
 355       ShuffleMask = SVI->getShuffleMask().copy(A);
 356
 357     for (auto &U : I->uses())
 358       op_push_back(U.getUser());
 359     llvm::sort(op_begin(), op_end());
 360   }
 361
 362   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
 363   void setVolatile(bool V) { Volatile = V; }
 364
 365   hash_code getHashValue() const override {
 366     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
 367                         MemoryUseOrder, Volatile, ShuffleMask);
 368   }
 369
 370   template <typename Function> hash_code getHashValue(Function MapFn) {
 371     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
 372                                ShuffleMask);
 373     for (auto *V : operands())
 374       H = hash_combine(H, MapFn(V));
 375     return H;
 376   }
 377 };
 378
 379 using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
 380
 381 class ValueTable {
 382   DenseMap<Value *, uint32_t> ValueNumbering;
 383   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
 384   DenseMap<size_t, uint32_t> HashNumbering;
 385   BumpPtrAllocator Allocator;
 386   ArrayRecycler<Value *> Recycler;
 387   uint32_t nextValueNumber = 1;
 388   BasicBlocksSet ReachableBBs;
 389
 390   /// Create an expression for I based on its opcode and its uses. If I
 391   /// touches or reads memory, the expression is also based upon its memory
 392   /// order - see \c getMemoryUseOrder().
 393   InstructionUseExpr *createExpr(Instruction *I) {
 394     InstructionUseExpr *E =
 395         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
 396     if (isMemoryInst(I))
 397       E->setMemoryUseOrder(getMemoryUseOrder(I));
 398
 399     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
 400       CmpInst::Predicate Predicate = C->getPredicate();
 401       E->setOpcode((C->getOpcode() << 8) | Predicate);
 402     }
 403     return E;
 404   }
 405
 406   /// Helper to compute the value number for a memory instruction
 407   /// (LoadInst/StoreInst), including checking the memory ordering and
 408   /// volatility.
 409   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
 410     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
 411       return nullptr;
 412     InstructionUseExpr *E = createExpr(I);
 413     E->setVolatile(I->isVolatile());
 414     return E;
 415   }
 416
 417 public:
 418   ValueTable() = default;
 419
 420   /// Set basic blocks reachable from entry block.
 421   void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
 422     this->ReachableBBs = ReachableBBs;
 423   }
 424
 425   /// Returns the value number for the specified value, assigning
 426   /// it a new number if it did not have one before.
 427   uint32_t lookupOrAdd(Value *V) {
 428     auto VI = ValueNumbering.find(V);
 429     if (VI != ValueNumbering.end())
 430       return VI->second;
 431
 432     if (!isa<Instruction>(V)) {
 433       ValueNumbering[V] = nextValueNumber;
 434       return nextValueNumber++;
 435     }
 436
 437     Instruction *I = cast<Instruction>(V);
 438     if (!ReachableBBs.contains(I->getParent()))
 439       return ~0U;
 440
 441     InstructionUseExpr *exp = nullptr;
 442     switch (I->getOpcode()) {
 443     case Instruction::Load:
 444       exp = createMemoryExpr(cast<LoadInst>(I));
 445       break;
 446     case Instruction::Store:
 447       exp = createMemoryExpr(cast<StoreInst>(I));
 448       break;
 449     case Instruction::Call:
 450     case Instruction::Invoke:
 451     case Instruction::FNeg:
 452     case Instruction::Add:
 453     case Instruction::FAdd:
 454     case Instruction::Sub:
 455     case Instruction::FSub:
 456     case Instruction::Mul:
 457     case Instruction::FMul:
 458     case Instruction::UDiv:
 459     case Instruction::SDiv:
 460     case Instruction::FDiv:
 461     case Instruction::URem:
 462     case Instruction::SRem:
 463     case Instruction::FRem:
 464     case Instruction::Shl:
 465     case Instruction::LShr:
 466     case Instruction::AShr:
 467     case Instruction::And:
 468     case Instruction::Or:
 469     case Instruction::Xor:
 470     case Instruction::ICmp:
 471     case Instruction::FCmp:
 472     case Instruction::Trunc:
 473     case Instruction::ZExt:
 474     case Instruction::SExt:
 475     case Instruction::FPToUI:
 476     case Instruction::FPToSI:
 477     case Instruction::UIToFP:
 478     case Instruction::SIToFP:
 479     case Instruction::FPTrunc:
 480     case Instruction::FPExt:
 481     case Instruction::PtrToInt:
 482     case Instruction::IntToPtr:
 483     case Instruction::BitCast:
 484     case Instruction::AddrSpaceCast:
 485     case Instruction::Select:
 486     case Instruction::ExtractElement:
 487     case Instruction::InsertElement:
 488     case Instruction::ShuffleVector:
 489     case Instruction::InsertValue:
 490     case Instruction::GetElementPtr:
 491       exp = createExpr(I);
 492       break;
 493     default:
 494       break;
 495     }
 496
 497     if (!exp) {
 498       ValueNumbering[V] = nextValueNumber;
 499       return nextValueNumber++;
 500     }
 501
 502     uint32_t e = ExpressionNumbering[exp];
 503     if (!e) {
 504       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
 505       auto I = HashNumbering.find(H);
 506       if (I != HashNumbering.end()) {
 507         e = I->second;
 508       } else {
 509         e = nextValueNumber++;
 510         HashNumbering[H] = e;
 511         ExpressionNumbering[exp] = e;
 512       }
 513     }
 514     ValueNumbering[V] = e;
 515     return e;
 516   }
 517
 518   /// Returns the value number of the specified value. Fails if the value has
 519   /// not yet been numbered.
 520   uint32_t lookup(Value *V) const {
 521     auto VI = ValueNumbering.find(V);
 522     assert(VI != ValueNumbering.end() && "Value not numbered?");
 523     return VI->second;
 524   }
 525
 526   /// Removes all value numberings and resets the value table.
 527   void clear() {
 528     ValueNumbering.clear();
 529     ExpressionNumbering.clear();
 530     HashNumbering.clear();
 531     Recycler.clear(Allocator);
 532     nextValueNumber = 1;
 533   }
 534
 535   /// \c Inst uses or touches memory. Return an ID describing the memory state
 536   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
 537   /// the exact same memory operations happen after I1 and I2.
 538   ///
 539   /// This is a very hard problem in general, so we use domain-specific
 540   /// knowledge that we only ever check for equivalence between blocks sharing a
 541   /// single immediate successor that is common, and when determining if I1 ==
 542   /// I2 we will have already determined that next(I1) == next(I2). This
 543   /// inductive property allows us to simply return the value number of the next
 544   /// instruction that defines memory.
 545   uint32_t getMemoryUseOrder(Instruction *Inst) {
 546     auto *BB = Inst->getParent();
 547     for (auto I = std::next(Inst->getIterator()), E = BB->end();
 548          I != E && !I->isTerminator(); ++I) {
 549       if (!isMemoryInst(&*I))
 550         continue;
 551       if (isa<LoadInst>(&*I))
 552         continue;
 553       CallInst *CI = dyn_cast<CallInst>(&*I);
 554       if (CI && CI->onlyReadsMemory())
 555         continue;
 556       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
 557       if (II && II->onlyReadsMemory())
 558         continue;
 559       return lookupOrAdd(&*I);
 560     }
 561     return 0;
 562   }
 563 };
 564
 565 //===----------------------------------------------------------------------===//
 566
 567 class GVNSink {
 568 public:
 569   GVNSink() = default;
 570
 571   bool run(Function &F) {
 572     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
 573                       << "\n");
 574
 575     unsigned NumSunk = 0;
 576     ReversePostOrderTraversal<Function*> RPOT(&F);
 577     VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
 578     for (auto *N : RPOT)
 579       NumSunk += sinkBB(N);
 580
 581     return NumSunk > 0;
 582   }
 583
 584 private:
 585   ValueTable VN;
 586
 587   bool shouldAvoidSinkingInstruction(Instruction *I) {
 588     // These instructions may change or break semantics if moved.
 589     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
 590         I->getType()->isTokenTy())
 591       return true;
 592     return false;
 593   }
 594
 595   /// The main heuristic function. Analyze the set of instructions pointed to by
 596   /// LRI and return a candidate solution if these instructions can be sunk, or
 597   /// std::nullopt otherwise.
 598   std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
 599       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 600       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
 601
 602   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
 603   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
 604                           SmallPtrSetImpl<Value *> &PHIContents) {
 605     for (PHINode &PN : BB->phis()) {
 606       auto MPHI = ModelledPHI(&PN);
 607       PHIs.insert(MPHI);
 608       for (auto *V : MPHI.getValues())
 609         PHIContents.insert(V);
 610     }
 611   }
 612
 613   /// The main instruction sinking driver. Set up state and try and sink
 614   /// instructions into BBEnd from its predecessors.
 615   unsigned sinkBB(BasicBlock *BBEnd);
 616
 617   /// Perform the actual mechanics of sinking an instruction from Blocks into
 618   /// BBEnd, which is their only successor.
 619   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
 620
 621   /// Remove PHIs that all have the same incoming value.
 622   void foldPointlessPHINodes(BasicBlock *BB) {
 623     auto I = BB->begin();
 624     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
 625       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
 626             return V == PN->getIncomingValue(0);
 627           }))
 628         continue;
 629       if (PN->getIncomingValue(0) != PN)
 630         PN->replaceAllUsesWith(PN->getIncomingValue(0));
 631       else
 632         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
 633       PN->eraseFromParent();
 634     }
 635   }
 636 };
 637
 638 std::optional<SinkingInstructionCandidate>
 639 GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
 640                                       unsigned &InstNum,
 641                                       unsigned &MemoryInstNum,
 642                                       ModelledPHISet &NeededPHIs,
 643                                       SmallPtrSetImpl<Value *> &PHIContents) {
 644   auto Insts = *LRI;
 645   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
 646                                                                   : Insts) {
 647     I->dump();
 648   } dbgs() << " ]\n";);
 649
 650   DenseMap<uint32_t, unsigned> VNums;
 651   for (auto *I : Insts) {
 652     uint32_t N = VN.lookupOrAdd(I);
 653     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
 654     if (N == ~0U)
 655       return std::nullopt;
 656     VNums[N]++;
 657   }
 658   unsigned VNumToSink =
 659       std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
 660
 661   if (VNums[VNumToSink] == 1)
 662     // Can't sink anything!
 663     return std::nullopt;
 664
 665   // Now restrict the number of incoming blocks down to only those with
 666   // VNumToSink.
 667   auto &ActivePreds = LRI.getActiveBlocks();
 668   unsigned InitialActivePredSize = ActivePreds.size();
 669   SmallVector<Instruction *, 4> NewInsts;
 670   for (auto *I : Insts) {
 671     if (VN.lookup(I) != VNumToSink)
 672       ActivePreds.remove(I->getParent());
 673     else
 674       NewInsts.push_back(I);
 675   }
 676   for (auto *I : NewInsts)
 677     if (shouldAvoidSinkingInstruction(I))
 678       return std::nullopt;
 679
 680   // If we've restricted the incoming blocks, restrict all needed PHIs also
 681   // to that set.
 682   bool RecomputePHIContents = false;
 683   if (ActivePreds.size() != InitialActivePredSize) {
 684     ModelledPHISet NewNeededPHIs;
 685     for (auto P : NeededPHIs) {
 686       P.restrictToBlocks(ActivePreds);
 687       NewNeededPHIs.insert(P);
 688     }
 689     NeededPHIs = NewNeededPHIs;
 690     LRI.restrictToBlocks(ActivePreds);
 691     RecomputePHIContents = true;
 692   }
 693
 694   // The sunk instruction's results.
 695   ModelledPHI NewPHI(NewInsts, ActivePreds);
 696
 697   // Does sinking this instruction render previous PHIs redundant?
 698   if (NeededPHIs.erase(NewPHI))
 699     RecomputePHIContents = true;
 700
 701   if (RecomputePHIContents) {
 702     // The needed PHIs have changed, so recompute the set of all needed
 703     // values.
 704     PHIContents.clear();
 705     for (auto &PHI : NeededPHIs)
 706       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 707   }
 708
 709   // Is this instruction required by a later PHI that doesn't match this PHI?
 710   // if so, we can't sink this instruction.
 711   for (auto *V : NewPHI.getValues())
 712     if (PHIContents.count(V))
 713       // V exists in this PHI, but the whole PHI is different to NewPHI
 714       // (else it would have been removed earlier). We cannot continue
 715       // because this isn't representable.
 716       return std::nullopt;
 717
 718   // Which operands need PHIs?
 719   // FIXME: If any of these fail, we should partition up the candidates to
 720   // try and continue making progress.
 721   Instruction *I0 = NewInsts[0];
 722
 723   // If all instructions that are going to participate don't have the same
 724   // number of operands, we can't do any useful PHI analysis for all operands.
 725   auto hasDifferentNumOperands = [&I0](Instruction *I) {
 726     return I->getNumOperands() != I0->getNumOperands();
 727   };
 728   if (any_of(NewInsts, hasDifferentNumOperands))
 729     return std::nullopt;
 730
 731   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
 732     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
 733     if (PHI.areAllIncomingValuesSame())
 734       continue;
 735     if (!canReplaceOperandWithVariable(I0, OpNum))
 736       // We can 't create a PHI from this instruction!
 737       return std::nullopt;
 738     if (NeededPHIs.count(PHI))
 739       continue;
 740     if (!PHI.areAllIncomingValuesSameType())
 741       return std::nullopt;
 742     // Don't create indirect calls! The called value is the final operand.
 743     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
 744         PHI.areAnyIncomingValuesConstant())
 745       return std::nullopt;
 746
 747     NeededPHIs.reserve(NeededPHIs.size());
 748     NeededPHIs.insert(PHI);
 749     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 750   }
 751
 752   if (isMemoryInst(NewInsts[0]))
 753     ++MemoryInstNum;
 754
 755   SinkingInstructionCandidate Cand;
 756   Cand.NumInstructions = ++InstNum;
 757   Cand.NumMemoryInsts = MemoryInstNum;
 758   Cand.NumBlocks = ActivePreds.size();
 759   Cand.NumPHIs = NeededPHIs.size();
 760   append_range(Cand.Blocks, ActivePreds);
 761
 762   return Cand;
 763 }
 764
 765 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
 766   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
 767              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 768   SmallVector<BasicBlock *, 4> Preds;
 769   for (auto *B : predecessors(BBEnd)) {
 770     auto *T = B->getTerminator();
 771     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
 772       Preds.push_back(B);
 773     else
 774       return 0;
 775   }
 776   if (Preds.size() < 2)
 777     return 0;
 778   llvm::sort(Preds);
 779
 780   unsigned NumOrigPreds = Preds.size();
 781   // We can only sink instructions through unconditional branches.
 782   llvm::erase_if(Preds, [](BasicBlock *BB) {
 783     return BB->getTerminator()->getNumSuccessors() != 1;
 784   });
 785
 786   LockstepReverseIterator LRI(Preds);
 787   SmallVector<SinkingInstructionCandidate, 4> Candidates;
 788   unsigned InstNum = 0, MemoryInstNum = 0;
 789   ModelledPHISet NeededPHIs;
 790   SmallPtrSet<Value *, 4> PHIContents;
 791   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
 792   unsigned NumOrigPHIs = NeededPHIs.size();
 793
 794   while (LRI.isValid()) {
 795     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
 796                                              NeededPHIs, PHIContents);
 797     if (!Cand)
 798       break;
 799     Cand->calculateCost(NumOrigPHIs, Preds.size());
 800     Candidates.emplace_back(*Cand);
 801     --LRI;
 802   }
 803
 804   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
 805   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
 806                                                          : Candidates) dbgs()
 807                                                     << "  " << C << "\n";);
 808
 809   // Pick the top candidate, as long it is positive!
 810   if (Candidates.empty() || Candidates.front().Cost <= 0)
 811     return 0;
 812   auto C = Candidates.front();
 813
 814   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
 815   BasicBlock *InsertBB = BBEnd;
 816   if (C.Blocks.size() < NumOrigPreds) {
 817     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
 818                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 819     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
 820     if (!InsertBB) {
 821       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
 822       // Edge couldn't be split.
 823       return 0;
 824     }
 825   }
 826
 827   for (unsigned I = 0; I < C.NumInstructions; ++I)
 828     sinkLastInstruction(C.Blocks, InsertBB);
 829
 830   return C.NumInstructions;
 831 }
 832
 833 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
 834                                   BasicBlock *BBEnd) {
 835   SmallVector<Instruction *, 4> Insts;
 836   for (BasicBlock *BB : Blocks)
 837     Insts.push_back(BB->getTerminator()->getPrevNode());
 838   Instruction *I0 = Insts.front();
 839
 840   SmallVector<Value *, 4> NewOperands;
 841   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
 842     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
 843       return I->getOperand(O) != I0->getOperand(O);
 844     });
 845     if (!NeedPHI) {
 846       NewOperands.push_back(I0->getOperand(O));
 847       continue;
 848     }
 849
 850     // Create a new PHI in the successor block and populate it.
 851     auto *Op = I0->getOperand(O);
 852     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
 853     auto *PN =
 854         PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
 855     PN->insertBefore(BBEnd->begin());
 856     for (auto *I : Insts)
 857       PN->addIncoming(I->getOperand(O), I->getParent());
 858     NewOperands.push_back(PN);
 859   }
 860
 861   // Arbitrarily use I0 as the new "common" instruction; remap its operands
 862   // and move it to the start of the successor block.
 863   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
 864     I0->getOperandUse(O).set(NewOperands[O]);
 865   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 866
 867   // Update metadata and IR flags.
 868   for (auto *I : Insts)
 869     if (I != I0) {
 870       combineMetadataForCSE(I0, I, true);
 871       I0->andIRFlags(I);
 872     }
 873
 874   for (auto *I : Insts)
 875     if (I != I0)
 876       I->replaceAllUsesWith(I0);
 877   foldPointlessPHINodes(BBEnd);
 878
 879   // Finally nuke all instructions apart from the common instruction.
 880   for (auto *I : Insts)
 881     if (I != I0)
 882       I->eraseFromParent();
 883
 884   NumRemoved += Insts.size() - 1;
 885 }
 886
 887 } // end anonymous namespace
 888
 889 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
 890   GVNSink G;
 891   if (!G.run(F))
 892     return PreservedAnalyses::all();
 893   return PreservedAnalyses::none();
 894 }