lib/Transforms/Scalar/GVNSink.cpp

   1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file GVNSink.cpp
  10 /// This pass attempts to sink instructions into successors, reducing static
  11 /// instruction count and enabling if-conversion.
  12 ///
  13 /// We use a variant of global value numbering to decide what can be sunk.
  14 /// Consider:
  15 ///
  16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
  17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
  18 ///                  \           /
  19 ///            [ %e = phi i32 %a2, %c2 ]
  20 ///            [ add i32 %e, 4         ]
  21 ///
  22 ///
  23 /// GVN would number %a1 and %c1 differently because they compute different
  24 /// results - the VN of an instruction is a function of its opcode and the
  25 /// transitive closure of its operands. This is the key property for hoisting
  26 /// and CSE.
  27 ///
  28 /// What we want when sinking however is for a numbering that is a function of
  29 /// the *uses* of an instruction, which allows us to answer the question "if I
  30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
  31 /// successive instructions?". The PostValueTable class in GVN provides this
  32 /// mapping.
  33 //
  34 //===----------------------------------------------------------------------===//
  35
  36 #include "llvm/ADT/ArrayRef.h"
  37 #include "llvm/ADT/DenseMap.h"
  38 #include "llvm/ADT/DenseMapInfo.h"
  39 #include "llvm/ADT/DenseSet.h"
  40 #include "llvm/ADT/Hashing.h"
  41 #include "llvm/ADT/None.h"
  42 #include "llvm/ADT/Optional.h"
  43 #include "llvm/ADT/PostOrderIterator.h"
  44 #include "llvm/ADT/STLExtras.h"
  45 #include "llvm/ADT/SmallPtrSet.h"
  46 #include "llvm/ADT/SmallVector.h"
  47 #include "llvm/ADT/Statistic.h"
  48 #include "llvm/ADT/StringExtras.h"
  49 #include "llvm/Analysis/GlobalsModRef.h"
  50 #include "llvm/Transforms/Utils/Local.h"
  51 #include "llvm/IR/BasicBlock.h"
  52 #include "llvm/IR/CFG.h"
  53 #include "llvm/IR/Constants.h"
  54 #include "llvm/IR/Function.h"
  55 #include "llvm/IR/InstrTypes.h"
  56 #include "llvm/IR/Instruction.h"
  57 #include "llvm/IR/Instructions.h"
  58 #include "llvm/IR/PassManager.h"
  59 #include "llvm/IR/Type.h"
  60 #include "llvm/IR/Use.h"
  61 #include "llvm/IR/Value.h"
  62 #include "llvm/Pass.h"
  63 #include "llvm/Support/Allocator.h"
  64 #include "llvm/Support/ArrayRecycler.h"
  65 #include "llvm/Support/AtomicOrdering.h"
  66 #include "llvm/Support/Casting.h"
  67 #include "llvm/Support/Compiler.h"
  68 #include "llvm/Support/Debug.h"
  69 #include "llvm/Support/raw_ostream.h"
  70 #include "llvm/Transforms/Scalar.h"
  71 #include "llvm/Transforms/Scalar/GVN.h"
  72 #include "llvm/Transforms/Scalar/GVNExpression.h"
  73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  74 #include <algorithm>
  75 #include <cassert>
  76 #include <cstddef>
  77 #include <cstdint>
  78 #include <iterator>
  79 #include <utility>
  80
  81 using namespace llvm;
  82
  83 #define DEBUG_TYPE "gvn-sink"
  84
  85 STATISTIC(NumRemoved, "Number of instructions removed");
  86
  87 namespace llvm {
  88 namespace GVNExpression {
  89
  90 LLVM_DUMP_METHOD void Expression::dump() const {
  91   print(dbgs());
  92   dbgs() << "\n";
  93 }
  94
  95 } // end namespace GVNExpression
  96 } // end namespace llvm
  97
  98 namespace {
  99
 100 static bool isMemoryInst(const Instruction *I) {
 101   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
 102          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
 103          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
 104 }
 105
 106 /// Iterates through instructions in a set of blocks in reverse order from the
 107 /// first non-terminator. For example (assume all blocks have size n):
 108 ///   LockstepReverseIterator I([B1, B2, B3]);
 109 ///   *I-- = [B1[n], B2[n], B3[n]];
 110 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
 111 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
 112 ///   ...
 113 ///
 114 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
 115 /// to
 116 /// determine which blocks are still going and the order they appear in the
 117 /// list returned by operator*.
 118 class LockstepReverseIterator {
 119   ArrayRef<BasicBlock *> Blocks;
 120   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
 121   SmallVector<Instruction *, 4> Insts;
 122   bool Fail;
 123
 124 public:
 125   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
 126     reset();
 127   }
 128
 129   void reset() {
 130     Fail = false;
 131     ActiveBlocks.clear();
 132     for (BasicBlock *BB : Blocks)
 133       ActiveBlocks.insert(BB);
 134     Insts.clear();
 135     for (BasicBlock *BB : Blocks) {
 136       if (BB->size() <= 1) {
 137         // Block wasn't big enough - only contained a terminator.
 138         ActiveBlocks.remove(BB);
 139         continue;
 140       }
 141       Insts.push_back(BB->getTerminator()->getPrevNode());
 142     }
 143     if (Insts.empty())
 144       Fail = true;
 145   }
 146
 147   bool isValid() const { return !Fail; }
 148   ArrayRef<Instruction *> operator*() const { return Insts; }
 149
 150   // Note: This needs to return a SmallSetVector as the elements of
 151   // ActiveBlocks will be later copied to Blocks using std::copy. The
 152   // resultant order of elements in Blocks needs to be deterministic.
 153   // Using SmallPtrSet instead causes non-deterministic order while
 154   // copying. And we cannot simply sort Blocks as they need to match the
 155   // corresponding Values.
 156   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
 157
 158   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
 159     for (auto II = Insts.begin(); II != Insts.end();) {
 160       if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
 161           Blocks.end()) {
 162         ActiveBlocks.remove((*II)->getParent());
 163         II = Insts.erase(II);
 164       } else {
 165         ++II;
 166       }
 167     }
 168   }
 169
 170   void operator--() {
 171     if (Fail)
 172       return;
 173     SmallVector<Instruction *, 4> NewInsts;
 174     for (auto *Inst : Insts) {
 175       if (Inst == &Inst->getParent()->front())
 176         ActiveBlocks.remove(Inst->getParent());
 177       else
 178         NewInsts.push_back(Inst->getPrevNode());
 179     }
 180     if (NewInsts.empty()) {
 181       Fail = true;
 182       return;
 183     }
 184     Insts = NewInsts;
 185   }
 186 };
 187
 188 //===----------------------------------------------------------------------===//
 189
 190 /// Candidate solution for sinking. There may be different ways to
 191 /// sink instructions, differing in the number of instructions sunk,
 192 /// the number of predecessors sunk from and the number of PHIs
 193 /// required.
 194 struct SinkingInstructionCandidate {
 195   unsigned NumBlocks;
 196   unsigned NumInstructions;
 197   unsigned NumPHIs;
 198   unsigned NumMemoryInsts;
 199   int Cost = -1;
 200   SmallVector<BasicBlock *, 4> Blocks;
 201
 202   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
 203     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
 204     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
 205     Cost = (NumInstructions * (NumBlocks - 1)) -
 206            (NumExtraPHIs *
 207             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
 208            - SplitEdgeCost;
 209   }
 210
 211   bool operator>(const SinkingInstructionCandidate &Other) const {
 212     return Cost > Other.Cost;
 213   }
 214 };
 215
 216 #ifndef NDEBUG
 217 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
 218   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
 219      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
 220   return OS;
 221 }
 222 #endif
 223
 224 //===----------------------------------------------------------------------===//
 225
 226 /// Describes a PHI node that may or may not exist. These track the PHIs
 227 /// that must be created if we sunk a sequence of instructions. It provides
 228 /// a hash function for efficient equality comparisons.
 229 class ModelledPHI {
 230   SmallVector<Value *, 4> Values;
 231   SmallVector<BasicBlock *, 4> Blocks;
 232
 233 public:
 234   ModelledPHI() = default;
 235
 236   ModelledPHI(const PHINode *PN) {
 237     // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
 238     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
 239     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
 240       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
 241     llvm::sort(Ops);
 242     for (auto &P : Ops) {
 243       Blocks.push_back(P.first);
 244       Values.push_back(P.second);
 245     }
 246   }
 247
 248   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
 249   /// without the same ID.
 250   /// \note This is specifically for DenseMapInfo - do not use this!
 251   static ModelledPHI createDummy(size_t ID) {
 252     ModelledPHI M;
 253     M.Values.push_back(reinterpret_cast<Value*>(ID));
 254     return M;
 255   }
 256
 257   /// Create a PHI from an array of incoming values and incoming blocks.
 258   template <typename VArray, typename BArray>
 259   ModelledPHI(const VArray &V, const BArray &B) {
 260     llvm::copy(V, std::back_inserter(Values));
 261     llvm::copy(B, std::back_inserter(Blocks));
 262   }
 263
 264   /// Create a PHI from [I[OpNum] for I in Insts].
 265   template <typename BArray>
 266   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
 267     llvm::copy(B, std::back_inserter(Blocks));
 268     for (auto *I : Insts)
 269       Values.push_back(I->getOperand(OpNum));
 270   }
 271
 272   /// Restrict the PHI's contents down to only \c NewBlocks.
 273   /// \c NewBlocks must be a subset of \c this->Blocks.
 274   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
 275     auto BI = Blocks.begin();
 276     auto VI = Values.begin();
 277     while (BI != Blocks.end()) {
 278       assert(VI != Values.end());
 279       if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
 280           NewBlocks.end()) {
 281         BI = Blocks.erase(BI);
 282         VI = Values.erase(VI);
 283       } else {
 284         ++BI;
 285         ++VI;
 286       }
 287     }
 288     assert(Blocks.size() == NewBlocks.size());
 289   }
 290
 291   ArrayRef<Value *> getValues() const { return Values; }
 292
 293   bool areAllIncomingValuesSame() const {
 294     return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
 295   }
 296
 297   bool areAllIncomingValuesSameType() const {
 298     return llvm::all_of(
 299         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
 300   }
 301
 302   bool areAnyIncomingValuesConstant() const {
 303     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
 304   }
 305
 306   // Hash functor
 307   unsigned hash() const {
 308       return (unsigned)hash_combine_range(Values.begin(), Values.end());
 309   }
 310
 311   bool operator==(const ModelledPHI &Other) const {
 312     return Values == Other.Values && Blocks == Other.Blocks;
 313   }
 314 };
 315
 316 template <typename ModelledPHI> struct DenseMapInfo {
 317   static inline ModelledPHI &getEmptyKey() {
 318     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
 319     return Dummy;
 320   }
 321
 322   static inline ModelledPHI &getTombstoneKey() {
 323     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
 324     return Dummy;
 325   }
 326
 327   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
 328
 329   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
 330     return LHS == RHS;
 331   }
 332 };
 333
 334 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
 335
 336 //===----------------------------------------------------------------------===//
 337 //                             ValueTable
 338 //===----------------------------------------------------------------------===//
 339 // This is a value number table where the value number is a function of the
 340 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
 341 // that the program would be equivalent if we replaced A with PHI(A, B).
 342 //===----------------------------------------------------------------------===//
 343
 344 /// A GVN expression describing how an instruction is used. The operands
 345 /// field of BasicExpression is used to store uses, not operands.
 346 ///
 347 /// This class also contains fields for discriminators used when determining
 348 /// equivalence of instructions with sideeffects.
 349 class InstructionUseExpr : public GVNExpression::BasicExpression {
 350   unsigned MemoryUseOrder = -1;
 351   bool Volatile = false;
 352
 353 public:
 354   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
 355                      BumpPtrAllocator &A)
 356       : GVNExpression::BasicExpression(I->getNumUses()) {
 357     allocateOperands(R, A);
 358     setOpcode(I->getOpcode());
 359     setType(I->getType());
 360
 361     for (auto &U : I->uses())
 362       op_push_back(U.getUser());
 363     llvm::sort(op_begin(), op_end());
 364   }
 365
 366   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
 367   void setVolatile(bool V) { Volatile = V; }
 368
 369   hash_code getHashValue() const override {
 370     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
 371                         MemoryUseOrder, Volatile);
 372   }
 373
 374   template <typename Function> hash_code getHashValue(Function MapFn) {
 375     hash_code H =
 376         hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile);
 377     for (auto *V : operands())
 378       H = hash_combine(H, MapFn(V));
 379     return H;
 380   }
 381 };
 382
 383 class ValueTable {
 384   DenseMap<Value *, uint32_t> ValueNumbering;
 385   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
 386   DenseMap<size_t, uint32_t> HashNumbering;
 387   BumpPtrAllocator Allocator;
 388   ArrayRecycler<Value *> Recycler;
 389   uint32_t nextValueNumber = 1;
 390
 391   /// Create an expression for I based on its opcode and its uses. If I
 392   /// touches or reads memory, the expression is also based upon its memory
 393   /// order - see \c getMemoryUseOrder().
 394   InstructionUseExpr *createExpr(Instruction *I) {
 395     InstructionUseExpr *E =
 396         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
 397     if (isMemoryInst(I))
 398       E->setMemoryUseOrder(getMemoryUseOrder(I));
 399
 400     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
 401       CmpInst::Predicate Predicate = C->getPredicate();
 402       E->setOpcode((C->getOpcode() << 8) | Predicate);
 403     }
 404     return E;
 405   }
 406
 407   /// Helper to compute the value number for a memory instruction
 408   /// (LoadInst/StoreInst), including checking the memory ordering and
 409   /// volatility.
 410   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
 411     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
 412       return nullptr;
 413     InstructionUseExpr *E = createExpr(I);
 414     E->setVolatile(I->isVolatile());
 415     return E;
 416   }
 417
 418 public:
 419   ValueTable() = default;
 420
 421   /// Returns the value number for the specified value, assigning
 422   /// it a new number if it did not have one before.
 423   uint32_t lookupOrAdd(Value *V) {
 424     auto VI = ValueNumbering.find(V);
 425     if (VI != ValueNumbering.end())
 426       return VI->second;
 427
 428     if (!isa<Instruction>(V)) {
 429       ValueNumbering[V] = nextValueNumber;
 430       return nextValueNumber++;
 431     }
 432
 433     Instruction *I = cast<Instruction>(V);
 434     InstructionUseExpr *exp = nullptr;
 435     switch (I->getOpcode()) {
 436     case Instruction::Load:
 437       exp = createMemoryExpr(cast<LoadInst>(I));
 438       break;
 439     case Instruction::Store:
 440       exp = createMemoryExpr(cast<StoreInst>(I));
 441       break;
 442     case Instruction::Call:
 443     case Instruction::Invoke:
 444     case Instruction::FNeg:
 445     case Instruction::Add:
 446     case Instruction::FAdd:
 447     case Instruction::Sub:
 448     case Instruction::FSub:
 449     case Instruction::Mul:
 450     case Instruction::FMul:
 451     case Instruction::UDiv:
 452     case Instruction::SDiv:
 453     case Instruction::FDiv:
 454     case Instruction::URem:
 455     case Instruction::SRem:
 456     case Instruction::FRem:
 457     case Instruction::Shl:
 458     case Instruction::LShr:
 459     case Instruction::AShr:
 460     case Instruction::And:
 461     case Instruction::Or:
 462     case Instruction::Xor:
 463     case Instruction::ICmp:
 464     case Instruction::FCmp:
 465     case Instruction::Trunc:
 466     case Instruction::ZExt:
 467     case Instruction::SExt:
 468     case Instruction::FPToUI:
 469     case Instruction::FPToSI:
 470     case Instruction::UIToFP:
 471     case Instruction::SIToFP:
 472     case Instruction::FPTrunc:
 473     case Instruction::FPExt:
 474     case Instruction::PtrToInt:
 475     case Instruction::IntToPtr:
 476     case Instruction::BitCast:
 477     case Instruction::Select:
 478     case Instruction::ExtractElement:
 479     case Instruction::InsertElement:
 480     case Instruction::ShuffleVector:
 481     case Instruction::InsertValue:
 482     case Instruction::GetElementPtr:
 483       exp = createExpr(I);
 484       break;
 485     default:
 486       break;
 487     }
 488
 489     if (!exp) {
 490       ValueNumbering[V] = nextValueNumber;
 491       return nextValueNumber++;
 492     }
 493
 494     uint32_t e = ExpressionNumbering[exp];
 495     if (!e) {
 496       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
 497       auto I = HashNumbering.find(H);
 498       if (I != HashNumbering.end()) {
 499         e = I->second;
 500       } else {
 501         e = nextValueNumber++;
 502         HashNumbering[H] = e;
 503         ExpressionNumbering[exp] = e;
 504       }
 505     }
 506     ValueNumbering[V] = e;
 507     return e;
 508   }
 509
 510   /// Returns the value number of the specified value. Fails if the value has
 511   /// not yet been numbered.
 512   uint32_t lookup(Value *V) const {
 513     auto VI = ValueNumbering.find(V);
 514     assert(VI != ValueNumbering.end() && "Value not numbered?");
 515     return VI->second;
 516   }
 517
 518   /// Removes all value numberings and resets the value table.
 519   void clear() {
 520     ValueNumbering.clear();
 521     ExpressionNumbering.clear();
 522     HashNumbering.clear();
 523     Recycler.clear(Allocator);
 524     nextValueNumber = 1;
 525   }
 526
 527   /// \c Inst uses or touches memory. Return an ID describing the memory state
 528   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
 529   /// the exact same memory operations happen after I1 and I2.
 530   ///
 531   /// This is a very hard problem in general, so we use domain-specific
 532   /// knowledge that we only ever check for equivalence between blocks sharing a
 533   /// single immediate successor that is common, and when determining if I1 ==
 534   /// I2 we will have already determined that next(I1) == next(I2). This
 535   /// inductive property allows us to simply return the value number of the next
 536   /// instruction that defines memory.
 537   uint32_t getMemoryUseOrder(Instruction *Inst) {
 538     auto *BB = Inst->getParent();
 539     for (auto I = std::next(Inst->getIterator()), E = BB->end();
 540          I != E && !I->isTerminator(); ++I) {
 541       if (!isMemoryInst(&*I))
 542         continue;
 543       if (isa<LoadInst>(&*I))
 544         continue;
 545       CallInst *CI = dyn_cast<CallInst>(&*I);
 546       if (CI && CI->onlyReadsMemory())
 547         continue;
 548       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
 549       if (II && II->onlyReadsMemory())
 550         continue;
 551       return lookupOrAdd(&*I);
 552     }
 553     return 0;
 554   }
 555 };
 556
 557 //===----------------------------------------------------------------------===//
 558
 559 class GVNSink {
 560 public:
 561   GVNSink() = default;
 562
 563   bool run(Function &F) {
 564     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
 565                       << "\n");
 566
 567     unsigned NumSunk = 0;
 568     ReversePostOrderTraversal<Function*> RPOT(&F);
 569     for (auto *N : RPOT)
 570       NumSunk += sinkBB(N);
 571
 572     return NumSunk > 0;
 573   }
 574
 575 private:
 576   ValueTable VN;
 577
 578   bool isInstructionBlacklisted(Instruction *I) {
 579     // These instructions may change or break semantics if moved.
 580     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
 581         I->getType()->isTokenTy())
 582       return true;
 583     return false;
 584   }
 585
 586   /// The main heuristic function. Analyze the set of instructions pointed to by
 587   /// LRI and return a candidate solution if these instructions can be sunk, or
 588   /// None otherwise.
 589   Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
 590       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 591       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
 592
 593   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
 594   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
 595                           SmallPtrSetImpl<Value *> &PHIContents) {
 596     for (PHINode &PN : BB->phis()) {
 597       auto MPHI = ModelledPHI(&PN);
 598       PHIs.insert(MPHI);
 599       for (auto *V : MPHI.getValues())
 600         PHIContents.insert(V);
 601     }
 602   }
 603
 604   /// The main instruction sinking driver. Set up state and try and sink
 605   /// instructions into BBEnd from its predecessors.
 606   unsigned sinkBB(BasicBlock *BBEnd);
 607
 608   /// Perform the actual mechanics of sinking an instruction from Blocks into
 609   /// BBEnd, which is their only successor.
 610   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
 611
 612   /// Remove PHIs that all have the same incoming value.
 613   void foldPointlessPHINodes(BasicBlock *BB) {
 614     auto I = BB->begin();
 615     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
 616       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
 617             return V == PN->getIncomingValue(0);
 618           }))
 619         continue;
 620       if (PN->getIncomingValue(0) != PN)
 621         PN->replaceAllUsesWith(PN->getIncomingValue(0));
 622       else
 623         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
 624       PN->eraseFromParent();
 625     }
 626   }
 627 };
 628
 629 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
 630   LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
 631   ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
 632   auto Insts = *LRI;
 633   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
 634                                                                   : Insts) {
 635     I->dump();
 636   } dbgs() << " ]\n";);
 637
 638   DenseMap<uint32_t, unsigned> VNums;
 639   for (auto *I : Insts) {
 640     uint32_t N = VN.lookupOrAdd(I);
 641     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
 642     if (N == ~0U)
 643       return None;
 644     VNums[N]++;
 645   }
 646   unsigned VNumToSink =
 647       std::max_element(VNums.begin(), VNums.end(),
 648                        [](const std::pair<uint32_t, unsigned> &I,
 649                           const std::pair<uint32_t, unsigned> &J) {
 650                          return I.second < J.second;
 651                        })
 652           ->first;
 653
 654   if (VNums[VNumToSink] == 1)
 655     // Can't sink anything!
 656     return None;
 657
 658   // Now restrict the number of incoming blocks down to only those with
 659   // VNumToSink.
 660   auto &ActivePreds = LRI.getActiveBlocks();
 661   unsigned InitialActivePredSize = ActivePreds.size();
 662   SmallVector<Instruction *, 4> NewInsts;
 663   for (auto *I : Insts) {
 664     if (VN.lookup(I) != VNumToSink)
 665       ActivePreds.remove(I->getParent());
 666     else
 667       NewInsts.push_back(I);
 668   }
 669   for (auto *I : NewInsts)
 670     if (isInstructionBlacklisted(I))
 671       return None;
 672
 673   // If we've restricted the incoming blocks, restrict all needed PHIs also
 674   // to that set.
 675   bool RecomputePHIContents = false;
 676   if (ActivePreds.size() != InitialActivePredSize) {
 677     ModelledPHISet NewNeededPHIs;
 678     for (auto P : NeededPHIs) {
 679       P.restrictToBlocks(ActivePreds);
 680       NewNeededPHIs.insert(P);
 681     }
 682     NeededPHIs = NewNeededPHIs;
 683     LRI.restrictToBlocks(ActivePreds);
 684     RecomputePHIContents = true;
 685   }
 686
 687   // The sunk instruction's results.
 688   ModelledPHI NewPHI(NewInsts, ActivePreds);
 689
 690   // Does sinking this instruction render previous PHIs redundant?
 691   if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
 692     NeededPHIs.erase(NewPHI);
 693     RecomputePHIContents = true;
 694   }
 695
 696   if (RecomputePHIContents) {
 697     // The needed PHIs have changed, so recompute the set of all needed
 698     // values.
 699     PHIContents.clear();
 700     for (auto &PHI : NeededPHIs)
 701       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 702   }
 703
 704   // Is this instruction required by a later PHI that doesn't match this PHI?
 705   // if so, we can't sink this instruction.
 706   for (auto *V : NewPHI.getValues())
 707     if (PHIContents.count(V))
 708       // V exists in this PHI, but the whole PHI is different to NewPHI
 709       // (else it would have been removed earlier). We cannot continue
 710       // because this isn't representable.
 711       return None;
 712
 713   // Which operands need PHIs?
 714   // FIXME: If any of these fail, we should partition up the candidates to
 715   // try and continue making progress.
 716   Instruction *I0 = NewInsts[0];
 717
 718   // If all instructions that are going to participate don't have the same
 719   // number of operands, we can't do any useful PHI analysis for all operands.
 720   auto hasDifferentNumOperands = [&I0](Instruction *I) {
 721     return I->getNumOperands() != I0->getNumOperands();
 722   };
 723   if (any_of(NewInsts, hasDifferentNumOperands))
 724     return None;
 725
 726   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
 727     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
 728     if (PHI.areAllIncomingValuesSame())
 729       continue;
 730     if (!canReplaceOperandWithVariable(I0, OpNum))
 731       // We can 't create a PHI from this instruction!
 732       return None;
 733     if (NeededPHIs.count(PHI))
 734       continue;
 735     if (!PHI.areAllIncomingValuesSameType())
 736       return None;
 737     // Don't create indirect calls! The called value is the final operand.
 738     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
 739         PHI.areAnyIncomingValuesConstant())
 740       return None;
 741
 742     NeededPHIs.reserve(NeededPHIs.size());
 743     NeededPHIs.insert(PHI);
 744     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
 745   }
 746
 747   if (isMemoryInst(NewInsts[0]))
 748     ++MemoryInstNum;
 749
 750   SinkingInstructionCandidate Cand;
 751   Cand.NumInstructions = ++InstNum;
 752   Cand.NumMemoryInsts = MemoryInstNum;
 753   Cand.NumBlocks = ActivePreds.size();
 754   Cand.NumPHIs = NeededPHIs.size();
 755   for (auto *C : ActivePreds)
 756     Cand.Blocks.push_back(C);
 757
 758   return Cand;
 759 }
 760
 761 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
 762   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
 763              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 764   SmallVector<BasicBlock *, 4> Preds;
 765   for (auto *B : predecessors(BBEnd)) {
 766     auto *T = B->getTerminator();
 767     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
 768       Preds.push_back(B);
 769     else
 770       return 0;
 771   }
 772   if (Preds.size() < 2)
 773     return 0;
 774   llvm::sort(Preds);
 775
 776   unsigned NumOrigPreds = Preds.size();
 777   // We can only sink instructions through unconditional branches.
 778   for (auto I = Preds.begin(); I != Preds.end();) {
 779     if ((*I)->getTerminator()->getNumSuccessors() != 1)
 780       I = Preds.erase(I);
 781     else
 782       ++I;
 783   }
 784
 785   LockstepReverseIterator LRI(Preds);
 786   SmallVector<SinkingInstructionCandidate, 4> Candidates;
 787   unsigned InstNum = 0, MemoryInstNum = 0;
 788   ModelledPHISet NeededPHIs;
 789   SmallPtrSet<Value *, 4> PHIContents;
 790   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
 791   unsigned NumOrigPHIs = NeededPHIs.size();
 792
 793   while (LRI.isValid()) {
 794     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
 795                                              NeededPHIs, PHIContents);
 796     if (!Cand)
 797       break;
 798     Cand->calculateCost(NumOrigPHIs, Preds.size());
 799     Candidates.emplace_back(*Cand);
 800     --LRI;
 801   }
 802
 803   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
 804   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
 805                                                          : Candidates) dbgs()
 806                                                     << "  " << C << "\n";);
 807
 808   // Pick the top candidate, as long it is positive!
 809   if (Candidates.empty() || Candidates.front().Cost <= 0)
 810     return 0;
 811   auto C = Candidates.front();
 812
 813   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
 814   BasicBlock *InsertBB = BBEnd;
 815   if (C.Blocks.size() < NumOrigPreds) {
 816     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
 817                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
 818     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
 819     if (!InsertBB) {
 820       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
 821       // Edge couldn't be split.
 822       return 0;
 823     }
 824   }
 825
 826   for (unsigned I = 0; I < C.NumInstructions; ++I)
 827     sinkLastInstruction(C.Blocks, InsertBB);
 828
 829   return C.NumInstructions;
 830 }
 831
 832 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
 833                                   BasicBlock *BBEnd) {
 834   SmallVector<Instruction *, 4> Insts;
 835   for (BasicBlock *BB : Blocks)
 836     Insts.push_back(BB->getTerminator()->getPrevNode());
 837   Instruction *I0 = Insts.front();
 838
 839   SmallVector<Value *, 4> NewOperands;
 840   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
 841     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
 842       return I->getOperand(O) != I0->getOperand(O);
 843     });
 844     if (!NeedPHI) {
 845       NewOperands.push_back(I0->getOperand(O));
 846       continue;
 847     }
 848
 849     // Create a new PHI in the successor block and populate it.
 850     auto *Op = I0->getOperand(O);
 851     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
 852     auto *PN = PHINode::Create(Op->getType(), Insts.size(),
 853                                Op->getName() + ".sink", &BBEnd->front());
 854     for (auto *I : Insts)
 855       PN->addIncoming(I->getOperand(O), I->getParent());
 856     NewOperands.push_back(PN);
 857   }
 858
 859   // Arbitrarily use I0 as the new "common" instruction; remap its operands
 860   // and move it to the start of the successor block.
 861   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
 862     I0->getOperandUse(O).set(NewOperands[O]);
 863   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
 864
 865   // Update metadata and IR flags.
 866   for (auto *I : Insts)
 867     if (I != I0) {
 868       combineMetadataForCSE(I0, I, true);
 869       I0->andIRFlags(I);
 870     }
 871
 872   for (auto *I : Insts)
 873     if (I != I0)
 874       I->replaceAllUsesWith(I0);
 875   foldPointlessPHINodes(BBEnd);
 876
 877   // Finally nuke all instructions apart from the common instruction.
 878   for (auto *I : Insts)
 879     if (I != I0)
 880       I->eraseFromParent();
 881
 882   NumRemoved += Insts.size() - 1;
 883 }
 884
 885 ////////////////////////////////////////////////////////////////////////////////
 886 // Pass machinery / boilerplate
 887
 888 class GVNSinkLegacyPass : public FunctionPass {
 889 public:
 890   static char ID;
 891
 892   GVNSinkLegacyPass() : FunctionPass(ID) {
 893     initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
 894   }
 895
 896   bool runOnFunction(Function &F) override {
 897     if (skipFunction(F))
 898       return false;
 899     GVNSink G;
 900     return G.run(F);
 901   }
 902
 903   void getAnalysisUsage(AnalysisUsage &AU) const override {
 904     AU.addPreserved<GlobalsAAWrapperPass>();
 905   }
 906 };
 907
 908 } // end anonymous namespace
 909
 910 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
 911   GVNSink G;
 912   if (!G.run(F))
 913     return PreservedAnalyses::all();
 914
 915   PreservedAnalyses PA;
 916   PA.preserve<GlobalsAA>();
 917   return PA;
 918 }
 919
 920 char GVNSinkLegacyPass::ID = 0;
 921
 922 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
 923                       "Early GVN sinking of Expressions", false, false)
 924 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 925 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 926 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
 927                     "Early GVN sinking of Expressions", false, false)
 928
 929 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }