lib/Transforms/Scalar/EarlyCSE.cpp

   1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass performs a simple dominator tree walk that eliminates trivially
  10 // redundant instructions.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
  15 #include "llvm/ADT/DenseMapInfo.h"
  16 #include "llvm/ADT/Hashing.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/ADT/ScopedHashTable.h"
  19 #include "llvm/ADT/SetVector.h"
  20 #include "llvm/ADT/SmallVector.h"
  21 #include "llvm/ADT/Statistic.h"
  22 #include "llvm/Analysis/AssumptionCache.h"
  23 #include "llvm/Analysis/GlobalsModRef.h"
  24 #include "llvm/Analysis/GuardUtils.h"
  25 #include "llvm/Analysis/InstructionSimplify.h"
  26 #include "llvm/Analysis/MemorySSA.h"
  27 #include "llvm/Analysis/MemorySSAUpdater.h"
  28 #include "llvm/Analysis/TargetLibraryInfo.h"
  29 #include "llvm/Analysis/TargetTransformInfo.h"
  30 #include "llvm/Transforms/Utils/Local.h"
  31 #include "llvm/Analysis/ValueTracking.h"
  32 #include "llvm/IR/BasicBlock.h"
  33 #include "llvm/IR/Constants.h"
  34 #include "llvm/IR/DataLayout.h"
  35 #include "llvm/IR/Dominators.h"
  36 #include "llvm/IR/Function.h"
  37 #include "llvm/IR/InstrTypes.h"
  38 #include "llvm/IR/Instruction.h"
  39 #include "llvm/IR/Instructions.h"
  40 #include "llvm/IR/IntrinsicInst.h"
  41 #include "llvm/IR/Intrinsics.h"
  42 #include "llvm/IR/LLVMContext.h"
  43 #include "llvm/IR/PassManager.h"
  44 #include "llvm/IR/PatternMatch.h"
  45 #include "llvm/IR/Type.h"
  46 #include "llvm/IR/Use.h"
  47 #include "llvm/IR/Value.h"
  48 #include "llvm/Pass.h"
  49 #include "llvm/Support/Allocator.h"
  50 #include "llvm/Support/AtomicOrdering.h"
  51 #include "llvm/Support/Casting.h"
  52 #include "llvm/Support/Debug.h"
  53 #include "llvm/Support/DebugCounter.h"
  54 #include "llvm/Support/RecyclingAllocator.h"
  55 #include "llvm/Support/raw_ostream.h"
  56 #include "llvm/Transforms/Scalar.h"
  57 #include "llvm/Transforms/Utils/GuardUtils.h"
  58 #include <cassert>
  59 #include <deque>
  60 #include <memory>
  61 #include <utility>
  62
  63 using namespace llvm;
  64 using namespace llvm::PatternMatch;
  65
  66 #define DEBUG_TYPE "early-cse"
  67
  68 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
  69 STATISTIC(NumCSE,      "Number of instructions CSE'd");
  70 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
  71 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
  72 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
  73 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
  74
  75 DEBUG_COUNTER(CSECounter, "early-cse",
  76               "Controls which instructions are removed");
  77
  78 static cl::opt<unsigned> EarlyCSEMssaOptCap(
  79     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
  80     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
  81              "for faster compile. Caps the MemorySSA clobbering calls."));
  82
  83 //===----------------------------------------------------------------------===//
  84 // SimpleValue
  85 //===----------------------------------------------------------------------===//
  86
  87 namespace {
  88
  89 /// Struct representing the available values in the scoped hash table.
  90 struct SimpleValue {
  91   Instruction *Inst;
  92
  93   SimpleValue(Instruction *I) : Inst(I) {
  94     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
  95   }
  96
  97   bool isSentinel() const {
  98     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
  99            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
 100   }
 101
 102   static bool canHandle(Instruction *Inst) {
 103     // This can only handle non-void readnone functions.
 104     if (CallInst *CI = dyn_cast<CallInst>(Inst))
 105       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
 106     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
 107            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
 108            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
 109            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
 110            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
 111   }
 112 };
 113
 114 } // end anonymous namespace
 115
 116 namespace llvm {
 117
 118 template <> struct DenseMapInfo<SimpleValue> {
 119   static inline SimpleValue getEmptyKey() {
 120     return DenseMapInfo<Instruction *>::getEmptyKey();
 121   }
 122
 123   static inline SimpleValue getTombstoneKey() {
 124     return DenseMapInfo<Instruction *>::getTombstoneKey();
 125   }
 126
 127   static unsigned getHashValue(SimpleValue Val);
 128   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
 129 };
 130
 131 } // end namespace llvm
 132
 133 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
 134   Instruction *Inst = Val.Inst;
 135   // Hash in all of the operands as pointers.
 136   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
 137     Value *LHS = BinOp->getOperand(0);
 138     Value *RHS = BinOp->getOperand(1);
 139     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
 140       std::swap(LHS, RHS);
 141
 142     return hash_combine(BinOp->getOpcode(), LHS, RHS);
 143   }
 144
 145   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
 146     Value *LHS = CI->getOperand(0);
 147     Value *RHS = CI->getOperand(1);
 148     CmpInst::Predicate Pred = CI->getPredicate();
 149     if (Inst->getOperand(0) > Inst->getOperand(1)) {
 150       std::swap(LHS, RHS);
 151       Pred = CI->getSwappedPredicate();
 152     }
 153     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
 154   }
 155
 156   // Hash min/max/abs (cmp + select) to allow for commuted operands.
 157   // Min/max may also have non-canonical compare predicate (eg, the compare for
 158   // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
 159   // compare.
 160   Value *A, *B;
 161   SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
 162   // TODO: We should also detect FP min/max.
 163   if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
 164       SPF == SPF_UMIN || SPF == SPF_UMAX) {
 165     if (A > B)
 166       std::swap(A, B);
 167     return hash_combine(Inst->getOpcode(), SPF, A, B);
 168   }
 169   if (SPF == SPF_ABS || SPF == SPF_NABS) {
 170     // ABS/NABS always puts the input in A and its negation in B.
 171     return hash_combine(Inst->getOpcode(), SPF, A, B);
 172   }
 173
 174   if (CastInst *CI = dyn_cast<CastInst>(Inst))
 175     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
 176
 177   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
 178     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
 179                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
 180
 181   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
 182     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
 183                         IVI->getOperand(1),
 184                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
 185
 186   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
 187           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
 188           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
 189           isa<ShuffleVectorInst>(Inst)) &&
 190          "Invalid/unknown instruction");
 191
 192   // Mix in the opcode.
 193   return hash_combine(
 194       Inst->getOpcode(),
 195       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 196 }
 197
 198 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 199   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 200
 201   if (LHS.isSentinel() || RHS.isSentinel())
 202     return LHSI == RHSI;
 203
 204   if (LHSI->getOpcode() != RHSI->getOpcode())
 205     return false;
 206   if (LHSI->isIdenticalToWhenDefined(RHSI))
 207     return true;
 208
 209   // If we're not strictly identical, we still might be a commutable instruction
 210   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
 211     if (!LHSBinOp->isCommutative())
 212       return false;
 213
 214     assert(isa<BinaryOperator>(RHSI) &&
 215            "same opcode, but different instruction type?");
 216     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
 217
 218     // Commuted equality
 219     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
 220            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
 221   }
 222   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
 223     assert(isa<CmpInst>(RHSI) &&
 224            "same opcode, but different instruction type?");
 225     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
 226     // Commuted equality
 227     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
 228            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
 229            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
 230   }
 231
 232   // Min/max/abs can occur with commuted operands, non-canonical predicates,
 233   // and/or non-canonical operands.
 234   Value *LHSA, *LHSB;
 235   SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
 236   // TODO: We should also detect FP min/max.
 237   if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
 238       LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
 239       LSPF == SPF_ABS || LSPF == SPF_NABS) {
 240     Value *RHSA, *RHSB;
 241     SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
 242     if (LSPF == RSPF) {
 243       // Abs results are placed in a defined order by matchSelectPattern.
 244       if (LSPF == SPF_ABS || LSPF == SPF_NABS)
 245         return LHSA == RHSA && LHSB == RHSB;
 246       return ((LHSA == RHSA && LHSB == RHSB) ||
 247               (LHSA == RHSB && LHSB == RHSA));
 248     }
 249   }
 250
 251   return false;
 252 }
 253
 254 //===----------------------------------------------------------------------===//
 255 // CallValue
 256 //===----------------------------------------------------------------------===//
 257
 258 namespace {
 259
 260 /// Struct representing the available call values in the scoped hash
 261 /// table.
 262 struct CallValue {
 263   Instruction *Inst;
 264
 265   CallValue(Instruction *I) : Inst(I) {
 266     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
 267   }
 268
 269   bool isSentinel() const {
 270     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
 271            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
 272   }
 273
 274   static bool canHandle(Instruction *Inst) {
 275     // Don't value number anything that returns void.
 276     if (Inst->getType()->isVoidTy())
 277       return false;
 278
 279     CallInst *CI = dyn_cast<CallInst>(Inst);
 280     if (!CI || !CI->onlyReadsMemory())
 281       return false;
 282     return true;
 283   }
 284 };
 285
 286 } // end anonymous namespace
 287
 288 namespace llvm {
 289
 290 template <> struct DenseMapInfo<CallValue> {
 291   static inline CallValue getEmptyKey() {
 292     return DenseMapInfo<Instruction *>::getEmptyKey();
 293   }
 294
 295   static inline CallValue getTombstoneKey() {
 296     return DenseMapInfo<Instruction *>::getTombstoneKey();
 297   }
 298
 299   static unsigned getHashValue(CallValue Val);
 300   static bool isEqual(CallValue LHS, CallValue RHS);
 301 };
 302
 303 } // end namespace llvm
 304
 305 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
 306   Instruction *Inst = Val.Inst;
 307   // Hash all of the operands as pointers and mix in the opcode.
 308   return hash_combine(
 309       Inst->getOpcode(),
 310       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 311 }
 312
 313 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
 314   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 315   if (LHS.isSentinel() || RHS.isSentinel())
 316     return LHSI == RHSI;
 317   return LHSI->isIdenticalTo(RHSI);
 318 }
 319
 320 //===----------------------------------------------------------------------===//
 321 // EarlyCSE implementation
 322 //===----------------------------------------------------------------------===//
 323
 324 namespace {
 325
 326 /// A simple and fast domtree-based CSE pass.
 327 ///
 328 /// This pass does a simple depth-first walk over the dominator tree,
 329 /// eliminating trivially redundant instructions and using instsimplify to
 330 /// canonicalize things as it goes. It is intended to be fast and catch obvious
 331 /// cases so that instcombine and other passes are more effective. It is
 332 /// expected that a later pass of GVN will catch the interesting/hard cases.
 333 class EarlyCSE {
 334 public:
 335   const TargetLibraryInfo &TLI;
 336   const TargetTransformInfo &TTI;
 337   DominatorTree &DT;
 338   AssumptionCache &AC;
 339   const SimplifyQuery SQ;
 340   MemorySSA *MSSA;
 341   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
 342
 343   using AllocatorTy =
 344       RecyclingAllocator<BumpPtrAllocator,
 345                          ScopedHashTableVal<SimpleValue, Value *>>;
 346   using ScopedHTType =
 347       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
 348                       AllocatorTy>;
 349
 350   /// A scoped hash table of the current values of all of our simple
 351   /// scalar expressions.
 352   ///
 353   /// As we walk down the domtree, we look to see if instructions are in this:
 354   /// if so, we replace them with what we find, otherwise we insert them so
 355   /// that dominated values can succeed in their lookup.
 356   ScopedHTType AvailableValues;
 357
 358   /// A scoped hash table of the current values of previously encountered
 359   /// memory locations.
 360   ///
 361   /// This allows us to get efficient access to dominating loads or stores when
 362   /// we have a fully redundant load.  In addition to the most recent load, we
 363   /// keep track of a generation count of the read, which is compared against
 364   /// the current generation count.  The current generation count is incremented
 365   /// after every possibly writing memory operation, which ensures that we only
 366   /// CSE loads with other loads that have no intervening store.  Ordering
 367   /// events (such as fences or atomic instructions) increment the generation
 368   /// count as well; essentially, we model these as writes to all possible
 369   /// locations.  Note that atomic and/or volatile loads and stores can be
 370   /// present the table; it is the responsibility of the consumer to inspect
 371   /// the atomicity/volatility if needed.
 372   struct LoadValue {
 373     Instruction *DefInst = nullptr;
 374     unsigned Generation = 0;
 375     int MatchingId = -1;
 376     bool IsAtomic = false;
 377
 378     LoadValue() = default;
 379     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
 380               bool IsAtomic)
 381         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
 382           IsAtomic(IsAtomic) {}
 383   };
 384
 385   using LoadMapAllocator =
 386       RecyclingAllocator<BumpPtrAllocator,
 387                          ScopedHashTableVal<Value *, LoadValue>>;
 388   using LoadHTType =
 389       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
 390                       LoadMapAllocator>;
 391
 392   LoadHTType AvailableLoads;
 393
 394   // A scoped hash table mapping memory locations (represented as typed
 395   // addresses) to generation numbers at which that memory location became
 396   // (henceforth indefinitely) invariant.
 397   using InvariantMapAllocator =
 398       RecyclingAllocator<BumpPtrAllocator,
 399                          ScopedHashTableVal<MemoryLocation, unsigned>>;
 400   using InvariantHTType =
 401       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
 402                       InvariantMapAllocator>;
 403   InvariantHTType AvailableInvariants;
 404
 405   /// A scoped hash table of the current values of read-only call
 406   /// values.
 407   ///
 408   /// It uses the same generation count as loads.
 409   using CallHTType =
 410       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
 411   CallHTType AvailableCalls;
 412
 413   /// This is the current generation of the memory value.
 414   unsigned CurrentGeneration = 0;
 415
 416   /// Set up the EarlyCSE runner for a particular function.
 417   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
 418            const TargetTransformInfo &TTI, DominatorTree &DT,
 419            AssumptionCache &AC, MemorySSA *MSSA)
 420       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
 421         MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
 422
 423   bool run();
 424
 425 private:
 426   unsigned ClobberCounter = 0;
 427   // Almost a POD, but needs to call the constructors for the scoped hash
 428   // tables so that a new scope gets pushed on. These are RAII so that the
 429   // scope gets popped when the NodeScope is destroyed.
 430   class NodeScope {
 431   public:
 432     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
 433               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
 434       : Scope(AvailableValues), LoadScope(AvailableLoads),
 435         InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
 436     NodeScope(const NodeScope &) = delete;
 437     NodeScope &operator=(const NodeScope &) = delete;
 438
 439   private:
 440     ScopedHTType::ScopeTy Scope;
 441     LoadHTType::ScopeTy LoadScope;
 442     InvariantHTType::ScopeTy InvariantScope;
 443     CallHTType::ScopeTy CallScope;
 444   };
 445
 446   // Contains all the needed information to create a stack for doing a depth
 447   // first traversal of the tree. This includes scopes for values, loads, and
 448   // calls as well as the generation. There is a child iterator so that the
 449   // children do not need to be store separately.
 450   class StackNode {
 451   public:
 452     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
 453               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
 454               unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
 455               DomTreeNode::iterator end)
 456         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
 457           EndIter(end),
 458           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
 459                  AvailableCalls)
 460           {}
 461     StackNode(const StackNode &) = delete;
 462     StackNode &operator=(const StackNode &) = delete;
 463
 464     // Accessors.
 465     unsigned currentGeneration() { return CurrentGeneration; }
 466     unsigned childGeneration() { return ChildGeneration; }
 467     void childGeneration(unsigned generation) { ChildGeneration = generation; }
 468     DomTreeNode *node() { return Node; }
 469     DomTreeNode::iterator childIter() { return ChildIter; }
 470
 471     DomTreeNode *nextChild() {
 472       DomTreeNode *child = *ChildIter;
 473       ++ChildIter;
 474       return child;
 475     }
 476
 477     DomTreeNode::iterator end() { return EndIter; }
 478     bool isProcessed() { return Processed; }
 479     void process() { Processed = true; }
 480
 481   private:
 482     unsigned CurrentGeneration;
 483     unsigned ChildGeneration;
 484     DomTreeNode *Node;
 485     DomTreeNode::iterator ChildIter;
 486     DomTreeNode::iterator EndIter;
 487     NodeScope Scopes;
 488     bool Processed = false;
 489   };
 490
 491   /// Wrapper class to handle memory instructions, including loads,
 492   /// stores and intrinsic loads and stores defined by the target.
 493   class ParseMemoryInst {
 494   public:
 495     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
 496       : Inst(Inst) {
 497       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
 498         if (TTI.getTgtMemIntrinsic(II, Info))
 499           IsTargetMemInst = true;
 500     }
 501
 502     bool isLoad() const {
 503       if (IsTargetMemInst) return Info.ReadMem;
 504       return isa<LoadInst>(Inst);
 505     }
 506
 507     bool isStore() const {
 508       if (IsTargetMemInst) return Info.WriteMem;
 509       return isa<StoreInst>(Inst);
 510     }
 511
 512     bool isAtomic() const {
 513       if (IsTargetMemInst)
 514         return Info.Ordering != AtomicOrdering::NotAtomic;
 515       return Inst->isAtomic();
 516     }
 517
 518     bool isUnordered() const {
 519       if (IsTargetMemInst)
 520         return Info.isUnordered();
 521
 522       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
 523         return LI->isUnordered();
 524       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
 525         return SI->isUnordered();
 526       }
 527       // Conservative answer
 528       return !Inst->isAtomic();
 529     }
 530
 531     bool isVolatile() const {
 532       if (IsTargetMemInst)
 533         return Info.IsVolatile;
 534
 535       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
 536         return LI->isVolatile();
 537       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
 538         return SI->isVolatile();
 539       }
 540       // Conservative answer
 541       return true;
 542     }
 543
 544     bool isInvariantLoad() const {
 545       if (auto *LI = dyn_cast<LoadInst>(Inst))
 546         return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
 547       return false;
 548     }
 549
 550     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
 551       return (getPointerOperand() == Inst.getPointerOperand() &&
 552               getMatchingId() == Inst.getMatchingId());
 553     }
 554
 555     bool isValid() const { return getPointerOperand() != nullptr; }
 556
 557     // For regular (non-intrinsic) loads/stores, this is set to -1. For
 558     // intrinsic loads/stores, the id is retrieved from the corresponding
 559     // field in the MemIntrinsicInfo structure.  That field contains
 560     // non-negative values only.
 561     int getMatchingId() const {
 562       if (IsTargetMemInst) return Info.MatchingId;
 563       return -1;
 564     }
 565
 566     Value *getPointerOperand() const {
 567       if (IsTargetMemInst) return Info.PtrVal;
 568       return getLoadStorePointerOperand(Inst);
 569     }
 570
 571     bool mayReadFromMemory() const {
 572       if (IsTargetMemInst) return Info.ReadMem;
 573       return Inst->mayReadFromMemory();
 574     }
 575
 576     bool mayWriteToMemory() const {
 577       if (IsTargetMemInst) return Info.WriteMem;
 578       return Inst->mayWriteToMemory();
 579     }
 580
 581   private:
 582     bool IsTargetMemInst = false;
 583     MemIntrinsicInfo Info;
 584     Instruction *Inst;
 585   };
 586
 587   bool processNode(DomTreeNode *Node);
 588
 589   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
 590                              const BasicBlock *BB, const BasicBlock *Pred);
 591
 592   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
 593     if (auto *LI = dyn_cast<LoadInst>(Inst))
 594       return LI;
 595     if (auto *SI = dyn_cast<StoreInst>(Inst))
 596       return SI->getValueOperand();
 597     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
 598     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
 599                                                  ExpectedType);
 600   }
 601
 602   /// Return true if the instruction is known to only operate on memory
 603   /// provably invariant in the given "generation".
 604   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
 605
 606   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
 607                            Instruction *EarlierInst, Instruction *LaterInst);
 608
 609   void removeMSSA(Instruction *Inst) {
 610     if (!MSSA)
 611       return;
 612     if (VerifyMemorySSA)
 613       MSSA->verifyMemorySSA();
 614     // Removing a store here can leave MemorySSA in an unoptimized state by
 615     // creating MemoryPhis that have identical arguments and by creating
 616     // MemoryUses whose defining access is not an actual clobber. The phi case
 617     // is handled by MemorySSA when passing OptimizePhis = true to
 618     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
 619     // by MemorySSA's getClobberingMemoryAccess.
 620     MSSAUpdater->removeMemoryAccess(Inst, true);
 621   }
 622 };
 623
 624 } // end anonymous namespace
 625
 626 /// Determine if the memory referenced by LaterInst is from the same heap
 627 /// version as EarlierInst.
 628 /// This is currently called in two scenarios:
 629 ///
 630 ///   load p
 631 ///   ...
 632 ///   load p
 633 ///
 634 /// and
 635 ///
 636 ///   x = load p
 637 ///   ...
 638 ///   store x, p
 639 ///
 640 /// in both cases we want to verify that there are no possible writes to the
 641 /// memory referenced by p between the earlier and later instruction.
 642 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
 643                                    unsigned LaterGeneration,
 644                                    Instruction *EarlierInst,
 645                                    Instruction *LaterInst) {
 646   // Check the simple memory generation tracking first.
 647   if (EarlierGeneration == LaterGeneration)
 648     return true;
 649
 650   if (!MSSA)
 651     return false;
 652
 653   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
 654   // read/write memory, then we can safely return true here.
 655   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
 656   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
 657   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
 658   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
 659   // with the default optimization pipeline.
 660   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
 661   if (!EarlierMA)
 662     return true;
 663   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
 664   if (!LaterMA)
 665     return true;
 666
 667   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
 668   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
 669   // EarlierInst and LaterInst and neither can any other write that potentially
 670   // clobbers LaterInst.
 671   MemoryAccess *LaterDef;
 672   if (ClobberCounter < EarlyCSEMssaOptCap) {
 673     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
 674     ClobberCounter++;
 675   } else
 676     LaterDef = LaterMA->getDefiningAccess();
 677
 678   return MSSA->dominates(LaterDef, EarlierMA);
 679 }
 680
 681 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
 682   // A location loaded from with an invariant_load is assumed to *never* change
 683   // within the visible scope of the compilation.
 684   if (auto *LI = dyn_cast<LoadInst>(I))
 685     if (LI->getMetadata(LLVMContext::MD_invariant_load))
 686       return true;
 687
 688   auto MemLocOpt = MemoryLocation::getOrNone(I);
 689   if (!MemLocOpt)
 690     // "target" intrinsic forms of loads aren't currently known to
 691     // MemoryLocation::get.  TODO
 692     return false;
 693   MemoryLocation MemLoc = *MemLocOpt;
 694   if (!AvailableInvariants.count(MemLoc))
 695     return false;
 696
 697   // Is the generation at which this became invariant older than the
 698   // current one?
 699   return AvailableInvariants.lookup(MemLoc) <= GenAt;
 700 }
 701
 702 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
 703                                      const BranchInst *BI, const BasicBlock *BB,
 704                                      const BasicBlock *Pred) {
 705   assert(BI->isConditional() && "Should be a conditional branch!");
 706   assert(BI->getCondition() == CondInst && "Wrong condition?");
 707   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
 708   auto *TorF = (BI->getSuccessor(0) == BB)
 709                    ? ConstantInt::getTrue(BB->getContext())
 710                    : ConstantInt::getFalse(BB->getContext());
 711   auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
 712     if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
 713       return BOp->getOpcode() == Opcode;
 714     return false;
 715   };
 716   // If the condition is AND operation, we can propagate its operands into the
 717   // true branch. If it is OR operation, we can propagate them into the false
 718   // branch.
 719   unsigned PropagateOpcode =
 720       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
 721
 722   bool MadeChanges = false;
 723   SmallVector<Instruction *, 4> WorkList;
 724   SmallPtrSet<Instruction *, 4> Visited;
 725   WorkList.push_back(CondInst);
 726   while (!WorkList.empty()) {
 727     Instruction *Curr = WorkList.pop_back_val();
 728
 729     AvailableValues.insert(Curr, TorF);
 730     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
 731                       << Curr->getName() << "' as " << *TorF << " in "
 732                       << BB->getName() << "\n");
 733     if (!DebugCounter::shouldExecute(CSECounter)) {
 734       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
 735     } else {
 736       // Replace all dominated uses with the known value.
 737       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
 738                                                     BasicBlockEdge(Pred, BB))) {
 739         NumCSECVP += Count;
 740         MadeChanges = true;
 741       }
 742     }
 743
 744     if (MatchBinOp(Curr, PropagateOpcode))
 745       for (auto &Op : cast<BinaryOperator>(Curr)->operands())
 746         if (Instruction *OPI = dyn_cast<Instruction>(Op))
 747           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
 748             WorkList.push_back(OPI);
 749   }
 750
 751   return MadeChanges;
 752 }
 753
 754 bool EarlyCSE::processNode(DomTreeNode *Node) {
 755   bool Changed = false;
 756   BasicBlock *BB = Node->getBlock();
 757
 758   // If this block has a single predecessor, then the predecessor is the parent
 759   // of the domtree node and all of the live out memory values are still current
 760   // in this block.  If this block has multiple predecessors, then they could
 761   // have invalidated the live-out memory values of our parent value.  For now,
 762   // just be conservative and invalidate memory if this block has multiple
 763   // predecessors.
 764   if (!BB->getSinglePredecessor())
 765     ++CurrentGeneration;
 766
 767   // If this node has a single predecessor which ends in a conditional branch,
 768   // we can infer the value of the branch condition given that we took this
 769   // path.  We need the single predecessor to ensure there's not another path
 770   // which reaches this block where the condition might hold a different
 771   // value.  Since we're adding this to the scoped hash table (like any other
 772   // def), it will have been popped if we encounter a future merge block.
 773   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
 774     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
 775     if (BI && BI->isConditional()) {
 776       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
 777       if (CondInst && SimpleValue::canHandle(CondInst))
 778         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
 779     }
 780   }
 781
 782   /// LastStore - Keep track of the last non-volatile store that we saw... for
 783   /// as long as there in no instruction that reads memory.  If we see a store
 784   /// to the same location, we delete the dead store.  This zaps trivial dead
 785   /// stores which can occur in bitfield code among other things.
 786   Instruction *LastStore = nullptr;
 787
 788   // See if any instructions in the block can be eliminated.  If so, do it.  If
 789   // not, add them to AvailableValues.
 790   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
 791     Instruction *Inst = &*I++;
 792
 793     // Dead instructions should just be removed.
 794     if (isInstructionTriviallyDead(Inst, &TLI)) {
 795       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
 796       if (!DebugCounter::shouldExecute(CSECounter)) {
 797         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
 798         continue;
 799       }
 800       if (!salvageDebugInfo(*Inst))
 801         replaceDbgUsesWithUndef(Inst);
 802       removeMSSA(Inst);
 803       Inst->eraseFromParent();
 804       Changed = true;
 805       ++NumSimplify;
 806       continue;
 807     }
 808
 809     // Skip assume intrinsics, they don't really have side effects (although
 810     // they're marked as such to ensure preservation of control dependencies),
 811     // and this pass will not bother with its removal. However, we should mark
 812     // its condition as true for all dominated blocks.
 813     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
 814       auto *CondI =
 815           dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
 816       if (CondI && SimpleValue::canHandle(CondI)) {
 817         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
 818                           << '\n');
 819         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
 820       } else
 821         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
 822       continue;
 823     }
 824
 825     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
 826     if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
 827       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
 828       continue;
 829     }
 830
 831     // We can skip all invariant.start intrinsics since they only read memory,
 832     // and we can forward values across it. For invariant starts without
 833     // invariant ends, we can use the fact that the invariantness never ends to
 834     // start a scope in the current generaton which is true for all future
 835     // generations.  Also, we dont need to consume the last store since the
 836     // semantics of invariant.start allow us to perform   DSE of the last
 837     // store, if there was a store following invariant.start. Consider:
 838     //
 839     // store 30, i8* p
 840     // invariant.start(p)
 841     // store 40, i8* p
 842     // We can DSE the store to 30, since the store 40 to invariant location p
 843     // causes undefined behaviour.
 844     if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
 845       // If there are any uses, the scope might end.
 846       if (!Inst->use_empty())
 847         continue;
 848       auto *CI = cast<CallInst>(Inst);
 849       MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
 850       // Don't start a scope if we already have a better one pushed
 851       if (!AvailableInvariants.count(MemLoc))
 852         AvailableInvariants.insert(MemLoc, CurrentGeneration);
 853       continue;
 854     }
 855
 856     if (isGuard(Inst)) {
 857       if (auto *CondI =
 858               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
 859         if (SimpleValue::canHandle(CondI)) {
 860           // Do we already know the actual value of this condition?
 861           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
 862             // Is the condition known to be true?
 863             if (isa<ConstantInt>(KnownCond) &&
 864                 cast<ConstantInt>(KnownCond)->isOne()) {
 865               LLVM_DEBUG(dbgs()
 866                          << "EarlyCSE removing guard: " << *Inst << '\n');
 867               removeMSSA(Inst);
 868               Inst->eraseFromParent();
 869               Changed = true;
 870               continue;
 871             } else
 872               // Use the known value if it wasn't true.
 873               cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
 874           }
 875           // The condition we're on guarding here is true for all dominated
 876           // locations.
 877           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
 878         }
 879       }
 880
 881       // Guard intrinsics read all memory, but don't write any memory.
 882       // Accordingly, don't update the generation but consume the last store (to
 883       // avoid an incorrect DSE).
 884       LastStore = nullptr;
 885       continue;
 886     }
 887
 888     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
 889     // its simpler value.
 890     if (Value *V = SimplifyInstruction(Inst, SQ)) {
 891       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V
 892                         << '\n');
 893       if (!DebugCounter::shouldExecute(CSECounter)) {
 894         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
 895       } else {
 896         bool Killed = false;
 897         if (!Inst->use_empty()) {
 898           Inst->replaceAllUsesWith(V);
 899           Changed = true;
 900         }
 901         if (isInstructionTriviallyDead(Inst, &TLI)) {
 902           removeMSSA(Inst);
 903           Inst->eraseFromParent();
 904           Changed = true;
 905           Killed = true;
 906         }
 907         if (Changed)
 908           ++NumSimplify;
 909         if (Killed)
 910           continue;
 911       }
 912     }
 913
 914     // If this is a simple instruction that we can value number, process it.
 915     if (SimpleValue::canHandle(Inst)) {
 916       // See if the instruction has an available value.  If so, use it.
 917       if (Value *V = AvailableValues.lookup(Inst)) {
 918         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V
 919                           << '\n');
 920         if (!DebugCounter::shouldExecute(CSECounter)) {
 921           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
 922           continue;
 923         }
 924         if (auto *I = dyn_cast<Instruction>(V))
 925           I->andIRFlags(Inst);
 926         Inst->replaceAllUsesWith(V);
 927         removeMSSA(Inst);
 928         Inst->eraseFromParent();
 929         Changed = true;
 930         ++NumCSE;
 931         continue;
 932       }
 933
 934       // Otherwise, just remember that this value is available.
 935       AvailableValues.insert(Inst, Inst);
 936       continue;
 937     }
 938
 939     ParseMemoryInst MemInst(Inst, TTI);
 940     // If this is a non-volatile load, process it.
 941     if (MemInst.isValid() && MemInst.isLoad()) {
 942       // (conservatively) we can't peak past the ordering implied by this
 943       // operation, but we can add this load to our set of available values
 944       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
 945         LastStore = nullptr;
 946         ++CurrentGeneration;
 947       }
 948
 949       if (MemInst.isInvariantLoad()) {
 950         // If we pass an invariant load, we know that memory location is
 951         // indefinitely constant from the moment of first dereferenceability.
 952         // We conservatively treat the invariant_load as that moment.  If we
 953         // pass a invariant load after already establishing a scope, don't
 954         // restart it since we want to preserve the earliest point seen.
 955         auto MemLoc = MemoryLocation::get(Inst);
 956         if (!AvailableInvariants.count(MemLoc))
 957           AvailableInvariants.insert(MemLoc, CurrentGeneration);
 958       }
 959
 960       // If we have an available version of this load, and if it is the right
 961       // generation or the load is known to be from an invariant location,
 962       // replace this instruction.
 963       //
 964       // If either the dominating load or the current load are invariant, then
 965       // we can assume the current load loads the same value as the dominating
 966       // load.
 967       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
 968       if (InVal.DefInst != nullptr &&
 969           InVal.MatchingId == MemInst.getMatchingId() &&
 970           // We don't yet handle removing loads with ordering of any kind.
 971           !MemInst.isVolatile() && MemInst.isUnordered() &&
 972           // We can't replace an atomic load with one which isn't also atomic.
 973           InVal.IsAtomic >= MemInst.isAtomic() &&
 974           (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
 975            isSameMemGeneration(InVal.Generation, CurrentGeneration,
 976                                InVal.DefInst, Inst))) {
 977         Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
 978         if (Op != nullptr) {
 979           LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
 980                             << "  to: " << *InVal.DefInst << '\n');
 981           if (!DebugCounter::shouldExecute(CSECounter)) {
 982             LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
 983             continue;
 984           }
 985           if (!Inst->use_empty())
 986             Inst->replaceAllUsesWith(Op);
 987           removeMSSA(Inst);
 988           Inst->eraseFromParent();
 989           Changed = true;
 990           ++NumCSELoad;
 991           continue;
 992         }
 993       }
 994
 995       // Otherwise, remember that we have this instruction.
 996       AvailableLoads.insert(
 997           MemInst.getPointerOperand(),
 998           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
 999                     MemInst.isAtomic()));
1000       LastStore = nullptr;
1001       continue;
1002     }
1003
1004     // If this instruction may read from memory or throw (and potentially read
1005     // from memory in the exception handler), forget LastStore.  Load/store
1006     // intrinsics will indicate both a read and a write to memory.  The target
1007     // may override this (e.g. so that a store intrinsic does not read from
1008     // memory, and thus will be treated the same as a regular store for
1009     // commoning purposes).
1010     if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1011         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1012       LastStore = nullptr;
1013
1014     // If this is a read-only call, process it.
1015     if (CallValue::canHandle(Inst)) {
1016       // If we have an available version of this call, and if it is the right
1017       // generation, replace this instruction.
1018       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1019       if (InVal.first != nullptr &&
1020           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1021                               Inst)) {
1022         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1023                           << "  to: " << *InVal.first << '\n');
1024         if (!DebugCounter::shouldExecute(CSECounter)) {
1025           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1026           continue;
1027         }
1028         if (!Inst->use_empty())
1029           Inst->replaceAllUsesWith(InVal.first);
1030         removeMSSA(Inst);
1031         Inst->eraseFromParent();
1032         Changed = true;
1033         ++NumCSECall;
1034         continue;
1035       }
1036
1037       // Otherwise, remember that we have this instruction.
1038       AvailableCalls.insert(
1039           Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1040       continue;
1041     }
1042
1043     // A release fence requires that all stores complete before it, but does
1044     // not prevent the reordering of following loads 'before' the fence.  As a
1045     // result, we don't need to consider it as writing to memory and don't need
1046     // to advance the generation.  We do need to prevent DSE across the fence,
1047     // but that's handled above.
1048     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1049       if (FI->getOrdering() == AtomicOrdering::Release) {
1050         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1051         continue;
1052       }
1053
1054     // write back DSE - If we write back the same value we just loaded from
1055     // the same location and haven't passed any intervening writes or ordering
1056     // operations, we can remove the write.  The primary benefit is in allowing
1057     // the available load table to remain valid and value forward past where
1058     // the store originally was.
1059     if (MemInst.isValid() && MemInst.isStore()) {
1060       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1061       if (InVal.DefInst &&
1062           InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1063           InVal.MatchingId == MemInst.getMatchingId() &&
1064           // We don't yet handle removing stores with ordering of any kind.
1065           !MemInst.isVolatile() && MemInst.isUnordered() &&
1066           (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1067            isSameMemGeneration(InVal.Generation, CurrentGeneration,
1068                                InVal.DefInst, Inst))) {
1069         // It is okay to have a LastStore to a different pointer here if MemorySSA
1070         // tells us that the load and store are from the same memory generation.
1071         // In that case, LastStore should keep its present value since we're
1072         // removing the current store.
1073         assert((!LastStore ||
1074                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1075                     MemInst.getPointerOperand() ||
1076                 MSSA) &&
1077                "can't have an intervening store if not using MemorySSA!");
1078         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1079         if (!DebugCounter::shouldExecute(CSECounter)) {
1080           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1081           continue;
1082         }
1083         removeMSSA(Inst);
1084         Inst->eraseFromParent();
1085         Changed = true;
1086         ++NumDSE;
1087         // We can avoid incrementing the generation count since we were able
1088         // to eliminate this store.
1089         continue;
1090       }
1091     }
1092
1093     // Okay, this isn't something we can CSE at all.  Check to see if it is
1094     // something that could modify memory.  If so, our available memory values
1095     // cannot be used so bump the generation count.
1096     if (Inst->mayWriteToMemory()) {
1097       ++CurrentGeneration;
1098
1099       if (MemInst.isValid() && MemInst.isStore()) {
1100         // We do a trivial form of DSE if there are two stores to the same
1101         // location with no intervening loads.  Delete the earlier store.
1102         // At the moment, we don't remove ordered stores, but do remove
1103         // unordered atomic stores.  There's no special requirement (for
1104         // unordered atomics) about removing atomic stores only in favor of
1105         // other atomic stores since we we're going to execute the non-atomic
1106         // one anyway and the atomic one might never have become visible.
1107         if (LastStore) {
1108           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1109           assert(LastStoreMemInst.isUnordered() &&
1110                  !LastStoreMemInst.isVolatile() &&
1111                  "Violated invariant");
1112           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1113             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1114                               << "  due to: " << *Inst << '\n');
1115             if (!DebugCounter::shouldExecute(CSECounter)) {
1116               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1117             } else {
1118               removeMSSA(LastStore);
1119               LastStore->eraseFromParent();
1120               Changed = true;
1121               ++NumDSE;
1122               LastStore = nullptr;
1123             }
1124           }
1125           // fallthrough - we can exploit information about this store
1126         }
1127
1128         // Okay, we just invalidated anything we knew about loaded values.  Try
1129         // to salvage *something* by remembering that the stored value is a live
1130         // version of the pointer.  It is safe to forward from volatile stores
1131         // to non-volatile loads, so we don't have to check for volatility of
1132         // the store.
1133         AvailableLoads.insert(
1134             MemInst.getPointerOperand(),
1135             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1136                       MemInst.isAtomic()));
1137
1138         // Remember that this was the last unordered store we saw for DSE. We
1139         // don't yet handle DSE on ordered or volatile stores since we don't
1140         // have a good way to model the ordering requirement for following
1141         // passes  once the store is removed.  We could insert a fence, but
1142         // since fences are slightly stronger than stores in their ordering,
1143         // it's not clear this is a profitable transform. Another option would
1144         // be to merge the ordering with that of the post dominating store.
1145         if (MemInst.isUnordered() && !MemInst.isVolatile())
1146           LastStore = Inst;
1147         else
1148           LastStore = nullptr;
1149       }
1150     }
1151   }
1152
1153   return Changed;
1154 }
1155
1156 bool EarlyCSE::run() {
1157   // Note, deque is being used here because there is significant performance
1158   // gains over vector when the container becomes very large due to the
1159   // specific access patterns. For more information see the mailing list
1160   // discussion on this:
1161   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1162   std::deque<StackNode *> nodesToProcess;
1163
1164   bool Changed = false;
1165
1166   // Process the root node.
1167   nodesToProcess.push_back(new StackNode(
1168       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1169       CurrentGeneration, DT.getRootNode(),
1170       DT.getRootNode()->begin(), DT.getRootNode()->end()));
1171
1172   // Save the current generation.
1173   unsigned LiveOutGeneration = CurrentGeneration;
1174
1175   // Process the stack.
1176   while (!nodesToProcess.empty()) {
1177     // Grab the first item off the stack. Set the current generation, remove
1178     // the node from the stack, and process it.
1179     StackNode *NodeToProcess = nodesToProcess.back();
1180
1181     // Initialize class members.
1182     CurrentGeneration = NodeToProcess->currentGeneration();
1183
1184     // Check if the node needs to be processed.
1185     if (!NodeToProcess->isProcessed()) {
1186       // Process the node.
1187       Changed |= processNode(NodeToProcess->node());
1188       NodeToProcess->childGeneration(CurrentGeneration);
1189       NodeToProcess->process();
1190     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1191       // Push the next child onto the stack.
1192       DomTreeNode *child = NodeToProcess->nextChild();
1193       nodesToProcess.push_back(
1194           new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1195                         AvailableCalls, NodeToProcess->childGeneration(),
1196                         child, child->begin(), child->end()));
1197     } else {
1198       // It has been processed, and there are no more children to process,
1199       // so delete it and pop it off the stack.
1200       delete NodeToProcess;
1201       nodesToProcess.pop_back();
1202     }
1203   } // while (!nodes...)
1204
1205   // Reset the current generation.
1206   CurrentGeneration = LiveOutGeneration;
1207
1208   return Changed;
1209 }
1210
1211 PreservedAnalyses EarlyCSEPass::run(Function &F,
1212                                     FunctionAnalysisManager &AM) {
1213   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1214   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1215   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1216   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1217   auto *MSSA =
1218       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1219
1220   EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1221
1222   if (!CSE.run())
1223     return PreservedAnalyses::all();
1224
1225   PreservedAnalyses PA;
1226   PA.preserveSet<CFGAnalyses>();
1227   PA.preserve<GlobalsAA>();
1228   if (UseMemorySSA)
1229     PA.preserve<MemorySSAAnalysis>();
1230   return PA;
1231 }
1232
1233 namespace {
1234
1235 /// A simple and fast domtree-based CSE pass.
1236 ///
1237 /// This pass does a simple depth-first walk over the dominator tree,
1238 /// eliminating trivially redundant instructions and using instsimplify to
1239 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1240 /// cases so that instcombine and other passes are more effective. It is
1241 /// expected that a later pass of GVN will catch the interesting/hard cases.
1242 template<bool UseMemorySSA>
1243 class EarlyCSELegacyCommonPass : public FunctionPass {
1244 public:
1245   static char ID;
1246
1247   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1248     if (UseMemorySSA)
1249       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1250     else
1251       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1252   }
1253
1254   bool runOnFunction(Function &F) override {
1255     if (skipFunction(F))
1256       return false;
1257
1258     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1259     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1260     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1261     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1262     auto *MSSA =
1263         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1264
1265     EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1266
1267     return CSE.run();
1268   }
1269
1270   void getAnalysisUsage(AnalysisUsage &AU) const override {
1271     AU.addRequired<AssumptionCacheTracker>();
1272     AU.addRequired<DominatorTreeWrapperPass>();
1273     AU.addRequired<TargetLibraryInfoWrapperPass>();
1274     AU.addRequired<TargetTransformInfoWrapperPass>();
1275     if (UseMemorySSA) {
1276       AU.addRequired<MemorySSAWrapperPass>();
1277       AU.addPreserved<MemorySSAWrapperPass>();
1278     }
1279     AU.addPreserved<GlobalsAAWrapperPass>();
1280     AU.setPreservesCFG();
1281   }
1282 };
1283
1284 } // end anonymous namespace
1285
1286 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1287
1288 template<>
1289 char EarlyCSELegacyPass::ID = 0;
1290
1291 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1292                       false)
1293 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1294 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1295 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1296 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1297 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1298
1299 using EarlyCSEMemSSALegacyPass =
1300     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1301
1302 template<>
1303 char EarlyCSEMemSSALegacyPass::ID = 0;
1304
1305 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1306   if (UseMemorySSA)
1307     return new EarlyCSEMemSSALegacyPass();
1308   else
1309     return new EarlyCSELegacyPass();
1310 }
1311
1312 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1313                       "Early CSE w/ MemorySSA", false, false)
1314 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1315 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1316 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1317 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1318 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1319 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1320                     "Early CSE w/ MemorySSA", false, false)