lib/Transforms/Scalar/SpeculateAroundPHIs.cpp

   1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
  10 #include "llvm/ADT/PostOrderIterator.h"
  11 #include "llvm/ADT/Sequence.h"
  12 #include "llvm/ADT/SetVector.h"
  13 #include "llvm/ADT/Statistic.h"
  14 #include "llvm/Analysis/TargetTransformInfo.h"
  15 #include "llvm/Analysis/ValueTracking.h"
  16 #include "llvm/IR/BasicBlock.h"
  17 #include "llvm/IR/IRBuilder.h"
  18 #include "llvm/IR/Instructions.h"
  19 #include "llvm/IR/IntrinsicInst.h"
  20 #include "llvm/Support/Debug.h"
  21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  22
  23 using namespace llvm;
  24
  25 #define DEBUG_TYPE "spec-phis"
  26
  27 STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around");
  28 STATISTIC(NumEdgesSplit,
  29           "Number of critical edges which were split for speculation");
  30 STATISTIC(NumSpeculatedInstructions,
  31           "Number of instructions we speculated around the PHI nodes");
  32 STATISTIC(NumNewRedundantInstructions,
  33           "Number of new, redundant instructions inserted");
  34
  35 /// Check whether speculating the users of a PHI node around the PHI
  36 /// will be safe.
  37 ///
  38 /// This checks both that all of the users are safe and also that all of their
  39 /// operands are either recursively safe or already available along an incoming
  40 /// edge to the PHI.
  41 ///
  42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
  43 /// and the chain of nodes that definitively reach any unsafe node in
  44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
  45 /// PHIs in the same basic block, the exploration here can be reused. However,
  46 /// these caches must no be reused for PHIs in a different basic block as they
  47 /// reflect what is available along incoming edges.
  48 static bool
  49 isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
  50                           SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
  51                           SmallPtrSetImpl<Instruction *> &UnsafeSet) {
  52   auto *PhiBB = PN.getParent();
  53   SmallPtrSet<Instruction *, 4> Visited;
  54   SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
  55
  56   // Walk each user of the PHI node.
  57   for (Use &U : PN.uses()) {
  58     auto *UI = cast<Instruction>(U.getUser());
  59
  60     // Ensure the use post-dominates the PHI node. This ensures that, in the
  61     // absence of unwinding, the use will actually be reached.
  62     // FIXME: We use a blunt hammer of requiring them to be in the same basic
  63     // block. We should consider using actual post-dominance here in the
  64     // future.
  65     if (UI->getParent() != PhiBB) {
  66       LLVM_DEBUG(dbgs() << "  Unsafe: use in a different BB: " << *UI << "\n");
  67       return false;
  68     }
  69
  70     // FIXME: This check is much too conservative. We're not going to move these
  71     // instructions onto new dynamic paths through the program unless there is
  72     // a call instruction between the use and the PHI node. And memory isn't
  73     // changing unless there is a store in that same sequence. We should
  74     // probably change this to do at least a limited scan of the intervening
  75     // instructions and allow handling stores in easily proven safe cases.
  76     if (mayBeMemoryDependent(*UI)) {
  77       LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate use: " << *UI << "\n");
  78       return false;
  79     }
  80
  81     // Now do a depth-first search of everything these users depend on to make
  82     // sure they are transitively safe. This is a depth-first search, but we
  83     // check nodes in preorder to minimize the amount of checking.
  84     Visited.insert(UI);
  85     DFSStack.push_back({UI, UI->value_op_begin()});
  86     do {
  87       User::value_op_iterator OpIt;
  88       std::tie(UI, OpIt) = DFSStack.pop_back_val();
  89
  90       while (OpIt != UI->value_op_end()) {
  91         auto *OpI = dyn_cast<Instruction>(*OpIt);
  92         // Increment to the next operand for whenever we continue.
  93         ++OpIt;
  94         // No need to visit non-instructions, which can't form dependencies.
  95         if (!OpI)
  96           continue;
  97
  98         // Now do the main pre-order checks that this operand is a viable
  99         // dependency of something we want to speculate.
 100
 101         // First do a few checks for instructions that won't require
 102         // speculation at all because they are trivially available on the
 103         // incoming edge (either through dominance or through an incoming value
 104         // to a PHI).
 105         //
 106         // The cases in the current block will be trivially dominated by the
 107         // edge.
 108         auto *ParentBB = OpI->getParent();
 109         if (ParentBB == PhiBB) {
 110           if (isa<PHINode>(OpI)) {
 111             // We can trivially map through phi nodes in the same block.
 112             continue;
 113           }
 114         } else if (DT.dominates(ParentBB, PhiBB)) {
 115           // Instructions from dominating blocks are already available.
 116           continue;
 117         }
 118
 119         // Once we know that we're considering speculating the operand, check
 120         // if we've already explored this subgraph and found it to be safe.
 121         if (PotentialSpecSet.count(OpI))
 122           continue;
 123
 124         // If we've already explored this subgraph and found it unsafe, bail.
 125         // If when we directly test whether this is safe it fails, bail.
 126         if (UnsafeSet.count(OpI) || ParentBB != PhiBB ||
 127             mayBeMemoryDependent(*OpI)) {
 128           LLVM_DEBUG(dbgs() << "  Unsafe: can't speculate transitive use: "
 129                             << *OpI << "\n");
 130           // Record the stack of instructions which reach this node as unsafe
 131           // so we prune subsequent searches.
 132           UnsafeSet.insert(OpI);
 133           for (auto &StackPair : DFSStack) {
 134             Instruction *I = StackPair.first;
 135             UnsafeSet.insert(I);
 136           }
 137           return false;
 138         }
 139
 140         // Skip any operands we're already recursively checking.
 141         if (!Visited.insert(OpI).second)
 142           continue;
 143
 144         // Push onto the stack and descend. We can directly continue this
 145         // loop when ascending.
 146         DFSStack.push_back({UI, OpIt});
 147         UI = OpI;
 148         OpIt = OpI->value_op_begin();
 149       }
 150
 151       // This node and all its operands are safe. Go ahead and cache that for
 152       // reuse later.
 153       PotentialSpecSet.insert(UI);
 154
 155       // Continue with the next node on the stack.
 156     } while (!DFSStack.empty());
 157   }
 158
 159 #ifndef NDEBUG
 160   // Every visited operand should have been marked as safe for speculation at
 161   // this point. Verify this and return success.
 162   for (auto *I : Visited)
 163     assert(PotentialSpecSet.count(I) &&
 164            "Failed to mark a visited instruction as safe!");
 165 #endif
 166   return true;
 167 }
 168
 169 /// Check whether, in isolation, a given PHI node is both safe and profitable
 170 /// to speculate users around.
 171 ///
 172 /// This handles checking whether there are any constant operands to a PHI
 173 /// which could represent a useful speculation candidate, whether the users of
 174 /// the PHI are safe to speculate including all their transitive dependencies,
 175 /// and whether after speculation there will be some cost savings (profit) to
 176 /// folding the operands into the users of the PHI node. Returns true if both
 177 /// safe and profitable with relevant cost savings updated in the map and with
 178 /// an update to the `PotentialSpecSet`. Returns false if either safety or
 179 /// profitability are absent. Some new entries may be made to the
 180 /// `PotentialSpecSet` even when this routine returns false, but they remain
 181 /// conservatively correct.
 182 ///
 183 /// The profitability check here is a local one, but it checks this in an
 184 /// interesting way. Beyond checking that the total cost of materializing the
 185 /// constants will be less than the cost of folding them into their users, it
 186 /// also checks that no one incoming constant will have a higher cost when
 187 /// folded into its users rather than materialized. This higher cost could
 188 /// result in a dynamic *path* that is more expensive even when the total cost
 189 /// is lower. Currently, all of the interesting cases where this optimization
 190 /// should fire are ones where it is a no-loss operation in this sense. If we
 191 /// ever want to be more aggressive here, we would need to balance the
 192 /// different incoming edges' cost by looking at their respective
 193 /// probabilities.
 194 static bool isSafeAndProfitableToSpeculateAroundPHI(
 195     PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
 196     SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
 197     SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT,
 198     TargetTransformInfo &TTI) {
 199   // First see whether there is any cost savings to speculating around this
 200   // PHI, and build up a map of the constant inputs to how many times they
 201   // occur.
 202   bool NonFreeMat = false;
 203   struct CostsAndCount {
 204     int MatCost = TargetTransformInfo::TCC_Free;
 205     int FoldedCost = TargetTransformInfo::TCC_Free;
 206     int Count = 0;
 207   };
 208   SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts;
 209   SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks;
 210   for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) {
 211     auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i));
 212     if (!IncomingC)
 213       continue;
 214
 215     // Only visit each incoming edge with a constant input once.
 216     if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second)
 217       continue;
 218
 219     auto InsertResult = CostsAndCounts.insert({IncomingC, {}});
 220     // Count how many edges share a given incoming costant.
 221     ++InsertResult.first->second.Count;
 222     // Only compute the cost the first time we see a particular constant.
 223     if (!InsertResult.second)
 224       continue;
 225
 226     int &MatCost = InsertResult.first->second.MatCost;
 227     MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType());
 228     NonFreeMat |= MatCost != TTI.TCC_Free;
 229   }
 230   if (!NonFreeMat) {
 231     LLVM_DEBUG(dbgs() << "    Free: " << PN << "\n");
 232     // No profit in free materialization.
 233     return false;
 234   }
 235
 236   // Now check that the uses of this PHI can actually be speculated,
 237   // otherwise we'll still have to materialize the PHI value.
 238   if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) {
 239     LLVM_DEBUG(dbgs() << "    Unsafe PHI: " << PN << "\n");
 240     return false;
 241   }
 242
 243   // Compute how much (if any) savings are available by speculating around this
 244   // PHI.
 245   for (Use &U : PN.uses()) {
 246     auto *UserI = cast<Instruction>(U.getUser());
 247     // Now check whether there is any savings to folding the incoming constants
 248     // into this use.
 249     unsigned Idx = U.getOperandNo();
 250
 251     // If we have a binary operator that is commutative, an actual constant
 252     // operand would end up on the RHS, so pretend the use of the PHI is on the
 253     // RHS.
 254     //
 255     // Technically, this is a bit weird if *both* operands are PHIs we're
 256     // speculating. But if that is the case, giving an "optimistic" cost isn't
 257     // a bad thing because after speculation it will constant fold. And
 258     // moreover, such cases should likely have been constant folded already by
 259     // some other pass, so we shouldn't worry about "modeling" them terribly
 260     // accurately here. Similarly, if the other operand is a constant, it still
 261     // seems fine to be "optimistic" in our cost modeling, because when the
 262     // incoming operand from the PHI node is also a constant, we will end up
 263     // constant folding.
 264     if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1)
 265       // Assume we will commute the constant to the RHS to be canonical.
 266       Idx = 1;
 267
 268     // Get the intrinsic ID if this user is an intrinsic.
 269     Intrinsic::ID IID = Intrinsic::not_intrinsic;
 270     if (auto *UserII = dyn_cast<IntrinsicInst>(UserI))
 271       IID = UserII->getIntrinsicID();
 272
 273     for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) {
 274       ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first;
 275       int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
 276       int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
 277       if (IID)
 278         FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(),
 279                                         IncomingC->getType());
 280       else
 281         FoldedCost +=
 282             TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(),
 283                               IncomingC->getType());
 284
 285       // If we accumulate more folded cost for this incoming constant than
 286       // materialized cost, then we'll regress any edge with this constant so
 287       // just bail. We're only interested in cases where folding the incoming
 288       // constants is at least break-even on all paths.
 289       if (FoldedCost > MatCost) {
 290         LLVM_DEBUG(dbgs() << "  Not profitable to fold imm: " << *IncomingC
 291                           << "\n"
 292                              "    Materializing cost:    "
 293                           << MatCost
 294                           << "\n"
 295                              "    Accumulated folded cost: "
 296                           << FoldedCost << "\n");
 297         return false;
 298       }
 299     }
 300   }
 301
 302   // Compute the total cost savings afforded by this PHI node.
 303   int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free;
 304   for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) {
 305     int MatCost = IncomingConstantAndCostsAndCount.second.MatCost;
 306     int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost;
 307     int Count = IncomingConstantAndCostsAndCount.second.Count;
 308
 309     TotalMatCost += MatCost * Count;
 310     TotalFoldedCost += FoldedCost * Count;
 311   }
 312   assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is "
 313                                             "less that its materialized cost, "
 314                                             "the sum must be as well.");
 315
 316   LLVM_DEBUG(dbgs() << "    Cost savings " << (TotalMatCost - TotalFoldedCost)
 317                     << ": " << PN << "\n");
 318   CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost;
 319   return true;
 320 }
 321
 322 /// Simple helper to walk all the users of a list of phis depth first, and call
 323 /// a visit function on each one in post-order.
 324 ///
 325 /// All of the PHIs should be in the same basic block, and this is primarily
 326 /// used to make a single depth-first walk across their collective users
 327 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
 328 /// callable to test whether a node has been visited and the more important
 329 /// callable to actually visit a particular node.
 330 ///
 331 /// Depth-first and postorder here refer to the *operand* graph -- we start
 332 /// from a collection of users of PHI nodes and walk "up" the operands
 333 /// depth-first.
 334 template <typename IsVisitedT, typename VisitT>
 335 static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs,
 336                                             IsVisitedT IsVisited,
 337                                             VisitT Visit) {
 338   SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack;
 339   for (auto *PN : PNs)
 340     for (Use &U : PN->uses()) {
 341       auto *UI = cast<Instruction>(U.getUser());
 342       if (IsVisited(UI))
 343         // Already visited this user, continue across the roots.
 344         continue;
 345
 346       // Otherwise, walk the operand graph depth-first and visit each
 347       // dependency in postorder.
 348       DFSStack.push_back({UI, UI->value_op_begin()});
 349       do {
 350         User::value_op_iterator OpIt;
 351         std::tie(UI, OpIt) = DFSStack.pop_back_val();
 352         while (OpIt != UI->value_op_end()) {
 353           auto *OpI = dyn_cast<Instruction>(*OpIt);
 354           // Increment to the next operand for whenever we continue.
 355           ++OpIt;
 356           // No need to visit non-instructions, which can't form dependencies,
 357           // or instructions outside of our potential dependency set that we
 358           // were given. Finally, if we've already visited the node, continue
 359           // to the next.
 360           if (!OpI || IsVisited(OpI))
 361             continue;
 362
 363           // Push onto the stack and descend. We can directly continue this
 364           // loop when ascending.
 365           DFSStack.push_back({UI, OpIt});
 366           UI = OpI;
 367           OpIt = OpI->value_op_begin();
 368         }
 369
 370         // Finished visiting children, visit this node.
 371         assert(!IsVisited(UI) && "Should not have already visited a node!");
 372         Visit(UI);
 373       } while (!DFSStack.empty());
 374     }
 375 }
 376
 377 /// Find profitable PHIs to speculate.
 378 ///
 379 /// For a PHI node to be profitable, we need the cost of speculating its users
 380 /// (and their dependencies) to not exceed the savings of folding the PHI's
 381 /// constant operands into the speculated users.
 382 ///
 383 /// Computing this is surprisingly challenging. Because users of two different
 384 /// PHI nodes can depend on each other or on common other instructions, it may
 385 /// be profitable to speculate two PHI nodes together even though neither one
 386 /// in isolation is profitable. The straightforward way to find all the
 387 /// profitable PHIs would be to check each combination of PHIs' cost, but this
 388 /// is exponential in complexity.
 389 ///
 390 /// Even if we assume that we only care about cases where we can consider each
 391 /// PHI node in isolation (rather than considering cases where none are
 392 /// profitable in isolation but some subset are profitable as a set), we still
 393 /// have a challenge. The obvious way to find all individually profitable PHIs
 394 /// is to iterate until reaching a fixed point, but this will be quadratic in
 395 /// complexity. =/
 396 ///
 397 /// This code currently uses a linear-to-compute order for a greedy approach.
 398 /// It won't find cases where a set of PHIs must be considered together, but it
 399 /// handles most cases of order dependence without quadratic iteration. The
 400 /// specific order used is the post-order across the operand DAG. When the last
 401 /// user of a PHI is visited in this postorder walk, we check it for
 402 /// profitability.
 403 ///
 404 /// There is an orthogonal extra complexity to all of this: computing the cost
 405 /// itself can easily become a linear computation making everything again (at
 406 /// best) quadratic. Using a postorder over the operand graph makes it
 407 /// particularly easy to avoid this through dynamic programming. As we do the
 408 /// postorder walk, we build the transitive cost of that subgraph. It is also
 409 /// straightforward to then update these costs when we mark a PHI for
 410 /// speculation so that subsequent PHIs don't re-pay the cost of already
 411 /// speculated instructions.
 412 static SmallVector<PHINode *, 16>
 413 findProfitablePHIs(ArrayRef<PHINode *> PNs,
 414                    const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap,
 415                    const SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
 416                    int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) {
 417   SmallVector<PHINode *, 16> SpecPNs;
 418
 419   // First, establish a reverse mapping from immediate users of the PHI nodes
 420   // to the nodes themselves, and count how many users each PHI node has in
 421   // a way we can update while processing them.
 422   SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap;
 423   SmallDenseMap<PHINode *, int, 16> PNUserCountMap;
 424   SmallPtrSet<Instruction *, 16> UserSet;
 425   for (auto *PN : PNs) {
 426     assert(UserSet.empty() && "Must start with an empty user set!");
 427     for (Use &U : PN->uses())
 428       UserSet.insert(cast<Instruction>(U.getUser()));
 429     PNUserCountMap[PN] = UserSet.size();
 430     for (auto *UI : UserSet)
 431       UserToPNMap.insert({UI, {}}).first->second.push_back(PN);
 432     UserSet.clear();
 433   }
 434
 435   // Now do a DFS across the operand graph of the users, computing cost as we
 436   // go and when all costs for a given PHI are known, checking that PHI for
 437   // profitability.
 438   SmallDenseMap<Instruction *, int, 16> SpecCostMap;
 439   visitPHIUsersAndDepsInPostOrder(
 440       PNs,
 441       /*IsVisited*/
 442       [&](Instruction *I) {
 443         // We consider anything that isn't potentially speculated to be
 444         // "visited" as it is already handled. Similarly, anything that *is*
 445         // potentially speculated but for which we have an entry in our cost
 446         // map, we're done.
 447         return !PotentialSpecSet.count(I) || SpecCostMap.count(I);
 448       },
 449       /*Visit*/
 450       [&](Instruction *I) {
 451         // We've fully visited the operands, so sum their cost with this node
 452         // and update the cost map.
 453         int Cost = TTI.TCC_Free;
 454         for (Value *OpV : I->operand_values())
 455           if (auto *OpI = dyn_cast<Instruction>(OpV)) {
 456             auto CostMapIt = SpecCostMap.find(OpI);
 457             if (CostMapIt != SpecCostMap.end())
 458               Cost += CostMapIt->second;
 459           }
 460         Cost += TTI.getUserCost(I);
 461         bool Inserted = SpecCostMap.insert({I, Cost}).second;
 462         (void)Inserted;
 463         assert(Inserted && "Must not re-insert a cost during the DFS!");
 464
 465         // Now check if this node had a corresponding PHI node using it. If so,
 466         // we need to decrement the outstanding user count for it.
 467         auto UserPNsIt = UserToPNMap.find(I);
 468         if (UserPNsIt == UserToPNMap.end())
 469           return;
 470         auto &UserPNs = UserPNsIt->second;
 471         auto UserPNsSplitIt = std::stable_partition(
 472             UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) {
 473               int &PNUserCount = PNUserCountMap.find(UserPN)->second;
 474               assert(
 475                   PNUserCount > 0 &&
 476                   "Should never re-visit a PN after its user count hits zero!");
 477               --PNUserCount;
 478               return PNUserCount != 0;
 479             });
 480
 481         // FIXME: Rather than one at a time, we should sum the savings as the
 482         // cost will be completely shared.
 483         SmallVector<Instruction *, 16> SpecWorklist;
 484         for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) {
 485           int SpecCost = TTI.TCC_Free;
 486           for (Use &U : PN->uses())
 487             SpecCost +=
 488                 SpecCostMap.find(cast<Instruction>(U.getUser()))->second;
 489           SpecCost *= (NumPreds - 1);
 490           // When the user count of a PHI node hits zero, we should check its
 491           // profitability. If profitable, we should mark it for speculation
 492           // and zero out the cost of everything it depends on.
 493           int CostSavings = CostSavingsMap.find(PN)->second;
 494           if (SpecCost > CostSavings) {
 495             LLVM_DEBUG(dbgs() << "  Not profitable, speculation cost: " << *PN
 496                               << "\n"
 497                                  "    Cost savings:     "
 498                               << CostSavings
 499                               << "\n"
 500                                  "    Speculation cost: "
 501                               << SpecCost << "\n");
 502             continue;
 503           }
 504
 505           // We're going to speculate this user-associated PHI. Copy it out and
 506           // add its users to the worklist to update their cost.
 507           SpecPNs.push_back(PN);
 508           for (Use &U : PN->uses()) {
 509             auto *UI = cast<Instruction>(U.getUser());
 510             auto CostMapIt = SpecCostMap.find(UI);
 511             if (CostMapIt->second == 0)
 512               continue;
 513             // Zero out this cost entry to avoid duplicates.
 514             CostMapIt->second = 0;
 515             SpecWorklist.push_back(UI);
 516           }
 517         }
 518
 519         // Now walk all the operands of the users in the worklist transitively
 520         // to zero out all the memoized costs.
 521         while (!SpecWorklist.empty()) {
 522           Instruction *SpecI = SpecWorklist.pop_back_val();
 523           assert(SpecCostMap.find(SpecI)->second == 0 &&
 524                  "Didn't zero out a cost!");
 525
 526           // Walk the operands recursively to zero out their cost as well.
 527           for (auto *OpV : SpecI->operand_values()) {
 528             auto *OpI = dyn_cast<Instruction>(OpV);
 529             if (!OpI)
 530               continue;
 531             auto CostMapIt = SpecCostMap.find(OpI);
 532             if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0)
 533               continue;
 534             CostMapIt->second = 0;
 535             SpecWorklist.push_back(OpI);
 536           }
 537         }
 538       });
 539
 540   return SpecPNs;
 541 }
 542
 543 /// Speculate users around a set of PHI nodes.
 544 ///
 545 /// This routine does the actual speculation around a set of PHI nodes where we
 546 /// have determined this to be both safe and profitable.
 547 ///
 548 /// This routine handles any spliting of critical edges necessary to create
 549 /// a safe block to speculate into as well as cloning the instructions and
 550 /// rewriting all uses.
 551 static void speculatePHIs(ArrayRef<PHINode *> SpecPNs,
 552                           SmallPtrSetImpl<Instruction *> &PotentialSpecSet,
 553                           SmallSetVector<BasicBlock *, 16> &PredSet,
 554                           DominatorTree &DT) {
 555   LLVM_DEBUG(dbgs() << "  Speculating around " << SpecPNs.size() << " PHIs!\n");
 556   NumPHIsSpeculated += SpecPNs.size();
 557
 558   // Split any critical edges so that we have a block to hoist into.
 559   auto *ParentBB = SpecPNs[0]->getParent();
 560   SmallVector<BasicBlock *, 16> SpecPreds;
 561   SpecPreds.reserve(PredSet.size());
 562   for (auto *PredBB : PredSet) {
 563     auto *NewPredBB = SplitCriticalEdge(
 564         PredBB, ParentBB,
 565         CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges());
 566     if (NewPredBB) {
 567       ++NumEdgesSplit;
 568       LLVM_DEBUG(dbgs() << "  Split critical edge from: " << PredBB->getName()
 569                         << "\n");
 570       SpecPreds.push_back(NewPredBB);
 571     } else {
 572       assert(PredBB->getSingleSuccessor() == ParentBB &&
 573              "We need a non-critical predecessor to speculate into.");
 574       assert(!isa<InvokeInst>(PredBB->getTerminator()) &&
 575              "Cannot have a non-critical invoke!");
 576
 577       // Already non-critical, use existing pred.
 578       SpecPreds.push_back(PredBB);
 579     }
 580   }
 581
 582   SmallPtrSet<Instruction *, 16> SpecSet;
 583   SmallVector<Instruction *, 16> SpecList;
 584   visitPHIUsersAndDepsInPostOrder(SpecPNs,
 585                                   /*IsVisited*/
 586                                   [&](Instruction *I) {
 587                                     // This is visited if we don't need to
 588                                     // speculate it or we already have
 589                                     // speculated it.
 590                                     return !PotentialSpecSet.count(I) ||
 591                                            SpecSet.count(I);
 592                                   },
 593                                   /*Visit*/
 594                                   [&](Instruction *I) {
 595                                     // All operands scheduled, schedule this
 596                                     // node.
 597                                     SpecSet.insert(I);
 598                                     SpecList.push_back(I);
 599                                   });
 600
 601   int NumSpecInsts = SpecList.size() * SpecPreds.size();
 602   int NumRedundantInsts = NumSpecInsts - SpecList.size();
 603   LLVM_DEBUG(dbgs() << "  Inserting " << NumSpecInsts
 604                     << " speculated instructions, " << NumRedundantInsts
 605                     << " redundancies\n");
 606   NumSpeculatedInstructions += NumSpecInsts;
 607   NumNewRedundantInstructions += NumRedundantInsts;
 608
 609   // Each predecessor is numbered by its index in `SpecPreds`, so for each
 610   // instruction we speculate, the speculated instruction is stored in that
 611   // index of the vector associated with the original instruction. We also
 612   // store the incoming values for each predecessor from any PHIs used.
 613   SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap;
 614
 615   // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
 616   // value. This handles both the PHIs we are speculating around and any other
 617   // PHIs that happen to be used.
 618   for (auto *OrigI : SpecList)
 619     for (auto *OpV : OrigI->operand_values()) {
 620       auto *OpPN = dyn_cast<PHINode>(OpV);
 621       if (!OpPN || OpPN->getParent() != ParentBB)
 622         continue;
 623
 624       auto InsertResult = SpeculatedValueMap.insert({OpPN, {}});
 625       if (!InsertResult.second)
 626         continue;
 627
 628       auto &SpeculatedVals = InsertResult.first->second;
 629
 630       // Populating our structure for mapping is particularly annoying because
 631       // finding an incoming value for a particular predecessor block in a PHI
 632       // node is a linear time operation! To avoid quadratic behavior, we build
 633       // a map for this PHI node's incoming values and then translate it into
 634       // the more compact representation used below.
 635       SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap;
 636       for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues()))
 637         IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i);
 638
 639       for (auto *PredBB : SpecPreds)
 640         SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second);
 641     }
 642
 643   // Speculate into each predecessor.
 644   for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) {
 645     auto *PredBB = SpecPreds[PredIdx];
 646     assert(PredBB->getSingleSuccessor() == ParentBB &&
 647            "We need a non-critical predecessor to speculate into.");
 648
 649     for (auto *OrigI : SpecList) {
 650       auto *NewI = OrigI->clone();
 651       NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx));
 652       NewI->insertBefore(PredBB->getTerminator());
 653
 654       // Rewrite all the operands to the previously speculated instructions.
 655       // Because we're walking in-order, the defs must precede the uses and we
 656       // should already have these mappings.
 657       for (Use &U : NewI->operands()) {
 658         auto *OpI = dyn_cast<Instruction>(U.get());
 659         if (!OpI)
 660           continue;
 661         auto MapIt = SpeculatedValueMap.find(OpI);
 662         if (MapIt == SpeculatedValueMap.end())
 663           continue;
 664         const auto &SpeculatedVals = MapIt->second;
 665         assert(SpeculatedVals[PredIdx] &&
 666                "Must have a speculated value for this predecessor!");
 667         assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() &&
 668                "Speculated value has the wrong type!");
 669
 670         // Rewrite the use to this predecessor's speculated instruction.
 671         U.set(SpeculatedVals[PredIdx]);
 672       }
 673
 674       // Commute instructions which now have a constant in the LHS but not the
 675       // RHS.
 676       if (NewI->isBinaryOp() && NewI->isCommutative() &&
 677           isa<Constant>(NewI->getOperand(0)) &&
 678           !isa<Constant>(NewI->getOperand(1)))
 679         NewI->getOperandUse(0).swap(NewI->getOperandUse(1));
 680
 681       SpeculatedValueMap[OrigI].push_back(NewI);
 682       assert(SpeculatedValueMap[OrigI][PredIdx] == NewI &&
 683              "Mismatched speculated instruction index!");
 684     }
 685   }
 686
 687   // Walk the speculated instruction list and if they have uses, insert a PHI
 688   // for them from the speculated versions, and replace the uses with the PHI.
 689   // Then erase the instructions as they have been fully speculated. The walk
 690   // needs to be in reverse so that we don't think there are users when we'll
 691   // actually eventually remove them later.
 692   IRBuilder<> IRB(SpecPNs[0]);
 693   for (auto *OrigI : llvm::reverse(SpecList)) {
 694     // Check if we need a PHI for any remaining users and if so, insert it.
 695     if (!OrigI->use_empty()) {
 696       auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(),
 697                                     Twine(OrigI->getName()) + ".phi");
 698       // Add the incoming values we speculated.
 699       auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second;
 700       for (int PredIdx : llvm::seq<int>(0, SpecPreds.size()))
 701         SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]);
 702
 703       // And replace the uses with the PHI node.
 704       OrigI->replaceAllUsesWith(SpecIPN);
 705     }
 706
 707     // It is important to immediately erase this so that it stops using other
 708     // instructions. This avoids inserting needless PHIs of them.
 709     OrigI->eraseFromParent();
 710   }
 711
 712   // All of the uses of the speculated phi nodes should be removed at this
 713   // point, so erase them.
 714   for (auto *SpecPN : SpecPNs) {
 715     assert(SpecPN->use_empty() && "All users should have been speculated!");
 716     SpecPN->eraseFromParent();
 717   }
 718 }
 719
 720 /// Try to speculate around a series of PHIs from a single basic block.
 721 ///
 722 /// This routine checks whether any of these PHIs are profitable to speculate
 723 /// users around. If safe and profitable, it does the speculation. It returns
 724 /// true when at least some speculation occurs.
 725 static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
 726                                DominatorTree &DT, TargetTransformInfo &TTI) {
 727   LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
 728
 729   // Savings in cost from speculating around a PHI node.
 730   SmallDenseMap<PHINode *, int, 16> CostSavingsMap;
 731
 732   // Remember the set of instructions that are candidates for speculation so
 733   // that we can quickly walk things within that space. This prunes out
 734   // instructions already available along edges, etc.
 735   SmallPtrSet<Instruction *, 16> PotentialSpecSet;
 736
 737   // Remember the set of instructions that are (transitively) unsafe to
 738   // speculate into the incoming edges of this basic block. This avoids
 739   // recomputing them for each PHI node we check. This set is specific to this
 740   // block though as things are pruned out of it based on what is available
 741   // along incoming edges.
 742   SmallPtrSet<Instruction *, 16> UnsafeSet;
 743
 744   // For each PHI node in this block, check whether there are immediate folding
 745   // opportunities from speculation, and whether that speculation will be
 746   // valid. This determise the set of safe PHIs to speculate.
 747   PNs.erase(llvm::remove_if(PNs,
 748                             [&](PHINode *PN) {
 749                               return !isSafeAndProfitableToSpeculateAroundPHI(
 750                                   *PN, CostSavingsMap, PotentialSpecSet,
 751                                   UnsafeSet, DT, TTI);
 752                             }),
 753             PNs.end());
 754   // If no PHIs were profitable, skip.
 755   if (PNs.empty()) {
 756     LLVM_DEBUG(dbgs() << "  No safe and profitable PHIs found!\n");
 757     return false;
 758   }
 759
 760   // We need to know how much speculation will cost which is determined by how
 761   // many incoming edges will need a copy of each speculated instruction.
 762   SmallSetVector<BasicBlock *, 16> PredSet;
 763   for (auto *PredBB : PNs[0]->blocks()) {
 764     if (!PredSet.insert(PredBB))
 765       continue;
 766
 767     // We cannot speculate when a predecessor is an indirect branch.
 768     // FIXME: We also can't reliably create a non-critical edge block for
 769     // speculation if the predecessor is an invoke. This doesn't seem
 770     // fundamental and we should probably be splitting critical edges
 771     // differently.
 772     if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
 773         isa<InvokeInst>(PredBB->getTerminator())) {
 774       LLVM_DEBUG(dbgs() << "  Invalid: predecessor terminator: "
 775                         << PredBB->getName() << "\n");
 776       return false;
 777     }
 778   }
 779   if (PredSet.size() < 2) {
 780     LLVM_DEBUG(dbgs() << "  Unimportant: phi with only one predecessor\n");
 781     return false;
 782   }
 783
 784   SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs(
 785       PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI);
 786   if (SpecPNs.empty())
 787     // Nothing to do.
 788     return false;
 789
 790   speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT);
 791   return true;
 792 }
 793
 794 PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F,
 795                                                FunctionAnalysisManager &AM) {
 796   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
 797   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
 798
 799   bool Changed = false;
 800   for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) {
 801     SmallVector<PHINode *, 16> PNs;
 802     auto BBI = BB->begin();
 803     while (auto *PN = dyn_cast<PHINode>(&*BBI)) {
 804       PNs.push_back(PN);
 805       ++BBI;
 806     }
 807
 808     if (PNs.empty())
 809       continue;
 810
 811     Changed |= tryToSpeculatePHIs(PNs, DT, TTI);
 812   }
 813
 814   if (!Changed)
 815     return PreservedAnalyses::all();
 816
 817   PreservedAnalyses PA;
 818   return PA;
 819 }