1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
10 #include "llvm/ADT/PostOrderIterator.h"
11 #include "llvm/ADT/Sequence.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/Analysis/TargetTransformInfo.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #define DEBUG_TYPE "spec-phis"
27 STATISTIC(NumPHIsSpeculated
, "Number of PHI nodes we speculated around");
28 STATISTIC(NumEdgesSplit
,
29 "Number of critical edges which were split for speculation");
30 STATISTIC(NumSpeculatedInstructions
,
31 "Number of instructions we speculated around the PHI nodes");
32 STATISTIC(NumNewRedundantInstructions
,
33 "Number of new, redundant instructions inserted");
35 /// Check whether speculating the users of a PHI node around the PHI
38 /// This checks both that all of the users are safe and also that all of their
39 /// operands are either recursively safe or already available along an incoming
42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43 /// and the chain of nodes that definitively reach any unsafe node in
44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
45 /// PHIs in the same basic block, the exploration here can be reused. However,
46 /// these caches must no be reused for PHIs in a different basic block as they
47 /// reflect what is available along incoming edges.
49 isSafeToSpeculatePHIUsers(PHINode
&PN
, DominatorTree
&DT
,
50 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
51 SmallPtrSetImpl
<Instruction
*> &UnsafeSet
) {
52 auto *PhiBB
= PN
.getParent();
53 SmallPtrSet
<Instruction
*, 4> Visited
;
54 SmallVector
<std::pair
<Instruction
*, User::value_op_iterator
>, 16> DFSStack
;
56 // Walk each user of the PHI node.
57 for (Use
&U
: PN
.uses()) {
58 auto *UI
= cast
<Instruction
>(U
.getUser());
60 // Ensure the use post-dominates the PHI node. This ensures that, in the
61 // absence of unwinding, the use will actually be reached.
62 // FIXME: We use a blunt hammer of requiring them to be in the same basic
63 // block. We should consider using actual post-dominance here in the
65 if (UI
->getParent() != PhiBB
) {
66 LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI
<< "\n");
70 // FIXME: This check is much too conservative. We're not going to move these
71 // instructions onto new dynamic paths through the program unless there is
72 // a call instruction between the use and the PHI node. And memory isn't
73 // changing unless there is a store in that same sequence. We should
74 // probably change this to do at least a limited scan of the intervening
75 // instructions and allow handling stores in easily proven safe cases.
76 if (mayBeMemoryDependent(*UI
)) {
77 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI
<< "\n");
81 // Now do a depth-first search of everything these users depend on to make
82 // sure they are transitively safe. This is a depth-first search, but we
83 // check nodes in preorder to minimize the amount of checking.
85 DFSStack
.push_back({UI
, UI
->value_op_begin()});
87 User::value_op_iterator OpIt
;
88 std::tie(UI
, OpIt
) = DFSStack
.pop_back_val();
90 while (OpIt
!= UI
->value_op_end()) {
91 auto *OpI
= dyn_cast
<Instruction
>(*OpIt
);
92 // Increment to the next operand for whenever we continue.
94 // No need to visit non-instructions, which can't form dependencies.
98 // Now do the main pre-order checks that this operand is a viable
99 // dependency of something we want to speculate.
101 // First do a few checks for instructions that won't require
102 // speculation at all because they are trivially available on the
103 // incoming edge (either through dominance or through an incoming value
106 // The cases in the current block will be trivially dominated by the
108 auto *ParentBB
= OpI
->getParent();
109 if (ParentBB
== PhiBB
) {
110 if (isa
<PHINode
>(OpI
)) {
111 // We can trivially map through phi nodes in the same block.
114 } else if (DT
.dominates(ParentBB
, PhiBB
)) {
115 // Instructions from dominating blocks are already available.
119 // Once we know that we're considering speculating the operand, check
120 // if we've already explored this subgraph and found it to be safe.
121 if (PotentialSpecSet
.count(OpI
))
124 // If we've already explored this subgraph and found it unsafe, bail.
125 // If when we directly test whether this is safe it fails, bail.
126 if (UnsafeSet
.count(OpI
) || ParentBB
!= PhiBB
||
127 mayBeMemoryDependent(*OpI
)) {
128 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
130 // Record the stack of instructions which reach this node as unsafe
131 // so we prune subsequent searches.
132 UnsafeSet
.insert(OpI
);
133 for (auto &StackPair
: DFSStack
) {
134 Instruction
*I
= StackPair
.first
;
140 // Skip any operands we're already recursively checking.
141 if (!Visited
.insert(OpI
).second
)
144 // Push onto the stack and descend. We can directly continue this
145 // loop when ascending.
146 DFSStack
.push_back({UI
, OpIt
});
148 OpIt
= OpI
->value_op_begin();
151 // This node and all its operands are safe. Go ahead and cache that for
153 PotentialSpecSet
.insert(UI
);
155 // Continue with the next node on the stack.
156 } while (!DFSStack
.empty());
160 // Every visited operand should have been marked as safe for speculation at
161 // this point. Verify this and return success.
162 for (auto *I
: Visited
)
163 assert(PotentialSpecSet
.count(I
) &&
164 "Failed to mark a visited instruction as safe!");
169 /// Check whether, in isolation, a given PHI node is both safe and profitable
170 /// to speculate users around.
172 /// This handles checking whether there are any constant operands to a PHI
173 /// which could represent a useful speculation candidate, whether the users of
174 /// the PHI are safe to speculate including all their transitive dependencies,
175 /// and whether after speculation there will be some cost savings (profit) to
176 /// folding the operands into the users of the PHI node. Returns true if both
177 /// safe and profitable with relevant cost savings updated in the map and with
178 /// an update to the `PotentialSpecSet`. Returns false if either safety or
179 /// profitability are absent. Some new entries may be made to the
180 /// `PotentialSpecSet` even when this routine returns false, but they remain
181 /// conservatively correct.
183 /// The profitability check here is a local one, but it checks this in an
184 /// interesting way. Beyond checking that the total cost of materializing the
185 /// constants will be less than the cost of folding them into their users, it
186 /// also checks that no one incoming constant will have a higher cost when
187 /// folded into its users rather than materialized. This higher cost could
188 /// result in a dynamic *path* that is more expensive even when the total cost
189 /// is lower. Currently, all of the interesting cases where this optimization
190 /// should fire are ones where it is a no-loss operation in this sense. If we
191 /// ever want to be more aggressive here, we would need to balance the
192 /// different incoming edges' cost by looking at their respective
194 static bool isSafeAndProfitableToSpeculateAroundPHI(
195 PHINode
&PN
, SmallDenseMap
<PHINode
*, int, 16> &CostSavingsMap
,
196 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
197 SmallPtrSetImpl
<Instruction
*> &UnsafeSet
, DominatorTree
&DT
,
198 TargetTransformInfo
&TTI
) {
199 // First see whether there is any cost savings to speculating around this
200 // PHI, and build up a map of the constant inputs to how many times they
202 bool NonFreeMat
= false;
203 struct CostsAndCount
{
204 int MatCost
= TargetTransformInfo::TCC_Free
;
205 int FoldedCost
= TargetTransformInfo::TCC_Free
;
208 SmallDenseMap
<ConstantInt
*, CostsAndCount
, 16> CostsAndCounts
;
209 SmallPtrSet
<BasicBlock
*, 16> IncomingConstantBlocks
;
210 for (int i
: llvm::seq
<int>(0, PN
.getNumIncomingValues())) {
211 auto *IncomingC
= dyn_cast
<ConstantInt
>(PN
.getIncomingValue(i
));
215 // Only visit each incoming edge with a constant input once.
216 if (!IncomingConstantBlocks
.insert(PN
.getIncomingBlock(i
)).second
)
219 auto InsertResult
= CostsAndCounts
.insert({IncomingC
, {}});
220 // Count how many edges share a given incoming costant.
221 ++InsertResult
.first
->second
.Count
;
222 // Only compute the cost the first time we see a particular constant.
223 if (!InsertResult
.second
)
226 int &MatCost
= InsertResult
.first
->second
.MatCost
;
227 MatCost
= TTI
.getIntImmCost(IncomingC
->getValue(), IncomingC
->getType());
228 NonFreeMat
|= MatCost
!= TTI
.TCC_Free
;
231 LLVM_DEBUG(dbgs() << " Free: " << PN
<< "\n");
232 // No profit in free materialization.
236 // Now check that the uses of this PHI can actually be speculated,
237 // otherwise we'll still have to materialize the PHI value.
238 if (!isSafeToSpeculatePHIUsers(PN
, DT
, PotentialSpecSet
, UnsafeSet
)) {
239 LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN
<< "\n");
243 // Compute how much (if any) savings are available by speculating around this
245 for (Use
&U
: PN
.uses()) {
246 auto *UserI
= cast
<Instruction
>(U
.getUser());
247 // Now check whether there is any savings to folding the incoming constants
249 unsigned Idx
= U
.getOperandNo();
251 // If we have a binary operator that is commutative, an actual constant
252 // operand would end up on the RHS, so pretend the use of the PHI is on the
255 // Technically, this is a bit weird if *both* operands are PHIs we're
256 // speculating. But if that is the case, giving an "optimistic" cost isn't
257 // a bad thing because after speculation it will constant fold. And
258 // moreover, such cases should likely have been constant folded already by
259 // some other pass, so we shouldn't worry about "modeling" them terribly
260 // accurately here. Similarly, if the other operand is a constant, it still
261 // seems fine to be "optimistic" in our cost modeling, because when the
262 // incoming operand from the PHI node is also a constant, we will end up
264 if (UserI
->isBinaryOp() && UserI
->isCommutative() && Idx
!= 1)
265 // Assume we will commute the constant to the RHS to be canonical.
268 // Get the intrinsic ID if this user is an intrinsic.
269 Intrinsic::ID IID
= Intrinsic::not_intrinsic
;
270 if (auto *UserII
= dyn_cast
<IntrinsicInst
>(UserI
))
271 IID
= UserII
->getIntrinsicID();
273 for (auto &IncomingConstantAndCostsAndCount
: CostsAndCounts
) {
274 ConstantInt
*IncomingC
= IncomingConstantAndCostsAndCount
.first
;
275 int MatCost
= IncomingConstantAndCostsAndCount
.second
.MatCost
;
276 int &FoldedCost
= IncomingConstantAndCostsAndCount
.second
.FoldedCost
;
278 FoldedCost
+= TTI
.getIntImmCost(IID
, Idx
, IncomingC
->getValue(),
279 IncomingC
->getType());
282 TTI
.getIntImmCost(UserI
->getOpcode(), Idx
, IncomingC
->getValue(),
283 IncomingC
->getType());
285 // If we accumulate more folded cost for this incoming constant than
286 // materialized cost, then we'll regress any edge with this constant so
287 // just bail. We're only interested in cases where folding the incoming
288 // constants is at least break-even on all paths.
289 if (FoldedCost
> MatCost
) {
290 LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
292 " Materializing cost: "
295 " Accumulated folded cost: "
296 << FoldedCost
<< "\n");
302 // Compute the total cost savings afforded by this PHI node.
303 int TotalMatCost
= TTI
.TCC_Free
, TotalFoldedCost
= TTI
.TCC_Free
;
304 for (auto IncomingConstantAndCostsAndCount
: CostsAndCounts
) {
305 int MatCost
= IncomingConstantAndCostsAndCount
.second
.MatCost
;
306 int FoldedCost
= IncomingConstantAndCostsAndCount
.second
.FoldedCost
;
307 int Count
= IncomingConstantAndCostsAndCount
.second
.Count
;
309 TotalMatCost
+= MatCost
* Count
;
310 TotalFoldedCost
+= FoldedCost
* Count
;
312 assert(TotalFoldedCost
<= TotalMatCost
&& "If each constant's folded cost is "
313 "less that its materialized cost, "
314 "the sum must be as well.");
316 LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost
- TotalFoldedCost
)
317 << ": " << PN
<< "\n");
318 CostSavingsMap
[&PN
] = TotalMatCost
- TotalFoldedCost
;
322 /// Simple helper to walk all the users of a list of phis depth first, and call
323 /// a visit function on each one in post-order.
325 /// All of the PHIs should be in the same basic block, and this is primarily
326 /// used to make a single depth-first walk across their collective users
327 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
328 /// callable to test whether a node has been visited and the more important
329 /// callable to actually visit a particular node.
331 /// Depth-first and postorder here refer to the *operand* graph -- we start
332 /// from a collection of users of PHI nodes and walk "up" the operands
334 template <typename IsVisitedT
, typename VisitT
>
335 static void visitPHIUsersAndDepsInPostOrder(ArrayRef
<PHINode
*> PNs
,
336 IsVisitedT IsVisited
,
338 SmallVector
<std::pair
<Instruction
*, User::value_op_iterator
>, 16> DFSStack
;
340 for (Use
&U
: PN
->uses()) {
341 auto *UI
= cast
<Instruction
>(U
.getUser());
343 // Already visited this user, continue across the roots.
346 // Otherwise, walk the operand graph depth-first and visit each
347 // dependency in postorder.
348 DFSStack
.push_back({UI
, UI
->value_op_begin()});
350 User::value_op_iterator OpIt
;
351 std::tie(UI
, OpIt
) = DFSStack
.pop_back_val();
352 while (OpIt
!= UI
->value_op_end()) {
353 auto *OpI
= dyn_cast
<Instruction
>(*OpIt
);
354 // Increment to the next operand for whenever we continue.
356 // No need to visit non-instructions, which can't form dependencies,
357 // or instructions outside of our potential dependency set that we
358 // were given. Finally, if we've already visited the node, continue
360 if (!OpI
|| IsVisited(OpI
))
363 // Push onto the stack and descend. We can directly continue this
364 // loop when ascending.
365 DFSStack
.push_back({UI
, OpIt
});
367 OpIt
= OpI
->value_op_begin();
370 // Finished visiting children, visit this node.
371 assert(!IsVisited(UI
) && "Should not have already visited a node!");
373 } while (!DFSStack
.empty());
377 /// Find profitable PHIs to speculate.
379 /// For a PHI node to be profitable, we need the cost of speculating its users
380 /// (and their dependencies) to not exceed the savings of folding the PHI's
381 /// constant operands into the speculated users.
383 /// Computing this is surprisingly challenging. Because users of two different
384 /// PHI nodes can depend on each other or on common other instructions, it may
385 /// be profitable to speculate two PHI nodes together even though neither one
386 /// in isolation is profitable. The straightforward way to find all the
387 /// profitable PHIs would be to check each combination of PHIs' cost, but this
388 /// is exponential in complexity.
390 /// Even if we assume that we only care about cases where we can consider each
391 /// PHI node in isolation (rather than considering cases where none are
392 /// profitable in isolation but some subset are profitable as a set), we still
393 /// have a challenge. The obvious way to find all individually profitable PHIs
394 /// is to iterate until reaching a fixed point, but this will be quadratic in
397 /// This code currently uses a linear-to-compute order for a greedy approach.
398 /// It won't find cases where a set of PHIs must be considered together, but it
399 /// handles most cases of order dependence without quadratic iteration. The
400 /// specific order used is the post-order across the operand DAG. When the last
401 /// user of a PHI is visited in this postorder walk, we check it for
404 /// There is an orthogonal extra complexity to all of this: computing the cost
405 /// itself can easily become a linear computation making everything again (at
406 /// best) quadratic. Using a postorder over the operand graph makes it
407 /// particularly easy to avoid this through dynamic programming. As we do the
408 /// postorder walk, we build the transitive cost of that subgraph. It is also
409 /// straightforward to then update these costs when we mark a PHI for
410 /// speculation so that subsequent PHIs don't re-pay the cost of already
411 /// speculated instructions.
412 static SmallVector
<PHINode
*, 16>
413 findProfitablePHIs(ArrayRef
<PHINode
*> PNs
,
414 const SmallDenseMap
<PHINode
*, int, 16> &CostSavingsMap
,
415 const SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
416 int NumPreds
, DominatorTree
&DT
, TargetTransformInfo
&TTI
) {
417 SmallVector
<PHINode
*, 16> SpecPNs
;
419 // First, establish a reverse mapping from immediate users of the PHI nodes
420 // to the nodes themselves, and count how many users each PHI node has in
421 // a way we can update while processing them.
422 SmallDenseMap
<Instruction
*, TinyPtrVector
<PHINode
*>, 16> UserToPNMap
;
423 SmallDenseMap
<PHINode
*, int, 16> PNUserCountMap
;
424 SmallPtrSet
<Instruction
*, 16> UserSet
;
425 for (auto *PN
: PNs
) {
426 assert(UserSet
.empty() && "Must start with an empty user set!");
427 for (Use
&U
: PN
->uses())
428 UserSet
.insert(cast
<Instruction
>(U
.getUser()));
429 PNUserCountMap
[PN
] = UserSet
.size();
430 for (auto *UI
: UserSet
)
431 UserToPNMap
.insert({UI
, {}}).first
->second
.push_back(PN
);
435 // Now do a DFS across the operand graph of the users, computing cost as we
436 // go and when all costs for a given PHI are known, checking that PHI for
438 SmallDenseMap
<Instruction
*, int, 16> SpecCostMap
;
439 visitPHIUsersAndDepsInPostOrder(
442 [&](Instruction
*I
) {
443 // We consider anything that isn't potentially speculated to be
444 // "visited" as it is already handled. Similarly, anything that *is*
445 // potentially speculated but for which we have an entry in our cost
447 return !PotentialSpecSet
.count(I
) || SpecCostMap
.count(I
);
450 [&](Instruction
*I
) {
451 // We've fully visited the operands, so sum their cost with this node
452 // and update the cost map.
453 int Cost
= TTI
.TCC_Free
;
454 for (Value
*OpV
: I
->operand_values())
455 if (auto *OpI
= dyn_cast
<Instruction
>(OpV
)) {
456 auto CostMapIt
= SpecCostMap
.find(OpI
);
457 if (CostMapIt
!= SpecCostMap
.end())
458 Cost
+= CostMapIt
->second
;
460 Cost
+= TTI
.getUserCost(I
);
461 bool Inserted
= SpecCostMap
.insert({I
, Cost
}).second
;
463 assert(Inserted
&& "Must not re-insert a cost during the DFS!");
465 // Now check if this node had a corresponding PHI node using it. If so,
466 // we need to decrement the outstanding user count for it.
467 auto UserPNsIt
= UserToPNMap
.find(I
);
468 if (UserPNsIt
== UserToPNMap
.end())
470 auto &UserPNs
= UserPNsIt
->second
;
471 auto UserPNsSplitIt
= std::stable_partition(
472 UserPNs
.begin(), UserPNs
.end(), [&](PHINode
*UserPN
) {
473 int &PNUserCount
= PNUserCountMap
.find(UserPN
)->second
;
476 "Should never re-visit a PN after its user count hits zero!");
478 return PNUserCount
!= 0;
481 // FIXME: Rather than one at a time, we should sum the savings as the
482 // cost will be completely shared.
483 SmallVector
<Instruction
*, 16> SpecWorklist
;
484 for (auto *PN
: llvm::make_range(UserPNsSplitIt
, UserPNs
.end())) {
485 int SpecCost
= TTI
.TCC_Free
;
486 for (Use
&U
: PN
->uses())
488 SpecCostMap
.find(cast
<Instruction
>(U
.getUser()))->second
;
489 SpecCost
*= (NumPreds
- 1);
490 // When the user count of a PHI node hits zero, we should check its
491 // profitability. If profitable, we should mark it for speculation
492 // and zero out the cost of everything it depends on.
493 int CostSavings
= CostSavingsMap
.find(PN
)->second
;
494 if (SpecCost
> CostSavings
) {
495 LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
500 " Speculation cost: "
501 << SpecCost
<< "\n");
505 // We're going to speculate this user-associated PHI. Copy it out and
506 // add its users to the worklist to update their cost.
507 SpecPNs
.push_back(PN
);
508 for (Use
&U
: PN
->uses()) {
509 auto *UI
= cast
<Instruction
>(U
.getUser());
510 auto CostMapIt
= SpecCostMap
.find(UI
);
511 if (CostMapIt
->second
== 0)
513 // Zero out this cost entry to avoid duplicates.
514 CostMapIt
->second
= 0;
515 SpecWorklist
.push_back(UI
);
519 // Now walk all the operands of the users in the worklist transitively
520 // to zero out all the memoized costs.
521 while (!SpecWorklist
.empty()) {
522 Instruction
*SpecI
= SpecWorklist
.pop_back_val();
523 assert(SpecCostMap
.find(SpecI
)->second
== 0 &&
524 "Didn't zero out a cost!");
526 // Walk the operands recursively to zero out their cost as well.
527 for (auto *OpV
: SpecI
->operand_values()) {
528 auto *OpI
= dyn_cast
<Instruction
>(OpV
);
531 auto CostMapIt
= SpecCostMap
.find(OpI
);
532 if (CostMapIt
== SpecCostMap
.end() || CostMapIt
->second
== 0)
534 CostMapIt
->second
= 0;
535 SpecWorklist
.push_back(OpI
);
543 /// Speculate users around a set of PHI nodes.
545 /// This routine does the actual speculation around a set of PHI nodes where we
546 /// have determined this to be both safe and profitable.
548 /// This routine handles any spliting of critical edges necessary to create
549 /// a safe block to speculate into as well as cloning the instructions and
550 /// rewriting all uses.
551 static void speculatePHIs(ArrayRef
<PHINode
*> SpecPNs
,
552 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
553 SmallSetVector
<BasicBlock
*, 16> &PredSet
,
555 LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs
.size() << " PHIs!\n");
556 NumPHIsSpeculated
+= SpecPNs
.size();
558 // Split any critical edges so that we have a block to hoist into.
559 auto *ParentBB
= SpecPNs
[0]->getParent();
560 SmallVector
<BasicBlock
*, 16> SpecPreds
;
561 SpecPreds
.reserve(PredSet
.size());
562 for (auto *PredBB
: PredSet
) {
563 auto *NewPredBB
= SplitCriticalEdge(
565 CriticalEdgeSplittingOptions(&DT
).setMergeIdenticalEdges());
568 LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB
->getName()
570 SpecPreds
.push_back(NewPredBB
);
572 assert(PredBB
->getSingleSuccessor() == ParentBB
&&
573 "We need a non-critical predecessor to speculate into.");
574 assert(!isa
<InvokeInst
>(PredBB
->getTerminator()) &&
575 "Cannot have a non-critical invoke!");
577 // Already non-critical, use existing pred.
578 SpecPreds
.push_back(PredBB
);
582 SmallPtrSet
<Instruction
*, 16> SpecSet
;
583 SmallVector
<Instruction
*, 16> SpecList
;
584 visitPHIUsersAndDepsInPostOrder(SpecPNs
,
586 [&](Instruction
*I
) {
587 // This is visited if we don't need to
588 // speculate it or we already have
590 return !PotentialSpecSet
.count(I
) ||
594 [&](Instruction
*I
) {
595 // All operands scheduled, schedule this
598 SpecList
.push_back(I
);
601 int NumSpecInsts
= SpecList
.size() * SpecPreds
.size();
602 int NumRedundantInsts
= NumSpecInsts
- SpecList
.size();
603 LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
604 << " speculated instructions, " << NumRedundantInsts
605 << " redundancies\n");
606 NumSpeculatedInstructions
+= NumSpecInsts
;
607 NumNewRedundantInstructions
+= NumRedundantInsts
;
609 // Each predecessor is numbered by its index in `SpecPreds`, so for each
610 // instruction we speculate, the speculated instruction is stored in that
611 // index of the vector associated with the original instruction. We also
612 // store the incoming values for each predecessor from any PHIs used.
613 SmallDenseMap
<Instruction
*, SmallVector
<Value
*, 2>, 16> SpeculatedValueMap
;
615 // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
616 // value. This handles both the PHIs we are speculating around and any other
617 // PHIs that happen to be used.
618 for (auto *OrigI
: SpecList
)
619 for (auto *OpV
: OrigI
->operand_values()) {
620 auto *OpPN
= dyn_cast
<PHINode
>(OpV
);
621 if (!OpPN
|| OpPN
->getParent() != ParentBB
)
624 auto InsertResult
= SpeculatedValueMap
.insert({OpPN
, {}});
625 if (!InsertResult
.second
)
628 auto &SpeculatedVals
= InsertResult
.first
->second
;
630 // Populating our structure for mapping is particularly annoying because
631 // finding an incoming value for a particular predecessor block in a PHI
632 // node is a linear time operation! To avoid quadratic behavior, we build
633 // a map for this PHI node's incoming values and then translate it into
634 // the more compact representation used below.
635 SmallDenseMap
<BasicBlock
*, Value
*, 16> IncomingValueMap
;
636 for (int i
: llvm::seq
<int>(0, OpPN
->getNumIncomingValues()))
637 IncomingValueMap
[OpPN
->getIncomingBlock(i
)] = OpPN
->getIncomingValue(i
);
639 for (auto *PredBB
: SpecPreds
)
640 SpeculatedVals
.push_back(IncomingValueMap
.find(PredBB
)->second
);
643 // Speculate into each predecessor.
644 for (int PredIdx
: llvm::seq
<int>(0, SpecPreds
.size())) {
645 auto *PredBB
= SpecPreds
[PredIdx
];
646 assert(PredBB
->getSingleSuccessor() == ParentBB
&&
647 "We need a non-critical predecessor to speculate into.");
649 for (auto *OrigI
: SpecList
) {
650 auto *NewI
= OrigI
->clone();
651 NewI
->setName(Twine(OrigI
->getName()) + "." + Twine(PredIdx
));
652 NewI
->insertBefore(PredBB
->getTerminator());
654 // Rewrite all the operands to the previously speculated instructions.
655 // Because we're walking in-order, the defs must precede the uses and we
656 // should already have these mappings.
657 for (Use
&U
: NewI
->operands()) {
658 auto *OpI
= dyn_cast
<Instruction
>(U
.get());
661 auto MapIt
= SpeculatedValueMap
.find(OpI
);
662 if (MapIt
== SpeculatedValueMap
.end())
664 const auto &SpeculatedVals
= MapIt
->second
;
665 assert(SpeculatedVals
[PredIdx
] &&
666 "Must have a speculated value for this predecessor!");
667 assert(SpeculatedVals
[PredIdx
]->getType() == OpI
->getType() &&
668 "Speculated value has the wrong type!");
670 // Rewrite the use to this predecessor's speculated instruction.
671 U
.set(SpeculatedVals
[PredIdx
]);
674 // Commute instructions which now have a constant in the LHS but not the
676 if (NewI
->isBinaryOp() && NewI
->isCommutative() &&
677 isa
<Constant
>(NewI
->getOperand(0)) &&
678 !isa
<Constant
>(NewI
->getOperand(1)))
679 NewI
->getOperandUse(0).swap(NewI
->getOperandUse(1));
681 SpeculatedValueMap
[OrigI
].push_back(NewI
);
682 assert(SpeculatedValueMap
[OrigI
][PredIdx
] == NewI
&&
683 "Mismatched speculated instruction index!");
687 // Walk the speculated instruction list and if they have uses, insert a PHI
688 // for them from the speculated versions, and replace the uses with the PHI.
689 // Then erase the instructions as they have been fully speculated. The walk
690 // needs to be in reverse so that we don't think there are users when we'll
691 // actually eventually remove them later.
692 IRBuilder
<> IRB(SpecPNs
[0]);
693 for (auto *OrigI
: llvm::reverse(SpecList
)) {
694 // Check if we need a PHI for any remaining users and if so, insert it.
695 if (!OrigI
->use_empty()) {
696 auto *SpecIPN
= IRB
.CreatePHI(OrigI
->getType(), SpecPreds
.size(),
697 Twine(OrigI
->getName()) + ".phi");
698 // Add the incoming values we speculated.
699 auto &SpeculatedVals
= SpeculatedValueMap
.find(OrigI
)->second
;
700 for (int PredIdx
: llvm::seq
<int>(0, SpecPreds
.size()))
701 SpecIPN
->addIncoming(SpeculatedVals
[PredIdx
], SpecPreds
[PredIdx
]);
703 // And replace the uses with the PHI node.
704 OrigI
->replaceAllUsesWith(SpecIPN
);
707 // It is important to immediately erase this so that it stops using other
708 // instructions. This avoids inserting needless PHIs of them.
709 OrigI
->eraseFromParent();
712 // All of the uses of the speculated phi nodes should be removed at this
713 // point, so erase them.
714 for (auto *SpecPN
: SpecPNs
) {
715 assert(SpecPN
->use_empty() && "All users should have been speculated!");
716 SpecPN
->eraseFromParent();
720 /// Try to speculate around a series of PHIs from a single basic block.
722 /// This routine checks whether any of these PHIs are profitable to speculate
723 /// users around. If safe and profitable, it does the speculation. It returns
724 /// true when at least some speculation occurs.
725 static bool tryToSpeculatePHIs(SmallVectorImpl
<PHINode
*> &PNs
,
726 DominatorTree
&DT
, TargetTransformInfo
&TTI
) {
727 LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
729 // Savings in cost from speculating around a PHI node.
730 SmallDenseMap
<PHINode
*, int, 16> CostSavingsMap
;
732 // Remember the set of instructions that are candidates for speculation so
733 // that we can quickly walk things within that space. This prunes out
734 // instructions already available along edges, etc.
735 SmallPtrSet
<Instruction
*, 16> PotentialSpecSet
;
737 // Remember the set of instructions that are (transitively) unsafe to
738 // speculate into the incoming edges of this basic block. This avoids
739 // recomputing them for each PHI node we check. This set is specific to this
740 // block though as things are pruned out of it based on what is available
741 // along incoming edges.
742 SmallPtrSet
<Instruction
*, 16> UnsafeSet
;
744 // For each PHI node in this block, check whether there are immediate folding
745 // opportunities from speculation, and whether that speculation will be
746 // valid. This determise the set of safe PHIs to speculate.
747 PNs
.erase(llvm::remove_if(PNs
,
749 return !isSafeAndProfitableToSpeculateAroundPHI(
750 *PN
, CostSavingsMap
, PotentialSpecSet
,
754 // If no PHIs were profitable, skip.
756 LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
760 // We need to know how much speculation will cost which is determined by how
761 // many incoming edges will need a copy of each speculated instruction.
762 SmallSetVector
<BasicBlock
*, 16> PredSet
;
763 for (auto *PredBB
: PNs
[0]->blocks()) {
764 if (!PredSet
.insert(PredBB
))
767 // We cannot speculate when a predecessor is an indirect branch.
768 // FIXME: We also can't reliably create a non-critical edge block for
769 // speculation if the predecessor is an invoke. This doesn't seem
770 // fundamental and we should probably be splitting critical edges
772 if (isa
<IndirectBrInst
>(PredBB
->getTerminator()) ||
773 isa
<InvokeInst
>(PredBB
->getTerminator())) {
774 LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
775 << PredBB
->getName() << "\n");
779 if (PredSet
.size() < 2) {
780 LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
784 SmallVector
<PHINode
*, 16> SpecPNs
= findProfitablePHIs(
785 PNs
, CostSavingsMap
, PotentialSpecSet
, PredSet
.size(), DT
, TTI
);
790 speculatePHIs(SpecPNs
, PotentialSpecSet
, PredSet
, DT
);
794 PreservedAnalyses
SpeculateAroundPHIsPass::run(Function
&F
,
795 FunctionAnalysisManager
&AM
) {
796 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
797 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
799 bool Changed
= false;
800 for (auto *BB
: ReversePostOrderTraversal
<Function
*>(&F
)) {
801 SmallVector
<PHINode
*, 16> PNs
;
802 auto BBI
= BB
->begin();
803 while (auto *PN
= dyn_cast
<PHINode
>(&*BBI
)) {
811 Changed
|= tryToSpeculatePHIs(PNs
, DT
, TTI
);
815 return PreservedAnalyses::all();
817 PreservedAnalyses PA
;