1 //===- SpeculateAroundPHIs.cpp --------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
10 #include "llvm/ADT/PostOrderIterator.h"
11 #include "llvm/ADT/Sequence.h"
12 #include "llvm/ADT/SetVector.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/Analysis/TargetTransformInfo.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/IR/BasicBlock.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #define DEBUG_TYPE "spec-phis"
27 STATISTIC(NumPHIsSpeculated
, "Number of PHI nodes we speculated around");
28 STATISTIC(NumEdgesSplit
,
29 "Number of critical edges which were split for speculation");
30 STATISTIC(NumSpeculatedInstructions
,
31 "Number of instructions we speculated around the PHI nodes");
32 STATISTIC(NumNewRedundantInstructions
,
33 "Number of new, redundant instructions inserted");
35 /// Check whether speculating the users of a PHI node around the PHI
38 /// This checks both that all of the users are safe and also that all of their
39 /// operands are either recursively safe or already available along an incoming
42 /// This routine caches both all the safe nodes explored in `PotentialSpecSet`
43 /// and the chain of nodes that definitively reach any unsafe node in
44 /// `UnsafeSet`. By preserving these between repeated calls to this routine for
45 /// PHIs in the same basic block, the exploration here can be reused. However,
46 /// these caches must no be reused for PHIs in a different basic block as they
47 /// reflect what is available along incoming edges.
49 isSafeToSpeculatePHIUsers(PHINode
&PN
, DominatorTree
&DT
,
50 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
51 SmallPtrSetImpl
<Instruction
*> &UnsafeSet
) {
52 auto *PhiBB
= PN
.getParent();
53 SmallPtrSet
<Instruction
*, 4> Visited
;
54 SmallVector
<std::pair
<Instruction
*, User::value_op_iterator
>, 16> DFSStack
;
56 // Walk each user of the PHI node.
57 for (Use
&U
: PN
.uses()) {
58 auto *UI
= cast
<Instruction
>(U
.getUser());
60 // Ensure the use post-dominates the PHI node. This ensures that, in the
61 // absence of unwinding, the use will actually be reached.
62 // FIXME: We use a blunt hammer of requiring them to be in the same basic
63 // block. We should consider using actual post-dominance here in the
65 if (UI
->getParent() != PhiBB
) {
66 LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI
<< "\n");
70 if (auto CS
= ImmutableCallSite(UI
)) {
71 if (CS
.isConvergent() || CS
.cannotDuplicate()) {
72 LLVM_DEBUG(dbgs() << " Unsafe: convergent "
73 "callsite cannot de duplicated: " << *UI
<< '\n');
78 // FIXME: This check is much too conservative. We're not going to move these
79 // instructions onto new dynamic paths through the program unless there is
80 // a call instruction between the use and the PHI node. And memory isn't
81 // changing unless there is a store in that same sequence. We should
82 // probably change this to do at least a limited scan of the intervening
83 // instructions and allow handling stores in easily proven safe cases.
84 if (mayBeMemoryDependent(*UI
)) {
85 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI
<< "\n");
89 // Now do a depth-first search of everything these users depend on to make
90 // sure they are transitively safe. This is a depth-first search, but we
91 // check nodes in preorder to minimize the amount of checking.
93 DFSStack
.push_back({UI
, UI
->value_op_begin()});
95 User::value_op_iterator OpIt
;
96 std::tie(UI
, OpIt
) = DFSStack
.pop_back_val();
98 while (OpIt
!= UI
->value_op_end()) {
99 auto *OpI
= dyn_cast
<Instruction
>(*OpIt
);
100 // Increment to the next operand for whenever we continue.
102 // No need to visit non-instructions, which can't form dependencies.
106 // Now do the main pre-order checks that this operand is a viable
107 // dependency of something we want to speculate.
109 // First do a few checks for instructions that won't require
110 // speculation at all because they are trivially available on the
111 // incoming edge (either through dominance or through an incoming value
114 // The cases in the current block will be trivially dominated by the
116 auto *ParentBB
= OpI
->getParent();
117 if (ParentBB
== PhiBB
) {
118 if (isa
<PHINode
>(OpI
)) {
119 // We can trivially map through phi nodes in the same block.
122 } else if (DT
.dominates(ParentBB
, PhiBB
)) {
123 // Instructions from dominating blocks are already available.
127 // Once we know that we're considering speculating the operand, check
128 // if we've already explored this subgraph and found it to be safe.
129 if (PotentialSpecSet
.count(OpI
))
132 // If we've already explored this subgraph and found it unsafe, bail.
133 // If when we directly test whether this is safe it fails, bail.
134 if (UnsafeSet
.count(OpI
) || ParentBB
!= PhiBB
||
135 mayBeMemoryDependent(*OpI
)) {
136 LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: "
138 // Record the stack of instructions which reach this node as unsafe
139 // so we prune subsequent searches.
140 UnsafeSet
.insert(OpI
);
141 for (auto &StackPair
: DFSStack
) {
142 Instruction
*I
= StackPair
.first
;
148 // Skip any operands we're already recursively checking.
149 if (!Visited
.insert(OpI
).second
)
152 // Push onto the stack and descend. We can directly continue this
153 // loop when ascending.
154 DFSStack
.push_back({UI
, OpIt
});
156 OpIt
= OpI
->value_op_begin();
159 // This node and all its operands are safe. Go ahead and cache that for
161 PotentialSpecSet
.insert(UI
);
163 // Continue with the next node on the stack.
164 } while (!DFSStack
.empty());
168 // Every visited operand should have been marked as safe for speculation at
169 // this point. Verify this and return success.
170 for (auto *I
: Visited
)
171 assert(PotentialSpecSet
.count(I
) &&
172 "Failed to mark a visited instruction as safe!");
177 /// Check whether, in isolation, a given PHI node is both safe and profitable
178 /// to speculate users around.
180 /// This handles checking whether there are any constant operands to a PHI
181 /// which could represent a useful speculation candidate, whether the users of
182 /// the PHI are safe to speculate including all their transitive dependencies,
183 /// and whether after speculation there will be some cost savings (profit) to
184 /// folding the operands into the users of the PHI node. Returns true if both
185 /// safe and profitable with relevant cost savings updated in the map and with
186 /// an update to the `PotentialSpecSet`. Returns false if either safety or
187 /// profitability are absent. Some new entries may be made to the
188 /// `PotentialSpecSet` even when this routine returns false, but they remain
189 /// conservatively correct.
191 /// The profitability check here is a local one, but it checks this in an
192 /// interesting way. Beyond checking that the total cost of materializing the
193 /// constants will be less than the cost of folding them into their users, it
194 /// also checks that no one incoming constant will have a higher cost when
195 /// folded into its users rather than materialized. This higher cost could
196 /// result in a dynamic *path* that is more expensive even when the total cost
197 /// is lower. Currently, all of the interesting cases where this optimization
198 /// should fire are ones where it is a no-loss operation in this sense. If we
199 /// ever want to be more aggressive here, we would need to balance the
200 /// different incoming edges' cost by looking at their respective
202 static bool isSafeAndProfitableToSpeculateAroundPHI(
203 PHINode
&PN
, SmallDenseMap
<PHINode
*, int, 16> &CostSavingsMap
,
204 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
205 SmallPtrSetImpl
<Instruction
*> &UnsafeSet
, DominatorTree
&DT
,
206 TargetTransformInfo
&TTI
) {
207 // First see whether there is any cost savings to speculating around this
208 // PHI, and build up a map of the constant inputs to how many times they
210 bool NonFreeMat
= false;
211 struct CostsAndCount
{
212 int MatCost
= TargetTransformInfo::TCC_Free
;
213 int FoldedCost
= TargetTransformInfo::TCC_Free
;
216 SmallDenseMap
<ConstantInt
*, CostsAndCount
, 16> CostsAndCounts
;
217 SmallPtrSet
<BasicBlock
*, 16> IncomingConstantBlocks
;
218 for (int i
: llvm::seq
<int>(0, PN
.getNumIncomingValues())) {
219 auto *IncomingC
= dyn_cast
<ConstantInt
>(PN
.getIncomingValue(i
));
223 // Only visit each incoming edge with a constant input once.
224 if (!IncomingConstantBlocks
.insert(PN
.getIncomingBlock(i
)).second
)
227 auto InsertResult
= CostsAndCounts
.insert({IncomingC
, {}});
228 // Count how many edges share a given incoming costant.
229 ++InsertResult
.first
->second
.Count
;
230 // Only compute the cost the first time we see a particular constant.
231 if (!InsertResult
.second
)
234 int &MatCost
= InsertResult
.first
->second
.MatCost
;
235 MatCost
= TTI
.getIntImmCost(IncomingC
->getValue(), IncomingC
->getType());
236 NonFreeMat
|= MatCost
!= TTI
.TCC_Free
;
239 LLVM_DEBUG(dbgs() << " Free: " << PN
<< "\n");
240 // No profit in free materialization.
244 // Now check that the uses of this PHI can actually be speculated,
245 // otherwise we'll still have to materialize the PHI value.
246 if (!isSafeToSpeculatePHIUsers(PN
, DT
, PotentialSpecSet
, UnsafeSet
)) {
247 LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN
<< "\n");
251 // Compute how much (if any) savings are available by speculating around this
253 for (Use
&U
: PN
.uses()) {
254 auto *UserI
= cast
<Instruction
>(U
.getUser());
255 // Now check whether there is any savings to folding the incoming constants
257 unsigned Idx
= U
.getOperandNo();
259 // If we have a binary operator that is commutative, an actual constant
260 // operand would end up on the RHS, so pretend the use of the PHI is on the
263 // Technically, this is a bit weird if *both* operands are PHIs we're
264 // speculating. But if that is the case, giving an "optimistic" cost isn't
265 // a bad thing because after speculation it will constant fold. And
266 // moreover, such cases should likely have been constant folded already by
267 // some other pass, so we shouldn't worry about "modeling" them terribly
268 // accurately here. Similarly, if the other operand is a constant, it still
269 // seems fine to be "optimistic" in our cost modeling, because when the
270 // incoming operand from the PHI node is also a constant, we will end up
272 if (UserI
->isBinaryOp() && UserI
->isCommutative() && Idx
!= 1)
273 // Assume we will commute the constant to the RHS to be canonical.
276 // Get the intrinsic ID if this user is an intrinsic.
277 Intrinsic::ID IID
= Intrinsic::not_intrinsic
;
278 if (auto *UserII
= dyn_cast
<IntrinsicInst
>(UserI
))
279 IID
= UserII
->getIntrinsicID();
281 for (auto &IncomingConstantAndCostsAndCount
: CostsAndCounts
) {
282 ConstantInt
*IncomingC
= IncomingConstantAndCostsAndCount
.first
;
283 int MatCost
= IncomingConstantAndCostsAndCount
.second
.MatCost
;
284 int &FoldedCost
= IncomingConstantAndCostsAndCount
.second
.FoldedCost
;
286 FoldedCost
+= TTI
.getIntImmCost(IID
, Idx
, IncomingC
->getValue(),
287 IncomingC
->getType());
290 TTI
.getIntImmCost(UserI
->getOpcode(), Idx
, IncomingC
->getValue(),
291 IncomingC
->getType());
293 // If we accumulate more folded cost for this incoming constant than
294 // materialized cost, then we'll regress any edge with this constant so
295 // just bail. We're only interested in cases where folding the incoming
296 // constants is at least break-even on all paths.
297 if (FoldedCost
> MatCost
) {
298 LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC
300 " Materializing cost: "
303 " Accumulated folded cost: "
304 << FoldedCost
<< "\n");
310 // Compute the total cost savings afforded by this PHI node.
311 int TotalMatCost
= TTI
.TCC_Free
, TotalFoldedCost
= TTI
.TCC_Free
;
312 for (auto IncomingConstantAndCostsAndCount
: CostsAndCounts
) {
313 int MatCost
= IncomingConstantAndCostsAndCount
.second
.MatCost
;
314 int FoldedCost
= IncomingConstantAndCostsAndCount
.second
.FoldedCost
;
315 int Count
= IncomingConstantAndCostsAndCount
.second
.Count
;
317 TotalMatCost
+= MatCost
* Count
;
318 TotalFoldedCost
+= FoldedCost
* Count
;
320 assert(TotalFoldedCost
<= TotalMatCost
&& "If each constant's folded cost is "
321 "less that its materialized cost, "
322 "the sum must be as well.");
324 LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost
- TotalFoldedCost
)
325 << ": " << PN
<< "\n");
326 CostSavingsMap
[&PN
] = TotalMatCost
- TotalFoldedCost
;
330 /// Simple helper to walk all the users of a list of phis depth first, and call
331 /// a visit function on each one in post-order.
333 /// All of the PHIs should be in the same basic block, and this is primarily
334 /// used to make a single depth-first walk across their collective users
335 /// without revisiting any subgraphs. Callers should provide a fast, idempotent
336 /// callable to test whether a node has been visited and the more important
337 /// callable to actually visit a particular node.
339 /// Depth-first and postorder here refer to the *operand* graph -- we start
340 /// from a collection of users of PHI nodes and walk "up" the operands
342 template <typename IsVisitedT
, typename VisitT
>
343 static void visitPHIUsersAndDepsInPostOrder(ArrayRef
<PHINode
*> PNs
,
344 IsVisitedT IsVisited
,
346 SmallVector
<std::pair
<Instruction
*, User::value_op_iterator
>, 16> DFSStack
;
348 for (Use
&U
: PN
->uses()) {
349 auto *UI
= cast
<Instruction
>(U
.getUser());
351 // Already visited this user, continue across the roots.
354 // Otherwise, walk the operand graph depth-first and visit each
355 // dependency in postorder.
356 DFSStack
.push_back({UI
, UI
->value_op_begin()});
358 User::value_op_iterator OpIt
;
359 std::tie(UI
, OpIt
) = DFSStack
.pop_back_val();
360 while (OpIt
!= UI
->value_op_end()) {
361 auto *OpI
= dyn_cast
<Instruction
>(*OpIt
);
362 // Increment to the next operand for whenever we continue.
364 // No need to visit non-instructions, which can't form dependencies,
365 // or instructions outside of our potential dependency set that we
366 // were given. Finally, if we've already visited the node, continue
368 if (!OpI
|| IsVisited(OpI
))
371 // Push onto the stack and descend. We can directly continue this
372 // loop when ascending.
373 DFSStack
.push_back({UI
, OpIt
});
375 OpIt
= OpI
->value_op_begin();
378 // Finished visiting children, visit this node.
379 assert(!IsVisited(UI
) && "Should not have already visited a node!");
381 } while (!DFSStack
.empty());
385 /// Find profitable PHIs to speculate.
387 /// For a PHI node to be profitable, we need the cost of speculating its users
388 /// (and their dependencies) to not exceed the savings of folding the PHI's
389 /// constant operands into the speculated users.
391 /// Computing this is surprisingly challenging. Because users of two different
392 /// PHI nodes can depend on each other or on common other instructions, it may
393 /// be profitable to speculate two PHI nodes together even though neither one
394 /// in isolation is profitable. The straightforward way to find all the
395 /// profitable PHIs would be to check each combination of PHIs' cost, but this
396 /// is exponential in complexity.
398 /// Even if we assume that we only care about cases where we can consider each
399 /// PHI node in isolation (rather than considering cases where none are
400 /// profitable in isolation but some subset are profitable as a set), we still
401 /// have a challenge. The obvious way to find all individually profitable PHIs
402 /// is to iterate until reaching a fixed point, but this will be quadratic in
405 /// This code currently uses a linear-to-compute order for a greedy approach.
406 /// It won't find cases where a set of PHIs must be considered together, but it
407 /// handles most cases of order dependence without quadratic iteration. The
408 /// specific order used is the post-order across the operand DAG. When the last
409 /// user of a PHI is visited in this postorder walk, we check it for
412 /// There is an orthogonal extra complexity to all of this: computing the cost
413 /// itself can easily become a linear computation making everything again (at
414 /// best) quadratic. Using a postorder over the operand graph makes it
415 /// particularly easy to avoid this through dynamic programming. As we do the
416 /// postorder walk, we build the transitive cost of that subgraph. It is also
417 /// straightforward to then update these costs when we mark a PHI for
418 /// speculation so that subsequent PHIs don't re-pay the cost of already
419 /// speculated instructions.
420 static SmallVector
<PHINode
*, 16>
421 findProfitablePHIs(ArrayRef
<PHINode
*> PNs
,
422 const SmallDenseMap
<PHINode
*, int, 16> &CostSavingsMap
,
423 const SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
424 int NumPreds
, DominatorTree
&DT
, TargetTransformInfo
&TTI
) {
425 SmallVector
<PHINode
*, 16> SpecPNs
;
427 // First, establish a reverse mapping from immediate users of the PHI nodes
428 // to the nodes themselves, and count how many users each PHI node has in
429 // a way we can update while processing them.
430 SmallDenseMap
<Instruction
*, TinyPtrVector
<PHINode
*>, 16> UserToPNMap
;
431 SmallDenseMap
<PHINode
*, int, 16> PNUserCountMap
;
432 SmallPtrSet
<Instruction
*, 16> UserSet
;
433 for (auto *PN
: PNs
) {
434 assert(UserSet
.empty() && "Must start with an empty user set!");
435 for (Use
&U
: PN
->uses())
436 UserSet
.insert(cast
<Instruction
>(U
.getUser()));
437 PNUserCountMap
[PN
] = UserSet
.size();
438 for (auto *UI
: UserSet
)
439 UserToPNMap
.insert({UI
, {}}).first
->second
.push_back(PN
);
443 // Now do a DFS across the operand graph of the users, computing cost as we
444 // go and when all costs for a given PHI are known, checking that PHI for
446 SmallDenseMap
<Instruction
*, int, 16> SpecCostMap
;
447 visitPHIUsersAndDepsInPostOrder(
450 [&](Instruction
*I
) {
451 // We consider anything that isn't potentially speculated to be
452 // "visited" as it is already handled. Similarly, anything that *is*
453 // potentially speculated but for which we have an entry in our cost
455 return !PotentialSpecSet
.count(I
) || SpecCostMap
.count(I
);
458 [&](Instruction
*I
) {
459 // We've fully visited the operands, so sum their cost with this node
460 // and update the cost map.
461 int Cost
= TTI
.TCC_Free
;
462 for (Value
*OpV
: I
->operand_values())
463 if (auto *OpI
= dyn_cast
<Instruction
>(OpV
)) {
464 auto CostMapIt
= SpecCostMap
.find(OpI
);
465 if (CostMapIt
!= SpecCostMap
.end())
466 Cost
+= CostMapIt
->second
;
468 Cost
+= TTI
.getUserCost(I
);
469 bool Inserted
= SpecCostMap
.insert({I
, Cost
}).second
;
471 assert(Inserted
&& "Must not re-insert a cost during the DFS!");
473 // Now check if this node had a corresponding PHI node using it. If so,
474 // we need to decrement the outstanding user count for it.
475 auto UserPNsIt
= UserToPNMap
.find(I
);
476 if (UserPNsIt
== UserToPNMap
.end())
478 auto &UserPNs
= UserPNsIt
->second
;
479 auto UserPNsSplitIt
= std::stable_partition(
480 UserPNs
.begin(), UserPNs
.end(), [&](PHINode
*UserPN
) {
481 int &PNUserCount
= PNUserCountMap
.find(UserPN
)->second
;
484 "Should never re-visit a PN after its user count hits zero!");
486 return PNUserCount
!= 0;
489 // FIXME: Rather than one at a time, we should sum the savings as the
490 // cost will be completely shared.
491 SmallVector
<Instruction
*, 16> SpecWorklist
;
492 for (auto *PN
: llvm::make_range(UserPNsSplitIt
, UserPNs
.end())) {
493 int SpecCost
= TTI
.TCC_Free
;
494 for (Use
&U
: PN
->uses())
496 SpecCostMap
.find(cast
<Instruction
>(U
.getUser()))->second
;
497 SpecCost
*= (NumPreds
- 1);
498 // When the user count of a PHI node hits zero, we should check its
499 // profitability. If profitable, we should mark it for speculation
500 // and zero out the cost of everything it depends on.
501 int CostSavings
= CostSavingsMap
.find(PN
)->second
;
502 if (SpecCost
> CostSavings
) {
503 LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN
508 " Speculation cost: "
509 << SpecCost
<< "\n");
513 // We're going to speculate this user-associated PHI. Copy it out and
514 // add its users to the worklist to update their cost.
515 SpecPNs
.push_back(PN
);
516 for (Use
&U
: PN
->uses()) {
517 auto *UI
= cast
<Instruction
>(U
.getUser());
518 auto CostMapIt
= SpecCostMap
.find(UI
);
519 if (CostMapIt
->second
== 0)
521 // Zero out this cost entry to avoid duplicates.
522 CostMapIt
->second
= 0;
523 SpecWorklist
.push_back(UI
);
527 // Now walk all the operands of the users in the worklist transitively
528 // to zero out all the memoized costs.
529 while (!SpecWorklist
.empty()) {
530 Instruction
*SpecI
= SpecWorklist
.pop_back_val();
531 assert(SpecCostMap
.find(SpecI
)->second
== 0 &&
532 "Didn't zero out a cost!");
534 // Walk the operands recursively to zero out their cost as well.
535 for (auto *OpV
: SpecI
->operand_values()) {
536 auto *OpI
= dyn_cast
<Instruction
>(OpV
);
539 auto CostMapIt
= SpecCostMap
.find(OpI
);
540 if (CostMapIt
== SpecCostMap
.end() || CostMapIt
->second
== 0)
542 CostMapIt
->second
= 0;
543 SpecWorklist
.push_back(OpI
);
551 /// Speculate users around a set of PHI nodes.
553 /// This routine does the actual speculation around a set of PHI nodes where we
554 /// have determined this to be both safe and profitable.
556 /// This routine handles any spliting of critical edges necessary to create
557 /// a safe block to speculate into as well as cloning the instructions and
558 /// rewriting all uses.
559 static void speculatePHIs(ArrayRef
<PHINode
*> SpecPNs
,
560 SmallPtrSetImpl
<Instruction
*> &PotentialSpecSet
,
561 SmallSetVector
<BasicBlock
*, 16> &PredSet
,
563 LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs
.size() << " PHIs!\n");
564 NumPHIsSpeculated
+= SpecPNs
.size();
566 // Split any critical edges so that we have a block to hoist into.
567 auto *ParentBB
= SpecPNs
[0]->getParent();
568 SmallVector
<BasicBlock
*, 16> SpecPreds
;
569 SpecPreds
.reserve(PredSet
.size());
570 for (auto *PredBB
: PredSet
) {
571 auto *NewPredBB
= SplitCriticalEdge(
573 CriticalEdgeSplittingOptions(&DT
).setMergeIdenticalEdges());
576 LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB
->getName()
578 SpecPreds
.push_back(NewPredBB
);
580 assert(PredBB
->getSingleSuccessor() == ParentBB
&&
581 "We need a non-critical predecessor to speculate into.");
582 assert(!isa
<InvokeInst
>(PredBB
->getTerminator()) &&
583 "Cannot have a non-critical invoke!");
585 // Already non-critical, use existing pred.
586 SpecPreds
.push_back(PredBB
);
590 SmallPtrSet
<Instruction
*, 16> SpecSet
;
591 SmallVector
<Instruction
*, 16> SpecList
;
592 visitPHIUsersAndDepsInPostOrder(SpecPNs
,
594 [&](Instruction
*I
) {
595 // This is visited if we don't need to
596 // speculate it or we already have
598 return !PotentialSpecSet
.count(I
) ||
602 [&](Instruction
*I
) {
603 // All operands scheduled, schedule this
606 SpecList
.push_back(I
);
609 int NumSpecInsts
= SpecList
.size() * SpecPreds
.size();
610 int NumRedundantInsts
= NumSpecInsts
- SpecList
.size();
611 LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts
612 << " speculated instructions, " << NumRedundantInsts
613 << " redundancies\n");
614 NumSpeculatedInstructions
+= NumSpecInsts
;
615 NumNewRedundantInstructions
+= NumRedundantInsts
;
617 // Each predecessor is numbered by its index in `SpecPreds`, so for each
618 // instruction we speculate, the speculated instruction is stored in that
619 // index of the vector associated with the original instruction. We also
620 // store the incoming values for each predecessor from any PHIs used.
621 SmallDenseMap
<Instruction
*, SmallVector
<Value
*, 2>, 16> SpeculatedValueMap
;
623 // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming
624 // value. This handles both the PHIs we are speculating around and any other
625 // PHIs that happen to be used.
626 for (auto *OrigI
: SpecList
)
627 for (auto *OpV
: OrigI
->operand_values()) {
628 auto *OpPN
= dyn_cast
<PHINode
>(OpV
);
629 if (!OpPN
|| OpPN
->getParent() != ParentBB
)
632 auto InsertResult
= SpeculatedValueMap
.insert({OpPN
, {}});
633 if (!InsertResult
.second
)
636 auto &SpeculatedVals
= InsertResult
.first
->second
;
638 // Populating our structure for mapping is particularly annoying because
639 // finding an incoming value for a particular predecessor block in a PHI
640 // node is a linear time operation! To avoid quadratic behavior, we build
641 // a map for this PHI node's incoming values and then translate it into
642 // the more compact representation used below.
643 SmallDenseMap
<BasicBlock
*, Value
*, 16> IncomingValueMap
;
644 for (int i
: llvm::seq
<int>(0, OpPN
->getNumIncomingValues()))
645 IncomingValueMap
[OpPN
->getIncomingBlock(i
)] = OpPN
->getIncomingValue(i
);
647 for (auto *PredBB
: SpecPreds
)
648 SpeculatedVals
.push_back(IncomingValueMap
.find(PredBB
)->second
);
651 // Speculate into each predecessor.
652 for (int PredIdx
: llvm::seq
<int>(0, SpecPreds
.size())) {
653 auto *PredBB
= SpecPreds
[PredIdx
];
654 assert(PredBB
->getSingleSuccessor() == ParentBB
&&
655 "We need a non-critical predecessor to speculate into.");
657 for (auto *OrigI
: SpecList
) {
658 auto *NewI
= OrigI
->clone();
659 NewI
->setName(Twine(OrigI
->getName()) + "." + Twine(PredIdx
));
660 NewI
->insertBefore(PredBB
->getTerminator());
662 // Rewrite all the operands to the previously speculated instructions.
663 // Because we're walking in-order, the defs must precede the uses and we
664 // should already have these mappings.
665 for (Use
&U
: NewI
->operands()) {
666 auto *OpI
= dyn_cast
<Instruction
>(U
.get());
669 auto MapIt
= SpeculatedValueMap
.find(OpI
);
670 if (MapIt
== SpeculatedValueMap
.end())
672 const auto &SpeculatedVals
= MapIt
->second
;
673 assert(SpeculatedVals
[PredIdx
] &&
674 "Must have a speculated value for this predecessor!");
675 assert(SpeculatedVals
[PredIdx
]->getType() == OpI
->getType() &&
676 "Speculated value has the wrong type!");
678 // Rewrite the use to this predecessor's speculated instruction.
679 U
.set(SpeculatedVals
[PredIdx
]);
682 // Commute instructions which now have a constant in the LHS but not the
684 if (NewI
->isBinaryOp() && NewI
->isCommutative() &&
685 isa
<Constant
>(NewI
->getOperand(0)) &&
686 !isa
<Constant
>(NewI
->getOperand(1)))
687 NewI
->getOperandUse(0).swap(NewI
->getOperandUse(1));
689 SpeculatedValueMap
[OrigI
].push_back(NewI
);
690 assert(SpeculatedValueMap
[OrigI
][PredIdx
] == NewI
&&
691 "Mismatched speculated instruction index!");
695 // Walk the speculated instruction list and if they have uses, insert a PHI
696 // for them from the speculated versions, and replace the uses with the PHI.
697 // Then erase the instructions as they have been fully speculated. The walk
698 // needs to be in reverse so that we don't think there are users when we'll
699 // actually eventually remove them later.
700 IRBuilder
<> IRB(SpecPNs
[0]);
701 for (auto *OrigI
: llvm::reverse(SpecList
)) {
702 // Check if we need a PHI for any remaining users and if so, insert it.
703 if (!OrigI
->use_empty()) {
704 auto *SpecIPN
= IRB
.CreatePHI(OrigI
->getType(), SpecPreds
.size(),
705 Twine(OrigI
->getName()) + ".phi");
706 // Add the incoming values we speculated.
707 auto &SpeculatedVals
= SpeculatedValueMap
.find(OrigI
)->second
;
708 for (int PredIdx
: llvm::seq
<int>(0, SpecPreds
.size()))
709 SpecIPN
->addIncoming(SpeculatedVals
[PredIdx
], SpecPreds
[PredIdx
]);
711 // And replace the uses with the PHI node.
712 OrigI
->replaceAllUsesWith(SpecIPN
);
715 // It is important to immediately erase this so that it stops using other
716 // instructions. This avoids inserting needless PHIs of them.
717 OrigI
->eraseFromParent();
720 // All of the uses of the speculated phi nodes should be removed at this
721 // point, so erase them.
722 for (auto *SpecPN
: SpecPNs
) {
723 assert(SpecPN
->use_empty() && "All users should have been speculated!");
724 SpecPN
->eraseFromParent();
728 /// Try to speculate around a series of PHIs from a single basic block.
730 /// This routine checks whether any of these PHIs are profitable to speculate
731 /// users around. If safe and profitable, it does the speculation. It returns
732 /// true when at least some speculation occurs.
733 static bool tryToSpeculatePHIs(SmallVectorImpl
<PHINode
*> &PNs
,
734 DominatorTree
&DT
, TargetTransformInfo
&TTI
) {
735 LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n");
737 // Savings in cost from speculating around a PHI node.
738 SmallDenseMap
<PHINode
*, int, 16> CostSavingsMap
;
740 // Remember the set of instructions that are candidates for speculation so
741 // that we can quickly walk things within that space. This prunes out
742 // instructions already available along edges, etc.
743 SmallPtrSet
<Instruction
*, 16> PotentialSpecSet
;
745 // Remember the set of instructions that are (transitively) unsafe to
746 // speculate into the incoming edges of this basic block. This avoids
747 // recomputing them for each PHI node we check. This set is specific to this
748 // block though as things are pruned out of it based on what is available
749 // along incoming edges.
750 SmallPtrSet
<Instruction
*, 16> UnsafeSet
;
752 // For each PHI node in this block, check whether there are immediate folding
753 // opportunities from speculation, and whether that speculation will be
754 // valid. This determise the set of safe PHIs to speculate.
755 PNs
.erase(llvm::remove_if(PNs
,
757 return !isSafeAndProfitableToSpeculateAroundPHI(
758 *PN
, CostSavingsMap
, PotentialSpecSet
,
762 // If no PHIs were profitable, skip.
764 LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n");
768 // We need to know how much speculation will cost which is determined by how
769 // many incoming edges will need a copy of each speculated instruction.
770 SmallSetVector
<BasicBlock
*, 16> PredSet
;
771 for (auto *PredBB
: PNs
[0]->blocks()) {
772 if (!PredSet
.insert(PredBB
))
775 // We cannot speculate when a predecessor is an indirect branch.
776 // FIXME: We also can't reliably create a non-critical edge block for
777 // speculation if the predecessor is an invoke. This doesn't seem
778 // fundamental and we should probably be splitting critical edges
780 const auto *TermInst
= PredBB
->getTerminator();
781 if (isa
<IndirectBrInst
>(TermInst
) ||
782 isa
<InvokeInst
>(TermInst
) ||
783 isa
<CallBrInst
>(TermInst
)) {
784 LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
785 << PredBB
->getName() << "\n");
789 if (PredSet
.size() < 2) {
790 LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n");
794 SmallVector
<PHINode
*, 16> SpecPNs
= findProfitablePHIs(
795 PNs
, CostSavingsMap
, PotentialSpecSet
, PredSet
.size(), DT
, TTI
);
800 speculatePHIs(SpecPNs
, PotentialSpecSet
, PredSet
, DT
);
804 PreservedAnalyses
SpeculateAroundPHIsPass::run(Function
&F
,
805 FunctionAnalysisManager
&AM
) {
806 auto &DT
= AM
.getResult
<DominatorTreeAnalysis
>(F
);
807 auto &TTI
= AM
.getResult
<TargetIRAnalysis
>(F
);
809 bool Changed
= false;
810 for (auto *BB
: ReversePostOrderTraversal
<Function
*>(&F
)) {
811 SmallVector
<PHINode
*, 16> PNs
;
812 auto BBI
= BB
->begin();
813 while (auto *PN
= dyn_cast
<PHINode
>(&*BBI
)) {
821 Changed
|= tryToSpeculatePHIs(PNs
, DT
, TTI
);
825 return PreservedAnalyses::all();
827 PreservedAnalyses PA
;