1 //===- Mem2Reg.cpp - Promotes memory slots into values ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "mlir/Transforms/Mem2Reg.h"
10 #include "mlir/Analysis/DataLayoutAnalysis.h"
11 #include "mlir/Analysis/SliceAnalysis.h"
12 #include "mlir/Analysis/TopologicalSortUtils.h"
13 #include "mlir/IR/Builders.h"
14 #include "mlir/IR/Dominance.h"
15 #include "mlir/IR/PatternMatch.h"
16 #include "mlir/IR/RegionKindInterface.h"
17 #include "mlir/IR/Value.h"
18 #include "mlir/Interfaces/ControlFlowInterfaces.h"
19 #include "mlir/Interfaces/MemorySlotInterfaces.h"
20 #include "mlir/Transforms/Passes.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/Support/GenericIteratedDominanceFrontier.h"
25 #define GEN_PASS_DEF_MEM2REG
26 #include "mlir/Transforms/Passes.h.inc"
29 #define DEBUG_TYPE "mem2reg"
35 /// This pass turns unnecessary uses of automatically allocated memory slots
36 /// into direct Value-based operations. For example, it will simplify storing a
37 /// constant in a memory slot to immediately load it to a direct use of that
38 /// constant. In other words, given a memory slot addressed by a non-aliased
39 /// "pointer" Value, mem2reg removes all the uses of that pointer.
41 /// Within a block, this is done by following the chain of stores and loads of
42 /// the slot and replacing the results of loads with the values previously
43 /// stored. If a load happens before any other store, a poison value is used
46 /// Control flow can create situations where a load could be replaced by
47 /// multiple possible stores depending on the control flow path taken. As a
48 /// result, this pass must introduce new block arguments in some blocks to
49 /// accommodate for the multiple possible definitions. Each predecessor will
50 /// populate the block argument with the definition reached at its end. With
51 /// this, the value stored can be well defined at block boundaries, allowing
52 /// the propagation of replacement through blocks.
54 /// This pass computes this transformation in four main steps. The two first
55 /// steps are performed during an analysis phase that does not mutate IR.
57 /// The two steps of the analysis phase are the following:
58 /// - A first step computes the list of operations that transitively use the
59 /// memory slot we would like to promote. The purpose of this phase is to
60 /// identify which uses must be removed to promote the slot, either by rewiring
61 /// the user or deleting it. Naturally, direct uses of the slot must be removed.
62 /// Sometimes additional uses must also be removed: this is notably the case
63 /// when a direct user of the slot cannot rewire its use and must delete itself,
64 /// and thus must make its users no longer use it. If any of those uses cannot
65 /// be removed by their users in any way, promotion cannot continue: this is
66 /// decided at this step.
67 /// - A second step computes the list of blocks where a block argument will be
68 /// needed ("merge points") without mutating the IR. These blocks are the blocks
69 /// leading to a definition clash between two predecessors. Such blocks happen
70 /// to be the Iterated Dominance Frontier (IDF) of the set of blocks containing
71 /// a store, as they represent the point where a clear defining dominator stops
72 /// existing. Computing this information in advance allows making sure the
73 /// terminators that will forward values are capable of doing so (inability to
74 /// do so aborts promotion at this step).
76 /// At this point, promotion is guaranteed to happen, and the mutation phase can
77 /// begin with the following steps:
78 /// - A third step computes the reaching definition of the memory slot at each
79 /// blocking user. This is the core of the mem2reg algorithm, also known as
80 /// load-store forwarding. This analyses loads and stores and propagates which
81 /// value must be stored in the slot at each blocking user. This is achieved by
82 /// doing a depth-first walk of the dominator tree of the function. This is
83 /// sufficient because the reaching definition at the beginning of a block is
84 /// either its new block argument if it is a merge block, or the definition
85 /// reaching the end of its immediate dominator (parent in the dominator tree).
86 /// We can therefore propagate this information down the dominator tree to
87 /// proceed with renaming within blocks.
88 /// - The final fourth step uses the reaching definition to remove blocking uses
89 /// in topological order.
91 /// For further reading, chapter three of SSA-based Compiler Design [1]
92 /// showcases SSA construction, where mem2reg is an adaptation of the same
95 /// [1]: Rastello F. & Bouchez Tichadou F., SSA-based Compiler Design (2022),
100 using BlockingUsesMap
=
101 llvm::MapVector
<Operation
*, SmallPtrSet
<OpOperand
*, 4>>;
103 /// Information computed during promotion analysis used to perform actual
105 struct MemorySlotPromotionInfo
{
106 /// Blocks for which at least two definitions of the slot values clash.
107 SmallPtrSet
<Block
*, 8> mergePoints
;
108 /// Contains, for each operation, which uses must be eliminated by promotion.
109 /// This is a DAG structure because if an operation must eliminate some of
110 /// its uses, it is because the defining ops of the blocking uses requested
111 /// it. The defining ops therefore must also have blocking uses or be the
112 /// starting point of the blocking uses.
113 BlockingUsesMap userToBlockingUses
;
116 /// Computes information for basic slot promotion. This will check that direct
117 /// slot promotion can be performed, and provide the information to execute the
118 /// promotion. This does not mutate IR.
119 class MemorySlotPromotionAnalyzer
{
121 MemorySlotPromotionAnalyzer(MemorySlot slot
, DominanceInfo
&dominance
,
122 const DataLayout
&dataLayout
)
123 : slot(slot
), dominance(dominance
), dataLayout(dataLayout
) {}
125 /// Computes the information for slot promotion if promotion is possible,
126 /// returns nothing otherwise.
127 std::optional
<MemorySlotPromotionInfo
> computeInfo();
130 /// Computes the transitive uses of the slot that block promotion. This finds
131 /// uses that would block the promotion, checks that the operation has a
132 /// solution to remove the blocking use, and potentially forwards the analysis
133 /// if the operation needs further blocking uses resolved to resolve its own
134 /// uses (typically, removing its users because it will delete itself to
135 /// resolve its own blocking uses). This will fail if one of the transitive
136 /// users cannot remove a requested use, and should prevent promotion.
137 LogicalResult
computeBlockingUses(BlockingUsesMap
&userToBlockingUses
);
139 /// Computes in which blocks the value stored in the slot is actually used,
140 /// meaning blocks leading to a load. This method uses `definingBlocks`, the
141 /// set of blocks containing a store to the slot (defining the value of the
143 SmallPtrSet
<Block
*, 16>
144 computeSlotLiveIn(SmallPtrSetImpl
<Block
*> &definingBlocks
);
146 /// Computes the points in which multiple re-definitions of the slot's value
147 /// (stores) may conflict.
148 void computeMergePoints(SmallPtrSetImpl
<Block
*> &mergePoints
);
150 /// Ensures predecessors of merge points can properly provide their current
151 /// definition of the value stored in the slot to the merge point. This can
152 /// notably be an issue if the terminator used does not have the ability to
153 /// forward values through block operands.
154 bool areMergePointsUsable(SmallPtrSetImpl
<Block
*> &mergePoints
);
157 DominanceInfo
&dominance
;
158 const DataLayout
&dataLayout
;
161 using BlockIndexCache
= DenseMap
<Region
*, DenseMap
<Block
*, size_t>>;
163 /// The MemorySlotPromoter handles the state of promoting a memory slot. It
164 /// wraps a slot and its associated allocator. This will perform the mutation of
166 class MemorySlotPromoter
{
168 MemorySlotPromoter(MemorySlot slot
, PromotableAllocationOpInterface allocator
,
169 OpBuilder
&builder
, DominanceInfo
&dominance
,
170 const DataLayout
&dataLayout
, MemorySlotPromotionInfo info
,
171 const Mem2RegStatistics
&statistics
,
172 BlockIndexCache
&blockIndexCache
);
174 /// Actually promotes the slot by mutating IR. Promoting a slot DOES
175 /// invalidate the MemorySlotPromotionInfo of other slots. Preparation of
176 /// promotion info should NOT be performed in batches.
177 /// Returns a promotable allocation op if a new allocator was created, nullopt
179 std::optional
<PromotableAllocationOpInterface
> promoteSlot();
182 /// Computes the reaching definition for all the operations that require
183 /// promotion. `reachingDef` is the value the slot should contain at the
184 /// beginning of the block. This method returns the reached definition at the
185 /// end of the block. This method must only be called at most once per block.
186 Value
computeReachingDefInBlock(Block
*block
, Value reachingDef
);
188 /// Computes the reaching definition for all the operations that require
189 /// promotion. `reachingDef` corresponds to the initial value the
190 /// slot will contain before any write, typically a poison value.
191 /// This method must only be called at most once per region.
192 void computeReachingDefInRegion(Region
*region
, Value reachingDef
);
194 /// Removes the blocking uses of the slot, in topological order.
195 void removeBlockingUses();
197 /// Lazily-constructed default value representing the content of the slot when
198 /// no store has been executed. This function may mutate IR.
199 Value
getOrCreateDefaultValue();
202 PromotableAllocationOpInterface allocator
;
204 /// Potentially non-initialized default value. Use `getOrCreateDefaultValue`
205 /// to initialize it on demand.
207 /// Contains the reaching definition at this operation. Reaching definitions
208 /// are only computed for promotable memory operations with blocking uses.
209 DenseMap
<PromotableMemOpInterface
, Value
> reachingDefs
;
210 DenseMap
<PromotableMemOpInterface
, Value
> replacedValuesMap
;
211 DominanceInfo
&dominance
;
212 const DataLayout
&dataLayout
;
213 MemorySlotPromotionInfo info
;
214 const Mem2RegStatistics
&statistics
;
216 /// Shared cache of block indices of specific regions.
217 BlockIndexCache
&blockIndexCache
;
222 MemorySlotPromoter::MemorySlotPromoter(
223 MemorySlot slot
, PromotableAllocationOpInterface allocator
,
224 OpBuilder
&builder
, DominanceInfo
&dominance
, const DataLayout
&dataLayout
,
225 MemorySlotPromotionInfo info
, const Mem2RegStatistics
&statistics
,
226 BlockIndexCache
&blockIndexCache
)
227 : slot(slot
), allocator(allocator
), builder(builder
), dominance(dominance
),
228 dataLayout(dataLayout
), info(std::move(info
)), statistics(statistics
),
229 blockIndexCache(blockIndexCache
) {
231 auto isResultOrNewBlockArgument
= [&]() {
232 if (BlockArgument arg
= dyn_cast
<BlockArgument
>(slot
.ptr
))
233 return arg
.getOwner()->getParentOp() == allocator
;
234 return slot
.ptr
.getDefiningOp() == allocator
;
237 assert(isResultOrNewBlockArgument() &&
238 "a slot must be a result of the allocator or an argument of the child "
239 "regions of the allocator");
243 Value
MemorySlotPromoter::getOrCreateDefaultValue() {
247 OpBuilder::InsertionGuard
guard(builder
);
248 builder
.setInsertionPointToStart(slot
.ptr
.getParentBlock());
249 return defaultValue
= allocator
.getDefaultValue(slot
, builder
);
252 LogicalResult
MemorySlotPromotionAnalyzer::computeBlockingUses(
253 BlockingUsesMap
&userToBlockingUses
) {
254 // The promotion of an operation may require the promotion of further
255 // operations (typically, removing operations that use an operation that must
256 // delete itself). We thus need to start from the use of the slot pointer and
257 // propagate further requests through the forward slice.
259 // Because this pass currently only supports analysing the parent region of
260 // the slot pointer, if a promotable memory op that needs promotion is within
261 // a graph region, the slot may only be used in a graph region and should
262 // therefore be ignored.
263 Region
*slotPtrRegion
= slot
.ptr
.getParentRegion();
264 auto slotPtrRegionOp
=
265 dyn_cast
<RegionKindInterface
>(slotPtrRegion
->getParentOp());
266 if (slotPtrRegionOp
&&
267 slotPtrRegionOp
.getRegionKind(slotPtrRegion
->getRegionNumber()) ==
271 // First insert that all immediate users of the slot pointer must no longer
273 for (OpOperand
&use
: slot
.ptr
.getUses()) {
274 SmallPtrSet
<OpOperand
*, 4> &blockingUses
=
275 userToBlockingUses
[use
.getOwner()];
276 blockingUses
.insert(&use
);
279 // Then, propagate the requirements for the removal of uses. The
280 // topologically-sorted forward slice allows for all blocking uses of an
281 // operation to have been computed before it is reached. Operations are
282 // traversed in topological order of their uses, starting from the slot
284 SetVector
<Operation
*> forwardSlice
;
285 mlir::getForwardSlice(slot
.ptr
, &forwardSlice
);
286 for (Operation
*user
: forwardSlice
) {
287 // If the next operation has no blocking uses, everything is fine.
288 auto it
= userToBlockingUses
.find(user
);
289 if (it
== userToBlockingUses
.end())
292 SmallPtrSet
<OpOperand
*, 4> &blockingUses
= it
->second
;
294 SmallVector
<OpOperand
*> newBlockingUses
;
295 // If the operation decides it cannot deal with removing the blocking uses,
296 // promotion must fail.
297 if (auto promotable
= dyn_cast
<PromotableOpInterface
>(user
)) {
298 if (!promotable
.canUsesBeRemoved(blockingUses
, newBlockingUses
,
301 } else if (auto promotable
= dyn_cast
<PromotableMemOpInterface
>(user
)) {
302 if (!promotable
.canUsesBeRemoved(slot
, blockingUses
, newBlockingUses
,
306 // An operation that has blocking uses must be promoted. If it is not
307 // promotable, promotion must fail.
311 // Then, register any new blocking uses for coming operations.
312 for (OpOperand
*blockingUse
: newBlockingUses
) {
313 assert(llvm::is_contained(user
->getResults(), blockingUse
->get()));
315 SmallPtrSetImpl
<OpOperand
*> &newUserBlockingUseSet
=
316 userToBlockingUses
[blockingUse
->getOwner()];
317 newUserBlockingUseSet
.insert(blockingUse
);
321 // Because this pass currently only supports analysing the parent region of
322 // the slot pointer, if a promotable memory op that needs promotion is outside
323 // of this region, promotion must fail because it will be impossible to
324 // provide a valid `reachingDef` for it.
325 for (auto &[toPromote
, _
] : userToBlockingUses
)
326 if (isa
<PromotableMemOpInterface
>(toPromote
) &&
327 toPromote
->getParentRegion() != slot
.ptr
.getParentRegion())
333 SmallPtrSet
<Block
*, 16> MemorySlotPromotionAnalyzer::computeSlotLiveIn(
334 SmallPtrSetImpl
<Block
*> &definingBlocks
) {
335 SmallPtrSet
<Block
*, 16> liveIn
;
337 // The worklist contains blocks in which it is known that the slot value is
338 // live-in. The further blocks where this value is live-in will be inferred
340 SmallVector
<Block
*> liveInWorkList
;
342 // Blocks with a load before any other store to the slot are the starting
343 // points of the analysis. The slot value is definitely live-in in those
345 SmallPtrSet
<Block
*, 16> visited
;
346 for (Operation
*user
: slot
.ptr
.getUsers()) {
347 if (!visited
.insert(user
->getBlock()).second
)
350 for (Operation
&op
: user
->getBlock()->getOperations()) {
351 if (auto memOp
= dyn_cast
<PromotableMemOpInterface
>(op
)) {
352 // If this operation loads the slot, it is loading from it before
353 // ever writing to it, so the value is live-in in this block.
354 if (memOp
.loadsFrom(slot
)) {
355 liveInWorkList
.push_back(user
->getBlock());
359 // If we store to the slot, further loads will see that value.
360 // Because we did not meet any load before, the value is not live-in.
361 if (memOp
.storesTo(slot
))
367 // The information is then propagated to the predecessors until a def site
369 while (!liveInWorkList
.empty()) {
370 Block
*liveInBlock
= liveInWorkList
.pop_back_val();
372 if (!liveIn
.insert(liveInBlock
).second
)
375 // If a predecessor is a defining block, either:
376 // - It has a load before its first store, in which case it is live-in but
377 // has already been processed in the initialisation step.
378 // - It has a store before any load, in which case it is not live-in.
379 // We can thus at this stage insert to the worklist only predecessors that
380 // are not defining blocks.
381 for (Block
*pred
: liveInBlock
->getPredecessors())
382 if (!definingBlocks
.contains(pred
))
383 liveInWorkList
.push_back(pred
);
389 using IDFCalculator
= llvm::IDFCalculatorBase
<Block
, false>;
390 void MemorySlotPromotionAnalyzer::computeMergePoints(
391 SmallPtrSetImpl
<Block
*> &mergePoints
) {
392 if (slot
.ptr
.getParentRegion()->hasOneBlock())
395 IDFCalculator
idfCalculator(dominance
.getDomTree(slot
.ptr
.getParentRegion()));
397 SmallPtrSet
<Block
*, 16> definingBlocks
;
398 for (Operation
*user
: slot
.ptr
.getUsers())
399 if (auto storeOp
= dyn_cast
<PromotableMemOpInterface
>(user
))
400 if (storeOp
.storesTo(slot
))
401 definingBlocks
.insert(user
->getBlock());
403 idfCalculator
.setDefiningBlocks(definingBlocks
);
405 SmallPtrSet
<Block
*, 16> liveIn
= computeSlotLiveIn(definingBlocks
);
406 idfCalculator
.setLiveInBlocks(liveIn
);
408 SmallVector
<Block
*> mergePointsVec
;
409 idfCalculator
.calculate(mergePointsVec
);
411 mergePoints
.insert(mergePointsVec
.begin(), mergePointsVec
.end());
414 bool MemorySlotPromotionAnalyzer::areMergePointsUsable(
415 SmallPtrSetImpl
<Block
*> &mergePoints
) {
416 for (Block
*mergePoint
: mergePoints
)
417 for (Block
*pred
: mergePoint
->getPredecessors())
418 if (!isa
<BranchOpInterface
>(pred
->getTerminator()))
424 std::optional
<MemorySlotPromotionInfo
>
425 MemorySlotPromotionAnalyzer::computeInfo() {
426 MemorySlotPromotionInfo info
;
428 // First, find the set of operations that will need to be changed for the
429 // promotion to happen. These operations need to resolve some of their uses,
430 // either by rewiring them or simply deleting themselves. If any of them
431 // cannot find a way to resolve their blocking uses, we abort the promotion.
432 if (failed(computeBlockingUses(info
.userToBlockingUses
)))
435 // Then, compute blocks in which two or more definitions of the allocated
436 // variable may conflict. These blocks will need a new block argument to
438 computeMergePoints(info
.mergePoints
);
440 // The slot can be promoted if the block arguments to be created can
441 // actually be populated with values, which may not be possible depending
442 // on their predecessors.
443 if (!areMergePointsUsable(info
.mergePoints
))
449 Value
MemorySlotPromoter::computeReachingDefInBlock(Block
*block
,
451 SmallVector
<Operation
*> blockOps
;
452 for (Operation
&op
: block
->getOperations())
453 blockOps
.push_back(&op
);
454 for (Operation
*op
: blockOps
) {
455 if (auto memOp
= dyn_cast
<PromotableMemOpInterface
>(op
)) {
456 if (info
.userToBlockingUses
.contains(memOp
))
457 reachingDefs
.insert({memOp
, reachingDef
});
459 if (memOp
.storesTo(slot
)) {
460 builder
.setInsertionPointAfter(memOp
);
461 Value stored
= memOp
.getStored(slot
, builder
, reachingDef
, dataLayout
);
462 assert(stored
&& "a memory operation storing to a slot must provide a "
463 "new definition of the slot");
464 reachingDef
= stored
;
465 replacedValuesMap
[memOp
] = stored
;
473 void MemorySlotPromoter::computeReachingDefInRegion(Region
*region
,
475 assert(reachingDef
&& "expected an initial reaching def to be provided");
476 if (region
->hasOneBlock()) {
477 computeReachingDefInBlock(®ion
->front(), reachingDef
);
482 llvm::DomTreeNodeBase
<Block
> *block
;
486 SmallVector
<DfsJob
> dfsStack
;
488 auto &domTree
= dominance
.getDomTree(slot
.ptr
.getParentRegion());
490 dfsStack
.emplace_back
<DfsJob
>(
491 {domTree
.getNode(®ion
->front()), reachingDef
});
493 while (!dfsStack
.empty()) {
494 DfsJob job
= dfsStack
.pop_back_val();
495 Block
*block
= job
.block
->getBlock();
497 if (info
.mergePoints
.contains(block
)) {
498 BlockArgument blockArgument
=
499 block
->addArgument(slot
.elemType
, slot
.ptr
.getLoc());
500 builder
.setInsertionPointToStart(block
);
501 allocator
.handleBlockArgument(slot
, blockArgument
, builder
);
502 job
.reachingDef
= blockArgument
;
504 if (statistics
.newBlockArgumentAmount
)
505 (*statistics
.newBlockArgumentAmount
)++;
508 job
.reachingDef
= computeReachingDefInBlock(block
, job
.reachingDef
);
509 assert(job
.reachingDef
);
511 if (auto terminator
= dyn_cast
<BranchOpInterface
>(block
->getTerminator())) {
512 for (BlockOperand
&blockOperand
: terminator
->getBlockOperands()) {
513 if (info
.mergePoints
.contains(blockOperand
.get())) {
514 terminator
.getSuccessorOperands(blockOperand
.getOperandNumber())
515 .append(job
.reachingDef
);
520 for (auto *child
: job
.block
->children())
521 dfsStack
.emplace_back
<DfsJob
>({child
, job
.reachingDef
});
525 /// Gets or creates a block index mapping for `region`.
526 static const DenseMap
<Block
*, size_t> &
527 getOrCreateBlockIndices(BlockIndexCache
&blockIndexCache
, Region
*region
) {
528 auto [it
, inserted
] = blockIndexCache
.try_emplace(region
);
532 DenseMap
<Block
*, size_t> &blockIndices
= it
->second
;
533 SetVector
<Block
*> topologicalOrder
= getBlocksSortedByDominance(*region
);
534 for (auto [index
, block
] : llvm::enumerate(topologicalOrder
))
535 blockIndices
[block
] = index
;
539 /// Sorts `ops` according to dominance. Relies on the topological order of basic
540 /// blocks to get a deterministic ordering. Uses `blockIndexCache` to avoid the
541 /// potentially expensive recomputation of a block index map.
542 static void dominanceSort(SmallVector
<Operation
*> &ops
, Region
®ion
,
543 BlockIndexCache
&blockIndexCache
) {
544 // Produce a topological block order and construct a map to lookup the indices
546 const DenseMap
<Block
*, size_t> &topoBlockIndices
=
547 getOrCreateBlockIndices(blockIndexCache
, ®ion
);
549 // Combining the topological order of the basic blocks together with block
550 // internal operation order guarantees a deterministic, dominance respecting
552 llvm::sort(ops
, [&](Operation
*lhs
, Operation
*rhs
) {
553 size_t lhsBlockIndex
= topoBlockIndices
.at(lhs
->getBlock());
554 size_t rhsBlockIndex
= topoBlockIndices
.at(rhs
->getBlock());
555 if (lhsBlockIndex
== rhsBlockIndex
)
556 return lhs
->isBeforeInBlock(rhs
);
557 return lhsBlockIndex
< rhsBlockIndex
;
561 void MemorySlotPromoter::removeBlockingUses() {
562 llvm::SmallVector
<Operation
*> usersToRemoveUses(
563 llvm::make_first_range(info
.userToBlockingUses
));
565 // Sort according to dominance.
566 dominanceSort(usersToRemoveUses
, *slot
.ptr
.getParentBlock()->getParent(),
569 llvm::SmallVector
<Operation
*> toErase
;
570 // List of all replaced values in the slot.
571 llvm::SmallVector
<std::pair
<Operation
*, Value
>> replacedValuesList
;
572 // Ops to visit with the `visitReplacedValues` method.
573 llvm::SmallVector
<PromotableOpInterface
> toVisit
;
574 for (Operation
*toPromote
: llvm::reverse(usersToRemoveUses
)) {
575 if (auto toPromoteMemOp
= dyn_cast
<PromotableMemOpInterface
>(toPromote
)) {
576 Value reachingDef
= reachingDefs
.lookup(toPromoteMemOp
);
577 // If no reaching definition is known, this use is outside the reach of
578 // the slot. The default value should thus be used.
580 reachingDef
= getOrCreateDefaultValue();
582 builder
.setInsertionPointAfter(toPromote
);
583 if (toPromoteMemOp
.removeBlockingUses(
584 slot
, info
.userToBlockingUses
[toPromote
], builder
, reachingDef
,
585 dataLayout
) == DeletionKind::Delete
)
586 toErase
.push_back(toPromote
);
587 if (toPromoteMemOp
.storesTo(slot
))
588 if (Value replacedValue
= replacedValuesMap
[toPromoteMemOp
])
589 replacedValuesList
.push_back({toPromoteMemOp
, replacedValue
});
593 auto toPromoteBasic
= cast
<PromotableOpInterface
>(toPromote
);
594 builder
.setInsertionPointAfter(toPromote
);
595 if (toPromoteBasic
.removeBlockingUses(info
.userToBlockingUses
[toPromote
],
596 builder
) == DeletionKind::Delete
)
597 toErase
.push_back(toPromote
);
598 if (toPromoteBasic
.requiresReplacedValues())
599 toVisit
.push_back(toPromoteBasic
);
601 for (PromotableOpInterface op
: toVisit
) {
602 builder
.setInsertionPointAfter(op
);
603 op
.visitReplacedValues(replacedValuesList
, builder
);
606 for (Operation
*toEraseOp
: toErase
)
609 assert(slot
.ptr
.use_empty() &&
610 "after promotion, the slot pointer should not be used anymore");
613 std::optional
<PromotableAllocationOpInterface
>
614 MemorySlotPromoter::promoteSlot() {
615 computeReachingDefInRegion(slot
.ptr
.getParentRegion(),
616 getOrCreateDefaultValue());
618 // Now that reaching definitions are known, remove all users.
619 removeBlockingUses();
621 // Update terminators in dead branches to forward default if they are
622 // succeeded by a merge points.
623 for (Block
*mergePoint
: info
.mergePoints
) {
624 for (BlockOperand
&use
: mergePoint
->getUses()) {
625 auto user
= cast
<BranchOpInterface
>(use
.getOwner());
626 SuccessorOperands succOperands
=
627 user
.getSuccessorOperands(use
.getOperandNumber());
628 assert(succOperands
.size() == mergePoint
->getNumArguments() ||
629 succOperands
.size() + 1 == mergePoint
->getNumArguments());
630 if (succOperands
.size() + 1 == mergePoint
->getNumArguments())
631 succOperands
.append(getOrCreateDefaultValue());
635 LLVM_DEBUG(llvm::dbgs() << "[mem2reg] Promoted memory slot: " << slot
.ptr
638 if (statistics
.promotedAmount
)
639 (*statistics
.promotedAmount
)++;
641 return allocator
.handlePromotionComplete(slot
, defaultValue
, builder
);
644 LogicalResult
mlir::tryToPromoteMemorySlots(
645 ArrayRef
<PromotableAllocationOpInterface
> allocators
, OpBuilder
&builder
,
646 const DataLayout
&dataLayout
, DominanceInfo
&dominance
,
647 Mem2RegStatistics statistics
) {
648 bool promotedAny
= false;
650 // A cache that stores deterministic block indices which are used to determine
651 // a valid operation modification order. The block index maps are computed
652 // lazily and cached to avoid expensive recomputation.
653 BlockIndexCache blockIndexCache
;
655 SmallVector
<PromotableAllocationOpInterface
> workList(allocators
);
657 SmallVector
<PromotableAllocationOpInterface
> newWorkList
;
658 newWorkList
.reserve(workList
.size());
660 bool changesInThisRound
= false;
661 for (PromotableAllocationOpInterface allocator
: workList
) {
662 bool changedAllocator
= false;
663 for (MemorySlot slot
: allocator
.getPromotableSlots()) {
664 if (slot
.ptr
.use_empty())
667 MemorySlotPromotionAnalyzer
analyzer(slot
, dominance
, dataLayout
);
668 std::optional
<MemorySlotPromotionInfo
> info
= analyzer
.computeInfo();
670 std::optional
<PromotableAllocationOpInterface
> newAllocator
=
671 MemorySlotPromoter(slot
, allocator
, builder
, dominance
,
672 dataLayout
, std::move(*info
), statistics
,
675 changedAllocator
= true;
676 // Add newly created allocators to the worklist for further
679 newWorkList
.push_back(*newAllocator
);
681 // A break is required, since promoting a slot may invalidate the
682 // remaining slots of an allocator.
686 if (!changedAllocator
)
687 newWorkList
.push_back(allocator
);
688 changesInThisRound
|= changedAllocator
;
690 if (!changesInThisRound
)
694 // Swap the vector's backing memory and clear the entries in newWorkList
695 // afterwards. This ensures that additional heap allocations can be avoided.
696 workList
.swap(newWorkList
);
700 return success(promotedAny
);
705 struct Mem2Reg
: impl::Mem2RegBase
<Mem2Reg
> {
706 using impl::Mem2RegBase
<Mem2Reg
>::Mem2RegBase
;
708 void runOnOperation() override
{
709 Operation
*scopeOp
= getOperation();
711 Mem2RegStatistics statistics
{&promotedAmount
, &newBlockArgumentAmount
};
713 bool changed
= false;
715 auto &dataLayoutAnalysis
= getAnalysis
<DataLayoutAnalysis
>();
716 const DataLayout
&dataLayout
= dataLayoutAnalysis
.getAtOrAbove(scopeOp
);
717 auto &dominance
= getAnalysis
<DominanceInfo
>();
719 for (Region
®ion
: scopeOp
->getRegions()) {
720 if (region
.getBlocks().empty())
723 OpBuilder
builder(®ion
.front(), region
.front().begin());
725 SmallVector
<PromotableAllocationOpInterface
> allocators
;
726 // Build a list of allocators to attempt to promote the slots of.
727 region
.walk([&](PromotableAllocationOpInterface allocator
) {
728 allocators
.emplace_back(allocator
);
731 // Attempt promoting as many of the slots as possible.
732 if (succeeded(tryToPromoteMemorySlots(allocators
, builder
, dataLayout
,
733 dominance
, statistics
)))
737 markAllAnalysesPreserved();