1 //===- MemoryUtils.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "flang/Optimizer/Transforms/MemoryUtils.h"
10 #include "flang/Optimizer/Builder/FIRBuilder.h"
11 #include "flang/Optimizer/Builder/Todo.h"
12 #include "mlir/Dialect/OpenACC/OpenACC.h"
13 #include "mlir/IR/Dominance.h"
14 #include "llvm/ADT/STLExtras.h"
17 /// Helper class to detect if an alloca is inside an mlir::Block that can be
18 /// reached again before its deallocation points via block successors. This
19 /// analysis is only valid if the deallocation points are inside (or nested
20 /// inside) the same region as alloca because it does not consider region CFG
21 /// (for instance, the block inside a fir.do_loop is obviously inside a loop,
22 /// but is not a loop formed by blocks). The dominance of the alloca on its
23 /// deallocation points implies this pre-condition (although it is more
25 class BlockCycleDetector
{
27 bool allocaIsInCycle(fir::AllocaOp alloca
,
28 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
);
31 // Cache for blocks owning alloca that have been analyzed. In many Fortran
32 // programs, allocas are usually made in the same blocks with no block cycles.
33 // So getting a fast "no" is beneficial.
34 llvm::DenseMap
<mlir::Block
*, /*isInCycle*/ bool> analyzed
;
39 class AllocaReplaceImpl
{
41 AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter
,
42 fir::DeallocCallBack deallocGenerator
)
43 : allocaRewriter
{allocaRewriter
}, deallocGenerator
{deallocGenerator
} {}
44 bool replace(mlir::RewriterBase
&, fir::AllocaOp
);
47 mlir::Region
*findDeallocationPointsAndOwner(
49 llvm::SmallVectorImpl
<mlir::Operation
*> &deallocationPoints
);
51 allocDominatesDealloc(fir::AllocaOp alloca
,
52 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
) {
53 return llvm::all_of(deallocationPoints
, [&](mlir::Operation
*deallocPoint
) {
54 return this->dominanceInfo
.properlyDominates(alloca
.getOperation(),
59 genIndirectDeallocation(mlir::RewriterBase
&, fir::AllocaOp
,
60 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
,
61 mlir::Value replacement
, mlir::Region
&owningRegion
);
64 fir::AllocaRewriterCallBack allocaRewriter
;
65 fir::DeallocCallBack deallocGenerator
;
66 mlir::DominanceInfo dominanceInfo
;
67 BlockCycleDetector blockCycleDetector
;
72 allocaIsInCycleImpl(mlir::Block
*allocaBlock
,
73 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
) {
74 llvm::DenseSet
<mlir::Block
*> seen
;
75 // Insert the deallocation point blocks as "seen" so that the block
76 // traversal will stop at them.
77 for (mlir::Operation
*deallocPoint
: deallocationPoints
)
78 seen
.insert(deallocPoint
->getBlock());
79 if (seen
.contains(allocaBlock
))
81 // Traverse the block successor graph starting by the alloca block.
82 llvm::SmallVector
<mlir::Block
*> successors
{allocaBlock
};
83 while (!successors
.empty())
84 for (mlir::Block
*next
: successors
.pop_back_val()->getSuccessors()) {
85 if (next
== allocaBlock
)
87 if (auto pair
= seen
.insert(next
); pair
.second
)
88 successors
.push_back(next
);
90 // The traversal did not reach the alloca block again.
93 bool BlockCycleDetector::allocaIsInCycle(
95 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
) {
96 mlir::Block
*allocaBlock
= alloca
->getBlock();
97 auto analyzedPair
= analyzed
.try_emplace(allocaBlock
, /*isInCycle=*/false);
98 bool alreadyAnalyzed
= !analyzedPair
.second
;
99 bool &isInCycle
= analyzedPair
.first
->second
;
100 // Fast exit if block was already analyzed and no cycle was found.
101 if (alreadyAnalyzed
&& !isInCycle
)
103 // If the analysis was not done generically for this block, run it and
105 if (!alreadyAnalyzed
)
106 isInCycle
= allocaIsInCycleImpl(allocaBlock
, /*deallocationPoints*/ {});
109 // If the generic analysis found a block loop, see if the deallocation
110 // point would be reached before reaching the block again. Do not
111 // cache that analysis that is specific to the deallocation points
112 // found for this alloca.
113 return allocaIsInCycleImpl(allocaBlock
, deallocationPoints
);
116 static bool terminatorYieldsMemory(mlir::Operation
&terminator
) {
117 return llvm::any_of(terminator
.getResults(), [](mlir::OpResult res
) {
118 return fir::conformsWithPassByRef(res
.getType());
122 static bool isRegionTerminator(mlir::Operation
&terminator
) {
123 // Using ReturnLike trait is tempting but it is not set on
124 // all region terminator that matters (like omp::TerminatorOp that
126 // May be true for dead code. It is not a correctness issue and dead code can
127 // be eliminated by running region simplification before this utility is
129 // May also be true for unreachable like terminators (e.g., after an abort
130 // call related to Fortran STOP). This is also OK, the inserted deallocation
131 // will simply never be reached. It is easier for the rest of the code here
132 // to assume there is always at least one deallocation point, so keep
133 // unreachable terminators.
134 return !terminator
.hasSuccessors();
137 mlir::Region
*AllocaReplaceImpl::findDeallocationPointsAndOwner(
138 fir::AllocaOp alloca
,
139 llvm::SmallVectorImpl
<mlir::Operation
*> &deallocationPoints
) {
140 // Step 1: Identify the operation and region owning the alloca.
141 mlir::Region
*owningRegion
= alloca
.getOwnerRegion();
144 mlir::Operation
*owningOp
= owningRegion
->getParentOp();
145 assert(owningOp
&& "region expected to be owned");
146 // Step 2: Identify the exit points of the owning region, they are the default
147 // deallocation points. TODO: detect and use lifetime markers to get earlier
148 // deallocation points.
149 bool isOpenACCMPRecipe
= mlir::isa
<mlir::accomp::RecipeInterface
>(owningOp
);
150 for (mlir::Block
&block
: owningRegion
->getBlocks())
151 if (mlir::Operation
*terminator
= block
.getTerminator();
152 isRegionTerminator(*terminator
)) {
153 // FIXME: OpenACC and OpenMP privatization recipe are stand alone
154 // operation meant to be later "inlined", the value they return may
155 // be the address of a local alloca. It would be incorrect to insert
156 // deallocation before the terminator (this would introduce use after
157 // free once the recipe is inlined.
158 // This probably require redesign or special handling on the OpenACC/MP
160 if (isOpenACCMPRecipe
&& terminatorYieldsMemory(*terminator
))
162 deallocationPoints
.push_back(terminator
);
164 // If no block terminators without successors have been found, this is
165 // an odd region we cannot reason about (never seen yet in FIR and
166 // mainstream dialects, but MLIR does not really prevent it).
167 if (deallocationPoints
.empty())
170 // Step 3: detect block based loops between the allocation and deallocation
171 // points, and add a deallocation point on the back edge to avoid memory
173 // The detection avoids doing region CFG analysis by assuming that there may
174 // be cycles if deallocation points are not dominated by the alloca.
175 // This leaves the cases where the deallocation points are in the same region
176 // as the alloca (or nested inside it). In which cases there may be a back
177 // edge between the alloca and the deallocation point via block successors. An
178 // analysis is run to detect those cases.
179 // When a loop is detected, the easiest solution to deallocate on the back
180 // edge is to store the allocated memory address in a variable (that dominates
181 // the loops) and to deallocate the address in that variable if it is set
182 // before executing the allocation. This strategy still leads to correct
183 // execution in the "false positive" cases.
184 // Hence, the alloca is added as a deallocation point when there is no
185 // dominance. Note that bringing lifetime markers above will reduce the
187 if (!allocDominatesDealloc(alloca
, deallocationPoints
) ||
188 blockCycleDetector
.allocaIsInCycle(alloca
, deallocationPoints
))
189 deallocationPoints
.push_back(alloca
.getOperation());
193 void AllocaReplaceImpl::genIndirectDeallocation(
194 mlir::RewriterBase
&rewriter
, fir::AllocaOp alloca
,
195 llvm::ArrayRef
<mlir::Operation
*> deallocationPoints
,
196 mlir::Value replacement
, mlir::Region
&owningRegion
) {
197 mlir::Location loc
= alloca
.getLoc();
198 auto replacementInsertPoint
= rewriter
.saveInsertionPoint();
199 // Create C pointer variable in the entry block to store the alloc
200 // and access it indirectly in the entry points that do not dominate.
201 rewriter
.setInsertionPointToStart(&owningRegion
.front());
202 mlir::Type heapType
= fir::HeapType::get(alloca
.getInType());
203 mlir::Value ptrVar
= rewriter
.create
<fir::AllocaOp
>(loc
, heapType
);
204 mlir::Value nullPtr
= rewriter
.create
<fir::ZeroOp
>(loc
, heapType
);
205 rewriter
.create
<fir::StoreOp
>(loc
, nullPtr
, ptrVar
);
206 // TODO: introducing a pointer compare op in FIR would help
207 // generating less IR here.
208 mlir::Type intPtrTy
= fir::getIntPtrType(rewriter
);
209 mlir::Value c0
= rewriter
.create
<mlir::arith::ConstantOp
>(
210 loc
, intPtrTy
, rewriter
.getIntegerAttr(intPtrTy
, 0));
212 // Store new storage address right after its creation.
213 rewriter
.restoreInsertionPoint(replacementInsertPoint
);
214 mlir::Value castReplacement
=
215 fir::factory::createConvert(rewriter
, loc
, heapType
, replacement
);
216 rewriter
.create
<fir::StoreOp
>(loc
, castReplacement
, ptrVar
);
218 // Generate conditional deallocation at every deallocation point.
219 auto genConditionalDealloc
= [&](mlir::Location loc
) {
220 mlir::Value ptrVal
= rewriter
.create
<fir::LoadOp
>(loc
, ptrVar
);
221 mlir::Value ptrToInt
=
222 rewriter
.create
<fir::ConvertOp
>(loc
, intPtrTy
, ptrVal
);
223 mlir::Value isAllocated
= rewriter
.create
<mlir::arith::CmpIOp
>(
224 loc
, mlir::arith::CmpIPredicate::ne
, ptrToInt
, c0
);
225 auto ifOp
= rewriter
.create
<fir::IfOp
>(loc
, std::nullopt
, isAllocated
,
226 /*withElseRegion=*/false);
227 rewriter
.setInsertionPointToStart(&ifOp
.getThenRegion().front());
228 mlir::Value cast
= fir::factory::createConvert(
229 rewriter
, loc
, replacement
.getType(), ptrVal
);
230 deallocGenerator(loc
, rewriter
, cast
);
231 // Currently there is no need to reset the pointer var because two
232 // deallocation points can never be reached without going through the
234 rewriter
.setInsertionPointAfter(ifOp
);
236 for (mlir::Operation
*deallocPoint
: deallocationPoints
) {
237 rewriter
.setInsertionPoint(deallocPoint
);
238 genConditionalDealloc(deallocPoint
->getLoc());
242 bool AllocaReplaceImpl::replace(mlir::RewriterBase
&rewriter
,
243 fir::AllocaOp alloca
) {
244 llvm::SmallVector
<mlir::Operation
*> deallocationPoints
;
245 mlir::Region
*owningRegion
=
246 findDeallocationPointsAndOwner(alloca
, deallocationPoints
);
249 rewriter
.setInsertionPointAfter(alloca
.getOperation());
250 bool deallocPointsDominateAlloc
=
251 allocDominatesDealloc(alloca
, deallocationPoints
);
252 if (mlir::Value replacement
=
253 allocaRewriter(rewriter
, alloca
, deallocPointsDominateAlloc
)) {
254 mlir::Value castReplacement
= fir::factory::createConvert(
255 rewriter
, alloca
.getLoc(), alloca
.getType(), replacement
);
256 if (deallocPointsDominateAlloc
)
257 for (mlir::Operation
*deallocPoint
: deallocationPoints
) {
258 rewriter
.setInsertionPoint(deallocPoint
);
259 deallocGenerator(deallocPoint
->getLoc(), rewriter
, replacement
);
262 genIndirectDeallocation(rewriter
, alloca
, deallocationPoints
, replacement
,
264 rewriter
.replaceOp(alloca
, castReplacement
);
269 bool fir::replaceAllocas(mlir::RewriterBase
&rewriter
,
270 mlir::Operation
*parentOp
,
271 MustRewriteCallBack mustReplace
,
272 AllocaRewriterCallBack allocaRewriter
,
273 DeallocCallBack deallocGenerator
) {
274 // If the parent operation is not an alloca owner, the code below would risk
275 // modifying IR outside of parentOp.
276 if (!fir::AllocaOp::ownsNestedAlloca(parentOp
))
278 auto insertPoint
= rewriter
.saveInsertionPoint();
279 bool replacedAllRequestedAlloca
= true;
280 AllocaReplaceImpl
impl(allocaRewriter
, deallocGenerator
);
281 parentOp
->walk([&](fir::AllocaOp alloca
) {
282 if (mustReplace(alloca
))
283 replacedAllRequestedAlloca
&= impl
.replace(rewriter
, alloca
);
285 rewriter
.restoreInsertionPoint(insertPoint
);
286 return replacedAllRequestedAlloca
;