1 //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines some loop transformation utilities.
11 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/DemandedBits.h"
24 #include "llvm/Analysis/EHPersonalities.h"
25 #include "llvm/Analysis/IVDescriptors.h"
26 #include "llvm/Analysis/MustExecute.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InstrTypes.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/ValueHandle.h"
33 #include "llvm/Support/Casting.h"
38 class AliasSetTracker
;
44 class MemorySSAUpdater
;
45 class OptimizationRemarkEmitter
;
46 class PredicatedScalarEvolution
;
47 class PredIteratorCache
;
48 class ScalarEvolution
;
50 class TargetLibraryInfo
;
51 class TargetTransformInfo
;
53 BasicBlock
*InsertPreheaderForLoop(Loop
*L
, DominatorTree
*DT
, LoopInfo
*LI
,
54 MemorySSAUpdater
*MSSAU
, bool PreserveLCSSA
);
56 /// Ensure that all exit blocks of the loop are dedicated exits.
58 /// For any loop exit block with non-loop predecessors, we split the loop
59 /// predecessors to use a dedicated loop exit block. We update the dominator
60 /// tree and loop info if provided, and will preserve LCSSA if requested.
61 bool formDedicatedExitBlocks(Loop
*L
, DominatorTree
*DT
, LoopInfo
*LI
,
62 MemorySSAUpdater
*MSSAU
, bool PreserveLCSSA
);
64 /// Ensures LCSSA form for every instruction from the Worklist in the scope of
65 /// innermost containing loop.
67 /// For the given instruction which have uses outside of the loop, an LCSSA PHI
68 /// node is inserted and the uses outside the loop are rewritten to use this
71 /// LoopInfo and DominatorTree are required and, since the routine makes no
72 /// changes to CFG, preserved.
74 /// Returns true if any modifications are made.
75 bool formLCSSAForInstructions(SmallVectorImpl
<Instruction
*> &Worklist
,
76 DominatorTree
&DT
, LoopInfo
&LI
);
78 /// Put loop into LCSSA form.
80 /// Looks at all instructions in the loop which have uses outside of the
81 /// current loop. For each, an LCSSA PHI node is inserted and the uses outside
82 /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
85 /// LoopInfo and DominatorTree are required and preserved.
87 /// If ScalarEvolution is passed in, it will be preserved.
89 /// Returns true if any modifications are made to the loop.
90 bool formLCSSA(Loop
&L
, DominatorTree
&DT
, LoopInfo
*LI
, ScalarEvolution
*SE
);
92 /// Put a loop nest into LCSSA form.
94 /// This recursively forms LCSSA for a loop nest.
96 /// LoopInfo and DominatorTree are required and preserved.
98 /// If ScalarEvolution is passed in, it will be preserved.
100 /// Returns true if any modifications are made to the loop.
101 bool formLCSSARecursively(Loop
&L
, DominatorTree
&DT
, LoopInfo
*LI
,
102 ScalarEvolution
*SE
);
104 struct SinkAndHoistLICMFlags
{
105 bool NoOfMemAccTooLarge
;
106 unsigned LicmMssaOptCounter
;
107 unsigned LicmMssaOptCap
;
108 unsigned LicmMssaNoAccForPromotionCap
;
112 /// Walk the specified region of the CFG (defined by all blocks
113 /// dominated by the specified block, and that are in the current loop) in
114 /// reverse depth first order w.r.t the DominatorTree. This allows us to visit
115 /// uses before definitions, allowing us to sink a loop body in one pass without
116 /// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
117 /// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
118 /// instructions of the loop and loop safety information as
119 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
120 bool sinkRegion(DomTreeNode
*, AliasAnalysis
*, LoopInfo
*, DominatorTree
*,
121 TargetLibraryInfo
*, TargetTransformInfo
*, Loop
*,
122 AliasSetTracker
*, MemorySSAUpdater
*, ICFLoopSafetyInfo
*,
123 SinkAndHoistLICMFlags
&, OptimizationRemarkEmitter
*);
125 /// Walk the specified region of the CFG (defined by all blocks
126 /// dominated by the specified block, and that are in the current loop) in depth
127 /// first order w.r.t the DominatorTree. This allows us to visit definitions
128 /// before uses, allowing us to hoist a loop body in one pass without iteration.
129 /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
130 /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
131 /// loop and loop safety information as arguments. Diagnostics is emitted via \p
132 /// ORE. It returns changed status.
133 bool hoistRegion(DomTreeNode
*, AliasAnalysis
*, LoopInfo
*, DominatorTree
*,
134 TargetLibraryInfo
*, Loop
*, AliasSetTracker
*,
135 MemorySSAUpdater
*, ICFLoopSafetyInfo
*,
136 SinkAndHoistLICMFlags
&, OptimizationRemarkEmitter
*);
138 /// This function deletes dead loops. The caller of this function needs to
139 /// guarantee that the loop is infact dead.
140 /// The function requires a bunch or prerequisites to be present:
141 /// - The loop needs to be in LCSSA form
142 /// - The loop needs to have a Preheader
143 /// - A unique dedicated exit block must exist
145 /// This also updates the relevant analysis information in \p DT, \p SE, and \p
146 /// LI if pointers to those are provided.
147 /// It also updates the loop PM if an updater struct is provided.
149 void deleteDeadLoop(Loop
*L
, DominatorTree
*DT
, ScalarEvolution
*SE
,
152 /// Try to promote memory values to scalars by sinking stores out of
153 /// the loop and moving loads to before the loop. We do this by looping over
154 /// the stores in the loop, looking for stores to Must pointers which are
155 /// loop invariant. It takes a set of must-alias values, Loop exit blocks
156 /// vector, loop exit blocks insertion point vector, PredIteratorCache,
157 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
158 /// of the loop and loop safety information as arguments.
159 /// Diagnostics is emitted via \p ORE. It returns changed status.
160 bool promoteLoopAccessesToScalars(
161 const SmallSetVector
<Value
*, 8> &, SmallVectorImpl
<BasicBlock
*> &,
162 SmallVectorImpl
<Instruction
*> &, SmallVectorImpl
<MemoryAccess
*> &,
163 PredIteratorCache
&, LoopInfo
*, DominatorTree
*, const TargetLibraryInfo
*,
164 Loop
*, AliasSetTracker
*, MemorySSAUpdater
*, ICFLoopSafetyInfo
*,
165 OptimizationRemarkEmitter
*);
167 /// Does a BFS from a given node to all of its children inside a given loop.
168 /// The returned vector of nodes includes the starting point.
169 SmallVector
<DomTreeNode
*, 16> collectChildrenInLoop(DomTreeNode
*N
,
170 const Loop
*CurLoop
);
172 /// Returns the instructions that use values defined in the loop.
173 SmallVector
<Instruction
*, 8> findDefsUsedOutsideOfLoop(Loop
*L
);
175 /// Find string metadata for loop
177 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
178 /// operand or null otherwise. If the string metadata is not found return
179 /// Optional's not-a-value.
180 Optional
<const MDOperand
*> findStringMetadataForLoop(const Loop
*TheLoop
,
183 /// Find named metadata for a loop with an integer value.
184 llvm::Optional
<int> getOptionalIntLoopAttribute(Loop
*TheLoop
, StringRef Name
);
186 /// Create a new loop identifier for a loop created from a loop transformation.
188 /// @param OrigLoopID The loop ID of the loop before the transformation.
189 /// @param FollowupAttrs List of attribute names that contain attributes to be
190 /// added to the new loop ID.
191 /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
192 /// from the original loop. The following values
194 /// nullptr : Inherit all attributes from @p OrigLoopID.
195 /// "" : Do not inherit any attribute from @p OrigLoopID; only use
196 /// those specified by a followup attribute.
197 /// "<prefix>": Inherit all attributes except those which start with
198 /// <prefix>; commonly used to remove metadata for the
199 /// applied transformation.
200 /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
203 /// @return The loop ID for the after-transformation loop. The following values
205 /// None : No followup attribute was found; it is up to the
206 /// transformation to choose attributes that make sense.
207 /// @p OrigLoopID: The original identifier can be reused.
208 /// nullptr : The new loop has no attributes.
209 /// MDNode* : A new unique loop identifier.
211 makeFollowupLoopID(MDNode
*OrigLoopID
, ArrayRef
<StringRef
> FollowupAttrs
,
212 const char *InheritOptionsAttrsPrefix
= "",
213 bool AlwaysNew
= false);
215 /// Look for the loop attribute that disables all transformation heuristic.
216 bool hasDisableAllTransformsHint(const Loop
*L
);
218 /// Look for the loop attribute that disables the LICM transformation heuristics.
219 bool hasDisableLICMTransformsHint(const Loop
*L
);
221 /// The mode sets how eager a transformation should be applied.
222 enum TransformationMode
{
223 /// The pass can use heuristics to determine whether a transformation should
227 /// The transformation should be applied without considering a cost model.
230 /// The transformation should not be applied.
233 /// Force is a flag and should not be used alone.
236 /// The transformation was directed by the user, e.g. by a #pragma in
237 /// the source code. If the transformation could not be applied, a
238 /// warning should be emitted.
239 TM_ForcedByUser
= TM_Enable
| TM_Force
,
241 /// The transformation must not be applied. For instance, `#pragma clang loop
242 /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
243 /// general loop metadata, it must not be dropped. Most passes should not
244 /// behave differently under TM_Disable and TM_SuppressedByUser.
245 TM_SuppressedByUser
= TM_Disable
| TM_Force
249 /// Get the mode for LLVM's supported loop transformations.
250 TransformationMode
hasUnrollTransformation(Loop
*L
);
251 TransformationMode
hasUnrollAndJamTransformation(Loop
*L
);
252 TransformationMode
hasVectorizeTransformation(Loop
*L
);
253 TransformationMode
hasDistributeTransformation(Loop
*L
);
254 TransformationMode
hasLICMVersioningTransformation(Loop
*L
);
257 /// Set input string into loop metadata by keeping other values intact.
258 /// If the string is already in loop metadata update value if it is
260 void addStringMetadataToLoop(Loop
*TheLoop
, const char *MDString
,
263 /// Get a loop's estimated trip count based on branch weight metadata.
264 /// Returns 0 when the count is estimated to be 0, or None when a meaningful
265 /// estimate can not be made.
266 Optional
<unsigned> getLoopEstimatedTripCount(Loop
*L
);
268 /// Check inner loop (L) backedge count is known to be invariant on all
269 /// iterations of its outer loop. If the loop has no parent, this is trivially
271 bool hasIterationCountInvariantInParent(Loop
*L
, ScalarEvolution
&SE
);
273 /// Helper to consistently add the set of standard passes to a loop pass's \c
276 /// All loop passes should call this as part of implementing their \c
277 /// getAnalysisUsage.
278 void getLoopAnalysisUsage(AnalysisUsage
&AU
);
280 /// Returns true if is legal to hoist or sink this instruction disregarding the
281 /// possible introduction of faults. Reasoning about potential faulting
282 /// instructions is the responsibility of the caller since it is challenging to
283 /// do efficiently from within this routine.
284 /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
285 /// target executes at most once per execution of the loop body. This is used
286 /// to assess the legality of duplicating atomic loads. Generally, this is
287 /// true when moving out of loop and not true when moving into loops.
288 /// If \p ORE is set use it to emit optimization remarks.
289 bool canSinkOrHoistInst(Instruction
&I
, AAResults
*AA
, DominatorTree
*DT
,
290 Loop
*CurLoop
, AliasSetTracker
*CurAST
,
291 MemorySSAUpdater
*MSSAU
, bool TargetExecutesOncePerLoop
,
292 SinkAndHoistLICMFlags
*LICMFlags
= nullptr,
293 OptimizationRemarkEmitter
*ORE
= nullptr);
295 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
296 Value
*createMinMaxOp(IRBuilder
<> &Builder
,
297 RecurrenceDescriptor::MinMaxRecurrenceKind RK
,
298 Value
*Left
, Value
*Right
);
300 /// Generates an ordered vector reduction using extracts to reduce the value.
302 getOrderedReduction(IRBuilder
<> &Builder
, Value
*Acc
, Value
*Src
, unsigned Op
,
303 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind
=
304 RecurrenceDescriptor::MRK_Invalid
,
305 ArrayRef
<Value
*> RedOps
= None
);
307 /// Generates a vector reduction using shufflevectors to reduce the value.
308 /// Fast-math-flags are propagated using the IRBuilder's setting.
309 Value
*getShuffleReduction(IRBuilder
<> &Builder
, Value
*Src
, unsigned Op
,
310 RecurrenceDescriptor::MinMaxRecurrenceKind
311 MinMaxKind
= RecurrenceDescriptor::MRK_Invalid
,
312 ArrayRef
<Value
*> RedOps
= None
);
314 /// Create a target reduction of the given vector. The reduction operation
315 /// is described by the \p Opcode parameter. min/max reductions require
316 /// additional information supplied in \p Flags.
317 /// The target is queried to determine if intrinsics or shuffle sequences are
318 /// required to implement the reduction.
319 /// Fast-math-flags are propagated using the IRBuilder's setting.
320 Value
*createSimpleTargetReduction(IRBuilder
<> &B
,
321 const TargetTransformInfo
*TTI
,
322 unsigned Opcode
, Value
*Src
,
323 TargetTransformInfo::ReductionFlags Flags
=
324 TargetTransformInfo::ReductionFlags(),
325 ArrayRef
<Value
*> RedOps
= None
);
327 /// Create a generic target reduction using a recurrence descriptor \p Desc
328 /// The target is queried to determine if intrinsics or shuffle sequences are
329 /// required to implement the reduction.
330 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
331 Value
*createTargetReduction(IRBuilder
<> &B
, const TargetTransformInfo
*TTI
,
332 RecurrenceDescriptor
&Desc
, Value
*Src
,
335 /// Get the intersection (logical and) of all of the potential IR flags
336 /// of each scalar operation (VL) that will be converted into a vector (I).
337 /// If OpValue is non-null, we only consider operations similar to OpValue
338 /// when intersecting.
339 /// Flag set: NSW, NUW, exact, and all of fast-math.
340 void propagateIRFlags(Value
*I
, ArrayRef
<Value
*> VL
, Value
*OpValue
= nullptr);
342 /// Returns true if we can prove that \p S is defined and always negative in
344 bool isKnownNegativeInLoop(const SCEV
*S
, const Loop
*L
, ScalarEvolution
&SE
);
346 /// Returns true if we can prove that \p S is defined and always non-negative in
348 bool isKnownNonNegativeInLoop(const SCEV
*S
, const Loop
*L
,
349 ScalarEvolution
&SE
);
351 /// Returns true if \p S is defined and never is equal to signed/unsigned max.
352 bool cannotBeMaxInLoop(const SCEV
*S
, const Loop
*L
, ScalarEvolution
&SE
,
355 /// Returns true if \p S is defined and never is equal to signed/unsigned min.
356 bool cannotBeMinInLoop(const SCEV
*S
, const Loop
*L
, ScalarEvolution
&SE
,
359 } // end namespace llvm
361 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H