1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
24 //===----------------------------------------------------------------------===//
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
29 #include "llvm/ADT/MapVector.h"
30 #include "llvm/Analysis/LoopAccessAnalysis.h"
31 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
32 #include "llvm/Transforms/Utils/LoopUtils.h"
36 /// Create an analysis remark that explains why vectorization failed
38 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
39 /// RemarkName is the identifier for the remark. If \p I is passed it is an
40 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for
41 /// the location of the remark. \return the remark object that can be
43 OptimizationRemarkAnalysis
createLVMissedAnalysis(const char *PassName
,
46 Instruction
*I
= nullptr);
48 /// Utility class for getting and setting loop vectorizer hints in the form
50 /// This class keeps a number of loop annotations locally (as member variables)
51 /// and can, upon request, write them back as metadata on the loop. It will
52 /// initially scan the loop for existing metadata, and will update the local
53 /// values based on information in the loop.
54 /// We cannot write all values to metadata, as the mere presence of some info,
55 /// for example 'force', means a decision has been made. So, we need to be
56 /// careful NOT to add them if the user hasn't specifically asked so.
57 class LoopVectorizeHints
{
58 enum HintKind
{ HK_WIDTH
, HK_UNROLL
, HK_FORCE
, HK_ISVECTORIZED
};
60 /// Hint - associates name and validation with the hint value.
63 unsigned Value
; // This may have to change for non-numeric values.
66 Hint(const char *Name
, unsigned Value
, HintKind Kind
)
67 : Name(Name
), Value(Value
), Kind(Kind
) {}
69 bool validate(unsigned Val
);
72 /// Vectorization width.
75 /// Vectorization interleave factor.
78 /// Vectorization forced
81 /// Already Vectorized
84 /// Return the loop metadata prefix.
85 static StringRef
Prefix() { return "llvm.loop."; }
87 /// True if there is any unsafe math in the loop.
88 bool PotentiallyUnsafe
= false;
92 FK_Undefined
= -1, ///< Not selected.
93 FK_Disabled
= 0, ///< Forcing disabled.
94 FK_Enabled
= 1, ///< Forcing enabled.
97 LoopVectorizeHints(const Loop
*L
, bool InterleaveOnlyWhenForced
,
98 OptimizationRemarkEmitter
&ORE
);
100 /// Mark the loop L as already vectorized by setting the width to 1.
101 void setAlreadyVectorized();
103 bool allowVectorization(Function
*F
, Loop
*L
,
104 bool VectorizeOnlyWhenForced
) const;
106 /// Dumps all the hint information.
107 void emitRemarkWithHints() const;
109 unsigned getWidth() const { return Width
.Value
; }
110 unsigned getInterleave() const { return Interleave
.Value
; }
111 unsigned getIsVectorized() const { return IsVectorized
.Value
; }
112 enum ForceKind
getForce() const {
113 if ((ForceKind
)Force
.Value
== FK_Undefined
&&
114 hasDisableAllTransformsHint(TheLoop
))
116 return (ForceKind
)Force
.Value
;
119 /// If hints are provided that force vectorization, use the AlwaysPrint
120 /// pass name to force the frontend to print the diagnostic.
121 const char *vectorizeAnalysisPassName() const;
123 bool allowReordering() const {
124 // When enabling loop hints are provided we allow the vectorizer to change
125 // the order of operations that is given by the scalar loop. This is not
126 // enabled by default because can be unsafe or inefficient. For example,
127 // reordering floating-point operations will change the way round-off
128 // error accumulates in the loop.
129 return getForce() == LoopVectorizeHints::FK_Enabled
|| getWidth() > 1;
132 bool isPotentiallyUnsafe() const {
133 // Avoid FP vectorization if the target is unsure about proper support.
134 // This may be related to the SIMD unit in the target not handling
135 // IEEE 754 FP ops properly, or bad single-to-double promotions.
136 // Otherwise, a sequence of vectorized loops, even without reduction,
137 // could lead to different end results on the destination vectors.
138 return getForce() != LoopVectorizeHints::FK_Enabled
&& PotentiallyUnsafe
;
141 void setPotentiallyUnsafe() { PotentiallyUnsafe
= true; }
144 /// Find hints specified in the loop metadata and update local values.
145 void getHintsFromMetadata();
147 /// Checks string hint with one operand and set value if valid.
148 void setHint(StringRef Name
, Metadata
*Arg
);
150 /// The loop these hints belong to.
153 /// Interface to emit optimization remarks.
154 OptimizationRemarkEmitter
&ORE
;
157 /// This holds vectorization requirements that must be verified late in
158 /// the process. The requirements are set by legalize and costmodel. Once
159 /// vectorization has been determined to be possible and profitable the
160 /// requirements can be verified by looking for metadata or compiler options.
161 /// For example, some loops require FP commutativity which is only allowed if
162 /// vectorization is explicitly specified or if the fast-math compiler option
163 /// has been provided.
164 /// Late evaluation of these requirements allows helpful diagnostics to be
165 /// composed that tells the user what need to be done to vectorize the loop. For
166 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
167 /// evaluation should be used only when diagnostics can generated that can be
168 /// followed by a non-expert user.
169 class LoopVectorizationRequirements
{
171 LoopVectorizationRequirements(OptimizationRemarkEmitter
&ORE
) : ORE(ORE
) {}
173 void addUnsafeAlgebraInst(Instruction
*I
) {
174 // First unsafe algebra instruction.
175 if (!UnsafeAlgebraInst
)
176 UnsafeAlgebraInst
= I
;
179 void addRuntimePointerChecks(unsigned Num
) { NumRuntimePointerChecks
= Num
; }
181 bool doesNotMeet(Function
*F
, Loop
*L
, const LoopVectorizeHints
&Hints
);
184 unsigned NumRuntimePointerChecks
= 0;
185 Instruction
*UnsafeAlgebraInst
= nullptr;
187 /// Interface to emit optimization remarks.
188 OptimizationRemarkEmitter
&ORE
;
191 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
192 /// to what vectorization factor.
193 /// This class does not look at the profitability of vectorization, only the
194 /// legality. This class has two main kinds of checks:
195 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
196 /// will change the order of memory accesses in a way that will change the
197 /// correctness of the program.
198 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
199 /// checks for a number of different conditions, such as the availability of a
200 /// single induction variable, that all types are supported and vectorize-able,
201 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
202 /// This class is also used by InnerLoopVectorizer for identifying
203 /// induction variable and the different reduction variables.
204 class LoopVectorizationLegality
{
206 LoopVectorizationLegality(
207 Loop
*L
, PredicatedScalarEvolution
&PSE
, DominatorTree
*DT
,
208 TargetLibraryInfo
*TLI
, AliasAnalysis
*AA
, Function
*F
,
209 std::function
<const LoopAccessInfo
&(Loop
&)> *GetLAA
, LoopInfo
*LI
,
210 OptimizationRemarkEmitter
*ORE
, LoopVectorizationRequirements
*R
,
211 LoopVectorizeHints
*H
, DemandedBits
*DB
, AssumptionCache
*AC
)
212 : TheLoop(L
), LI(LI
), PSE(PSE
), TLI(TLI
), DT(DT
), GetLAA(GetLAA
),
213 ORE(ORE
), Requirements(R
), Hints(H
), DB(DB
), AC(AC
) {}
215 /// ReductionList contains the reduction descriptors for all
216 /// of the reductions that were found in the loop.
217 using ReductionList
= DenseMap
<PHINode
*, RecurrenceDescriptor
>;
219 /// InductionList saves induction variables and maps them to the
220 /// induction descriptor.
221 using InductionList
= MapVector
<PHINode
*, InductionDescriptor
>;
223 /// RecurrenceSet contains the phi nodes that are recurrences other than
224 /// inductions and reductions.
225 using RecurrenceSet
= SmallPtrSet
<const PHINode
*, 8>;
227 /// Returns true if it is legal to vectorize this loop.
228 /// This does not mean that it is profitable to vectorize this
229 /// loop, only that it is legal to do so.
230 /// Temporarily taking UseVPlanNativePath parameter. If true, take
231 /// the new code path being implemented for outer loop vectorization
232 /// (should be functional for inner loop vectorization) based on VPlan.
233 /// If false, good old LV code.
234 bool canVectorize(bool UseVPlanNativePath
);
236 /// Return true if we can vectorize this loop while folding its tail by
238 bool canFoldTailByMasking();
240 /// Returns the primary induction variable.
241 PHINode
*getPrimaryInduction() { return PrimaryInduction
; }
243 /// Returns the reduction variables found in the loop.
244 ReductionList
*getReductionVars() { return &Reductions
; }
246 /// Returns the induction variables found in the loop.
247 InductionList
*getInductionVars() { return &Inductions
; }
249 /// Return the first-order recurrences found in the loop.
250 RecurrenceSet
*getFirstOrderRecurrences() { return &FirstOrderRecurrences
; }
252 /// Return the set of instructions to sink to handle first-order recurrences.
253 DenseMap
<Instruction
*, Instruction
*> &getSinkAfter() { return SinkAfter
; }
255 /// Returns the widest induction type.
256 Type
*getWidestInductionType() { return WidestIndTy
; }
258 /// Returns True if V is a Phi node of an induction variable in this loop.
259 bool isInductionPhi(const Value
*V
);
261 /// Returns True if V is a cast that is part of an induction def-use chain,
262 /// and had been proven to be redundant under a runtime guard (in other
263 /// words, the cast has the same SCEV expression as the induction phi).
264 bool isCastedInductionVariable(const Value
*V
);
266 /// Returns True if V can be considered as an induction variable in this
267 /// loop. V can be the induction phi, or some redundant cast in the def-use
268 /// chain of the inducion phi.
269 bool isInductionVariable(const Value
*V
);
271 /// Returns True if PN is a reduction variable in this loop.
272 bool isReductionVariable(PHINode
*PN
) { return Reductions
.count(PN
); }
274 /// Returns True if Phi is a first-order recurrence in this loop.
275 bool isFirstOrderRecurrence(const PHINode
*Phi
);
277 /// Return true if the block BB needs to be predicated in order for the loop
278 /// to be vectorized.
279 bool blockNeedsPredication(BasicBlock
*BB
);
281 /// Check if this pointer is consecutive when vectorizing. This happens
282 /// when the last index of the GEP is the induction variable, or that the
283 /// pointer itself is an induction variable.
284 /// This check allows us to vectorize A[idx] into a wide load/store.
286 /// 0 - Stride is unknown or non-consecutive.
287 /// 1 - Address is consecutive.
288 /// -1 - Address is consecutive, and decreasing.
289 /// NOTE: This method must only be used before modifying the original scalar
290 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
291 int isConsecutivePtr(Value
*Ptr
);
293 /// Returns true if the value V is uniform within the loop.
294 bool isUniform(Value
*V
);
296 /// Returns the information that we collected about runtime memory check.
297 const RuntimePointerChecking
*getRuntimePointerChecking() const {
298 return LAI
->getRuntimePointerChecking();
301 const LoopAccessInfo
*getLAI() const { return LAI
; }
303 unsigned getMaxSafeDepDistBytes() { return LAI
->getMaxSafeDepDistBytes(); }
305 uint64_t getMaxSafeRegisterWidth() const {
306 return LAI
->getDepChecker().getMaxSafeRegisterWidth();
309 bool hasStride(Value
*V
) { return LAI
->hasStride(V
); }
311 /// Returns true if vector representation of the instruction \p I
313 bool isMaskRequired(const Instruction
*I
) { return (MaskedOp
.count(I
) != 0); }
315 unsigned getNumStores() const { return LAI
->getNumStores(); }
316 unsigned getNumLoads() const { return LAI
->getNumLoads(); }
318 // Returns true if the NoNaN attribute is set on the function.
319 bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr
; }
322 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
323 /// its nested loops are considered legal for vectorization. These legal
324 /// checks are common for inner and outer loop vectorization.
325 /// Temporarily taking UseVPlanNativePath parameter. If true, take
326 /// the new code path being implemented for outer loop vectorization
327 /// (should be functional for inner loop vectorization) based on VPlan.
328 /// If false, good old LV code.
329 bool canVectorizeLoopNestCFG(Loop
*Lp
, bool UseVPlanNativePath
);
331 /// Set up outer loop inductions by checking Phis in outer loop header for
332 /// supported inductions (int inductions). Return false if any of these Phis
333 /// is not a supported induction or if we fail to find an induction.
334 bool setupOuterLoopInductions();
336 /// Return true if the pre-header, exiting and latch blocks of \p Lp
337 /// (non-recursive) are considered legal for vectorization.
338 /// Temporarily taking UseVPlanNativePath parameter. If true, take
339 /// the new code path being implemented for outer loop vectorization
340 /// (should be functional for inner loop vectorization) based on VPlan.
341 /// If false, good old LV code.
342 bool canVectorizeLoopCFG(Loop
*Lp
, bool UseVPlanNativePath
);
344 /// Check if a single basic block loop is vectorizable.
345 /// At this point we know that this is a loop with a constant trip count
346 /// and we only need to check individual instructions.
347 bool canVectorizeInstrs();
349 /// When we vectorize loops we may change the order in which
350 /// we read and write from memory. This method checks if it is
351 /// legal to vectorize the code, considering only memory constrains.
352 /// Returns true if the loop is vectorizable
353 bool canVectorizeMemory();
355 /// Return true if we can vectorize this loop using the IF-conversion
357 bool canVectorizeWithIfConvert();
359 /// Return true if we can vectorize this outer loop. The method performs
360 /// specific checks for outer loop vectorization.
361 bool canVectorizeOuterLoop();
363 /// Return true if all of the instructions in the block can be speculatively
364 /// executed. \p SafePtrs is a list of addresses that are known to be legal
365 /// and we know that we can read from them without segfault.
366 bool blockCanBePredicated(BasicBlock
*BB
, SmallPtrSetImpl
<Value
*> &SafePtrs
);
368 /// Updates the vectorization state by adding \p Phi to the inductions list.
369 /// This can set \p Phi as the main induction of the loop if \p Phi is a
370 /// better choice for the main induction than the existing one.
371 void addInductionPhi(PHINode
*Phi
, const InductionDescriptor
&ID
,
372 SmallPtrSetImpl
<Value
*> &AllowedExit
);
374 /// Create an analysis remark that explains why vectorization failed
376 /// \p RemarkName is the identifier for the remark. If \p I is passed it is
377 /// an instruction that prevents vectorization. Otherwise the loop is used
378 /// for the location of the remark. \return the remark object that can be
380 OptimizationRemarkAnalysis
381 createMissedAnalysis(StringRef RemarkName
, Instruction
*I
= nullptr) const {
382 return createLVMissedAnalysis(Hints
->vectorizeAnalysisPassName(),
383 RemarkName
, TheLoop
, I
);
386 /// If an access has a symbolic strides, this maps the pointer value to
387 /// the stride symbol.
388 const ValueToValueMap
*getSymbolicStrides() {
389 // FIXME: Currently, the set of symbolic strides is sometimes queried before
390 // it's collected. This happens from canVectorizeWithIfConvert, when the
391 // pointer is checked to reference consecutive elements suitable for a
393 return LAI
? &LAI
->getSymbolicStrides() : nullptr;
396 /// The loop that we evaluate.
399 /// Loop Info analysis.
402 /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
403 /// Applies dynamic knowledge to simplify SCEV expressions in the context
404 /// of existing SCEV assumptions. The analysis will also add a minimal set
405 /// of new predicates if this is required to enable vectorization and
407 PredicatedScalarEvolution
&PSE
;
409 /// Target Library Info.
410 TargetLibraryInfo
*TLI
;
415 // LoopAccess analysis.
416 std::function
<const LoopAccessInfo
&(Loop
&)> *GetLAA
;
418 // And the loop-accesses info corresponding to this loop. This pointer is
419 // null until canVectorizeMemory sets it up.
420 const LoopAccessInfo
*LAI
= nullptr;
422 /// Interface to emit optimization remarks.
423 OptimizationRemarkEmitter
*ORE
;
425 // --- vectorization state --- //
427 /// Holds the primary induction variable. This is the counter of the
429 PHINode
*PrimaryInduction
= nullptr;
431 /// Holds the reduction variables.
432 ReductionList Reductions
;
434 /// Holds all of the induction variables that we found in the loop.
435 /// Notice that inductions don't need to start at zero and that induction
436 /// variables can be pointers.
437 InductionList Inductions
;
439 /// Holds all the casts that participate in the update chain of the induction
440 /// variables, and that have been proven to be redundant (possibly under a
441 /// runtime guard). These casts can be ignored when creating the vectorized
443 SmallPtrSet
<Instruction
*, 4> InductionCastsToIgnore
;
445 /// Holds the phi nodes that are first-order recurrences.
446 RecurrenceSet FirstOrderRecurrences
;
448 /// Holds instructions that need to sink past other instructions to handle
449 /// first-order recurrences.
450 DenseMap
<Instruction
*, Instruction
*> SinkAfter
;
452 /// Holds the widest induction type encountered.
453 Type
*WidestIndTy
= nullptr;
455 /// Allowed outside users. This holds the induction and reduction
456 /// vars which can be accessed from outside the loop.
457 SmallPtrSet
<Value
*, 4> AllowedExit
;
459 /// Can we assume the absence of NaNs.
460 bool HasFunNoNaNAttr
= false;
462 /// Vectorization requirements that will go through late-evaluation.
463 LoopVectorizationRequirements
*Requirements
;
465 /// Used to emit an analysis of any legality issues.
466 LoopVectorizeHints
*Hints
;
468 /// The demanded bits analsyis is used to compute the minimum type size in
469 /// which a reduction can be computed.
472 /// The assumption cache analysis is used to compute the minimum type size in
473 /// which a reduction can be computed.
476 /// While vectorizing these instructions we have to generate a
477 /// call to the appropriate masked intrinsic
478 SmallPtrSet
<const Instruction
*, 8> MaskedOp
;
483 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H