include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

   1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file defines the LoopVectorizationLegality class. Original code
  11 /// in Loop Vectorizer has been moved out to its own file for modularity
  12 /// and reusability.
  13 ///
  14 /// Currently, it works for innermost loop vectorization. Extending this to
  15 /// outer loop vectorization is a TODO item.
  16 ///
  17 /// Also provides:
  18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
  19 /// locally for easy look up. It has the ability to write them back as
  20 /// loop metadata, upon request.
  21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
  22 /// of reporting useful failure to vectorize message.
  23 //
  24 //===----------------------------------------------------------------------===//
  25
  26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
  27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
  28
  29 #include "llvm/ADT/MapVector.h"
  30 #include "llvm/Analysis/LoopAccessAnalysis.h"
  31 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  32 #include "llvm/Transforms/Utils/LoopUtils.h"
  33
  34 namespace llvm {
  35
  36 /// Utility class for getting and setting loop vectorizer hints in the form
  37 /// of loop metadata.
  38 /// This class keeps a number of loop annotations locally (as member variables)
  39 /// and can, upon request, write them back as metadata on the loop. It will
  40 /// initially scan the loop for existing metadata, and will update the local
  41 /// values based on information in the loop.
  42 /// We cannot write all values to metadata, as the mere presence of some info,
  43 /// for example 'force', means a decision has been made. So, we need to be
  44 /// careful NOT to add them if the user hasn't specifically asked so.
  45 class LoopVectorizeHints {
  46   enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
  47                   HK_PREDICATE };
  48
  49   /// Hint - associates name and validation with the hint value.
  50   struct Hint {
  51     const char *Name;
  52     unsigned Value; // This may have to change for non-numeric values.
  53     HintKind Kind;
  54
  55     Hint(const char *Name, unsigned Value, HintKind Kind)
  56         : Name(Name), Value(Value), Kind(Kind) {}
  57
  58     bool validate(unsigned Val);
  59   };
  60
  61   /// Vectorization width.
  62   Hint Width;
  63
  64   /// Vectorization interleave factor.
  65   Hint Interleave;
  66
  67   /// Vectorization forced
  68   Hint Force;
  69
  70   /// Already Vectorized
  71   Hint IsVectorized;
  72
  73   /// Vector Predicate
  74   Hint Predicate;
  75
  76   /// Return the loop metadata prefix.
  77   static StringRef Prefix() { return "llvm.loop."; }
  78
  79   /// True if there is any unsafe math in the loop.
  80   bool PotentiallyUnsafe = false;
  81
  82 public:
  83   enum ForceKind {
  84     FK_Undefined = -1, ///< Not selected.
  85     FK_Disabled = 0,   ///< Forcing disabled.
  86     FK_Enabled = 1,    ///< Forcing enabled.
  87   };
  88
  89   LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
  90                      OptimizationRemarkEmitter &ORE);
  91
  92   /// Mark the loop L as already vectorized by setting the width to 1.
  93   void setAlreadyVectorized();
  94
  95   bool allowVectorization(Function *F, Loop *L,
  96                           bool VectorizeOnlyWhenForced) const;
  97
  98   /// Dumps all the hint information.
  99   void emitRemarkWithHints() const;
 100
 101   unsigned getWidth() const { return Width.Value; }
 102   unsigned getInterleave() const { return Interleave.Value; }
 103   unsigned getIsVectorized() const { return IsVectorized.Value; }
 104   unsigned getPredicate() const { return Predicate.Value; }
 105   enum ForceKind getForce() const {
 106     if ((ForceKind)Force.Value == FK_Undefined &&
 107         hasDisableAllTransformsHint(TheLoop))
 108       return FK_Disabled;
 109     return (ForceKind)Force.Value;
 110   }
 111
 112   /// If hints are provided that force vectorization, use the AlwaysPrint
 113   /// pass name to force the frontend to print the diagnostic.
 114   const char *vectorizeAnalysisPassName() const;
 115
 116   bool allowReordering() const {
 117     // When enabling loop hints are provided we allow the vectorizer to change
 118     // the order of operations that is given by the scalar loop. This is not
 119     // enabled by default because can be unsafe or inefficient. For example,
 120     // reordering floating-point operations will change the way round-off
 121     // error accumulates in the loop.
 122     return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
 123   }
 124
 125   bool isPotentiallyUnsafe() const {
 126     // Avoid FP vectorization if the target is unsure about proper support.
 127     // This may be related to the SIMD unit in the target not handling
 128     // IEEE 754 FP ops properly, or bad single-to-double promotions.
 129     // Otherwise, a sequence of vectorized loops, even without reduction,
 130     // could lead to different end results on the destination vectors.
 131     return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
 132   }
 133
 134   void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
 135
 136 private:
 137   /// Find hints specified in the loop metadata and update local values.
 138   void getHintsFromMetadata();
 139
 140   /// Checks string hint with one operand and set value if valid.
 141   void setHint(StringRef Name, Metadata *Arg);
 142
 143   /// The loop these hints belong to.
 144   const Loop *TheLoop;
 145
 146   /// Interface to emit optimization remarks.
 147   OptimizationRemarkEmitter &ORE;
 148 };
 149
 150 /// This holds vectorization requirements that must be verified late in
 151 /// the process. The requirements are set by legalize and costmodel. Once
 152 /// vectorization has been determined to be possible and profitable the
 153 /// requirements can be verified by looking for metadata or compiler options.
 154 /// For example, some loops require FP commutativity which is only allowed if
 155 /// vectorization is explicitly specified or if the fast-math compiler option
 156 /// has been provided.
 157 /// Late evaluation of these requirements allows helpful diagnostics to be
 158 /// composed that tells the user what need to be done to vectorize the loop. For
 159 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
 160 /// evaluation should be used only when diagnostics can generated that can be
 161 /// followed by a non-expert user.
 162 class LoopVectorizationRequirements {
 163 public:
 164   LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
 165
 166   void addUnsafeAlgebraInst(Instruction *I) {
 167     // First unsafe algebra instruction.
 168     if (!UnsafeAlgebraInst)
 169       UnsafeAlgebraInst = I;
 170   }
 171
 172   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 173
 174   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
 175
 176 private:
 177   unsigned NumRuntimePointerChecks = 0;
 178   Instruction *UnsafeAlgebraInst = nullptr;
 179
 180   /// Interface to emit optimization remarks.
 181   OptimizationRemarkEmitter &ORE;
 182 };
 183
 184 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
 185 /// to what vectorization factor.
 186 /// This class does not look at the profitability of vectorization, only the
 187 /// legality. This class has two main kinds of checks:
 188 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
 189 ///   will change the order of memory accesses in a way that will change the
 190 ///   correctness of the program.
 191 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
 192 /// checks for a number of different conditions, such as the availability of a
 193 /// single induction variable, that all types are supported and vectorize-able,
 194 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
 195 /// This class is also used by InnerLoopVectorizer for identifying
 196 /// induction variable and the different reduction variables.
 197 class LoopVectorizationLegality {
 198 public:
 199   LoopVectorizationLegality(
 200       Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
 201       TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
 202       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
 203       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
 204       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
 205       AssumptionCache *AC)
 206       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
 207         GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
 208
 209   /// ReductionList contains the reduction descriptors for all
 210   /// of the reductions that were found in the loop.
 211   using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
 212
 213   /// InductionList saves induction variables and maps them to the
 214   /// induction descriptor.
 215   using InductionList = MapVector<PHINode *, InductionDescriptor>;
 216
 217   /// RecurrenceSet contains the phi nodes that are recurrences other than
 218   /// inductions and reductions.
 219   using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
 220
 221   /// Returns true if it is legal to vectorize this loop.
 222   /// This does not mean that it is profitable to vectorize this
 223   /// loop, only that it is legal to do so.
 224   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 225   /// the new code path being implemented for outer loop vectorization
 226   /// (should be functional for inner loop vectorization) based on VPlan.
 227   /// If false, good old LV code.
 228   bool canVectorize(bool UseVPlanNativePath);
 229
 230   /// Return true if we can vectorize this loop while folding its tail by
 231   /// masking, and mark all respective loads/stores for masking.
 232   bool prepareToFoldTailByMasking();
 233
 234   /// Returns the primary induction variable.
 235   PHINode *getPrimaryInduction() { return PrimaryInduction; }
 236
 237   /// Returns the reduction variables found in the loop.
 238   ReductionList *getReductionVars() { return &Reductions; }
 239
 240   /// Returns the induction variables found in the loop.
 241   InductionList *getInductionVars() { return &Inductions; }
 242
 243   /// Return the first-order recurrences found in the loop.
 244   RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
 245
 246   /// Return the set of instructions to sink to handle first-order recurrences.
 247   DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
 248
 249   /// Returns the widest induction type.
 250   Type *getWidestInductionType() { return WidestIndTy; }
 251
 252   /// Returns True if V is a Phi node of an induction variable in this loop.
 253   bool isInductionPhi(const Value *V);
 254
 255   /// Returns True if V is a cast that is part of an induction def-use chain,
 256   /// and had been proven to be redundant under a runtime guard (in other
 257   /// words, the cast has the same SCEV expression as the induction phi).
 258   bool isCastedInductionVariable(const Value *V);
 259
 260   /// Returns True if V can be considered as an induction variable in this
 261   /// loop. V can be the induction phi, or some redundant cast in the def-use
 262   /// chain of the inducion phi.
 263   bool isInductionVariable(const Value *V);
 264
 265   /// Returns True if PN is a reduction variable in this loop.
 266   bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
 267
 268   /// Returns True if Phi is a first-order recurrence in this loop.
 269   bool isFirstOrderRecurrence(const PHINode *Phi);
 270
 271   /// Return true if the block BB needs to be predicated in order for the loop
 272   /// to be vectorized.
 273   bool blockNeedsPredication(BasicBlock *BB);
 274
 275   /// Check if this pointer is consecutive when vectorizing. This happens
 276   /// when the last index of the GEP is the induction variable, or that the
 277   /// pointer itself is an induction variable.
 278   /// This check allows us to vectorize A[idx] into a wide load/store.
 279   /// Returns:
 280   /// 0 - Stride is unknown or non-consecutive.
 281   /// 1 - Address is consecutive.
 282   /// -1 - Address is consecutive, and decreasing.
 283   /// NOTE: This method must only be used before modifying the original scalar
 284   /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
 285   int isConsecutivePtr(Value *Ptr);
 286
 287   /// Returns true if the value V is uniform within the loop.
 288   bool isUniform(Value *V);
 289
 290   /// Returns the information that we collected about runtime memory check.
 291   const RuntimePointerChecking *getRuntimePointerChecking() const {
 292     return LAI->getRuntimePointerChecking();
 293   }
 294
 295   const LoopAccessInfo *getLAI() const { return LAI; }
 296
 297   unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
 298
 299   uint64_t getMaxSafeRegisterWidth() const {
 300     return LAI->getDepChecker().getMaxSafeRegisterWidth();
 301   }
 302
 303   bool hasStride(Value *V) { return LAI->hasStride(V); }
 304
 305   /// Returns true if vector representation of the instruction \p I
 306   /// requires mask.
 307   bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
 308
 309   unsigned getNumStores() const { return LAI->getNumStores(); }
 310   unsigned getNumLoads() const { return LAI->getNumLoads(); }
 311
 312   // Returns true if the NoNaN attribute is set on the function.
 313   bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
 314
 315 private:
 316   /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
 317   /// its nested loops are considered legal for vectorization. These legal
 318   /// checks are common for inner and outer loop vectorization.
 319   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 320   /// the new code path being implemented for outer loop vectorization
 321   /// (should be functional for inner loop vectorization) based on VPlan.
 322   /// If false, good old LV code.
 323   bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
 324
 325   /// Set up outer loop inductions by checking Phis in outer loop header for
 326   /// supported inductions (int inductions). Return false if any of these Phis
 327   /// is not a supported induction or if we fail to find an induction.
 328   bool setupOuterLoopInductions();
 329
 330   /// Return true if the pre-header, exiting and latch blocks of \p Lp
 331   /// (non-recursive) are considered legal for vectorization.
 332   /// Temporarily taking UseVPlanNativePath parameter. If true, take
 333   /// the new code path being implemented for outer loop vectorization
 334   /// (should be functional for inner loop vectorization) based on VPlan.
 335   /// If false, good old LV code.
 336   bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
 337
 338   /// Check if a single basic block loop is vectorizable.
 339   /// At this point we know that this is a loop with a constant trip count
 340   /// and we only need to check individual instructions.
 341   bool canVectorizeInstrs();
 342
 343   /// When we vectorize loops we may change the order in which
 344   /// we read and write from memory. This method checks if it is
 345   /// legal to vectorize the code, considering only memory constrains.
 346   /// Returns true if the loop is vectorizable
 347   bool canVectorizeMemory();
 348
 349   /// Return true if we can vectorize this loop using the IF-conversion
 350   /// transformation.
 351   bool canVectorizeWithIfConvert();
 352
 353   /// Return true if we can vectorize this outer loop. The method performs
 354   /// specific checks for outer loop vectorization.
 355   bool canVectorizeOuterLoop();
 356
 357   /// Return true if all of the instructions in the block can be speculatively
 358   /// executed, and record the loads/stores that require masking. If's that
 359   /// guard loads can be ignored under "assume safety" unless \p PreserveGuards
 360   /// is true. This can happen when we introduces guards for which the original
 361   /// "unguarded-loads are safe" assumption does not hold. For example, the
 362   /// vectorizer's fold-tail transformation changes the loop to execute beyond
 363   /// its original trip-count, under a proper guard, which should be preserved.
 364   /// \p SafePtrs is a list of addresses that are known to be legal and we know
 365   /// that we can read from them without segfault.
 366   bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
 367                             bool PreserveGuards = false);
 368
 369   /// Updates the vectorization state by adding \p Phi to the inductions list.
 370   /// This can set \p Phi as the main induction of the loop if \p Phi is a
 371   /// better choice for the main induction than the existing one.
 372   void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
 373                        SmallPtrSetImpl<Value *> &AllowedExit);
 374
 375   /// If an access has a symbolic strides, this maps the pointer value to
 376   /// the stride symbol.
 377   const ValueToValueMap *getSymbolicStrides() {
 378     // FIXME: Currently, the set of symbolic strides is sometimes queried before
 379     // it's collected.  This happens from canVectorizeWithIfConvert, when the
 380     // pointer is checked to reference consecutive elements suitable for a
 381     // masked access.
 382     return LAI ? &LAI->getSymbolicStrides() : nullptr;
 383   }
 384
 385   /// The loop that we evaluate.
 386   Loop *TheLoop;
 387
 388   /// Loop Info analysis.
 389   LoopInfo *LI;
 390
 391   /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
 392   /// Applies dynamic knowledge to simplify SCEV expressions in the context
 393   /// of existing SCEV assumptions. The analysis will also add a minimal set
 394   /// of new predicates if this is required to enable vectorization and
 395   /// unrolling.
 396   PredicatedScalarEvolution &PSE;
 397
 398   /// Target Transform Info.
 399   TargetTransformInfo *TTI;
 400
 401   /// Target Library Info.
 402   TargetLibraryInfo *TLI;
 403
 404   /// Dominator Tree.
 405   DominatorTree *DT;
 406
 407   // LoopAccess analysis.
 408   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
 409
 410   // And the loop-accesses info corresponding to this loop.  This pointer is
 411   // null until canVectorizeMemory sets it up.
 412   const LoopAccessInfo *LAI = nullptr;
 413
 414   /// Interface to emit optimization remarks.
 415   OptimizationRemarkEmitter *ORE;
 416
 417   //  ---  vectorization state --- //
 418
 419   /// Holds the primary induction variable. This is the counter of the
 420   /// loop.
 421   PHINode *PrimaryInduction = nullptr;
 422
 423   /// Holds the reduction variables.
 424   ReductionList Reductions;
 425
 426   /// Holds all of the induction variables that we found in the loop.
 427   /// Notice that inductions don't need to start at zero and that induction
 428   /// variables can be pointers.
 429   InductionList Inductions;
 430
 431   /// Holds all the casts that participate in the update chain of the induction
 432   /// variables, and that have been proven to be redundant (possibly under a
 433   /// runtime guard). These casts can be ignored when creating the vectorized
 434   /// loop body.
 435   SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
 436
 437   /// Holds the phi nodes that are first-order recurrences.
 438   RecurrenceSet FirstOrderRecurrences;
 439
 440   /// Holds instructions that need to sink past other instructions to handle
 441   /// first-order recurrences.
 442   DenseMap<Instruction *, Instruction *> SinkAfter;
 443
 444   /// Holds the widest induction type encountered.
 445   Type *WidestIndTy = nullptr;
 446
 447   /// Allowed outside users. This holds the variables that can be accessed from
 448   /// outside the loop.
 449   SmallPtrSet<Value *, 4> AllowedExit;
 450
 451   /// Can we assume the absence of NaNs.
 452   bool HasFunNoNaNAttr = false;
 453
 454   /// Vectorization requirements that will go through late-evaluation.
 455   LoopVectorizationRequirements *Requirements;
 456
 457   /// Used to emit an analysis of any legality issues.
 458   LoopVectorizeHints *Hints;
 459
 460   /// The demanded bits analysis is used to compute the minimum type size in
 461   /// which a reduction can be computed.
 462   DemandedBits *DB;
 463
 464   /// The assumption cache analysis is used to compute the minimum type size in
 465   /// which a reduction can be computed.
 466   AssumptionCache *AC;
 467
 468   /// While vectorizing these instructions we have to generate a
 469   /// call to the appropriate masked intrinsic
 470   SmallPtrSet<const Instruction *, 8> MaskedOp;
 471 };
 472
 473 } // namespace llvm
 474
 475 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H