llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

   1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file a TargetTransformInfo::Concept conforming object specific to the
  10 /// AArch64 target machine. It uses the target's detailed information to
  11 /// provide more precise answers to certain TTI queries, while letting the
  12 /// target independent and default TTI implementations handle the rest.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  18
  19 #include "AArch64.h"
  20 #include "AArch64Subtarget.h"
  21 #include "AArch64TargetMachine.h"
  22 #include "llvm/ADT/ArrayRef.h"
  23 #include "llvm/Analysis/TargetTransformInfo.h"
  24 #include "llvm/CodeGen/BasicTTIImpl.h"
  25 #include "llvm/IR/Function.h"
  26 #include "llvm/IR/Intrinsics.h"
  27 #include <cstdint>
  28 #include <optional>
  29
  30 namespace llvm {
  31
  32 class APInt;
  33 class Instruction;
  34 class IntrinsicInst;
  35 class Loop;
  36 class SCEV;
  37 class ScalarEvolution;
  38 class Type;
  39 class Value;
  40 class VectorType;
  41
  42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  43   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
  44   using TTI = TargetTransformInfo;
  45
  46   friend BaseT;
  47
  48   const AArch64Subtarget *ST;
  49   const AArch64TargetLowering *TLI;
  50
  51   const AArch64Subtarget *getST() const { return ST; }
  52   const AArch64TargetLowering *getTLI() const { return TLI; }
  53
  54   enum MemIntrinsicType {
  55     VECTOR_LDST_TWO_ELEMENTS,
  56     VECTOR_LDST_THREE_ELEMENTS,
  57     VECTOR_LDST_FOUR_ELEMENTS
  58   };
  59
  60   bool isWideningInstruction(Type *DstTy, unsigned Opcode,
  61                              ArrayRef<const Value *> Args,
  62                              Type *SrcOverrideTy = nullptr);
  63
  64   // A helper function called by 'getVectorInstrCost'.
  65   //
  66   // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
  67   // indicates whether the vector instruction is available in the input IR or
  68   // just imaginary in vectorizer passes.
  69   InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val,
  70                                            unsigned Index, bool HasRealUse);
  71
  72 public:
  73   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
  74       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  75         TLI(ST->getTargetLowering()) {}
  76
  77   bool areInlineCompatible(const Function *Caller,
  78                            const Function *Callee) const;
  79
  80   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  81                              const ArrayRef<Type *> &Types) const;
  82
  83   /// \name Scalar TTI Implementations
  84   /// @{
  85
  86   using BaseT::getIntImmCost;
  87   InstructionCost getIntImmCost(int64_t Val);
  88   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  89                                 TTI::TargetCostKind CostKind);
  90   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  91                                     const APInt &Imm, Type *Ty,
  92                                     TTI::TargetCostKind CostKind,
  93                                     Instruction *Inst = nullptr);
  94   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  95                                       const APInt &Imm, Type *Ty,
  96                                       TTI::TargetCostKind CostKind);
  97   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  98
  99   /// @}
 100
 101   /// \name Vector TTI Implementations
 102   /// @{
 103
 104   bool enableInterleavedAccessVectorization() { return true; }
 105
 106   bool enableMaskedInterleavedAccessVectorization() { return ST->hasSVE(); }
 107
 108   unsigned getNumberOfRegisters(unsigned ClassID) const {
 109     bool Vector = (ClassID == 1);
 110     if (Vector) {
 111       if (ST->hasNEON())
 112         return 32;
 113       return 0;
 114     }
 115     return 31;
 116   }
 117
 118   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 119                                         TTI::TargetCostKind CostKind);
 120
 121   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
 122                                                     IntrinsicInst &II) const;
 123
 124   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
 125       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
 126       APInt &UndefElts2, APInt &UndefElts3,
 127       std::function<void(Instruction *, unsigned, APInt, APInt &)>
 128           SimplifyAndSetOp) const;
 129
 130   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
 131
 132   unsigned getMinVectorRegisterBitWidth() const {
 133     return ST->getMinVectorRegisterBitWidth();
 134   }
 135
 136   std::optional<unsigned> getVScaleForTuning() const {
 137     return ST->getVScaleForTuning();
 138   }
 139
 140   bool isVScaleKnownToBeAPowerOfTwo() const { return true; }
 141
 142   bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
 143
 144   /// Try to return an estimate cost factor that can be used as a multiplier
 145   /// when scalarizing an operation for a vector with ElementCount \p VF.
 146   /// For scalable vectors this currently takes the most pessimistic view based
 147   /// upon the maximum possible value for vscale.
 148   unsigned getMaxNumElements(ElementCount VF) const {
 149     if (!VF.isScalable())
 150       return VF.getFixedValue();
 151
 152     return VF.getKnownMinValue() * ST->getVScaleForTuning();
 153   }
 154
 155   unsigned getMaxInterleaveFactor(ElementCount VF);
 156
 157   bool prefersVectorizedAddressing() const;
 158
 159   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 160                                         Align Alignment, unsigned AddressSpace,
 161                                         TTI::TargetCostKind CostKind);
 162
 163   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 164                                          const Value *Ptr, bool VariableMask,
 165                                          Align Alignment,
 166                                          TTI::TargetCostKind CostKind,
 167                                          const Instruction *I = nullptr);
 168
 169   bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src);
 170
 171   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 172                                    TTI::CastContextHint CCH,
 173                                    TTI::TargetCostKind CostKind,
 174                                    const Instruction *I = nullptr);
 175
 176   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
 177                                            VectorType *VecTy, unsigned Index);
 178
 179   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
 180                                  const Instruction *I = nullptr);
 181
 182   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
 183                                      TTI::TargetCostKind CostKind,
 184                                      unsigned Index, Value *Op0, Value *Op1);
 185   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
 186                                      TTI::TargetCostKind CostKind,
 187                                      unsigned Index);
 188
 189   InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
 190                                          FastMathFlags FMF,
 191                                          TTI::TargetCostKind CostKind);
 192
 193   InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
 194                                                 VectorType *ValTy,
 195                                                 TTI::TargetCostKind CostKind);
 196
 197   InstructionCost getSpliceCost(VectorType *Tp, int Index);
 198
 199   InstructionCost getArithmeticInstrCost(
 200       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
 201       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
 202       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
 203       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 204       const Instruction *CxtI = nullptr);
 205
 206   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 207                                             const SCEV *Ptr);
 208
 209   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
 210                                      CmpInst::Predicate VecPred,
 211                                      TTI::TargetCostKind CostKind,
 212                                      const Instruction *I = nullptr);
 213
 214   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
 215                                                     bool IsZeroCmp) const;
 216   bool useNeonVector(const Type *Ty) const;
 217
 218   InstructionCost
 219   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
 220                   unsigned AddressSpace, TTI::TargetCostKind CostKind,
 221                   TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
 222                   const Instruction *I = nullptr);
 223
 224   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
 225
 226   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 227                                TTI::UnrollingPreferences &UP,
 228                                OptimizationRemarkEmitter *ORE);
 229
 230   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 231                              TTI::PeelingPreferences &PP);
 232
 233   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
 234                                            Type *ExpectedType);
 235
 236   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 237
 238   bool isElementTypeLegalForScalableVector(Type *Ty) const {
 239     if (Ty->isPointerTy())
 240       return true;
 241
 242     if (Ty->isBFloatTy() && ST->hasBF16())
 243       return true;
 244
 245     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
 246       return true;
 247
 248     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
 249         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
 250       return true;
 251
 252     return false;
 253   }
 254
 255   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
 256     if (!ST->hasSVE())
 257       return false;
 258
 259     // For fixed vectors, avoid scalarization if using SVE for them.
 260     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
 261         DataType->getPrimitiveSizeInBits() != 128)
 262       return false; // Fall back to scalarization of masked operations.
 263
 264     return isElementTypeLegalForScalableVector(DataType->getScalarType());
 265   }
 266
 267   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
 268     return isLegalMaskedLoadStore(DataType, Alignment);
 269   }
 270
 271   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
 272     return isLegalMaskedLoadStore(DataType, Alignment);
 273   }
 274
 275   bool isLegalMaskedGatherScatter(Type *DataType) const {
 276     if (!ST->hasSVE() || !ST->isNeonAvailable())
 277       return false;
 278
 279     // For fixed vectors, scalarize if not using SVE for them.
 280     auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
 281     if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
 282                          DataTypeFVTy->getNumElements() < 2))
 283       return false;
 284
 285     return isElementTypeLegalForScalableVector(DataType->getScalarType());
 286   }
 287
 288   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
 289     return isLegalMaskedGatherScatter(DataType);
 290   }
 291   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
 292     return isLegalMaskedGatherScatter(DataType);
 293   }
 294
 295   bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
 296     // Return true if we can generate a `ld1r` splat load instruction.
 297     if (!ST->hasNEON() || NumElements.isScalable())
 298       return false;
 299     switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
 300     case 8:
 301     case 16:
 302     case 32:
 303     case 64: {
 304       // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
 305       unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
 306       return VectorBits >= 64;
 307     }
 308     }
 309     return false;
 310   }
 311
 312   bool isLegalNTStoreLoad(Type *DataType, Align Alignment) {
 313     // NOTE: The logic below is mostly geared towards LV, which calls it with
 314     //       vectors with 2 elements. We might want to improve that, if other
 315     //       users show up.
 316     // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
 317     // the vector can be halved so that each half fits into a register. That's
 318     // the case if the element type fits into a register and the number of
 319     // elements is a power of 2 > 1.
 320     if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
 321       unsigned NumElements = DataTypeTy->getNumElements();
 322       unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
 323       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
 324              EltSize <= 128 && isPowerOf2_64(EltSize);
 325     }
 326     return BaseT::isLegalNTStore(DataType, Alignment);
 327   }
 328
 329   bool isLegalNTStore(Type *DataType, Align Alignment) {
 330     return isLegalNTStoreLoad(DataType, Alignment);
 331   }
 332
 333   bool isLegalNTLoad(Type *DataType, Align Alignment) {
 334     // Only supports little-endian targets.
 335     if (ST->isLittleEndian())
 336       return isLegalNTStoreLoad(DataType, Alignment);
 337     return BaseT::isLegalNTLoad(DataType, Alignment);
 338   }
 339
 340   bool enableOrderedReductions() const { return true; }
 341
 342   InstructionCost getInterleavedMemoryOpCost(
 343       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 344       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
 345       bool UseMaskForCond = false, bool UseMaskForGaps = false);
 346
 347   bool
 348   shouldConsiderAddressTypePromotion(const Instruction &I,
 349                                      bool &AllowPromotionWithoutCommonHeader);
 350
 351   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
 352
 353   unsigned getGISelRematGlobalCost() const {
 354     return 2;
 355   }
 356
 357   unsigned getMinTripCountTailFoldingThreshold() const {
 358     return ST->hasSVE() ? 5 : 0;
 359   }
 360
 361   TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
 362     if (ST->hasSVE())
 363       return IVUpdateMayOverflow
 364                  ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
 365                  : TailFoldingStyle::DataAndControlFlow;
 366
 367     return TailFoldingStyle::DataWithoutLaneMask;
 368   }
 369
 370   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
 371
 372   bool supportsScalableVectors() const { return ST->hasSVE(); }
 373
 374   bool enableScalableVectorization() const { return ST->hasSVE(); }
 375
 376   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
 377                                    ElementCount VF) const;
 378
 379   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
 380                                        TTI::ReductionFlags Flags) const {
 381     return ST->hasSVE();
 382   }
 383
 384   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
 385                                              std::optional<FastMathFlags> FMF,
 386                                              TTI::TargetCostKind CostKind);
 387
 388   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
 389                                  ArrayRef<int> Mask,
 390                                  TTI::TargetCostKind CostKind, int Index,
 391                                  VectorType *SubTp,
 392                                  ArrayRef<const Value *> Args = std::nullopt);
 393
 394   InstructionCost getScalarizationOverhead(VectorType *Ty,
 395                                            const APInt &DemandedElts,
 396                                            bool Insert, bool Extract,
 397                                            TTI::TargetCostKind CostKind);
 398
 399   /// Return the cost of the scaling factor used in the addressing
 400   /// mode represented by AM for this target, for a load/store
 401   /// of the specified type.
 402   /// If the AM is supported, the return value must be >= 0.
 403   /// If the AM is not supported, it returns a negative value.
 404   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
 405                                        int64_t BaseOffset, bool HasBaseReg,
 406                                        int64_t Scale, unsigned AddrSpace) const;
 407   /// @}
 408
 409   bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
 410
 411   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
 412                              Type *ScalarValTy) const {
 413     // We can vectorize store v4i8.
 414     if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4)
 415       return 4;
 416
 417     return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
 418   }
 419 };
 420
 421 } // end namespace llvm
 422
 423 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H