llvm/lib/Target/ARM/ARMTargetTransformInfo.h

   1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file a TargetTransformInfo::Concept conforming object specific to the
  11 /// ARM target machine. It uses the target's detailed information to
  12 /// provide more precise answers to certain TTI queries, while letting the
  13 /// target independent and default TTI implementations handle the rest.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  19
  20 #include "ARM.h"
  21 #include "ARMSubtarget.h"
  22 #include "ARMTargetMachine.h"
  23 #include "llvm/ADT/ArrayRef.h"
  24 #include "llvm/Analysis/TargetTransformInfo.h"
  25 #include "llvm/CodeGen/BasicTTIImpl.h"
  26 #include "llvm/IR/Constant.h"
  27 #include "llvm/IR/Function.h"
  28 #include "llvm/TargetParser/SubtargetFeature.h"
  29 #include <optional>
  30
  31 namespace llvm {
  32
  33 class APInt;
  34 class ARMTargetLowering;
  35 class Instruction;
  36 class Loop;
  37 class SCEV;
  38 class ScalarEvolution;
  39 class Type;
  40 class Value;
  41
  42 namespace TailPredication {
  43   enum Mode {
  44     Disabled = 0,
  45     EnabledNoReductions,
  46     Enabled,
  47     ForceEnabledNoReductions,
  48     ForceEnabled
  49   };
  50 }
  51
  52 // For controlling conversion of memcpy into Tail Predicated loop.
  53 namespace TPLoop {
  54 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
  55 }
  56
  57 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  58   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
  59   using TTI = TargetTransformInfo;
  60
  61   friend BaseT;
  62
  63   const ARMSubtarget *ST;
  64   const ARMTargetLowering *TLI;
  65
  66   // Currently the following features are excluded from InlineFeaturesAllowed.
  67   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
  68   // Depending on whether they are set or unset, different
  69   // instructions/registers are available. For example, inlining a callee with
  70   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
  71   // fail if the callee uses ARM only instructions, e.g. in inline asm.
  72   const FeatureBitset InlineFeaturesAllowed = {
  73       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
  74       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
  75       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
  76       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
  77       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
  78       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
  79       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
  80       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
  81       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
  82       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
  83       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
  84       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
  85       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
  86       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
  87       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
  88       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
  89       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
  90       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
  91       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
  92       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
  93       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
  94       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
  95       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
  96       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
  97   };
  98
  99   const ARMSubtarget *getST() const { return ST; }
 100   const ARMTargetLowering *getTLI() const { return TLI; }
 101
 102 public:
 103   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
 104       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
 105         TLI(ST->getTargetLowering()) {}
 106
 107   bool areInlineCompatible(const Function *Caller,
 108                            const Function *Callee) const;
 109
 110   bool enableInterleavedAccessVectorization() { return true; }
 111
 112   TTI::AddressingModeKind
 113     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
 114
 115   /// Floating-point computation using ARMv8 AArch32 Advanced
 116   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
 117   /// and Arm MVE are IEEE-754 compliant.
 118   bool isFPVectorizationPotentiallyUnsafe() {
 119     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
 120   }
 121
 122   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
 123                                                     IntrinsicInst &II) const;
 124   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
 125       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
 126       APInt &UndefElts2, APInt &UndefElts3,
 127       std::function<void(Instruction *, unsigned, APInt, APInt &)>
 128           SimplifyAndSetOp) const;
 129
 130   /// \name Scalar TTI Implementations
 131   /// @{
 132
 133   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
 134                                         const APInt &Imm, Type *Ty);
 135
 136   using BaseT::getIntImmCost;
 137   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
 138                                 TTI::TargetCostKind CostKind);
 139
 140   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
 141                                     const APInt &Imm, Type *Ty,
 142                                     TTI::TargetCostKind CostKind,
 143                                     Instruction *Inst = nullptr);
 144
 145   /// @}
 146
 147   /// \name Vector TTI Implementations
 148   /// @{
 149
 150   unsigned getNumberOfRegisters(unsigned ClassID) const {
 151     bool Vector = (ClassID == 1);
 152     if (Vector) {
 153       if (ST->hasNEON())
 154         return 16;
 155       if (ST->hasMVEIntegerOps())
 156         return 8;
 157       return 0;
 158     }
 159
 160     if (ST->isThumb1Only())
 161       return 8;
 162     return 13;
 163   }
 164
 165   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
 166     switch (K) {
 167     case TargetTransformInfo::RGK_Scalar:
 168       return TypeSize::getFixed(32);
 169     case TargetTransformInfo::RGK_FixedWidthVector:
 170       if (ST->hasNEON())
 171         return TypeSize::getFixed(128);
 172       if (ST->hasMVEIntegerOps())
 173         return TypeSize::getFixed(128);
 174       return TypeSize::getFixed(0);
 175     case TargetTransformInfo::RGK_ScalableVector:
 176       return TypeSize::getScalable(0);
 177     }
 178     llvm_unreachable("Unsupported register kind");
 179   }
 180
 181   unsigned getMaxInterleaveFactor(ElementCount VF) {
 182     return ST->getMaxInterleaveFactor();
 183   }
 184
 185   bool isProfitableLSRChainElement(Instruction *I);
 186
 187   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
 188
 189   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
 190     return isLegalMaskedLoad(DataTy, Alignment);
 191   }
 192
 193   bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
 194     // For MVE, we have a custom lowering pass that will already have custom
 195     // legalised any gathers that we can lower to MVE intrinsics, and want to
 196     // expand all the rest. The pass runs before the masked intrinsic lowering
 197     // pass.
 198     return true;
 199   }
 200
 201   bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
 202     return forceScalarizeMaskedGather(VTy, Alignment);
 203   }
 204
 205   bool isLegalMaskedGather(Type *Ty, Align Alignment);
 206
 207   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
 208     return isLegalMaskedGather(Ty, Alignment);
 209   }
 210
 211   InstructionCost getMemcpyCost(const Instruction *I);
 212
 213   uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
 214     return ST->getMaxInlineSizeThreshold();
 215   }
 216
 217   int getNumMemOps(const IntrinsicInst *I) const;
 218
 219   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
 220                                  ArrayRef<int> Mask,
 221                                  TTI::TargetCostKind CostKind, int Index,
 222                                  VectorType *SubTp,
 223                                  ArrayRef<const Value *> Args = {},
 224                                  const Instruction *CxtI = nullptr);
 225
 226   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
 227                              TTI::ReductionFlags Flags) const;
 228
 229   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
 230                                        TTI::ReductionFlags Flags) const;
 231
 232   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
 233
 234   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
 235                                  const Instruction *I = nullptr);
 236
 237   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 238                                    TTI::CastContextHint CCH,
 239                                    TTI::TargetCostKind CostKind,
 240                                    const Instruction *I = nullptr);
 241
 242   InstructionCost getCmpSelInstrCost(
 243       unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
 244       TTI::TargetCostKind CostKind,
 245       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
 246       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
 247       const Instruction *I = nullptr);
 248
 249   using BaseT::getVectorInstrCost;
 250   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
 251                                      TTI::TargetCostKind CostKind,
 252                                      unsigned Index, Value *Op0, Value *Op1);
 253
 254   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
 255                                             const SCEV *Ptr);
 256
 257   InstructionCost getArithmeticInstrCost(
 258       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
 259       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
 260       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
 261       ArrayRef<const Value *> Args = {}, const Instruction *CxtI = nullptr);
 262
 263   InstructionCost
 264   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
 265                   unsigned AddressSpace, TTI::TargetCostKind CostKind,
 266                   TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
 267                   const Instruction *I = nullptr);
 268
 269   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 270                                         Align Alignment, unsigned AddressSpace,
 271                                         TTI::TargetCostKind CostKind);
 272
 273   InstructionCost getInterleavedMemoryOpCost(
 274       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 275       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
 276       bool UseMaskForCond = false, bool UseMaskForGaps = false);
 277
 278   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 279                                          const Value *Ptr, bool VariableMask,
 280                                          Align Alignment,
 281                                          TTI::TargetCostKind CostKind,
 282                                          const Instruction *I = nullptr);
 283
 284   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
 285                                              std::optional<FastMathFlags> FMF,
 286                                              TTI::TargetCostKind CostKind);
 287   InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
 288                                            Type *ResTy, VectorType *ValTy,
 289                                            FastMathFlags FMF,
 290                                            TTI::TargetCostKind CostKind);
 291   InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
 292                                          VectorType *ValTy,
 293                                          TTI::TargetCostKind CostKind);
 294
 295   InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
 296                                          FastMathFlags FMF,
 297                                          TTI::TargetCostKind CostKind);
 298
 299   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 300                                         TTI::TargetCostKind CostKind);
 301
 302   /// getScalingFactorCost - Return the cost of the scaling used in
 303   /// addressing mode represented by AM.
 304   /// If the AM is supported, the return value must be >= 0.
 305   /// If the AM is not supported, the return value must be negative.
 306   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
 307                                        StackOffset BaseOffset, bool HasBaseReg,
 308                                        int64_t Scale, unsigned AddrSpace) const;
 309
 310   bool maybeLoweredToCall(Instruction &I);
 311   bool isLoweredToCall(const Function *F);
 312   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 313                                 AssumptionCache &AC,
 314                                 TargetLibraryInfo *LibInfo,
 315                                 HardwareLoopInfo &HWLoopInfo);
 316   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
 317   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 318                                TTI::UnrollingPreferences &UP,
 319                                OptimizationRemarkEmitter *ORE);
 320
 321   TailFoldingStyle
 322   getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
 323
 324   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 325                              TTI::PeelingPreferences &PP);
 326   bool shouldBuildLookupTablesForConstant(Constant *C) const {
 327     // In the ROPI and RWPI relocation models we can't have pointers to global
 328     // variables or functions in constant data, so don't convert switches to
 329     // lookup tables if any of the values would need relocation.
 330     if (ST->isROPI() || ST->isRWPI())
 331       return !C->needsDynamicRelocation();
 332
 333     return true;
 334   }
 335
 336   bool hasArmWideBranch(bool Thumb) const;
 337
 338   bool isProfitableToSinkOperands(Instruction *I,
 339                                   SmallVectorImpl<Use *> &Ops) const;
 340
 341   unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const;
 342
 343   /// @}
 344 };
 345
 346 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
 347 /// instruction with the specified blocksize.  (The order of the elements
 348 /// within each block of the vector is reversed.)
 349 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
 350   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
 351          "Only possible block sizes for VREV are: 16, 32, 64");
 352
 353   unsigned EltSz = VT.getScalarSizeInBits();
 354   if (EltSz != 8 && EltSz != 16 && EltSz != 32)
 355     return false;
 356
 357   unsigned BlockElts = M[0] + 1;
 358   // If the first shuffle index is UNDEF, be optimistic.
 359   if (M[0] < 0)
 360     BlockElts = BlockSize / EltSz;
 361
 362   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
 363     return false;
 364
 365   for (unsigned i = 0, e = M.size(); i < e; ++i) {
 366     if (M[i] < 0)
 367       continue; // ignore UNDEF indices
 368     if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
 369       return false;
 370   }
 371
 372   return true;
 373 }
 374
 375 } // end namespace llvm
 376
 377 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H