llvm/lib/Target/ARM/ARMTargetTransformInfo.h

   1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file a TargetTransformInfo::Concept conforming object specific to the
  11 /// ARM target machine. It uses the target's detailed information to
  12 /// provide more precise answers to certain TTI queries, while letting the
  13 /// target independent and default TTI implementations handle the rest.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  19
  20 #include "ARM.h"
  21 #include "ARMSubtarget.h"
  22 #include "ARMTargetMachine.h"
  23 #include "llvm/ADT/ArrayRef.h"
  24 #include "llvm/Analysis/TargetTransformInfo.h"
  25 #include "llvm/CodeGen/BasicTTIImpl.h"
  26 #include "llvm/IR/Constant.h"
  27 #include "llvm/IR/Function.h"
  28 #include "llvm/MC/SubtargetFeature.h"
  29
  30 namespace llvm {
  31
  32 class APInt;
  33 class ARMTargetLowering;
  34 class Instruction;
  35 class Loop;
  36 class SCEV;
  37 class ScalarEvolution;
  38 class Type;
  39 class Value;
  40
  41 namespace TailPredication {
  42   enum Mode {
  43     Disabled = 0,
  44     EnabledNoReductions,
  45     Enabled,
  46     ForceEnabledNoReductions,
  47     ForceEnabled
  48   };
  49 }
  50
  51 // For controlling conversion of memcpy into Tail Predicated loop.
  52 namespace TPLoop {
  53 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
  54 }
  55
  56 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  57   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
  58   using TTI = TargetTransformInfo;
  59
  60   friend BaseT;
  61
  62   const ARMSubtarget *ST;
  63   const ARMTargetLowering *TLI;
  64
  65   // Currently the following features are excluded from InlineFeaturesAllowed.
  66   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
  67   // Depending on whether they are set or unset, different
  68   // instructions/registers are available. For example, inlining a callee with
  69   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
  70   // fail if the callee uses ARM only instructions, e.g. in inline asm.
  71   const FeatureBitset InlineFeaturesAllowed = {
  72       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
  73       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
  74       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
  75       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
  76       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
  77       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
  78       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
  79       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
  80       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
  81       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
  82       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
  83       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
  84       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
  85       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
  86       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
  87       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
  88       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
  89       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
  90       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
  91       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
  92       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
  93       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
  94       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
  95       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
  96   };
  97
  98   const ARMSubtarget *getST() const { return ST; }
  99   const ARMTargetLowering *getTLI() const { return TLI; }
 100
 101 public:
 102   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
 103       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
 104         TLI(ST->getTargetLowering()) {}
 105
 106   bool areInlineCompatible(const Function *Caller,
 107                            const Function *Callee) const;
 108
 109   bool enableInterleavedAccessVectorization() { return true; }
 110
 111   TTI::AddressingModeKind
 112     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
 113
 114   /// Floating-point computation using ARMv8 AArch32 Advanced
 115   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
 116   /// and Arm MVE are IEEE-754 compliant.
 117   bool isFPVectorizationPotentiallyUnsafe() {
 118     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
 119   }
 120
 121   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
 122                                                IntrinsicInst &II) const;
 123
 124   /// \name Scalar TTI Implementations
 125   /// @{
 126
 127   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
 128                                         const APInt &Imm, Type *Ty);
 129
 130   using BaseT::getIntImmCost;
 131   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
 132                                 TTI::TargetCostKind CostKind);
 133
 134   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
 135                                     const APInt &Imm, Type *Ty,
 136                                     TTI::TargetCostKind CostKind,
 137                                     Instruction *Inst = nullptr);
 138
 139   /// @}
 140
 141   /// \name Vector TTI Implementations
 142   /// @{
 143
 144   unsigned getNumberOfRegisters(unsigned ClassID) const {
 145     bool Vector = (ClassID == 1);
 146     if (Vector) {
 147       if (ST->hasNEON())
 148         return 16;
 149       if (ST->hasMVEIntegerOps())
 150         return 8;
 151       return 0;
 152     }
 153
 154     if (ST->isThumb1Only())
 155       return 8;
 156     return 13;
 157   }
 158
 159   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
 160     switch (K) {
 161     case TargetTransformInfo::RGK_Scalar:
 162       return TypeSize::getFixed(32);
 163     case TargetTransformInfo::RGK_FixedWidthVector:
 164       if (ST->hasNEON())
 165         return TypeSize::getFixed(128);
 166       if (ST->hasMVEIntegerOps())
 167         return TypeSize::getFixed(128);
 168       return TypeSize::getFixed(0);
 169     case TargetTransformInfo::RGK_ScalableVector:
 170       return TypeSize::getScalable(0);
 171     }
 172     llvm_unreachable("Unsupported register kind");
 173   }
 174
 175   unsigned getMaxInterleaveFactor(unsigned VF) {
 176     return ST->getMaxInterleaveFactor();
 177   }
 178
 179   bool isProfitableLSRChainElement(Instruction *I);
 180
 181   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
 182
 183   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
 184     return isLegalMaskedLoad(DataTy, Alignment);
 185   }
 186
 187   bool isLegalMaskedGather(Type *Ty, Align Alignment);
 188
 189   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
 190     return isLegalMaskedGather(Ty, Alignment);
 191   }
 192
 193   InstructionCost getMemcpyCost(const Instruction *I);
 194
 195   int getNumMemOps(const IntrinsicInst *I) const;
 196
 197   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
 198                                  ArrayRef<int> Mask, int Index,
 199                                  VectorType *SubTp);
 200
 201   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
 202                              TTI::ReductionFlags Flags) const;
 203
 204   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
 205                                        TTI::ReductionFlags Flags) const;
 206
 207   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
 208
 209   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
 210                                  const Instruction *I = nullptr);
 211
 212   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 213                                    TTI::CastContextHint CCH,
 214                                    TTI::TargetCostKind CostKind,
 215                                    const Instruction *I = nullptr);
 216
 217   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
 218                                      CmpInst::Predicate VecPred,
 219                                      TTI::TargetCostKind CostKind,
 220                                      const Instruction *I = nullptr);
 221
 222   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
 223                                      unsigned Index);
 224
 225   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
 226                                             const SCEV *Ptr);
 227
 228   InstructionCost getArithmeticInstrCost(
 229       unsigned Opcode, Type *Ty,
 230       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
 231       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
 232       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
 233       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 234       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 235       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 236       const Instruction *CxtI = nullptr);
 237
 238   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
 239                                   MaybeAlign Alignment, unsigned AddressSpace,
 240                                   TTI::TargetCostKind CostKind,
 241                                   const Instruction *I = nullptr);
 242
 243   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 244                                         Align Alignment, unsigned AddressSpace,
 245                                         TTI::TargetCostKind CostKind);
 246
 247   InstructionCost getInterleavedMemoryOpCost(
 248       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 249       Align Alignment, unsigned AddressSpace,
 250       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
 251       bool UseMaskForCond = false, bool UseMaskForGaps = false);
 252
 253   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 254                                          const Value *Ptr, bool VariableMask,
 255                                          Align Alignment,
 256                                          TTI::TargetCostKind CostKind,
 257                                          const Instruction *I = nullptr);
 258
 259   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
 260                                              Optional<FastMathFlags> FMF,
 261                                              TTI::TargetCostKind CostKind);
 262   InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
 263                                               Type *ResTy, VectorType *ValTy,
 264                                               TTI::TargetCostKind CostKind);
 265
 266   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 267                                         TTI::TargetCostKind CostKind);
 268
 269   bool maybeLoweredToCall(Instruction &I);
 270   bool isLoweredToCall(const Function *F);
 271   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 272                                 AssumptionCache &AC,
 273                                 TargetLibraryInfo *LibInfo,
 274                                 HardwareLoopInfo &HWLoopInfo);
 275   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
 276                                    ScalarEvolution &SE,
 277                                    AssumptionCache &AC,
 278                                    TargetLibraryInfo *TLI,
 279                                    DominatorTree *DT,
 280                                    const LoopAccessInfo *LAI);
 281   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 282                                TTI::UnrollingPreferences &UP,
 283                                OptimizationRemarkEmitter *ORE);
 284
 285   bool emitGetActiveLaneMask() const;
 286
 287   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 288                              TTI::PeelingPreferences &PP);
 289   bool shouldBuildLookupTablesForConstant(Constant *C) const {
 290     // In the ROPI and RWPI relocation models we can't have pointers to global
 291     // variables or functions in constant data, so don't convert switches to
 292     // lookup tables if any of the values would need relocation.
 293     if (ST->isROPI() || ST->isRWPI())
 294       return !C->needsDynamicRelocation();
 295
 296     return true;
 297   }
 298   /// @}
 299 };
 300
 301 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
 302 /// instruction with the specified blocksize.  (The order of the elements
 303 /// within each block of the vector is reversed.)
 304 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
 305   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
 306          "Only possible block sizes for VREV are: 16, 32, 64");
 307
 308   unsigned EltSz = VT.getScalarSizeInBits();
 309   if (EltSz != 8 && EltSz != 16 && EltSz != 32)
 310     return false;
 311
 312   unsigned BlockElts = M[0] + 1;
 313   // If the first shuffle index is UNDEF, be optimistic.
 314   if (M[0] < 0)
 315     BlockElts = BlockSize / EltSz;
 316
 317   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
 318     return false;
 319
 320   for (unsigned i = 0, e = M.size(); i < e; ++i) {
 321     if (M[i] < 0)
 322       continue; // ignore UNDEF indices
 323     if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
 324       return false;
 325   }
 326
 327   return true;
 328 }
 329
 330 } // end namespace llvm
 331
 332 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H