llvm/lib/Target/X86/X86Subtarget.h

   1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  15
  16 #include "X86FrameLowering.h"
  17 #include "X86ISelLowering.h"
  18 #include "X86InstrInfo.h"
  19 #include "X86SelectionDAGInfo.h"
  20 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  22 #include "llvm/IR/CallingConv.h"
  23 #include "llvm/TargetParser/Triple.h"
  24 #include <climits>
  25 #include <memory>
  26
  27 #define GET_SUBTARGETINFO_HEADER
  28 #include "X86GenSubtargetInfo.inc"
  29
  30 namespace llvm {
  31
  32 class CallLowering;
  33 class GlobalValue;
  34 class InstructionSelector;
  35 class LegalizerInfo;
  36 class RegisterBankInfo;
  37 class StringRef;
  38 class TargetMachine;
  39
  40 /// The X86 backend supports a number of different styles of PIC.
  41 ///
  42 namespace PICStyles {
  43
  44 enum class Style {
  45   StubPIC,          // Used on i386-darwin in pic mode.
  46   GOT,              // Used on 32 bit elf on when in pic mode.
  47   RIPRel,           // Used on X86-64 when in pic mode.
  48   None              // Set when not in pic mode.
  49 };
  50
  51 } // end namespace PICStyles
  52
  53 class X86Subtarget final : public X86GenSubtargetInfo {
  54   enum X86SSEEnum {
  55     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
  56   };
  57
  58   enum X863DNowEnum {
  59     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
  60   };
  61
  62   /// Which PIC style to use
  63   PICStyles::Style PICStyle;
  64
  65   const TargetMachine &TM;
  66
  67   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
  68   X86SSEEnum X86SSELevel = NoSSE;
  69
  70   /// MMX, 3DNow, 3DNow Athlon, or none supported.
  71   X863DNowEnum X863DNowLevel = NoThreeDNow;
  72
  73 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
  74   bool ATTRIBUTE = DEFAULT;
  75 #include "X86GenSubtargetInfo.inc"
  76   /// The minimum alignment known to hold of the stack frame on
  77   /// entry to the function and which must be maintained by every function.
  78   Align stackAlignment = Align(4);
  79
  80   Align TileConfigAlignment = Align(4);
  81
  82   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
  83   ///
  84   // FIXME: this is a known good value for Yonah. How about others?
  85   unsigned MaxInlineSizeThreshold = 128;
  86
  87   /// What processor and OS we're targeting.
  88   Triple TargetTriple;
  89
  90   /// GlobalISel related APIs.
  91   std::unique_ptr<CallLowering> CallLoweringInfo;
  92   std::unique_ptr<LegalizerInfo> Legalizer;
  93   std::unique_ptr<RegisterBankInfo> RegBankInfo;
  94   std::unique_ptr<InstructionSelector> InstSelector;
  95
  96   /// Override the stack alignment.
  97   MaybeAlign StackAlignOverride;
  98
  99   /// Preferred vector width from function attribute.
 100   unsigned PreferVectorWidthOverride;
 101
 102   /// Resolved preferred vector width from function attribute and subtarget
 103   /// features.
 104   unsigned PreferVectorWidth = UINT32_MAX;
 105
 106   /// Required vector width from function attribute.
 107   unsigned RequiredVectorWidth;
 108
 109   X86SelectionDAGInfo TSInfo;
 110   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
 111   // X86TargetLowering needs.
 112   X86InstrInfo InstrInfo;
 113   X86TargetLowering TLInfo;
 114   X86FrameLowering FrameLowering;
 115
 116 public:
 117   /// This constructor initializes the data members to match that
 118   /// of the specified triple.
 119   ///
 120   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
 121                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
 122                unsigned PreferVectorWidthOverride,
 123                unsigned RequiredVectorWidth);
 124
 125   const X86TargetLowering *getTargetLowering() const override {
 126     return &TLInfo;
 127   }
 128
 129   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
 130
 131   const X86FrameLowering *getFrameLowering() const override {
 132     return &FrameLowering;
 133   }
 134
 135   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
 136     return &TSInfo;
 137   }
 138
 139   const X86RegisterInfo *getRegisterInfo() const override {
 140     return &getInstrInfo()->getRegisterInfo();
 141   }
 142
 143   unsigned getTileConfigSize() const { return 64; }
 144   Align getTileConfigAlignment() const { return TileConfigAlignment; }
 145
 146   /// Returns the minimum alignment known to hold of the
 147   /// stack frame on entry to the function and which must be maintained by every
 148   /// function for this subtarget.
 149   Align getStackAlignment() const { return stackAlignment; }
 150
 151   /// Returns the maximum memset / memcpy size
 152   /// that still makes it profitable to inline the call.
 153   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
 154
 155   /// ParseSubtargetFeatures - Parses features string setting specified
 156   /// subtarget options.  Definition of function is auto generated by tblgen.
 157   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 158
 159   /// Methods used by Global ISel
 160   const CallLowering *getCallLowering() const override;
 161   InstructionSelector *getInstructionSelector() const override;
 162   const LegalizerInfo *getLegalizerInfo() const override;
 163   const RegisterBankInfo *getRegBankInfo() const override;
 164
 165 private:
 166   /// Initialize the full set of dependencies so we can use an initializer
 167   /// list for X86Subtarget.
 168   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
 169                                                 StringRef TuneCPU,
 170                                                 StringRef FS);
 171   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 172
 173 public:
 174
 175 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
 176   bool GETTER() const { return ATTRIBUTE; }
 177 #include "X86GenSubtargetInfo.inc"
 178
 179   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
 180   bool isTarget64BitILP32() const {
 181     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
 182   }
 183
 184   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
 185   bool isTarget64BitLP64() const {
 186     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
 187   }
 188
 189   PICStyles::Style getPICStyle() const { return PICStyle; }
 190   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 191
 192   bool canUseCMPXCHG8B() const { return hasCX8(); }
 193   bool canUseCMPXCHG16B() const {
 194     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
 195     return hasCX16() && is64Bit();
 196   }
 197   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
 198   // All 64-bit processors support cmov.
 199   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
 200   bool hasSSE1() const { return X86SSELevel >= SSE1; }
 201   bool hasSSE2() const { return X86SSELevel >= SSE2; }
 202   bool hasSSE3() const { return X86SSELevel >= SSE3; }
 203   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
 204   bool hasSSE41() const { return X86SSELevel >= SSE41; }
 205   bool hasSSE42() const { return X86SSELevel >= SSE42; }
 206   bool hasAVX() const { return X86SSELevel >= AVX; }
 207   bool hasAVX2() const { return X86SSELevel >= AVX2; }
 208   bool hasAVX512() const { return X86SSELevel >= AVX512; }
 209   bool hasInt256() const { return hasAVX2(); }
 210   bool hasMMX() const { return X863DNowLevel >= MMX; }
 211   bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
 212   bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
 213   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
 214   bool hasPrefetchW() const {
 215     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
 216     // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
 217     // it and KNL has another that prefetches to L2 cache. We assume the
 218     // L1 version exists if the L2 version does.
 219     return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
 220   }
 221   bool hasSSEPrefetch() const {
 222     // We implicitly enable these when we have a write prefix supporting cache
 223     // level OR if we have prfchw, but don't already have a read prefetch from
 224     // 3dnow.
 225     return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
 226            hasPREFETCHI();
 227   }
 228   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
 229   // These are generic getters that OR together all of the thunk types
 230   // supported by the subtarget. Therefore useIndirectThunk*() will return true
 231   // if any respective thunk feature is enabled.
 232   bool useIndirectThunkCalls() const {
 233     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
 234   }
 235   bool useIndirectThunkBranches() const {
 236     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
 237   }
 238
 239   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
 240   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
 241
 242   // Helper functions to determine when we should allow widening to 512-bit
 243   // during codegen.
 244   // TODO: Currently we're always allowing widening on CPUs without VLX,
 245   // because for many cases we don't have a better option.
 246   bool canExtendTo512DQ() const {
 247     return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
 248   }
 249   bool canExtendTo512BW() const  {
 250     return hasBWI() && canExtendTo512DQ();
 251   }
 252
 253   bool hasNoDomainDelay() const { return NoDomainDelay; }
 254   bool hasNoDomainDelayMov() const {
 255       return hasNoDomainDelay() || NoDomainDelayMov;
 256   }
 257   bool hasNoDomainDelayBlend() const {
 258       return hasNoDomainDelay() || NoDomainDelayBlend;
 259   }
 260   bool hasNoDomainDelayShuffle() const {
 261       return hasNoDomainDelay() || NoDomainDelayShuffle;
 262   }
 263
 264   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
 265   // disable them in the legalizer.
 266   bool useAVX512Regs() const {
 267     return hasAVX512() && hasEVEX512() &&
 268            (canExtendTo512DQ() || RequiredVectorWidth > 256);
 269   }
 270
 271   bool useLight256BitInstructions() const {
 272     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
 273   }
 274
 275   bool useBWIRegs() const {
 276     return hasBWI() && useAVX512Regs();
 277   }
 278
 279   bool isXRaySupported() const override { return is64Bit(); }
 280
 281   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
 282   /// no-sse2). There isn't any reason to disable it if the target processor
 283   /// supports it.
 284   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
 285
 286   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
 287   /// no-sse2). There isn't any reason to disable it if the target processor
 288   /// supports it.
 289   bool hasMFence() const { return hasSSE2() || is64Bit(); }
 290
 291   const Triple &getTargetTriple() const { return TargetTriple; }
 292
 293   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
 294   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
 295   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
 296   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
 297   bool isTargetPS() const { return TargetTriple.isPS(); }
 298
 299   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
 300   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
 301   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 302
 303   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 304   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
 305   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
 306   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 307   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
 308   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
 309   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
 310   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
 311   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
 312
 313   bool isTargetWindowsMSVC() const {
 314     return TargetTriple.isWindowsMSVCEnvironment();
 315   }
 316
 317   bool isTargetWindowsCoreCLR() const {
 318     return TargetTriple.isWindowsCoreCLREnvironment();
 319   }
 320
 321   bool isTargetWindowsCygwin() const {
 322     return TargetTriple.isWindowsCygwinEnvironment();
 323   }
 324
 325   bool isTargetWindowsGNU() const {
 326     return TargetTriple.isWindowsGNUEnvironment();
 327   }
 328
 329   bool isTargetWindowsItanium() const {
 330     return TargetTriple.isWindowsItaniumEnvironment();
 331   }
 332
 333   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
 334
 335   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
 336
 337   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
 338
 339   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
 340
 341   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
 342   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
 343
 344   bool isPICStyleStubPIC() const {
 345     return PICStyle == PICStyles::Style::StubPIC;
 346   }
 347
 348   bool isPositionIndependent() const;
 349
 350   bool isCallingConvWin64(CallingConv::ID CC) const {
 351     switch (CC) {
 352     // On Win64, all these conventions just use the default convention.
 353     case CallingConv::C:
 354     case CallingConv::Fast:
 355     case CallingConv::Tail:
 356     case CallingConv::Swift:
 357     case CallingConv::SwiftTail:
 358     case CallingConv::X86_FastCall:
 359     case CallingConv::X86_StdCall:
 360     case CallingConv::X86_ThisCall:
 361     case CallingConv::X86_VectorCall:
 362     case CallingConv::Intel_OCL_BI:
 363       return isTargetWin64();
 364     // This convention allows using the Win64 convention on other targets.
 365     case CallingConv::Win64:
 366       return true;
 367     // This convention allows using the SysV convention on Windows targets.
 368     case CallingConv::X86_64_SysV:
 369       return false;
 370     // Otherwise, who knows what this is.
 371     default:
 372       return false;
 373     }
 374   }
 375
 376   /// Classify a global variable reference for the current subtarget according
 377   /// to how we should reference it in a non-pcrel context.
 378   unsigned char classifyLocalReference(const GlobalValue *GV) const;
 379
 380   unsigned char classifyGlobalReference(const GlobalValue *GV,
 381                                         const Module &M) const;
 382   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
 383
 384   /// Classify a global function reference for the current subtarget.
 385   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
 386                                                 const Module &M) const;
 387   unsigned char
 388   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
 389
 390   /// Classify a blockaddress reference for the current subtarget according to
 391   /// how we should reference it in a non-pcrel context.
 392   unsigned char classifyBlockAddressReference() const;
 393
 394   /// Return true if the subtarget allows calls to immediate address.
 395   bool isLegalToCallImmediateAddr() const;
 396
 397   /// Return whether FrameLowering should always set the "extended frame
 398   /// present" bit in FP, or set it based on a symbol in the runtime.
 399   bool swiftAsyncContextIsDynamicallySet() const {
 400     // Older OS versions (particularly system unwinders) are confused by the
 401     // Swift extended frame, so when building code that might be run on them we
 402     // must dynamically query the concurrency library to determine whether
 403     // extended frames should be flagged as present.
 404     const Triple &TT = getTargetTriple();
 405
 406     unsigned Major = TT.getOSVersion().getMajor();
 407     switch(TT.getOS()) {
 408     default:
 409       return false;
 410     case Triple::IOS:
 411     case Triple::TvOS:
 412       return Major < 15;
 413     case Triple::WatchOS:
 414       return Major < 8;
 415     case Triple::MacOSX:
 416     case Triple::Darwin:
 417       return Major < 12;
 418     }
 419   }
 420
 421   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
 422   /// lowering to an actual indirect jump.
 423   bool enableIndirectBrExpand() const override {
 424     return useIndirectThunkBranches();
 425   }
 426
 427   /// Enable the MachineScheduler pass for all X86 subtargets.
 428   bool enableMachineScheduler() const override { return true; }
 429
 430   bool enableEarlyIfConversion() const override;
 431
 432   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
 433                               &Mutations) const override;
 434
 435   AntiDepBreakMode getAntiDepBreakMode() const override {
 436     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
 437   }
 438 };
 439
 440 } // end namespace llvm
 441
 442 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H