llvm/lib/Target/X86/X86Subtarget.h

   1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  15
  16 #include "X86FrameLowering.h"
  17 #include "X86ISelLowering.h"
  18 #include "X86InstrInfo.h"
  19 #include "X86SelectionDAGInfo.h"
  20 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  22 #include "llvm/IR/CallingConv.h"
  23 #include "llvm/TargetParser/Triple.h"
  24 #include <climits>
  25 #include <memory>
  26
  27 #define GET_SUBTARGETINFO_HEADER
  28 #include "X86GenSubtargetInfo.inc"
  29
  30 namespace llvm {
  31
  32 class CallLowering;
  33 class GlobalValue;
  34 class InstructionSelector;
  35 class LegalizerInfo;
  36 class RegisterBankInfo;
  37 class StringRef;
  38 class TargetMachine;
  39
  40 /// The X86 backend supports a number of different styles of PIC.
  41 ///
  42 namespace PICStyles {
  43
  44 enum class Style {
  45   StubPIC,          // Used on i386-darwin in pic mode.
  46   GOT,              // Used on 32 bit elf on when in pic mode.
  47   RIPRel,           // Used on X86-64 when in pic mode.
  48   None              // Set when not in pic mode.
  49 };
  50
  51 } // end namespace PICStyles
  52
  53 class X86Subtarget final : public X86GenSubtargetInfo {
  54   enum X86SSEEnum {
  55     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
  56   };
  57
  58   /// Which PIC style to use
  59   PICStyles::Style PICStyle;
  60
  61   const TargetMachine &TM;
  62
  63   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
  64   X86SSEEnum X86SSELevel = NoSSE;
  65
  66 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
  67   bool ATTRIBUTE = DEFAULT;
  68 #include "X86GenSubtargetInfo.inc"
  69   /// The minimum alignment known to hold of the stack frame on
  70   /// entry to the function and which must be maintained by every function.
  71   Align stackAlignment = Align(4);
  72
  73   Align TileConfigAlignment = Align(4);
  74
  75   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
  76   ///
  77   // FIXME: this is a known good value for Yonah. How about others?
  78   unsigned MaxInlineSizeThreshold = 128;
  79
  80   /// What processor and OS we're targeting.
  81   Triple TargetTriple;
  82
  83   /// GlobalISel related APIs.
  84   std::unique_ptr<CallLowering> CallLoweringInfo;
  85   std::unique_ptr<LegalizerInfo> Legalizer;
  86   std::unique_ptr<RegisterBankInfo> RegBankInfo;
  87   std::unique_ptr<InstructionSelector> InstSelector;
  88
  89   /// Override the stack alignment.
  90   MaybeAlign StackAlignOverride;
  91
  92   /// Preferred vector width from function attribute.
  93   unsigned PreferVectorWidthOverride;
  94
  95   /// Resolved preferred vector width from function attribute and subtarget
  96   /// features.
  97   unsigned PreferVectorWidth = UINT32_MAX;
  98
  99   /// Required vector width from function attribute.
 100   unsigned RequiredVectorWidth;
 101
 102   X86SelectionDAGInfo TSInfo;
 103   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
 104   // X86TargetLowering needs.
 105   X86InstrInfo InstrInfo;
 106   X86TargetLowering TLInfo;
 107   X86FrameLowering FrameLowering;
 108
 109 public:
 110   /// This constructor initializes the data members to match that
 111   /// of the specified triple.
 112   ///
 113   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
 114                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
 115                unsigned PreferVectorWidthOverride,
 116                unsigned RequiredVectorWidth);
 117
 118   const X86TargetLowering *getTargetLowering() const override {
 119     return &TLInfo;
 120   }
 121
 122   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
 123
 124   const X86FrameLowering *getFrameLowering() const override {
 125     return &FrameLowering;
 126   }
 127
 128   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
 129     return &TSInfo;
 130   }
 131
 132   const X86RegisterInfo *getRegisterInfo() const override {
 133     return &getInstrInfo()->getRegisterInfo();
 134   }
 135
 136   unsigned getTileConfigSize() const { return 64; }
 137   Align getTileConfigAlignment() const { return TileConfigAlignment; }
 138
 139   /// Returns the minimum alignment known to hold of the
 140   /// stack frame on entry to the function and which must be maintained by every
 141   /// function for this subtarget.
 142   Align getStackAlignment() const { return stackAlignment; }
 143
 144   /// Returns the maximum memset / memcpy size
 145   /// that still makes it profitable to inline the call.
 146   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
 147
 148   /// ParseSubtargetFeatures - Parses features string setting specified
 149   /// subtarget options.  Definition of function is auto generated by tblgen.
 150   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 151
 152   /// Methods used by Global ISel
 153   const CallLowering *getCallLowering() const override;
 154   InstructionSelector *getInstructionSelector() const override;
 155   const LegalizerInfo *getLegalizerInfo() const override;
 156   const RegisterBankInfo *getRegBankInfo() const override;
 157
 158 private:
 159   /// Initialize the full set of dependencies so we can use an initializer
 160   /// list for X86Subtarget.
 161   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
 162                                                 StringRef TuneCPU,
 163                                                 StringRef FS);
 164   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 165
 166 public:
 167
 168 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
 169   bool GETTER() const { return ATTRIBUTE; }
 170 #include "X86GenSubtargetInfo.inc"
 171
 172   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
 173   bool isTarget64BitILP32() const {
 174     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
 175   }
 176
 177   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
 178   bool isTarget64BitLP64() const {
 179     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
 180   }
 181
 182   PICStyles::Style getPICStyle() const { return PICStyle; }
 183   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 184
 185   bool canUseCMPXCHG8B() const { return hasCX8(); }
 186   bool canUseCMPXCHG16B() const {
 187     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
 188     return hasCX16() && is64Bit();
 189   }
 190   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
 191   // All 64-bit processors support cmov.
 192   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
 193   bool hasSSE1() const { return X86SSELevel >= SSE1; }
 194   bool hasSSE2() const { return X86SSELevel >= SSE2; }
 195   bool hasSSE3() const { return X86SSELevel >= SSE3; }
 196   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
 197   bool hasSSE41() const { return X86SSELevel >= SSE41; }
 198   bool hasSSE42() const { return X86SSELevel >= SSE42; }
 199   bool hasAVX() const { return X86SSELevel >= AVX; }
 200   bool hasAVX2() const { return X86SSELevel >= AVX2; }
 201   bool hasAVX512() const { return X86SSELevel >= AVX512; }
 202   bool hasInt256() const { return hasAVX2(); }
 203   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
 204   bool hasPrefetchW() const {
 205     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
 206     // its own CPUID bit as part of deprecating 3DNow.
 207     return hasPRFCHW();
 208   }
 209   bool hasSSEPrefetch() const {
 210     // We also implicitly enable these when we have a write prefix supporting
 211     // cache level OR if we have prfchw.
 212     return hasSSE1() || hasPRFCHW() || hasPREFETCHI();
 213   }
 214   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
 215   // These are generic getters that OR together all of the thunk types
 216   // supported by the subtarget. Therefore useIndirectThunk*() will return true
 217   // if any respective thunk feature is enabled.
 218   bool useIndirectThunkCalls() const {
 219     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
 220   }
 221   bool useIndirectThunkBranches() const {
 222     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
 223   }
 224
 225   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
 226   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
 227
 228   // Helper functions to determine when we should allow widening to 512-bit
 229   // during codegen.
 230   // TODO: Currently we're always allowing widening on CPUs without VLX,
 231   // because for many cases we don't have a better option.
 232   bool canExtendTo512DQ() const {
 233     return hasAVX512() && hasEVEX512() &&
 234            (!hasVLX() || getPreferVectorWidth() >= 512);
 235   }
 236   bool canExtendTo512BW() const  {
 237     return hasBWI() && canExtendTo512DQ();
 238   }
 239
 240   bool hasNoDomainDelay() const { return NoDomainDelay; }
 241   bool hasNoDomainDelayMov() const {
 242       return hasNoDomainDelay() || NoDomainDelayMov;
 243   }
 244   bool hasNoDomainDelayBlend() const {
 245       return hasNoDomainDelay() || NoDomainDelayBlend;
 246   }
 247   bool hasNoDomainDelayShuffle() const {
 248       return hasNoDomainDelay() || NoDomainDelayShuffle;
 249   }
 250
 251   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
 252   // disable them in the legalizer.
 253   bool useAVX512Regs() const {
 254     return hasAVX512() && hasEVEX512() &&
 255            (canExtendTo512DQ() || RequiredVectorWidth > 256);
 256   }
 257
 258   bool useLight256BitInstructions() const {
 259     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
 260   }
 261
 262   bool useBWIRegs() const {
 263     return hasBWI() && useAVX512Regs();
 264   }
 265
 266   bool isXRaySupported() const override { return is64Bit(); }
 267
 268   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
 269   /// no-sse2). There isn't any reason to disable it if the target processor
 270   /// supports it.
 271   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
 272
 273   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
 274   /// no-sse2). There isn't any reason to disable it if the target processor
 275   /// supports it.
 276   bool hasMFence() const { return hasSSE2() || is64Bit(); }
 277
 278   const Triple &getTargetTriple() const { return TargetTriple; }
 279
 280   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
 281   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
 282   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
 283   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
 284   bool isTargetPS() const { return TargetTriple.isPS(); }
 285
 286   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
 287   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
 288   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 289
 290   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 291   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
 292   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
 293   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 294   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
 295   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
 296   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
 297   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
 298   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
 299
 300   bool isTargetWindowsMSVC() const {
 301     return TargetTriple.isWindowsMSVCEnvironment();
 302   }
 303
 304   bool isTargetWindowsCoreCLR() const {
 305     return TargetTriple.isWindowsCoreCLREnvironment();
 306   }
 307
 308   bool isTargetWindowsCygwin() const {
 309     return TargetTriple.isWindowsCygwinEnvironment();
 310   }
 311
 312   bool isTargetWindowsGNU() const {
 313     return TargetTriple.isWindowsGNUEnvironment();
 314   }
 315
 316   bool isTargetWindowsItanium() const {
 317     return TargetTriple.isWindowsItaniumEnvironment();
 318   }
 319
 320   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
 321
 322   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
 323
 324   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
 325
 326   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
 327
 328   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
 329   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
 330
 331   bool isPICStyleStubPIC() const {
 332     return PICStyle == PICStyles::Style::StubPIC;
 333   }
 334
 335   bool isPositionIndependent() const;
 336
 337   bool isCallingConvWin64(CallingConv::ID CC) const {
 338     switch (CC) {
 339     // On Win64, all these conventions just use the default convention.
 340     case CallingConv::C:
 341     case CallingConv::Fast:
 342     case CallingConv::Tail:
 343     case CallingConv::Swift:
 344     case CallingConv::SwiftTail:
 345     case CallingConv::X86_FastCall:
 346     case CallingConv::X86_StdCall:
 347     case CallingConv::X86_ThisCall:
 348     case CallingConv::X86_VectorCall:
 349     case CallingConv::Intel_OCL_BI:
 350       return isTargetWin64();
 351     // This convention allows using the Win64 convention on other targets.
 352     case CallingConv::Win64:
 353       return true;
 354     // This convention allows using the SysV convention on Windows targets.
 355     case CallingConv::X86_64_SysV:
 356       return false;
 357     // Otherwise, who knows what this is.
 358     default:
 359       return false;
 360     }
 361   }
 362
 363   /// Classify a global variable reference for the current subtarget according
 364   /// to how we should reference it in a non-pcrel context.
 365   unsigned char classifyLocalReference(const GlobalValue *GV) const;
 366
 367   unsigned char classifyGlobalReference(const GlobalValue *GV,
 368                                         const Module &M) const;
 369   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
 370
 371   /// Classify a global function reference for the current subtarget.
 372   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
 373                                                 const Module &M) const;
 374   unsigned char
 375   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
 376
 377   /// Classify a blockaddress reference for the current subtarget according to
 378   /// how we should reference it in a non-pcrel context.
 379   unsigned char classifyBlockAddressReference() const;
 380
 381   /// Return true if the subtarget allows calls to immediate address.
 382   bool isLegalToCallImmediateAddr() const;
 383
 384   /// Return whether FrameLowering should always set the "extended frame
 385   /// present" bit in FP, or set it based on a symbol in the runtime.
 386   bool swiftAsyncContextIsDynamicallySet() const {
 387     // Older OS versions (particularly system unwinders) are confused by the
 388     // Swift extended frame, so when building code that might be run on them we
 389     // must dynamically query the concurrency library to determine whether
 390     // extended frames should be flagged as present.
 391     const Triple &TT = getTargetTriple();
 392
 393     unsigned Major = TT.getOSVersion().getMajor();
 394     switch(TT.getOS()) {
 395     default:
 396       return false;
 397     case Triple::IOS:
 398     case Triple::TvOS:
 399       return Major < 15;
 400     case Triple::WatchOS:
 401       return Major < 8;
 402     case Triple::MacOSX:
 403     case Triple::Darwin:
 404       return Major < 12;
 405     }
 406   }
 407
 408   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
 409   /// lowering to an actual indirect jump.
 410   bool enableIndirectBrExpand() const override {
 411     return useIndirectThunkBranches();
 412   }
 413
 414   /// Enable the MachineScheduler pass for all X86 subtargets.
 415   bool enableMachineScheduler() const override { return true; }
 416
 417   bool enableEarlyIfConversion() const override;
 418
 419   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
 420                               &Mutations) const override;
 421
 422   AntiDepBreakMode getAntiDepBreakMode() const override {
 423     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
 424   }
 425 };
 426
 427 } // end namespace llvm
 428
 429 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H