lib/Target/AMDGPU/AMDGPUSubtarget.h

   1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// AMDGPU specific subclass of TargetSubtarget.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  17
  18 #include "AMDGPU.h"
  19 #include "AMDGPUCallLowering.h"
  20 #include "R600FrameLowering.h"
  21 #include "R600ISelLowering.h"
  22 #include "R600InstrInfo.h"
  23 #include "SIFrameLowering.h"
  24 #include "SIISelLowering.h"
  25 #include "SIInstrInfo.h"
  26 #include "Utils/AMDGPUBaseInfo.h"
  27 #include "llvm/ADT/Triple.h"
  28 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  29 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  30 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
  31 #include "llvm/CodeGen/MachineFunction.h"
  32 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  33 #include "llvm/MC/MCInstrItineraries.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include <cassert>
  36 #include <cstdint>
  37 #include <memory>
  38 #include <utility>
  39
  40 #define GET_SUBTARGETINFO_HEADER
  41 #include "AMDGPUGenSubtargetInfo.inc"
  42 #define GET_SUBTARGETINFO_HEADER
  43 #include "R600GenSubtargetInfo.inc"
  44
  45 namespace llvm {
  46
  47 class StringRef;
  48
  49 class AMDGPUSubtarget {
  50 public:
  51   enum Generation {
  52     R600 = 0,
  53     R700 = 1,
  54     EVERGREEN = 2,
  55     NORTHERN_ISLANDS = 3,
  56     SOUTHERN_ISLANDS = 4,
  57     SEA_ISLANDS = 5,
  58     VOLCANIC_ISLANDS = 6,
  59     GFX9 = 7
  60   };
  61
  62 private:
  63   Triple TargetTriple;
  64
  65 protected:
  66   const FeatureBitset &SubtargetFeatureBits;
  67   bool Has16BitInsts;
  68   bool HasMadMixInsts;
  69   bool FP32Denormals;
  70   bool FPExceptions;
  71   bool HasSDWA;
  72   bool HasVOP3PInsts;
  73   bool HasMulI24;
  74   bool HasMulU24;
  75   bool HasFminFmaxLegacy;
  76   bool EnablePromoteAlloca;
  77   int LocalMemorySize;
  78   unsigned WavefrontSize;
  79
  80 public:
  81   AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
  82
  83   static const AMDGPUSubtarget &get(const MachineFunction &MF);
  84   static const AMDGPUSubtarget &get(const TargetMachine &TM,
  85                                           const Function &F);
  86
  87   /// \returns Default range flat work group size for a calling convention.
  88   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
  89
  90   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
  91   /// for function \p F, or minimum/maximum flat work group sizes explicitly
  92   /// requested using "amdgpu-flat-work-group-size" attribute attached to
  93   /// function \p F.
  94   ///
  95   /// \returns Subtarget's default values if explicitly requested values cannot
  96   /// be converted to integer, or violate subtarget's specifications.
  97   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
  98
  99   /// \returns Subtarget's default pair of minimum/maximum number of waves per
 100   /// execution unit for function \p F, or minimum/maximum number of waves per
 101   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
 102   /// attached to function \p F.
 103   ///
 104   /// \returns Subtarget's default values if explicitly requested values cannot
 105   /// be converted to integer, violate subtarget's specifications, or are not
 106   /// compatible with minimum/maximum number of waves limited by flat work group
 107   /// size, register usage, and/or lds usage.
 108   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
 109
 110   /// Return the amount of LDS that can be used that will not restrict the
 111   /// occupancy lower than WaveCount.
 112   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 113                                            const Function &) const;
 114
 115   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
 116   /// the given LDS memory size is the only constraint.
 117   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
 118
 119   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
 120
 121   bool isAmdHsaOS() const {
 122     return TargetTriple.getOS() == Triple::AMDHSA;
 123   }
 124
 125   bool isAmdPalOS() const {
 126     return TargetTriple.getOS() == Triple::AMDPAL;
 127   }
 128
 129   bool isMesa3DOS() const {
 130     return TargetTriple.getOS() == Triple::Mesa3D;
 131   }
 132
 133   bool isMesaKernel(const Function &F) const {
 134     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
 135   }
 136
 137   bool isAmdCodeObjectV2(const Function &F) const {
 138     return isAmdHsaOS() || isMesaKernel(F);
 139   }
 140
 141   bool has16BitInsts() const {
 142     return Has16BitInsts;
 143   }
 144
 145   bool hasMadMixInsts() const {
 146     return HasMadMixInsts;
 147   }
 148
 149   bool hasFP32Denormals() const {
 150     return FP32Denormals;
 151   }
 152
 153   bool hasFPExceptions() const {
 154     return FPExceptions;
 155   }
 156
 157   bool hasSDWA() const {
 158     return HasSDWA;
 159   }
 160
 161   bool hasVOP3PInsts() const {
 162     return HasVOP3PInsts;
 163   }
 164
 165   bool hasMulI24() const {
 166     return HasMulI24;
 167   }
 168
 169   bool hasMulU24() const {
 170     return HasMulU24;
 171   }
 172
 173   bool hasFminFmaxLegacy() const {
 174     return HasFminFmaxLegacy;
 175   }
 176
 177   bool isPromoteAllocaEnabled() const {
 178     return EnablePromoteAlloca;
 179   }
 180
 181   unsigned getWavefrontSize() const {
 182     return WavefrontSize;
 183   }
 184
 185   int getLocalMemorySize() const {
 186     return LocalMemorySize;
 187   }
 188
 189   unsigned getAlignmentForImplicitArgPtr() const {
 190     return isAmdHsaOS() ? 8 : 4;
 191   }
 192
 193   /// Returns the offset in bytes from the start of the input buffer
 194   ///        of the first explicit kernel argument.
 195   unsigned getExplicitKernelArgOffset(const Function &F) const {
 196     return isAmdCodeObjectV2(F) ? 0 : 36;
 197   }
 198
 199   /// \returns Maximum number of work groups per compute unit supported by the
 200   /// subtarget and limited by given \p FlatWorkGroupSize.
 201   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
 202     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
 203                                                   FlatWorkGroupSize);
 204   }
 205
 206   /// \returns Minimum flat work group size supported by the subtarget.
 207   unsigned getMinFlatWorkGroupSize() const {
 208     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
 209   }
 210
 211   /// \returns Maximum flat work group size supported by the subtarget.
 212   unsigned getMaxFlatWorkGroupSize() const {
 213     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
 214   }
 215
 216   /// \returns Maximum number of waves per execution unit supported by the
 217   /// subtarget and limited by given \p FlatWorkGroupSize.
 218   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
 219     return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
 220                                              FlatWorkGroupSize);
 221   }
 222
 223   /// \returns Minimum number of waves per execution unit supported by the
 224   /// subtarget.
 225   unsigned getMinWavesPerEU() const {
 226     return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
 227   }
 228
 229   unsigned getMaxWavesPerEU() const { return 10; }
 230
 231   /// Creates value range metadata on an workitemid.* inrinsic call or load.
 232   bool makeLIDRangeMetadata(Instruction *I) const;
 233
 234   virtual ~AMDGPUSubtarget() {}
 235 };
 236
 237 class GCNSubtarget : public AMDGPUGenSubtargetInfo,
 238                      public AMDGPUSubtarget {
 239 public:
 240   enum {
 241     ISAVersion0_0_0,
 242     ISAVersion6_0_0,
 243     ISAVersion6_0_1,
 244     ISAVersion7_0_0,
 245     ISAVersion7_0_1,
 246     ISAVersion7_0_2,
 247     ISAVersion7_0_3,
 248     ISAVersion7_0_4,
 249     ISAVersion8_0_1,
 250     ISAVersion8_0_2,
 251     ISAVersion8_0_3,
 252     ISAVersion8_1_0,
 253     ISAVersion9_0_0,
 254     ISAVersion9_0_2,
 255     ISAVersion9_0_4,
 256     ISAVersion9_0_6,
 257   };
 258
 259   enum TrapHandlerAbi {
 260     TrapHandlerAbiNone = 0,
 261     TrapHandlerAbiHsa = 1
 262   };
 263
 264   enum TrapID {
 265     TrapIDHardwareReserved = 0,
 266     TrapIDHSADebugTrap = 1,
 267     TrapIDLLVMTrap = 2,
 268     TrapIDLLVMDebugTrap = 3,
 269     TrapIDDebugBreakpoint = 7,
 270     TrapIDDebugReserved8 = 8,
 271     TrapIDDebugReservedFE = 0xfe,
 272     TrapIDDebugReservedFF = 0xff
 273   };
 274
 275   enum TrapRegValues {
 276     LLVMTrapHandlerRegValue = 1
 277   };
 278
 279 private:
 280   /// GlobalISel related APIs.
 281   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
 282   std::unique_ptr<InstructionSelector> InstSelector;
 283   std::unique_ptr<LegalizerInfo> Legalizer;
 284   std::unique_ptr<RegisterBankInfo> RegBankInfo;
 285
 286 protected:
 287   // Basic subtarget description.
 288   Triple TargetTriple;
 289   unsigned Gen;
 290   unsigned IsaVersion;
 291   int LDSBankCount;
 292   unsigned MaxPrivateElementSize;
 293
 294   // Possibly statically set by tablegen, but may want to be overridden.
 295   bool FastFMAF32;
 296   bool HalfRate64Ops;
 297
 298   // Dynamially set bits that enable features.
 299   bool FP64FP16Denormals;
 300   bool DX10Clamp;
 301   bool FlatForGlobal;
 302   bool AutoWaitcntBeforeBarrier;
 303   bool CodeObjectV3;
 304   bool UnalignedScratchAccess;
 305   bool UnalignedBufferAccess;
 306   bool HasApertureRegs;
 307   bool EnableXNACK;
 308   bool TrapHandler;
 309   bool DebuggerInsertNops;
 310   bool DebuggerEmitPrologue;
 311
 312   // Used as options.
 313   bool EnableHugePrivateBuffer;
 314   bool EnableVGPRSpilling;
 315   bool EnableLoadStoreOpt;
 316   bool EnableUnsafeDSOffsetFolding;
 317   bool EnableSIScheduler;
 318   bool EnableDS128;
 319   bool DumpCode;
 320
 321   // Subtarget statically properties set by tablegen
 322   bool FP64;
 323   bool FMA;
 324   bool MIMG_R128;
 325   bool IsGCN;
 326   bool GCN3Encoding;
 327   bool CIInsts;
 328   bool GFX9Insts;
 329   bool SGPRInitBug;
 330   bool HasSMemRealTime;
 331   bool HasIntClamp;
 332   bool HasFmaMixInsts;
 333   bool HasMovrel;
 334   bool HasVGPRIndexMode;
 335   bool HasScalarStores;
 336   bool HasScalarAtomics;
 337   bool HasInv2PiInlineImm;
 338   bool HasSDWAOmod;
 339   bool HasSDWAScalar;
 340   bool HasSDWASdst;
 341   bool HasSDWAMac;
 342   bool HasSDWAOutModsVOPC;
 343   bool HasDPP;
 344   bool HasDLInsts;
 345   bool D16PreservesUnusedBits;
 346   bool FlatAddressSpace;
 347   bool FlatInstOffsets;
 348   bool FlatGlobalInsts;
 349   bool FlatScratchInsts;
 350   bool AddNoCarryInsts;
 351   bool HasUnpackedD16VMem;
 352   bool R600ALUInst;
 353   bool CaymanISA;
 354   bool CFALUBug;
 355   bool HasVertexCache;
 356   short TexVTXClauseSize;
 357   bool ScalarizeGlobal;
 358
 359   // Dummy feature to use for assembler in tablegen.
 360   bool FeatureDisable;
 361
 362   SelectionDAGTargetInfo TSInfo;
 363   AMDGPUAS AS;
 364 private:
 365   SIInstrInfo InstrInfo;
 366   SITargetLowering TLInfo;
 367   SIFrameLowering FrameLowering;
 368
 369 public:
 370   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 371                const GCNTargetMachine &TM);
 372   ~GCNSubtarget() override;
 373
 374   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
 375                                                    StringRef GPU, StringRef FS);
 376
 377   const SIInstrInfo *getInstrInfo() const override {
 378     return &InstrInfo;
 379   }
 380
 381   const SIFrameLowering *getFrameLowering() const override {
 382     return &FrameLowering;
 383   }
 384
 385   const SITargetLowering *getTargetLowering() const override {
 386     return &TLInfo;
 387   }
 388
 389   const SIRegisterInfo *getRegisterInfo() const override {
 390     return &InstrInfo.getRegisterInfo();
 391   }
 392
 393   const CallLowering *getCallLowering() const override {
 394     return CallLoweringInfo.get();
 395   }
 396
 397   const InstructionSelector *getInstructionSelector() const override {
 398     return InstSelector.get();
 399   }
 400
 401   const LegalizerInfo *getLegalizerInfo() const override {
 402     return Legalizer.get();
 403   }
 404
 405   const RegisterBankInfo *getRegBankInfo() const override {
 406     return RegBankInfo.get();
 407   }
 408
 409   // Nothing implemented, just prevent crashes on use.
 410   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 411     return &TSInfo;
 412   }
 413
 414   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 415
 416   Generation getGeneration() const {
 417     return (Generation)Gen;
 418   }
 419
 420   unsigned getWavefrontSizeLog2() const {
 421     return Log2_32(WavefrontSize);
 422   }
 423
 424   int getLDSBankCount() const {
 425     return LDSBankCount;
 426   }
 427
 428   unsigned getMaxPrivateElementSize() const {
 429     return MaxPrivateElementSize;
 430   }
 431
 432   AMDGPUAS getAMDGPUAS() const {
 433     return AS;
 434   }
 435
 436   bool hasIntClamp() const {
 437     return HasIntClamp;
 438   }
 439
 440   bool hasFP64() const {
 441     return FP64;
 442   }
 443
 444   bool hasMIMG_R128() const {
 445     return MIMG_R128;
 446   }
 447
 448   bool hasHWFP64() const {
 449     return FP64;
 450   }
 451
 452   bool hasFastFMAF32() const {
 453     return FastFMAF32;
 454   }
 455
 456   bool hasHalfRate64Ops() const {
 457     return HalfRate64Ops;
 458   }
 459
 460   bool hasAddr64() const {
 461     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
 462   }
 463
 464   bool hasBFE() const {
 465     return true;
 466   }
 467
 468   bool hasBFI() const {
 469     return true;
 470   }
 471
 472   bool hasBFM() const {
 473     return hasBFE();
 474   }
 475
 476   bool hasBCNT(unsigned Size) const {
 477     return true;
 478   }
 479
 480   bool hasFFBL() const {
 481     return true;
 482   }
 483
 484   bool hasFFBH() const {
 485     return true;
 486   }
 487
 488   bool hasMed3_16() const {
 489     return getGeneration() >= AMDGPUSubtarget::GFX9;
 490   }
 491
 492   bool hasMin3Max3_16() const {
 493     return getGeneration() >= AMDGPUSubtarget::GFX9;
 494   }
 495
 496   bool hasFmaMixInsts() const {
 497     return HasFmaMixInsts;
 498   }
 499
 500   bool hasCARRY() const {
 501     return true;
 502   }
 503
 504   bool hasFMA() const {
 505     return FMA;
 506   }
 507
 508   TrapHandlerAbi getTrapHandlerAbi() const {
 509     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
 510   }
 511
 512   bool enableHugePrivateBuffer() const {
 513     return EnableHugePrivateBuffer;
 514   }
 515
 516   bool unsafeDSOffsetFoldingEnabled() const {
 517     return EnableUnsafeDSOffsetFolding;
 518   }
 519
 520   bool dumpCode() const {
 521     return DumpCode;
 522   }
 523
 524   /// Return the amount of LDS that can be used that will not restrict the
 525   /// occupancy lower than WaveCount.
 526   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 527                                            const Function &) const;
 528
 529   bool hasFP16Denormals() const {
 530     return FP64FP16Denormals;
 531   }
 532
 533   bool hasFP64Denormals() const {
 534     return FP64FP16Denormals;
 535   }
 536
 537   bool supportsMinMaxDenormModes() const {
 538     return getGeneration() >= AMDGPUSubtarget::GFX9;
 539   }
 540
 541   bool enableDX10Clamp() const {
 542     return DX10Clamp;
 543   }
 544
 545   bool enableIEEEBit(const MachineFunction &MF) const {
 546     return AMDGPU::isCompute(MF.getFunction().getCallingConv());
 547   }
 548
 549   bool useFlatForGlobal() const {
 550     return FlatForGlobal;
 551   }
 552
 553   /// \returns If target supports ds_read/write_b128 and user enables generation
 554   /// of ds_read/write_b128.
 555   bool useDS128() const {
 556     return CIInsts && EnableDS128;
 557   }
 558
 559   /// \returns If MUBUF instructions always perform range checking, even for
 560   /// buffer resources used for private memory access.
 561   bool privateMemoryResourceIsRangeChecked() const {
 562     return getGeneration() < AMDGPUSubtarget::GFX9;
 563   }
 564
 565   bool hasAutoWaitcntBeforeBarrier() const {
 566     return AutoWaitcntBeforeBarrier;
 567   }
 568
 569   bool hasCodeObjectV3() const {
 570     return CodeObjectV3;
 571   }
 572
 573   bool hasUnalignedBufferAccess() const {
 574     return UnalignedBufferAccess;
 575   }
 576
 577   bool hasUnalignedScratchAccess() const {
 578     return UnalignedScratchAccess;
 579   }
 580
 581   bool hasApertureRegs() const {
 582     return HasApertureRegs;
 583   }
 584
 585   bool isTrapHandlerEnabled() const {
 586     return TrapHandler;
 587   }
 588
 589   bool isXNACKEnabled() const {
 590     return EnableXNACK;
 591   }
 592
 593   bool hasFlatAddressSpace() const {
 594     return FlatAddressSpace;
 595   }
 596
 597   bool hasFlatInstOffsets() const {
 598     return FlatInstOffsets;
 599   }
 600
 601   bool hasFlatGlobalInsts() const {
 602     return FlatGlobalInsts;
 603   }
 604
 605   bool hasFlatScratchInsts() const {
 606     return FlatScratchInsts;
 607   }
 608
 609   bool hasFlatLgkmVMemCountInOrder() const {
 610     return getGeneration() > GFX9;
 611   }
 612
 613   bool hasD16LoadStore() const {
 614     return getGeneration() >= GFX9;
 615   }
 616
 617   /// Return if most LDS instructions have an m0 use that require m0 to be
 618   /// iniitalized.
 619   bool ldsRequiresM0Init() const {
 620     return getGeneration() < GFX9;
 621   }
 622
 623   bool hasAddNoCarry() const {
 624     return AddNoCarryInsts;
 625   }
 626
 627   bool hasUnpackedD16VMem() const {
 628     return HasUnpackedD16VMem;
 629   }
 630
 631   // Covers VS/PS/CS graphics shaders
 632   bool isMesaGfxShader(const Function &F) const {
 633     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
 634   }
 635
 636   bool hasMad64_32() const {
 637     return getGeneration() >= SEA_ISLANDS;
 638   }
 639
 640   bool hasSDWAOmod() const {
 641     return HasSDWAOmod;
 642   }
 643
 644   bool hasSDWAScalar() const {
 645     return HasSDWAScalar;
 646   }
 647
 648   bool hasSDWASdst() const {
 649     return HasSDWASdst;
 650   }
 651
 652   bool hasSDWAMac() const {
 653     return HasSDWAMac;
 654   }
 655
 656   bool hasSDWAOutModsVOPC() const {
 657     return HasSDWAOutModsVOPC;
 658   }
 659
 660   bool vmemWriteNeedsExpWaitcnt() const {
 661     return getGeneration() < SEA_ISLANDS;
 662   }
 663
 664   bool hasDLInsts() const {
 665     return HasDLInsts;
 666   }
 667
 668   bool d16PreservesUnusedBits() const {
 669     return D16PreservesUnusedBits;
 670   }
 671
 672   /// \returns Number of bytes of arguments that are passed to a shader or
 673   /// kernel in addition to the explicit ones declared for the function.
 674   unsigned getImplicitArgNumBytes(const Function &F) const {
 675     if (isMesaKernel(F))
 676       return 16;
 677     return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
 678   }
 679
 680   // Scratch is allocated in 256 dword per wave blocks for the entire
 681   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
 682   // is 4-byte aligned.
 683   //
 684   // Only 4-byte alignment is really needed to access anything. Transformations
 685   // on the pointer value itself may rely on the alignment / known low bits of
 686   // the pointer. Set this to something above the minimum to avoid needing
 687   // dynamic realignment in common cases.
 688   unsigned getStackAlignment() const {
 689     return 16;
 690   }
 691
 692   bool enableMachineScheduler() const override {
 693     return true;
 694   }
 695
 696   bool enableSubRegLiveness() const override {
 697     return true;
 698   }
 699
 700   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
 701   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
 702
 703   /// \returns Number of execution units per compute unit supported by the
 704   /// subtarget.
 705   unsigned getEUsPerCU() const {
 706     return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
 707   }
 708
 709   /// \returns Maximum number of waves per compute unit supported by the
 710   /// subtarget without any kind of limitation.
 711   unsigned getMaxWavesPerCU() const {
 712     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
 713   }
 714
 715   /// \returns Maximum number of waves per compute unit supported by the
 716   /// subtarget and limited by given \p FlatWorkGroupSize.
 717   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
 718     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
 719                                              FlatWorkGroupSize);
 720   }
 721
 722   /// \returns Maximum number of waves per execution unit supported by the
 723   /// subtarget without any kind of limitation.
 724   unsigned getMaxWavesPerEU() const {
 725     return AMDGPU::IsaInfo::getMaxWavesPerEU();
 726   }
 727
 728   /// \returns Number of waves per work group supported by the subtarget and
 729   /// limited by given \p FlatWorkGroupSize.
 730   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
 731     return AMDGPU::IsaInfo::getWavesPerWorkGroup(
 732         MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
 733   }
 734
 735   // static wrappers
 736   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
 737
 738   // XXX - Why is this here if it isn't in the default pass set?
 739   bool enableEarlyIfConversion() const override {
 740     return true;
 741   }
 742
 743   void overrideSchedPolicy(MachineSchedPolicy &Policy,
 744                            unsigned NumRegionInstrs) const override;
 745
 746   bool isVGPRSpillingEnabled(const Function &F) const;
 747
 748   unsigned getMaxNumUserSGPRs() const {
 749     return 16;
 750   }
 751
 752   bool hasSMemRealTime() const {
 753     return HasSMemRealTime;
 754   }
 755
 756   bool hasMovrel() const {
 757     return HasMovrel;
 758   }
 759
 760   bool hasVGPRIndexMode() const {
 761     return HasVGPRIndexMode;
 762   }
 763
 764   bool useVGPRIndexMode(bool UserEnable) const {
 765     return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
 766   }
 767
 768   bool hasScalarCompareEq64() const {
 769     return getGeneration() >= VOLCANIC_ISLANDS;
 770   }
 771
 772   bool hasScalarStores() const {
 773     return HasScalarStores;
 774   }
 775
 776   bool hasScalarAtomics() const {
 777     return HasScalarAtomics;
 778   }
 779
 780   bool hasInv2PiInlineImm() const {
 781     return HasInv2PiInlineImm;
 782   }
 783
 784   bool hasDPP() const {
 785     return HasDPP;
 786   }
 787
 788   bool enableSIScheduler() const {
 789     return EnableSIScheduler;
 790   }
 791
 792   bool debuggerSupported() const {
 793     return debuggerInsertNops() && debuggerEmitPrologue();
 794   }
 795
 796   bool debuggerInsertNops() const {
 797     return DebuggerInsertNops;
 798   }
 799
 800   bool debuggerEmitPrologue() const {
 801     return DebuggerEmitPrologue;
 802   }
 803
 804   bool loadStoreOptEnabled() const {
 805     return EnableLoadStoreOpt;
 806   }
 807
 808   bool hasSGPRInitBug() const {
 809     return SGPRInitBug;
 810   }
 811
 812   bool has12DWordStoreHazard() const {
 813     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
 814   }
 815
 816   bool hasSMovFedHazard() const {
 817     return getGeneration() >= AMDGPUSubtarget::GFX9;
 818   }
 819
 820   bool hasReadM0MovRelInterpHazard() const {
 821     return getGeneration() >= AMDGPUSubtarget::GFX9;
 822   }
 823
 824   bool hasReadM0SendMsgHazard() const {
 825     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
 826   }
 827
 828   uint64_t getExplicitKernArgSize(const Function &F) const;
 829   unsigned getKernArgSegmentSize(const Function &F,
 830                                  int64_t ExplicitArgBytes = -1) const;
 831
 832   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
 833   /// SGPRs
 834   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 835
 836   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
 837   /// VGPRs
 838   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 839
 840   /// \returns true if the flat_scratch register should be initialized with the
 841   /// pointer to the wave's scratch memory rather than a size and offset.
 842   bool flatScratchIsPointer() const {
 843     return getGeneration() >= AMDGPUSubtarget::GFX9;
 844   }
 845
 846   /// \returns true if the machine has merged shaders in which s0-s7 are
 847   /// reserved by the hardware and user SGPRs start at s8
 848   bool hasMergedShaders() const {
 849     return getGeneration() >= GFX9;
 850   }
 851
 852   /// \returns SGPR allocation granularity supported by the subtarget.
 853   unsigned getSGPRAllocGranule() const {
 854     return AMDGPU::IsaInfo::getSGPRAllocGranule(
 855         MCSubtargetInfo::getFeatureBits());
 856   }
 857
 858   /// \returns SGPR encoding granularity supported by the subtarget.
 859   unsigned getSGPREncodingGranule() const {
 860     return AMDGPU::IsaInfo::getSGPREncodingGranule(
 861         MCSubtargetInfo::getFeatureBits());
 862   }
 863
 864   /// \returns Total number of SGPRs supported by the subtarget.
 865   unsigned getTotalNumSGPRs() const {
 866     return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
 867   }
 868
 869   /// \returns Addressable number of SGPRs supported by the subtarget.
 870   unsigned getAddressableNumSGPRs() const {
 871     return AMDGPU::IsaInfo::getAddressableNumSGPRs(
 872         MCSubtargetInfo::getFeatureBits());
 873   }
 874
 875   /// \returns Minimum number of SGPRs that meets the given number of waves per
 876   /// execution unit requirement supported by the subtarget.
 877   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
 878     return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
 879                                            WavesPerEU);
 880   }
 881
 882   /// \returns Maximum number of SGPRs that meets the given number of waves per
 883   /// execution unit requirement supported by the subtarget.
 884   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
 885     return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
 886                                            WavesPerEU, Addressable);
 887   }
 888
 889   /// \returns Reserved number of SGPRs for given function \p MF.
 890   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
 891
 892   /// \returns Maximum number of SGPRs that meets number of waves per execution
 893   /// unit requirement for function \p MF, or number of SGPRs explicitly
 894   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
 895   ///
 896   /// \returns Value that meets number of waves per execution unit requirement
 897   /// if explicitly requested value cannot be converted to integer, violates
 898   /// subtarget's specifications, or does not meet number of waves per execution
 899   /// unit requirement.
 900   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
 901
 902   /// \returns VGPR allocation granularity supported by the subtarget.
 903   unsigned getVGPRAllocGranule() const {
 904     return AMDGPU::IsaInfo::getVGPRAllocGranule(
 905         MCSubtargetInfo::getFeatureBits());
 906   }
 907
 908   /// \returns VGPR encoding granularity supported by the subtarget.
 909   unsigned getVGPREncodingGranule() const {
 910     return AMDGPU::IsaInfo::getVGPREncodingGranule(
 911         MCSubtargetInfo::getFeatureBits());
 912   }
 913
 914   /// \returns Total number of VGPRs supported by the subtarget.
 915   unsigned getTotalNumVGPRs() const {
 916     return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
 917   }
 918
 919   /// \returns Addressable number of VGPRs supported by the subtarget.
 920   unsigned getAddressableNumVGPRs() const {
 921     return AMDGPU::IsaInfo::getAddressableNumVGPRs(
 922         MCSubtargetInfo::getFeatureBits());
 923   }
 924
 925   /// \returns Minimum number of VGPRs that meets given number of waves per
 926   /// execution unit requirement supported by the subtarget.
 927   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
 928     return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
 929                                            WavesPerEU);
 930   }
 931
 932   /// \returns Maximum number of VGPRs that meets given number of waves per
 933   /// execution unit requirement supported by the subtarget.
 934   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
 935     return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
 936                                            WavesPerEU);
 937   }
 938
 939   /// \returns Maximum number of VGPRs that meets number of waves per execution
 940   /// unit requirement for function \p MF, or number of VGPRs explicitly
 941   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
 942   ///
 943   /// \returns Value that meets number of waves per execution unit requirement
 944   /// if explicitly requested value cannot be converted to integer, violates
 945   /// subtarget's specifications, or does not meet number of waves per execution
 946   /// unit requirement.
 947   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 948
 949   void getPostRAMutations(
 950       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
 951       const override;
 952 };
 953
 954 class R600Subtarget final : public R600GenSubtargetInfo,
 955                             public AMDGPUSubtarget {
 956 private:
 957   R600InstrInfo InstrInfo;
 958   R600FrameLowering FrameLowering;
 959   bool FMA;
 960   bool CaymanISA;
 961   bool CFALUBug;
 962   bool DX10Clamp;
 963   bool HasVertexCache;
 964   bool R600ALUInst;
 965   bool FP64;
 966   short TexVTXClauseSize;
 967   Generation Gen;
 968   R600TargetLowering TLInfo;
 969   InstrItineraryData InstrItins;
 970   SelectionDAGTargetInfo TSInfo;
 971   AMDGPUAS AS;
 972
 973 public:
 974   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
 975                 const TargetMachine &TM);
 976
 977   const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
 978
 979   const R600FrameLowering *getFrameLowering() const override {
 980     return &FrameLowering;
 981   }
 982
 983   const R600TargetLowering *getTargetLowering() const override {
 984     return &TLInfo;
 985   }
 986
 987   const R600RegisterInfo *getRegisterInfo() const override {
 988     return &InstrInfo.getRegisterInfo();
 989   }
 990
 991   const InstrItineraryData *getInstrItineraryData() const override {
 992     return &InstrItins;
 993   }
 994
 995   // Nothing implemented, just prevent crashes on use.
 996   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 997     return &TSInfo;
 998   }
 999
1000   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1001
1002   Generation getGeneration() const {
1003     return Gen;
1004   }
1005
1006   unsigned getStackAlignment() const {
1007     return 4;
1008   }
1009
1010   R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1011                                                  StringRef GPU, StringRef FS);
1012
1013   bool hasBFE() const {
1014     return (getGeneration() >= EVERGREEN);
1015   }
1016
1017   bool hasBFI() const {
1018     return (getGeneration() >= EVERGREEN);
1019   }
1020
1021   bool hasBCNT(unsigned Size) const {
1022     if (Size == 32)
1023       return (getGeneration() >= EVERGREEN);
1024
1025     return false;
1026   }
1027
1028   bool hasBORROW() const {
1029     return (getGeneration() >= EVERGREEN);
1030   }
1031
1032   bool hasCARRY() const {
1033     return (getGeneration() >= EVERGREEN);
1034   }
1035
1036   bool hasCaymanISA() const {
1037     return CaymanISA;
1038   }
1039
1040   bool hasFFBL() const {
1041     return (getGeneration() >= EVERGREEN);
1042   }
1043
1044   bool hasFFBH() const {
1045     return (getGeneration() >= EVERGREEN);
1046   }
1047
1048   bool hasFMA() const { return FMA; }
1049
1050   bool hasCFAluBug() const { return CFALUBug; }
1051
1052   bool hasVertexCache() const { return HasVertexCache; }
1053
1054   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1055
1056   AMDGPUAS getAMDGPUAS() const { return AS; }
1057
1058   bool enableMachineScheduler() const override {
1059     return true;
1060   }
1061
1062   bool enableSubRegLiveness() const override {
1063     return true;
1064   }
1065 };
1066
1067 } // end namespace llvm
1068
1069 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H