lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

   1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //==-----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// Defines an instruction selector for the AMDGPU target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "AMDGPU.h"
  15 #include "AMDGPUArgumentUsageInfo.h"
  16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
  17 #include "AMDGPUInstrInfo.h"
  18 #include "AMDGPUPerfHintAnalysis.h"
  19 #include "AMDGPURegisterInfo.h"
  20 #include "AMDGPUSubtarget.h"
  21 #include "AMDGPUTargetMachine.h"
  22 #include "SIDefines.h"
  23 #include "SIISelLowering.h"
  24 #include "SIInstrInfo.h"
  25 #include "SIMachineFunctionInfo.h"
  26 #include "SIRegisterInfo.h"
  27 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  28 #include "llvm/ADT/APInt.h"
  29 #include "llvm/ADT/SmallVector.h"
  30 #include "llvm/ADT/StringRef.h"
  31 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
  32 #include "llvm/Analysis/ValueTracking.h"
  33 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  34 #include "llvm/CodeGen/ISDOpcodes.h"
  35 #include "llvm/CodeGen/MachineFunction.h"
  36 #include "llvm/CodeGen/MachineRegisterInfo.h"
  37 #include "llvm/CodeGen/SelectionDAG.h"
  38 #include "llvm/CodeGen/SelectionDAGISel.h"
  39 #include "llvm/CodeGen/SelectionDAGNodes.h"
  40 #include "llvm/CodeGen/ValueTypes.h"
  41 #include "llvm/IR/BasicBlock.h"
  42 #include "llvm/IR/Instruction.h"
  43 #include "llvm/MC/MCInstrDesc.h"
  44 #include "llvm/Support/Casting.h"
  45 #include "llvm/Support/CodeGen.h"
  46 #include "llvm/Support/ErrorHandling.h"
  47 #include "llvm/Support/MachineValueType.h"
  48 #include "llvm/Support/MathExtras.h"
  49 #include <cassert>
  50 #include <cstdint>
  51 #include <new>
  52 #include <vector>
  53
  54 using namespace llvm;
  55
  56 namespace llvm {
  57
  58 class R600InstrInfo;
  59
  60 } // end namespace llvm
  61
  62 //===----------------------------------------------------------------------===//
  63 // Instruction Selector Implementation
  64 //===----------------------------------------------------------------------===//
  65
  66 namespace {
  67
  68 /// AMDGPU specific code to select AMDGPU machine instructions for
  69 /// SelectionDAG operations.
  70 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
  71   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
  72   // make the right decision when generating code for different targets.
  73   const GCNSubtarget *Subtarget;
  74   bool EnableLateStructurizeCFG;
  75
  76 public:
  77   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
  78                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
  79     : SelectionDAGISel(*TM, OptLevel) {
  80     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
  81   }
  82   ~AMDGPUDAGToDAGISel() override = default;
  83
  84   void getAnalysisUsage(AnalysisUsage &AU) const override {
  85     AU.addRequired<AMDGPUArgumentUsageInfo>();
  86     AU.addRequired<AMDGPUPerfHintAnalysis>();
  87     AU.addRequired<LegacyDivergenceAnalysis>();
  88     SelectionDAGISel::getAnalysisUsage(AU);
  89   }
  90
  91   bool runOnMachineFunction(MachineFunction &MF) override;
  92   void Select(SDNode *N) override;
  93   StringRef getPassName() const override;
  94   void PostprocessISelDAG() override;
  95
  96 protected:
  97   void SelectBuildVector(SDNode *N, unsigned RegClassID);
  98
  99 private:
 100   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
 101   bool isNoNanSrc(SDValue N) const;
 102   bool isInlineImmediate(const SDNode *N) const;
 103   bool isVGPRImm(const SDNode *N) const;
 104   bool isUniformLoad(const SDNode *N) const;
 105   bool isUniformBr(const SDNode *N) const;
 106
 107   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
 108
 109   SDNode *glueCopyToM0LDSInit(SDNode *N) const;
 110   SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
 111
 112   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
 113   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
 114   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
 115   bool isDSOffsetLegal(SDValue Base, unsigned Offset,
 116                        unsigned OffsetBits) const;
 117   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
 118   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
 119                                  SDValue &Offset1) const;
 120   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
 121                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
 122                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
 123                    SDValue &TFE) const;
 124   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
 125                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
 126                          SDValue &SLC, SDValue &TFE) const;
 127   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
 128                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
 129                          SDValue &SLC) const;
 130   bool SelectMUBUFScratchOffen(SDNode *Parent,
 131                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
 132                                SDValue &SOffset, SDValue &ImmOffset) const;
 133   bool SelectMUBUFScratchOffset(SDNode *Parent,
 134                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
 135                                 SDValue &Offset) const;
 136
 137   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
 138                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
 139                          SDValue &TFE) const;
 140   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
 141                          SDValue &Offset, SDValue &SLC) const;
 142   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
 143                          SDValue &Offset) const;
 144
 145   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
 146                         SDValue &Offset, SDValue &SLC) const;
 147   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
 148                               SDValue &Offset, SDValue &SLC) const;
 149
 150   template <bool IsSigned>
 151   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
 152                         SDValue &Offset, SDValue &SLC) const;
 153
 154   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
 155                         bool &Imm) const;
 156   SDValue Expand32BitAddress(SDValue Addr) const;
 157   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
 158                   bool &Imm) const;
 159   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
 160   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
 161   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
 162   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
 163   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
 164   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
 165
 166   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 167   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
 168   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 169   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
 170   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
 171                        SDValue &Clamp, SDValue &Omod) const;
 172   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
 173                          SDValue &Clamp, SDValue &Omod) const;
 174
 175   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
 176                                  SDValue &Clamp,
 177                                  SDValue &Omod) const;
 178
 179   bool SelectVOP3OMods(SDValue In, SDValue &Src,
 180                        SDValue &Clamp, SDValue &Omod) const;
 181
 182   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 183   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
 184                         SDValue &Clamp) const;
 185
 186   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 187   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
 188                         SDValue &Clamp) const;
 189
 190   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 191   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
 192                             SDValue &Clamp) const;
 193   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
 194   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
 195
 196   bool SelectHi16Elt(SDValue In, SDValue &Src) const;
 197
 198   void SelectADD_SUB_I64(SDNode *N);
 199   void SelectUADDO_USUBO(SDNode *N);
 200   void SelectDIV_SCALE(SDNode *N);
 201   void SelectMAD_64_32(SDNode *N);
 202   void SelectFMA_W_CHAIN(SDNode *N);
 203   void SelectFMUL_W_CHAIN(SDNode *N);
 204
 205   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
 206                    uint32_t Offset, uint32_t Width);
 207   void SelectS_BFEFromShifts(SDNode *N);
 208   void SelectS_BFE(SDNode *N);
 209   bool isCBranchSCC(const SDNode *N) const;
 210   void SelectBRCOND(SDNode *N);
 211   void SelectFMAD_FMA(SDNode *N);
 212   void SelectATOMIC_CMP_SWAP(SDNode *N);
 213   void SelectINTRINSIC_W_CHAIN(SDNode *N);
 214
 215 protected:
 216   // Include the pieces autogenerated from the target description.
 217 #include "AMDGPUGenDAGISel.inc"
 218 };
 219
 220 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
 221   const R600Subtarget *Subtarget;
 222
 223   bool isConstantLoad(const MemSDNode *N, int cbID) const;
 224   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
 225   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
 226                                        SDValue& Offset);
 227 public:
 228   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
 229       AMDGPUDAGToDAGISel(TM, OptLevel) {}
 230
 231   void Select(SDNode *N) override;
 232
 233   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
 234                           SDValue &Offset) override;
 235   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 236                           SDValue &Offset) override;
 237
 238   bool runOnMachineFunction(MachineFunction &MF) override;
 239 protected:
 240   // Include the pieces autogenerated from the target description.
 241 #include "R600GenDAGISel.inc"
 242 };
 243
 244 }  // end anonymous namespace
 245
 246 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
 247                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 248 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
 249 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
 250 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
 251 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
 252                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 253
 254 /// This pass converts a legalized DAG into a AMDGPU-specific
 255 // DAG, ready for instruction scheduling.
 256 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
 257                                         CodeGenOpt::Level OptLevel) {
 258   return new AMDGPUDAGToDAGISel(TM, OptLevel);
 259 }
 260
 261 /// This pass converts a legalized DAG into a R600-specific
 262 // DAG, ready for instruction scheduling.
 263 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
 264                                       CodeGenOpt::Level OptLevel) {
 265   return new R600DAGToDAGISel(TM, OptLevel);
 266 }
 267
 268 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
 269   Subtarget = &MF.getSubtarget<GCNSubtarget>();
 270   return SelectionDAGISel::runOnMachineFunction(MF);
 271 }
 272
 273 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
 274   if (TM.Options.NoNaNsFPMath)
 275     return true;
 276
 277   // TODO: Move into isKnownNeverNaN
 278   if (N->getFlags().isDefined())
 279     return N->getFlags().hasNoNaNs();
 280
 281   return CurDAG->isKnownNeverNaN(N);
 282 }
 283
 284 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
 285   const SIInstrInfo *TII = Subtarget->getInstrInfo();
 286
 287   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
 288     return TII->isInlineConstant(C->getAPIntValue());
 289
 290   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
 291     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
 292
 293   return false;
 294 }
 295
 296 /// Determine the register class for \p OpNo
 297 /// \returns The register class of the virtual register that will be used for
 298 /// the given operand number \OpNo or NULL if the register class cannot be
 299 /// determined.
 300 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
 301                                                           unsigned OpNo) const {
 302   if (!N->isMachineOpcode()) {
 303     if (N->getOpcode() == ISD::CopyToReg) {
 304       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
 305       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
 306         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
 307         return MRI.getRegClass(Reg);
 308       }
 309
 310       const SIRegisterInfo *TRI
 311         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
 312       return TRI->getPhysRegClass(Reg);
 313     }
 314
 315     return nullptr;
 316   }
 317
 318   switch (N->getMachineOpcode()) {
 319   default: {
 320     const MCInstrDesc &Desc =
 321         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
 322     unsigned OpIdx = Desc.getNumDefs() + OpNo;
 323     if (OpIdx >= Desc.getNumOperands())
 324       return nullptr;
 325     int RegClass = Desc.OpInfo[OpIdx].RegClass;
 326     if (RegClass == -1)
 327       return nullptr;
 328
 329     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
 330   }
 331   case AMDGPU::REG_SEQUENCE: {
 332     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
 333     const TargetRegisterClass *SuperRC =
 334         Subtarget->getRegisterInfo()->getRegClass(RCID);
 335
 336     SDValue SubRegOp = N->getOperand(OpNo + 1);
 337     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
 338     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
 339                                                               SubRegIdx);
 340   }
 341   }
 342 }
 343
 344 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
 345   const SITargetLowering& Lowering =
 346     *static_cast<const SITargetLowering*>(getTargetLowering());
 347
 348   // Write max value to m0 before each load operation
 349
 350   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
 351                                  Val);
 352
 353   SDValue Glue = M0.getValue(1);
 354
 355   SmallVector <SDValue, 8> Ops;
 356   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
 357     Ops.push_back(N->getOperand(i));
 358
 359   Ops.push_back(Glue);
 360   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
 361 }
 362
 363 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
 364   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
 365       !Subtarget->ldsRequiresM0Init())
 366     return N;
 367   return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
 368 }
 369
 370 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
 371                                                   EVT VT) const {
 372   SDNode *Lo = CurDAG->getMachineNode(
 373       AMDGPU::S_MOV_B32, DL, MVT::i32,
 374       CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
 375   SDNode *Hi =
 376       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
 377                              CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
 378   const SDValue Ops[] = {
 379       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
 380       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
 381       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
 382
 383   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
 384 }
 385
 386 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
 387   switch (NumVectorElts) {
 388   case 1:
 389     return AMDGPU::SReg_32_XM0RegClassID;
 390   case 2:
 391     return AMDGPU::SReg_64RegClassID;
 392   case 4:
 393     return AMDGPU::SReg_128RegClassID;
 394   case 8:
 395     return AMDGPU::SReg_256RegClassID;
 396   case 16:
 397     return AMDGPU::SReg_512RegClassID;
 398   }
 399
 400   llvm_unreachable("invalid vector size");
 401 }
 402
 403 static bool getConstantValue(SDValue N, uint32_t &Out) {
 404   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
 405     Out = C->getAPIntValue().getZExtValue();
 406     return true;
 407   }
 408
 409   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
 410     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
 411     return true;
 412   }
 413
 414   return false;
 415 }
 416
 417 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
 418   EVT VT = N->getValueType(0);
 419   unsigned NumVectorElts = VT.getVectorNumElements();
 420   EVT EltVT = VT.getVectorElementType();
 421   SDLoc DL(N);
 422   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
 423
 424   if (NumVectorElts == 1) {
 425     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
 426                          RegClass);
 427     return;
 428   }
 429
 430   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
 431                                   "supported yet");
 432   // 16 = Max Num Vector Elements
 433   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
 434   // 1 = Vector Register Class
 435   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
 436
 437   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
 438   bool IsRegSeq = true;
 439   unsigned NOps = N->getNumOperands();
 440   for (unsigned i = 0; i < NOps; i++) {
 441     // XXX: Why is this here?
 442     if (isa<RegisterSDNode>(N->getOperand(i))) {
 443       IsRegSeq = false;
 444       break;
 445     }
 446     unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
 447     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
 448     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
 449   }
 450   if (NOps != NumVectorElts) {
 451     // Fill in the missing undef elements if this was a scalar_to_vector.
 452     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
 453     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
 454                                                    DL, EltVT);
 455     for (unsigned i = NOps; i < NumVectorElts; ++i) {
 456       unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
 457       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
 458       RegSeqArgs[1 + (2 * i) + 1] =
 459           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
 460     }
 461   }
 462
 463   if (!IsRegSeq)
 464     SelectCode(N);
 465   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
 466 }
 467
 468 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
 469   unsigned int Opc = N->getOpcode();
 470   if (N->isMachineOpcode()) {
 471     N->setNodeId(-1);
 472     return;   // Already selected.
 473   }
 474
 475   if (isa<AtomicSDNode>(N) ||
 476       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
 477        Opc == ISD::ATOMIC_LOAD_FADD ||
 478        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
 479        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
 480     N = glueCopyToM0LDSInit(N);
 481
 482   switch (Opc) {
 483   default:
 484     break;
 485   // We are selecting i64 ADD here instead of custom lower it during
 486   // DAG legalization, so we can fold some i64 ADDs used for address
 487   // calculation into the LOAD and STORE instructions.
 488   case ISD::ADDC:
 489   case ISD::ADDE:
 490   case ISD::SUBC:
 491   case ISD::SUBE: {
 492     if (N->getValueType(0) != MVT::i64)
 493       break;
 494
 495     SelectADD_SUB_I64(N);
 496     return;
 497   }
 498   case ISD::UADDO:
 499   case ISD::USUBO: {
 500     SelectUADDO_USUBO(N);
 501     return;
 502   }
 503   case AMDGPUISD::FMUL_W_CHAIN: {
 504     SelectFMUL_W_CHAIN(N);
 505     return;
 506   }
 507   case AMDGPUISD::FMA_W_CHAIN: {
 508     SelectFMA_W_CHAIN(N);
 509     return;
 510   }
 511
 512   case ISD::SCALAR_TO_VECTOR:
 513   case ISD::BUILD_VECTOR: {
 514     EVT VT = N->getValueType(0);
 515     unsigned NumVectorElts = VT.getVectorNumElements();
 516     if (VT.getScalarSizeInBits() == 16) {
 517       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
 518         uint32_t LHSVal, RHSVal;
 519         if (getConstantValue(N->getOperand(0), LHSVal) &&
 520             getConstantValue(N->getOperand(1), RHSVal)) {
 521           uint32_t K = LHSVal | (RHSVal << 16);
 522           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
 523                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
 524           return;
 525         }
 526       }
 527
 528       break;
 529     }
 530
 531     assert(VT.getVectorElementType().bitsEq(MVT::i32));
 532     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
 533     SelectBuildVector(N, RegClassID);
 534     return;
 535   }
 536   case ISD::BUILD_PAIR: {
 537     SDValue RC, SubReg0, SubReg1;
 538     SDLoc DL(N);
 539     if (N->getValueType(0) == MVT::i128) {
 540       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
 541       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
 542       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
 543     } else if (N->getValueType(0) == MVT::i64) {
 544       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
 545       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
 546       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
 547     } else {
 548       llvm_unreachable("Unhandled value type for BUILD_PAIR");
 549     }
 550     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
 551                             N->getOperand(1), SubReg1 };
 552     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
 553                                           N->getValueType(0), Ops));
 554     return;
 555   }
 556
 557   case ISD::Constant:
 558   case ISD::ConstantFP: {
 559     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
 560       break;
 561
 562     uint64_t Imm;
 563     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
 564       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
 565     else {
 566       ConstantSDNode *C = cast<ConstantSDNode>(N);
 567       Imm = C->getZExtValue();
 568     }
 569
 570     SDLoc DL(N);
 571     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
 572     return;
 573   }
 574   case ISD::LOAD:
 575   case ISD::STORE:
 576   case ISD::ATOMIC_LOAD:
 577   case ISD::ATOMIC_STORE: {
 578     N = glueCopyToM0LDSInit(N);
 579     break;
 580   }
 581
 582   case AMDGPUISD::BFE_I32:
 583   case AMDGPUISD::BFE_U32: {
 584     // There is a scalar version available, but unlike the vector version which
 585     // has a separate operand for the offset and width, the scalar version packs
 586     // the width and offset into a single operand. Try to move to the scalar
 587     // version if the offsets are constant, so that we can try to keep extended
 588     // loads of kernel arguments in SGPRs.
 589
 590     // TODO: Technically we could try to pattern match scalar bitshifts of
 591     // dynamic values, but it's probably not useful.
 592     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
 593     if (!Offset)
 594       break;
 595
 596     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
 597     if (!Width)
 598       break;
 599
 600     bool Signed = Opc == AMDGPUISD::BFE_I32;
 601
 602     uint32_t OffsetVal = Offset->getZExtValue();
 603     uint32_t WidthVal = Width->getZExtValue();
 604
 605     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
 606                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
 607     return;
 608   }
 609   case AMDGPUISD::DIV_SCALE: {
 610     SelectDIV_SCALE(N);
 611     return;
 612   }
 613   case AMDGPUISD::MAD_I64_I32:
 614   case AMDGPUISD::MAD_U64_U32: {
 615     SelectMAD_64_32(N);
 616     return;
 617   }
 618   case ISD::CopyToReg: {
 619     const SITargetLowering& Lowering =
 620       *static_cast<const SITargetLowering*>(getTargetLowering());
 621     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
 622     break;
 623   }
 624   case ISD::AND:
 625   case ISD::SRL:
 626   case ISD::SRA:
 627   case ISD::SIGN_EXTEND_INREG:
 628     if (N->getValueType(0) != MVT::i32)
 629       break;
 630
 631     SelectS_BFE(N);
 632     return;
 633   case ISD::BRCOND:
 634     SelectBRCOND(N);
 635     return;
 636   case ISD::FMAD:
 637   case ISD::FMA:
 638     SelectFMAD_FMA(N);
 639     return;
 640   case AMDGPUISD::ATOMIC_CMP_SWAP:
 641     SelectATOMIC_CMP_SWAP(N);
 642     return;
 643   case AMDGPUISD::CVT_PKRTZ_F16_F32:
 644   case AMDGPUISD::CVT_PKNORM_I16_F32:
 645   case AMDGPUISD::CVT_PKNORM_U16_F32:
 646   case AMDGPUISD::CVT_PK_U16_U32:
 647   case AMDGPUISD::CVT_PK_I16_I32: {
 648     // Hack around using a legal type if f16 is illegal.
 649     if (N->getValueType(0) == MVT::i32) {
 650       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
 651       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
 652                               { N->getOperand(0), N->getOperand(1) });
 653       SelectCode(N);
 654       return;
 655     }
 656
 657     break;
 658   }
 659   case ISD::INTRINSIC_W_CHAIN: {
 660     SelectINTRINSIC_W_CHAIN(N);
 661     return;
 662   }
 663   }
 664
 665   SelectCode(N);
 666 }
 667
 668 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
 669   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
 670   const Instruction *Term = BB->getTerminator();
 671   return Term->getMetadata("amdgpu.uniform") ||
 672          Term->getMetadata("structurizecfg.uniform");
 673 }
 674
 675 StringRef AMDGPUDAGToDAGISel::getPassName() const {
 676   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 677 }
 678
 679 //===----------------------------------------------------------------------===//
 680 // Complex Patterns
 681 //===----------------------------------------------------------------------===//
 682
 683 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
 684                                             SDValue &Offset) {
 685   return false;
 686 }
 687
 688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
 689                                             SDValue &Offset) {
 690   ConstantSDNode *C;
 691   SDLoc DL(Addr);
 692
 693   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
 694     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
 695     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
 696   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
 697              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
 698     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
 699     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
 700   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
 701             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
 702     Base = Addr.getOperand(0);
 703     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
 704   } else {
 705     Base = Addr;
 706     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
 707   }
 708
 709   return true;
 710 }
 711
 712 // FIXME: Should only handle addcarry/subcarry
 713 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 714   SDLoc DL(N);
 715   SDValue LHS = N->getOperand(0);
 716   SDValue RHS = N->getOperand(1);
 717
 718   unsigned Opcode = N->getOpcode();
 719   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
 720   bool ProduceCarry =
 721       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
 722   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
 723
 724   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
 725   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
 726
 727   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 728                                        DL, MVT::i32, LHS, Sub0);
 729   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 730                                        DL, MVT::i32, LHS, Sub1);
 731
 732   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 733                                        DL, MVT::i32, RHS, Sub0);
 734   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
 735                                        DL, MVT::i32, RHS, Sub1);
 736
 737   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
 738
 739   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
 740   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
 741
 742   SDNode *AddLo;
 743   if (!ConsumeCarry) {
 744     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
 745     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
 746   } else {
 747     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
 748     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
 749   }
 750   SDValue AddHiArgs[] = {
 751     SDValue(Hi0, 0),
 752     SDValue(Hi1, 0),
 753     SDValue(AddLo, 1)
 754   };
 755   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
 756
 757   SDValue RegSequenceArgs[] = {
 758     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
 759     SDValue(AddLo,0),
 760     Sub0,
 761     SDValue(AddHi,0),
 762     Sub1,
 763   };
 764   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
 765                                                MVT::i64, RegSequenceArgs);
 766
 767   if (ProduceCarry) {
 768     // Replace the carry-use
 769     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
 770   }
 771
 772   // Replace the remaining uses.
 773   ReplaceNode(N, RegSequence);
 774 }
 775
 776 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
 777   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
 778   // carry out despite the _i32 name. These were renamed in VI to _U32.
 779   // FIXME: We should probably rename the opcodes here.
 780   unsigned Opc = N->getOpcode() == ISD::UADDO ?
 781     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
 782
 783   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
 784                        { N->getOperand(0), N->getOperand(1) });
 785 }
 786
 787 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
 788   SDLoc SL(N);
 789   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
 790   SDValue Ops[10];
 791
 792   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
 793   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
 794   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
 795   Ops[8] = N->getOperand(0);
 796   Ops[9] = N->getOperand(4);
 797
 798   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
 799 }
 800
 801 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
 802   SDLoc SL(N);
 803   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
 804   SDValue Ops[8];
 805
 806   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
 807   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
 808   Ops[6] = N->getOperand(0);
 809   Ops[7] = N->getOperand(3);
 810
 811   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
 812 }
 813
 814 // We need to handle this here because tablegen doesn't support matching
 815 // instructions with multiple outputs.
 816 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
 817   SDLoc SL(N);
 818   EVT VT = N->getValueType(0);
 819
 820   assert(VT == MVT::f32 || VT == MVT::f64);
 821
 822   unsigned Opc
 823     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 824
 825   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
 826   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 827 }
 828
 829 // We need to handle this here because tablegen doesn't support matching
 830 // instructions with multiple outputs.
 831 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
 832   SDLoc SL(N);
 833   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
 834   unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
 835
 836   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
 837   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
 838                     Clamp };
 839   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
 840 }
 841
 842 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
 843                                          unsigned OffsetBits) const {
 844   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
 845       (OffsetBits == 8 && !isUInt<8>(Offset)))
 846     return false;
 847
 848   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
 849       Subtarget->unsafeDSOffsetFoldingEnabled())
 850     return true;
 851
 852   // On Southern Islands instruction with a negative base value and an offset
 853   // don't seem to work.
 854   return CurDAG->SignBitIsZero(Base);
 855 }
 856
 857 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
 858                                               SDValue &Offset) const {
 859   SDLoc DL(Addr);
 860   if (CurDAG->isBaseWithConstantOffset(Addr)) {
 861     SDValue N0 = Addr.getOperand(0);
 862     SDValue N1 = Addr.getOperand(1);
 863     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 864     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
 865       // (add n0, c0)
 866       Base = N0;
 867       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
 868       return true;
 869     }
 870   } else if (Addr.getOpcode() == ISD::SUB) {
 871     // sub C, x -> add (sub 0, x), C
 872     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
 873       int64_t ByteOffset = C->getSExtValue();
 874       if (isUInt<16>(ByteOffset)) {
 875         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
 876
 877         // XXX - This is kind of hacky. Create a dummy sub node so we can check
 878         // the known bits in isDSOffsetLegal. We need to emit the selected node
 879         // here, so this is thrown away.
 880         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
 881                                       Zero, Addr.getOperand(1));
 882
 883         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
 884           // FIXME: Select to VOP3 version for with-carry.
 885           unsigned SubOp = Subtarget->hasAddNoCarry() ?
 886             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
 887
 888           MachineSDNode *MachineSub
 889             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
 890                                      Zero, Addr.getOperand(1));
 891
 892           Base = SDValue(MachineSub, 0);
 893           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
 894           return true;
 895         }
 896       }
 897     }
 898   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
 899     // If we have a constant address, prefer to put the constant into the
 900     // offset. This can save moves to load the constant address since multiple
 901     // operations can share the zero base address register, and enables merging
 902     // into read2 / write2 instructions.
 903
 904     SDLoc DL(Addr);
 905
 906     if (isUInt<16>(CAddr->getZExtValue())) {
 907       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
 908       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
 909                                  DL, MVT::i32, Zero);
 910       Base = SDValue(MovZero, 0);
 911       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
 912       return true;
 913     }
 914   }
 915
 916   // default case
 917   Base = Addr;
 918   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
 919   return true;
 920 }
 921
 922 // TODO: If offset is too big, put low 16-bit into offset.
 923 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
 924                                                    SDValue &Offset0,
 925                                                    SDValue &Offset1) const {
 926   SDLoc DL(Addr);
 927
 928   if (CurDAG->isBaseWithConstantOffset(Addr)) {
 929     SDValue N0 = Addr.getOperand(0);
 930     SDValue N1 = Addr.getOperand(1);
 931     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
 932     unsigned DWordOffset0 = C1->getZExtValue() / 4;
 933     unsigned DWordOffset1 = DWordOffset0 + 1;
 934     // (add n0, c0)
 935     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
 936       Base = N0;
 937       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
 938       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
 939       return true;
 940     }
 941   } else if (Addr.getOpcode() == ISD::SUB) {
 942     // sub C, x -> add (sub 0, x), C
 943     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
 944       unsigned DWordOffset0 = C->getZExtValue() / 4;
 945       unsigned DWordOffset1 = DWordOffset0 + 1;
 946
 947       if (isUInt<8>(DWordOffset0)) {
 948         SDLoc DL(Addr);
 949         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
 950
 951         // XXX - This is kind of hacky. Create a dummy sub node so we can check
 952         // the known bits in isDSOffsetLegal. We need to emit the selected node
 953         // here, so this is thrown away.
 954         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
 955                                       Zero, Addr.getOperand(1));
 956
 957         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
 958           unsigned SubOp = Subtarget->hasAddNoCarry() ?
 959             AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
 960
 961           MachineSDNode *MachineSub
 962             = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
 963                                      Zero, Addr.getOperand(1));
 964
 965           Base = SDValue(MachineSub, 0);
 966           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
 967           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
 968           return true;
 969         }
 970       }
 971     }
 972   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
 973     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
 974     unsigned DWordOffset1 = DWordOffset0 + 1;
 975     assert(4 * DWordOffset0 == CAddr->getZExtValue());
 976
 977     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
 978       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
 979       MachineSDNode *MovZero
 980         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
 981                                  DL, MVT::i32, Zero);
 982       Base = SDValue(MovZero, 0);
 983       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
 984       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
 985       return true;
 986     }
 987   }
 988
 989   // default case
 990
 991   Base = Addr;
 992   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
 993   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
 994   return true;
 995 }
 996
 997 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
 998                                      SDValue &VAddr, SDValue &SOffset,
 999                                      SDValue &Offset, SDValue &Offen,
1000                                      SDValue &Idxen, SDValue &Addr64,
1001                                      SDValue &GLC, SDValue &SLC,
1002                                      SDValue &TFE) const {
1003   // Subtarget prefers to use flat instruction
1004   if (Subtarget->useFlatForGlobal())
1005     return false;
1006
1007   SDLoc DL(Addr);
1008
1009   if (!GLC.getNode())
1010     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1011   if (!SLC.getNode())
1012     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
1013   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1014
1015   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1016   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1017   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1018   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1019
1020   ConstantSDNode *C1 = nullptr;
1021   SDValue N0 = Addr;
1022   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1023     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1024     if (isUInt<32>(C1->getZExtValue()))
1025       N0 = Addr.getOperand(0);
1026     else
1027       C1 = nullptr;
1028   }
1029
1030   if (N0.getOpcode() == ISD::ADD) {
1031     // (add N2, N3) -> addr64, or
1032     // (add (add N2, N3), C1) -> addr64
1033     SDValue N2 = N0.getOperand(0);
1034     SDValue N3 = N0.getOperand(1);
1035     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1036
1037     if (N2->isDivergent()) {
1038       if (N3->isDivergent()) {
1039         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1040         // addr64, and construct the resource from a 0 address.
1041         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1042         VAddr = N0;
1043       } else {
1044         // N2 is divergent, N3 is not.
1045         Ptr = N3;
1046         VAddr = N2;
1047       }
1048     } else {
1049       // N2 is not divergent.
1050       Ptr = N2;
1051       VAddr = N3;
1052     }
1053     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1054   } else if (N0->isDivergent()) {
1055     // N0 is divergent. Use it as the addr64, and construct the resource from a
1056     // 0 address.
1057     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1058     VAddr = N0;
1059     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1060   } else {
1061     // N0 -> offset, or
1062     // (N0 + C1) -> offset
1063     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1064     Ptr = N0;
1065   }
1066
1067   if (!C1) {
1068     // No offset.
1069     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1070     return true;
1071   }
1072
1073   if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1074     // Legal offset for instruction.
1075     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1076     return true;
1077   }
1078
1079   // Illegal offset, store it in soffset.
1080   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1081   SOffset =
1082       SDValue(CurDAG->getMachineNode(
1083                   AMDGPU::S_MOV_B32, DL, MVT::i32,
1084                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1085               0);
1086   return true;
1087 }
1088
1089 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1090                                            SDValue &VAddr, SDValue &SOffset,
1091                                            SDValue &Offset, SDValue &GLC,
1092                                            SDValue &SLC, SDValue &TFE) const {
1093   SDValue Ptr, Offen, Idxen, Addr64;
1094
1095   // addr64 bit was removed for volcanic islands.
1096   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1097     return false;
1098
1099   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1100               GLC, SLC, TFE))
1101     return false;
1102
1103   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1104   if (C->getSExtValue()) {
1105     SDLoc DL(Addr);
1106
1107     const SITargetLowering& Lowering =
1108       *static_cast<const SITargetLowering*>(getTargetLowering());
1109
1110     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1111     return true;
1112   }
1113
1114   return false;
1115 }
1116
1117 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1118                                            SDValue &VAddr, SDValue &SOffset,
1119                                            SDValue &Offset,
1120                                            SDValue &SLC) const {
1121   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1122   SDValue GLC, TFE;
1123
1124   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1125 }
1126
1127 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1128   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1129   return PSV && PSV->isStack();
1130 }
1131
1132 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1133   const MachineFunction &MF = CurDAG->getMachineFunction();
1134   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1135
1136   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1137     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1138                                               FI->getValueType(0));
1139
1140     // If we can resolve this to a frame index access, this is relative to the
1141     // frame pointer SGPR.
1142     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1143                                                    MVT::i32));
1144   }
1145
1146   // If we don't know this private access is a local stack object, it needs to
1147   // be relative to the entry point's scratch wave offset register.
1148   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1149                                                MVT::i32));
1150 }
1151
1152 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1153                                                  SDValue Addr, SDValue &Rsrc,
1154                                                  SDValue &VAddr, SDValue &SOffset,
1155                                                  SDValue &ImmOffset) const {
1156
1157   SDLoc DL(Addr);
1158   MachineFunction &MF = CurDAG->getMachineFunction();
1159   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1160
1161   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1162
1163   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1164     unsigned Imm = CAddr->getZExtValue();
1165
1166     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1167     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1168                                                         DL, MVT::i32, HighBits);
1169     VAddr = SDValue(MovHighBits, 0);
1170
1171     // In a call sequence, stores to the argument stack area are relative to the
1172     // stack pointer.
1173     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1174     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1175       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1176
1177     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1178     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1179     return true;
1180   }
1181
1182   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1183     // (add n0, c1)
1184
1185     SDValue N0 = Addr.getOperand(0);
1186     SDValue N1 = Addr.getOperand(1);
1187
1188     // Offsets in vaddr must be positive if range checking is enabled.
1189     //
1190     // The total computation of vaddr + soffset + offset must not overflow.  If
1191     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1192     // overflowing.
1193     //
1194     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1195     // always perform a range check. If a negative vaddr base index was used,
1196     // this would fail the range check. The overall address computation would
1197     // compute a valid address, but this doesn't happen due to the range
1198     // check. For out-of-bounds MUBUF loads, a 0 is returned.
1199     //
1200     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1201     // MUBUF vaddr, but not on older subtargets which can only do this if the
1202     // sign bit is known 0.
1203     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1204     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1205         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1206          CurDAG->SignBitIsZero(N0))) {
1207       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1208       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1209       return true;
1210     }
1211   }
1212
1213   // (node)
1214   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1215   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1216   return true;
1217 }
1218
1219 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1220                                                   SDValue Addr,
1221                                                   SDValue &SRsrc,
1222                                                   SDValue &SOffset,
1223                                                   SDValue &Offset) const {
1224   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1225   if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1226     return false;
1227
1228   SDLoc DL(Addr);
1229   MachineFunction &MF = CurDAG->getMachineFunction();
1230   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1231
1232   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1233
1234   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1235   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1236     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1237
1238   // FIXME: Get from MachinePointerInfo? We should only be using the frame
1239   // offset if we know this is in a call sequence.
1240   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1241
1242   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1243   return true;
1244 }
1245
1246 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1247                                            SDValue &SOffset, SDValue &Offset,
1248                                            SDValue &GLC, SDValue &SLC,
1249                                            SDValue &TFE) const {
1250   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1251   const SIInstrInfo *TII =
1252     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1253
1254   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1255               GLC, SLC, TFE))
1256     return false;
1257
1258   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1259       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1260       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1261     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1262                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1263     SDLoc DL(Addr);
1264
1265     const SITargetLowering& Lowering =
1266       *static_cast<const SITargetLowering*>(getTargetLowering());
1267
1268     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1269     return true;
1270   }
1271   return false;
1272 }
1273
1274 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1275                                            SDValue &Soffset, SDValue &Offset
1276                                            ) const {
1277   SDValue GLC, SLC, TFE;
1278
1279   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1280 }
1281 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1282                                            SDValue &Soffset, SDValue &Offset,
1283                                            SDValue &SLC) const {
1284   SDValue GLC, TFE;
1285
1286   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1287 }
1288
1289 template <bool IsSigned>
1290 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1291                                           SDValue &VAddr,
1292                                           SDValue &Offset,
1293                                           SDValue &SLC) const {
1294   int64_t OffsetVal = 0;
1295
1296   if (Subtarget->hasFlatInstOffsets() &&
1297       CurDAG->isBaseWithConstantOffset(Addr)) {
1298     SDValue N0 = Addr.getOperand(0);
1299     SDValue N1 = Addr.getOperand(1);
1300     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1301
1302     if ((IsSigned && isInt<13>(COffsetVal)) ||
1303         (!IsSigned && isUInt<12>(COffsetVal))) {
1304       Addr = N0;
1305       OffsetVal = COffsetVal;
1306     }
1307   }
1308
1309   VAddr = Addr;
1310   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1311   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1312
1313   return true;
1314 }
1315
1316 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1317                                           SDValue &VAddr,
1318                                           SDValue &Offset,
1319                                           SDValue &SLC) const {
1320   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1321 }
1322
1323 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1324                                           SDValue &VAddr,
1325                                           SDValue &Offset,
1326                                           SDValue &SLC) const {
1327   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1328 }
1329
1330 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1331                                           SDValue &Offset, bool &Imm) const {
1332
1333   // FIXME: Handle non-constant offsets.
1334   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1335   if (!C)
1336     return false;
1337
1338   SDLoc SL(ByteOffsetNode);
1339   GCNSubtarget::Generation Gen = Subtarget->getGeneration();
1340   int64_t ByteOffset = C->getSExtValue();
1341   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1342
1343   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1344     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1345     Imm = true;
1346     return true;
1347   }
1348
1349   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1350     return false;
1351
1352   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1353     // 32-bit Immediates are supported on Sea Islands.
1354     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1355   } else {
1356     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1357     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1358                                             C32Bit), 0);
1359   }
1360   Imm = false;
1361   return true;
1362 }
1363
1364 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1365   if (Addr.getValueType() != MVT::i32)
1366     return Addr;
1367
1368   // Zero-extend a 32-bit address.
1369   SDLoc SL(Addr);
1370
1371   const MachineFunction &MF = CurDAG->getMachineFunction();
1372   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1373   unsigned AddrHiVal = Info->get32BitAddressHighBits();
1374   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1375
1376   const SDValue Ops[] = {
1377     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1378     Addr,
1379     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1380     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1381             0),
1382     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1383   };
1384
1385   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1386                                         Ops), 0);
1387 }
1388
1389 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1390                                      SDValue &Offset, bool &Imm) const {
1391   SDLoc SL(Addr);
1392
1393   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1394   // wraparound, because s_load instructions perform the addition in 64 bits.
1395   if ((Addr.getValueType() != MVT::i32 ||
1396        Addr->getFlags().hasNoUnsignedWrap()) &&
1397       CurDAG->isBaseWithConstantOffset(Addr)) {
1398     SDValue N0 = Addr.getOperand(0);
1399     SDValue N1 = Addr.getOperand(1);
1400
1401     if (SelectSMRDOffset(N1, Offset, Imm)) {
1402       SBase = Expand32BitAddress(N0);
1403       return true;
1404     }
1405   }
1406   SBase = Expand32BitAddress(Addr);
1407   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1408   Imm = true;
1409   return true;
1410 }
1411
1412 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1413                                        SDValue &Offset) const {
1414   bool Imm;
1415   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1416 }
1417
1418 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1419                                          SDValue &Offset) const {
1420
1421   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1422     return false;
1423
1424   bool Imm;
1425   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1426     return false;
1427
1428   return !Imm && isa<ConstantSDNode>(Offset);
1429 }
1430
1431 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1432                                         SDValue &Offset) const {
1433   bool Imm;
1434   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1435          !isa<ConstantSDNode>(Offset);
1436 }
1437
1438 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1439                                              SDValue &Offset) const {
1440   bool Imm;
1441   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1442 }
1443
1444 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1445                                                SDValue &Offset) const {
1446   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1447     return false;
1448
1449   bool Imm;
1450   if (!SelectSMRDOffset(Addr, Offset, Imm))
1451     return false;
1452
1453   return !Imm && isa<ConstantSDNode>(Offset);
1454 }
1455
1456 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1457                                             SDValue &Base,
1458                                             SDValue &Offset) const {
1459   SDLoc DL(Index);
1460
1461   if (CurDAG->isBaseWithConstantOffset(Index)) {
1462     SDValue N0 = Index.getOperand(0);
1463     SDValue N1 = Index.getOperand(1);
1464     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1465
1466     // (add n0, c0)
1467     // Don't peel off the offset (c0) if doing so could possibly lead
1468     // the base (n0) to be negative.
1469     if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)) {
1470       Base = N0;
1471       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1472       return true;
1473     }
1474   }
1475
1476   if (isa<ConstantSDNode>(Index))
1477     return false;
1478
1479   Base = Index;
1480   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1481   return true;
1482 }
1483
1484 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1485                                      SDValue Val, uint32_t Offset,
1486                                      uint32_t Width) {
1487   // Transformation function, pack the offset and width of a BFE into
1488   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1489   // source, bits [5:0] contain the offset and bits [22:16] the width.
1490   uint32_t PackedVal = Offset | (Width << 16);
1491   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1492
1493   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1494 }
1495
1496 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1497   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1498   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1499   // Predicate: 0 < b <= c < 32
1500
1501   const SDValue &Shl = N->getOperand(0);
1502   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1503   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1504
1505   if (B && C) {
1506     uint32_t BVal = B->getZExtValue();
1507     uint32_t CVal = C->getZExtValue();
1508
1509     if (0 < BVal && BVal <= CVal && CVal < 32) {
1510       bool Signed = N->getOpcode() == ISD::SRA;
1511       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1512
1513       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1514                               32 - CVal));
1515       return;
1516     }
1517   }
1518   SelectCode(N);
1519 }
1520
1521 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1522   switch (N->getOpcode()) {
1523   case ISD::AND:
1524     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1525       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1526       // Predicate: isMask(mask)
1527       const SDValue &Srl = N->getOperand(0);
1528       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1529       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1530
1531       if (Shift && Mask) {
1532         uint32_t ShiftVal = Shift->getZExtValue();
1533         uint32_t MaskVal = Mask->getZExtValue();
1534
1535         if (isMask_32(MaskVal)) {
1536           uint32_t WidthVal = countPopulation(MaskVal);
1537
1538           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1539                                   Srl.getOperand(0), ShiftVal, WidthVal));
1540           return;
1541         }
1542       }
1543     }
1544     break;
1545   case ISD::SRL:
1546     if (N->getOperand(0).getOpcode() == ISD::AND) {
1547       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1548       // Predicate: isMask(mask >> b)
1549       const SDValue &And = N->getOperand(0);
1550       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1551       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1552
1553       if (Shift && Mask) {
1554         uint32_t ShiftVal = Shift->getZExtValue();
1555         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1556
1557         if (isMask_32(MaskVal)) {
1558           uint32_t WidthVal = countPopulation(MaskVal);
1559
1560           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1561                                   And.getOperand(0), ShiftVal, WidthVal));
1562           return;
1563         }
1564       }
1565     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1566       SelectS_BFEFromShifts(N);
1567       return;
1568     }
1569     break;
1570   case ISD::SRA:
1571     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1572       SelectS_BFEFromShifts(N);
1573       return;
1574     }
1575     break;
1576
1577   case ISD::SIGN_EXTEND_INREG: {
1578     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1579     SDValue Src = N->getOperand(0);
1580     if (Src.getOpcode() != ISD::SRL)
1581       break;
1582
1583     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1584     if (!Amt)
1585       break;
1586
1587     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1588     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1589                             Amt->getZExtValue(), Width));
1590     return;
1591   }
1592   }
1593
1594   SelectCode(N);
1595 }
1596
1597 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1598   assert(N->getOpcode() == ISD::BRCOND);
1599   if (!N->hasOneUse())
1600     return false;
1601
1602   SDValue Cond = N->getOperand(1);
1603   if (Cond.getOpcode() == ISD::CopyToReg)
1604     Cond = Cond.getOperand(2);
1605
1606   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1607     return false;
1608
1609   MVT VT = Cond.getOperand(0).getSimpleValueType();
1610   if (VT == MVT::i32)
1611     return true;
1612
1613   if (VT == MVT::i64) {
1614     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
1615
1616     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1617     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1618   }
1619
1620   return false;
1621 }
1622
1623 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1624   SDValue Cond = N->getOperand(1);
1625
1626   if (Cond.isUndef()) {
1627     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1628                          N->getOperand(2), N->getOperand(0));
1629     return;
1630   }
1631
1632   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1633   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1634   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1635   SDLoc SL(N);
1636
1637   if (!UseSCCBr) {
1638     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
1639     // analyzed what generates the vcc value, so we do not know whether vcc
1640     // bits for disabled lanes are 0.  Thus we need to mask out bits for
1641     // disabled lanes.
1642     //
1643     // For the case that we select S_CBRANCH_SCC1 and it gets
1644     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1645     // SIInstrInfo::moveToVALU which inserts the S_AND).
1646     //
1647     // We could add an analysis of what generates the vcc value here and omit
1648     // the S_AND when is unnecessary. But it would be better to add a separate
1649     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1650     // catches both cases.
1651     Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1652                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1653                                Cond),
1654                    0);
1655   }
1656
1657   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1658   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1659                        N->getOperand(2), // Basic Block
1660                        VCC.getValue(0));
1661 }
1662
1663 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
1664   MVT VT = N->getSimpleValueType(0);
1665   bool IsFMA = N->getOpcode() == ISD::FMA;
1666   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
1667                          !Subtarget->hasFmaMixInsts()) ||
1668       ((IsFMA && Subtarget->hasMadMixInsts()) ||
1669        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
1670     SelectCode(N);
1671     return;
1672   }
1673
1674   SDValue Src0 = N->getOperand(0);
1675   SDValue Src1 = N->getOperand(1);
1676   SDValue Src2 = N->getOperand(2);
1677   unsigned Src0Mods, Src1Mods, Src2Mods;
1678
1679   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1680   // using the conversion from f16.
1681   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1682   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1683   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1684
1685   assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
1686          "fmad selected with denormals enabled");
1687   // TODO: We can select this with f32 denormals enabled if all the sources are
1688   // converted from f16 (in which case fmad isn't legal).
1689
1690   if (Sel0 || Sel1 || Sel2) {
1691     // For dummy operands.
1692     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1693     SDValue Ops[] = {
1694       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1695       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1696       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1697       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1698       Zero, Zero
1699     };
1700
1701     CurDAG->SelectNodeTo(N,
1702                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
1703                          MVT::f32, Ops);
1704   } else {
1705     SelectCode(N);
1706   }
1707 }
1708
1709 // This is here because there isn't a way to use the generated sub0_sub1 as the
1710 // subreg index to EXTRACT_SUBREG in tablegen.
1711 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1712   MemSDNode *Mem = cast<MemSDNode>(N);
1713   unsigned AS = Mem->getAddressSpace();
1714   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1715     SelectCode(N);
1716     return;
1717   }
1718
1719   MVT VT = N->getSimpleValueType(0);
1720   bool Is32 = (VT == MVT::i32);
1721   SDLoc SL(N);
1722
1723   MachineSDNode *CmpSwap = nullptr;
1724   if (Subtarget->hasAddr64()) {
1725     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1726
1727     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1728       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1729         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1730       SDValue CmpVal = Mem->getOperand(2);
1731
1732       // XXX - Do we care about glue operands?
1733
1734       SDValue Ops[] = {
1735         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1736       };
1737
1738       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1739     }
1740   }
1741
1742   if (!CmpSwap) {
1743     SDValue SRsrc, SOffset, Offset, SLC;
1744     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1745       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1746         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1747
1748       SDValue CmpVal = Mem->getOperand(2);
1749       SDValue Ops[] = {
1750         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1751       };
1752
1753       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1754     }
1755   }
1756
1757   if (!CmpSwap) {
1758     SelectCode(N);
1759     return;
1760   }
1761
1762   MachineMemOperand *MMO = Mem->getMemOperand();
1763   CurDAG->setNodeMemRefs(CmpSwap, {MMO});
1764
1765   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1766   SDValue Extract
1767     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1768
1769   ReplaceUses(SDValue(N, 0), Extract);
1770   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1771   CurDAG->RemoveDeadNode(N);
1772 }
1773
1774 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
1775   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1776   if ((IntrID != Intrinsic::amdgcn_ds_append &&
1777        IntrID != Intrinsic::amdgcn_ds_consume) ||
1778       N->getValueType(0) != MVT::i32) {
1779     SelectCode(N);
1780     return;
1781   }
1782
1783   // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1784   // be copied to an SGPR with readfirstlane.
1785   unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
1786     AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1787
1788   SDValue Chain = N->getOperand(0);
1789   SDValue Ptr = N->getOperand(2);
1790   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
1791   bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
1792
1793   SDValue Offset;
1794   if (CurDAG->isBaseWithConstantOffset(Ptr)) {
1795     SDValue PtrBase = Ptr.getOperand(0);
1796     SDValue PtrOffset = Ptr.getOperand(1);
1797
1798     const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
1799     if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
1800       N = glueCopyToM0(N, PtrBase);
1801       Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1802     }
1803   }
1804
1805   if (!Offset) {
1806     N = glueCopyToM0(N, Ptr);
1807     Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1808   }
1809
1810   SDValue Ops[] = {
1811     Offset,
1812     CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
1813     Chain,
1814     N->getOperand(N->getNumOperands() - 1) // New glue
1815   };
1816
1817   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
1818 }
1819
1820 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1821                                             unsigned &Mods) const {
1822   Mods = 0;
1823   Src = In;
1824
1825   if (Src.getOpcode() == ISD::FNEG) {
1826     Mods |= SISrcMods::NEG;
1827     Src = Src.getOperand(0);
1828   }
1829
1830   if (Src.getOpcode() == ISD::FABS) {
1831     Mods |= SISrcMods::ABS;
1832     Src = Src.getOperand(0);
1833   }
1834
1835   return true;
1836 }
1837
1838 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1839                                         SDValue &SrcMods) const {
1840   unsigned Mods;
1841   if (SelectVOP3ModsImpl(In, Src, Mods)) {
1842     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1843     return true;
1844   }
1845
1846   return false;
1847 }
1848
1849 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1850                                              SDValue &SrcMods) const {
1851   SelectVOP3Mods(In, Src, SrcMods);
1852   return isNoNanSrc(Src);
1853 }
1854
1855 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1856   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1857     return false;
1858
1859   Src = In;
1860   return true;
1861 }
1862
1863 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1864                                          SDValue &SrcMods, SDValue &Clamp,
1865                                          SDValue &Omod) const {
1866   SDLoc DL(In);
1867   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1868   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1869
1870   return SelectVOP3Mods(In, Src, SrcMods);
1871 }
1872
1873 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1874                                                    SDValue &SrcMods,
1875                                                    SDValue &Clamp,
1876                                                    SDValue &Omod) const {
1877   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1878   return SelectVOP3Mods(In, Src, SrcMods);
1879 }
1880
1881 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1882                                          SDValue &Clamp, SDValue &Omod) const {
1883   Src = In;
1884
1885   SDLoc DL(In);
1886   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1887   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1888
1889   return true;
1890 }
1891
1892 static SDValue stripBitcast(SDValue Val) {
1893   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1894 }
1895
1896 // Figure out if this is really an extract of the high 16-bits of a dword.
1897 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1898   In = stripBitcast(In);
1899   if (In.getOpcode() != ISD::TRUNCATE)
1900     return false;
1901
1902   SDValue Srl = In.getOperand(0);
1903   if (Srl.getOpcode() == ISD::SRL) {
1904     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1905       if (ShiftAmt->getZExtValue() == 16) {
1906         Out = stripBitcast(Srl.getOperand(0));
1907         return true;
1908       }
1909     }
1910   }
1911
1912   return false;
1913 }
1914
1915 // Look through operations that obscure just looking at the low 16-bits of the
1916 // same register.
1917 static SDValue stripExtractLoElt(SDValue In) {
1918   if (In.getOpcode() == ISD::TRUNCATE) {
1919     SDValue Src = In.getOperand(0);
1920     if (Src.getValueType().getSizeInBits() == 32)
1921       return stripBitcast(Src);
1922   }
1923
1924   return In;
1925 }
1926
1927 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1928                                          SDValue &SrcMods) const {
1929   unsigned Mods = 0;
1930   Src = In;
1931
1932   if (Src.getOpcode() == ISD::FNEG) {
1933     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1934     Src = Src.getOperand(0);
1935   }
1936
1937   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1938     unsigned VecMods = Mods;
1939
1940     SDValue Lo = stripBitcast(Src.getOperand(0));
1941     SDValue Hi = stripBitcast(Src.getOperand(1));
1942
1943     if (Lo.getOpcode() == ISD::FNEG) {
1944       Lo = stripBitcast(Lo.getOperand(0));
1945       Mods ^= SISrcMods::NEG;
1946     }
1947
1948     if (Hi.getOpcode() == ISD::FNEG) {
1949       Hi = stripBitcast(Hi.getOperand(0));
1950       Mods ^= SISrcMods::NEG_HI;
1951     }
1952
1953     if (isExtractHiElt(Lo, Lo))
1954       Mods |= SISrcMods::OP_SEL_0;
1955
1956     if (isExtractHiElt(Hi, Hi))
1957       Mods |= SISrcMods::OP_SEL_1;
1958
1959     Lo = stripExtractLoElt(Lo);
1960     Hi = stripExtractLoElt(Hi);
1961
1962     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1963       // Really a scalar input. Just select from the low half of the register to
1964       // avoid packing.
1965
1966       Src = Lo;
1967       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1968       return true;
1969     }
1970
1971     Mods = VecMods;
1972   }
1973
1974   // Packed instructions do not have abs modifiers.
1975   Mods |= SISrcMods::OP_SEL_1;
1976
1977   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1978   return true;
1979 }
1980
1981 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1982                                           SDValue &SrcMods,
1983                                           SDValue &Clamp) const {
1984   SDLoc SL(In);
1985
1986   // FIXME: Handle clamp and op_sel
1987   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1988
1989   return SelectVOP3PMods(In, Src, SrcMods);
1990 }
1991
1992 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1993                                          SDValue &SrcMods) const {
1994   Src = In;
1995   // FIXME: Handle op_sel
1996   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1997   return true;
1998 }
1999
2000 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
2001                                           SDValue &SrcMods,
2002                                           SDValue &Clamp) const {
2003   SDLoc SL(In);
2004
2005   // FIXME: Handle clamp
2006   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2007
2008   return SelectVOP3OpSel(In, Src, SrcMods);
2009 }
2010
2011 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2012                                              SDValue &SrcMods) const {
2013   // FIXME: Handle op_sel
2014   return SelectVOP3Mods(In, Src, SrcMods);
2015 }
2016
2017 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
2018                                               SDValue &SrcMods,
2019                                               SDValue &Clamp) const {
2020   SDLoc SL(In);
2021
2022   // FIXME: Handle clamp
2023   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
2024
2025   return SelectVOP3OpSelMods(In, Src, SrcMods);
2026 }
2027
2028 // The return value is not whether the match is possible (which it always is),
2029 // but whether or not it a conversion is really used.
2030 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2031                                                    unsigned &Mods) const {
2032   Mods = 0;
2033   SelectVOP3ModsImpl(In, Src, Mods);
2034
2035   if (Src.getOpcode() == ISD::FP_EXTEND) {
2036     Src = Src.getOperand(0);
2037     assert(Src.getValueType() == MVT::f16);
2038     Src = stripBitcast(Src);
2039
2040     // Be careful about folding modifiers if we already have an abs. fneg is
2041     // applied last, so we don't want to apply an earlier fneg.
2042     if ((Mods & SISrcMods::ABS) == 0) {
2043       unsigned ModsTmp;
2044       SelectVOP3ModsImpl(Src, Src, ModsTmp);
2045
2046       if ((ModsTmp & SISrcMods::NEG) != 0)
2047         Mods ^= SISrcMods::NEG;
2048
2049       if ((ModsTmp & SISrcMods::ABS) != 0)
2050         Mods |= SISrcMods::ABS;
2051     }
2052
2053     // op_sel/op_sel_hi decide the source type and source.
2054     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2055     // If the sources's op_sel is set, it picks the high half of the source
2056     // register.
2057
2058     Mods |= SISrcMods::OP_SEL_1;
2059     if (isExtractHiElt(Src, Src)) {
2060       Mods |= SISrcMods::OP_SEL_0;
2061
2062       // TODO: Should we try to look for neg/abs here?
2063     }
2064
2065     return true;
2066   }
2067
2068   return false;
2069 }
2070
2071 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2072                                                SDValue &SrcMods) const {
2073   unsigned Mods = 0;
2074   SelectVOP3PMadMixModsImpl(In, Src, Mods);
2075   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2076   return true;
2077 }
2078
2079 // TODO: Can we identify things like v_mad_mixhi_f16?
2080 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
2081   if (In.isUndef()) {
2082     Src = In;
2083     return true;
2084   }
2085
2086   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2087     SDLoc SL(In);
2088     SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
2089     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2090                                                  SL, MVT::i32, K);
2091     Src = SDValue(MovK, 0);
2092     return true;
2093   }
2094
2095   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2096     SDLoc SL(In);
2097     SDValue K = CurDAG->getTargetConstant(
2098       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2099     MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
2100                                                  SL, MVT::i32, K);
2101     Src = SDValue(MovK, 0);
2102     return true;
2103   }
2104
2105   return isExtractHiElt(In, Src);
2106 }
2107
2108 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2109   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
2110     return false;
2111   }
2112   const SIRegisterInfo *SIRI =
2113     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2114   const SIInstrInfo * SII =
2115     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2116
2117   unsigned Limit = 0;
2118   bool AllUsesAcceptSReg = true;
2119   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2120     Limit < 10 && U != E; ++U, ++Limit) {
2121     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2122
2123     // If the register class is unknown, it could be an unknown
2124     // register class that needs to be an SGPR, e.g. an inline asm
2125     // constraint
2126     if (!RC || SIRI->isSGPRClass(RC))
2127       return false;
2128
2129     if (RC != &AMDGPU::VS_32RegClass) {
2130       AllUsesAcceptSReg = false;
2131       SDNode * User = *U;
2132       if (User->isMachineOpcode()) {
2133         unsigned Opc = User->getMachineOpcode();
2134         MCInstrDesc Desc = SII->get(Opc);
2135         if (Desc.isCommutable()) {
2136           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2137           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2138           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2139             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2140             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2141             if (CommutedRC == &AMDGPU::VS_32RegClass)
2142               AllUsesAcceptSReg = true;
2143           }
2144         }
2145       }
2146       // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2147       // commuting current user. This means have at least one use
2148       // that strictly require VGPR. Thus, we will not attempt to commute
2149       // other user instructions.
2150       if (!AllUsesAcceptSReg)
2151         break;
2152     }
2153   }
2154   return !AllUsesAcceptSReg && (Limit < 10);
2155 }
2156
2157 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
2158   auto Ld = cast<LoadSDNode>(N);
2159
2160   return Ld->getAlignment() >= 4 &&
2161         (
2162           (
2163             (
2164               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS       ||
2165               Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2166             )
2167             &&
2168             !N->isDivergent()
2169           )
2170           ||
2171           (
2172             Subtarget->getScalarizeGlobalBehavior() &&
2173             Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2174             !Ld->isVolatile() &&
2175             !N->isDivergent() &&
2176             static_cast<const SITargetLowering *>(
2177               getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)
2178           )
2179         );
2180 }
2181
2182 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2183   const AMDGPUTargetLowering& Lowering =
2184     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2185   bool IsModified = false;
2186   do {
2187     IsModified = false;
2188
2189     // Go over all selected nodes and try to fold them a bit more
2190     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
2191     while (Position != CurDAG->allnodes_end()) {
2192       SDNode *Node = &*Position++;
2193       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
2194       if (!MachineNode)
2195         continue;
2196
2197       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2198       if (ResNode != Node) {
2199         if (ResNode)
2200           ReplaceUses(Node, ResNode);
2201         IsModified = true;
2202       }
2203     }
2204     CurDAG->RemoveDeadNodes();
2205   } while (IsModified);
2206 }
2207
2208 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
2209   Subtarget = &MF.getSubtarget<R600Subtarget>();
2210   return SelectionDAGISel::runOnMachineFunction(MF);
2211 }
2212
2213 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
2214   if (!N->readMem())
2215     return false;
2216   if (CbId == -1)
2217     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2218            N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
2219
2220   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
2221 }
2222
2223 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
2224                                                          SDValue& IntPtr) {
2225   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
2226     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
2227                                        true);
2228     return true;
2229   }
2230   return false;
2231 }
2232
2233 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
2234     SDValue& BaseReg, SDValue &Offset) {
2235   if (!isa<ConstantSDNode>(Addr)) {
2236     BaseReg = Addr;
2237     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
2238     return true;
2239   }
2240   return false;
2241 }
2242
2243 void R600DAGToDAGISel::Select(SDNode *N) {
2244   unsigned int Opc = N->getOpcode();
2245   if (N->isMachineOpcode()) {
2246     N->setNodeId(-1);
2247     return;   // Already selected.
2248   }
2249
2250   switch (Opc) {
2251   default: break;
2252   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2253   case ISD::SCALAR_TO_VECTOR:
2254   case ISD::BUILD_VECTOR: {
2255     EVT VT = N->getValueType(0);
2256     unsigned NumVectorElts = VT.getVectorNumElements();
2257     unsigned RegClassID;
2258     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2259     // that adds a 128 bits reg copy when going through TwoAddressInstructions
2260     // pass. We want to avoid 128 bits copies as much as possible because they
2261     // can't be bundled by our scheduler.
2262     switch(NumVectorElts) {
2263     case 2: RegClassID = R600::R600_Reg64RegClassID; break;
2264     case 4:
2265       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2266         RegClassID = R600::R600_Reg128VerticalRegClassID;
2267       else
2268         RegClassID = R600::R600_Reg128RegClassID;
2269       break;
2270     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2271     }
2272     SelectBuildVector(N, RegClassID);
2273     return;
2274   }
2275   }
2276
2277   SelectCode(N);
2278 }
2279
2280 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2281                                           SDValue &Offset) {
2282   ConstantSDNode *C;
2283   SDLoc DL(Addr);
2284
2285   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2286     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2287     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2288   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2289              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2290     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
2291     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2292   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2293             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2294     Base = Addr.getOperand(0);
2295     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2296   } else {
2297     Base = Addr;
2298     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2299   }
2300
2301   return true;
2302 }
2303
2304 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2305                                           SDValue &Offset) {
2306   ConstantSDNode *IMMOffset;
2307
2308   if (Addr.getOpcode() == ISD::ADD
2309       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2310       && isInt<16>(IMMOffset->getZExtValue())) {
2311
2312       Base = Addr.getOperand(0);
2313       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2314                                          MVT::i32);
2315       return true;
2316   // If the pointer address is constant, we can move it to the offset field.
2317   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2318              && isInt<16>(IMMOffset->getZExtValue())) {
2319     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2320                                   SDLoc(CurDAG->getEntryNode()),
2321                                   R600::ZERO, MVT::i32);
2322     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2323                                        MVT::i32);
2324     return true;
2325   }
2326
2327   // Default case, no offset
2328   Base = Addr;
2329   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2330   return true;
2331 }