[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / AMDGPU / AMDGPUInstructionSelector.cpp
blob3cfa9d57ec46fe61560881ac8f0bd2a6badf7b20
1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
37 #define DEBUG_TYPE "amdgpu-isel"
39 using namespace llvm;
40 using namespace MIPatternMatch;
42 #define GET_GLOBALISEL_IMPL
43 #define AMDGPUSubtarget GCNSubtarget
44 #include "AMDGPUGenGlobalISel.inc"
45 #undef GET_GLOBALISEL_IMPL
46 #undef AMDGPUSubtarget
48 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
49 const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
50 const AMDGPUTargetMachine &TM)
51 : InstructionSelector(), TII(*STI.getInstrInfo()),
52 TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
53 STI(STI),
54 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
55 #define GET_GLOBALISEL_PREDICATES_INIT
56 #include "AMDGPUGenGlobalISel.inc"
57 #undef GET_GLOBALISEL_PREDICATES_INIT
58 #define GET_GLOBALISEL_TEMPORARIES_INIT
59 #include "AMDGPUGenGlobalISel.inc"
60 #undef GET_GLOBALISEL_TEMPORARIES_INIT
64 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
66 void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits &KB,
67 CodeGenCoverage &CoverageInfo) {
68 MRI = &MF.getRegInfo();
69 InstructionSelector::setupMF(MF, KB, CoverageInfo);
72 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
73 if (Register::isPhysicalRegister(Reg))
74 return Reg == AMDGPU::SCC;
76 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
77 const TargetRegisterClass *RC =
78 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
79 if (RC) {
80 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
81 // context of the register bank has been lost.
82 // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which
83 // won't ever beconstrained any further.
84 if (RC != &AMDGPU::SGPR_32RegClass)
85 return false;
86 const LLT Ty = MRI.getType(Reg);
87 return Ty.isValid() && Ty.getSizeInBits() == 1;
90 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
91 return RB->getID() == AMDGPU::SCCRegBankID;
94 bool AMDGPUInstructionSelector::isVCC(Register Reg,
95 const MachineRegisterInfo &MRI) const {
96 if (Register::isPhysicalRegister(Reg))
97 return Reg == TRI.getVCC();
99 auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
100 const TargetRegisterClass *RC =
101 RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
102 if (RC) {
103 const LLT Ty = MRI.getType(Reg);
104 return RC->hasSuperClassEq(TRI.getBoolRC()) &&
105 Ty.isValid() && Ty.getSizeInBits() == 1;
108 const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
109 return RB->getID() == AMDGPU::VCCRegBankID;
112 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
113 const DebugLoc &DL = I.getDebugLoc();
114 MachineBasicBlock *BB = I.getParent();
115 I.setDesc(TII.get(TargetOpcode::COPY));
117 const MachineOperand &Src = I.getOperand(1);
118 MachineOperand &Dst = I.getOperand(0);
119 Register DstReg = Dst.getReg();
120 Register SrcReg = Src.getReg();
122 if (isVCC(DstReg, *MRI)) {
123 if (SrcReg == AMDGPU::SCC) {
124 const TargetRegisterClass *RC
125 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
126 if (!RC)
127 return true;
128 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
131 if (!isVCC(SrcReg, *MRI)) {
132 // TODO: Should probably leave the copy and let copyPhysReg expand it.
133 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
134 return false;
136 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
137 .addImm(0)
138 .addReg(SrcReg);
140 if (!MRI->getRegClassOrNull(SrcReg))
141 MRI->setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, *MRI));
142 I.eraseFromParent();
143 return true;
146 const TargetRegisterClass *RC =
147 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
148 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
149 return false;
151 // Don't constrain the source register to a class so the def instruction
152 // handles it (unless it's undef).
154 // FIXME: This is a hack. When selecting the def, we neeed to know
155 // specifically know that the result is VCCRegBank, and not just an SGPR
156 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
157 if (Src.isUndef()) {
158 const TargetRegisterClass *SrcRC =
159 TRI.getConstrainedRegClassForOperand(Src, *MRI);
160 if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
161 return false;
164 return true;
167 for (const MachineOperand &MO : I.operands()) {
168 if (Register::isPhysicalRegister(MO.getReg()))
169 continue;
171 const TargetRegisterClass *RC =
172 TRI.getConstrainedRegClassForOperand(MO, *MRI);
173 if (!RC)
174 continue;
175 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
177 return true;
180 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
181 const Register DefReg = I.getOperand(0).getReg();
182 const LLT DefTy = MRI->getType(DefReg);
184 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
186 const RegClassOrRegBank &RegClassOrBank =
187 MRI->getRegClassOrRegBank(DefReg);
189 const TargetRegisterClass *DefRC
190 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
191 if (!DefRC) {
192 if (!DefTy.isValid()) {
193 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
194 return false;
197 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
198 if (RB.getID() == AMDGPU::SCCRegBankID) {
199 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
200 return false;
203 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, *MRI);
204 if (!DefRC) {
205 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
206 return false;
210 I.setDesc(TII.get(TargetOpcode::PHI));
211 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
214 MachineOperand
215 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
216 const TargetRegisterClass &SubRC,
217 unsigned SubIdx) const {
219 MachineInstr *MI = MO.getParent();
220 MachineBasicBlock *BB = MO.getParent()->getParent();
221 Register DstReg = MRI->createVirtualRegister(&SubRC);
223 if (MO.isReg()) {
224 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
225 Register Reg = MO.getReg();
226 BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
227 .addReg(Reg, 0, ComposedSubIdx);
229 return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
230 MO.isKill(), MO.isDead(), MO.isUndef(),
231 MO.isEarlyClobber(), 0, MO.isDebug(),
232 MO.isInternalRead());
235 assert(MO.isImm());
237 APInt Imm(64, MO.getImm());
239 switch (SubIdx) {
240 default:
241 llvm_unreachable("do not know to split immediate with this sub index.");
242 case AMDGPU::sub0:
243 return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
244 case AMDGPU::sub1:
245 return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
249 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
250 switch (Opc) {
251 case AMDGPU::G_AND:
252 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
253 case AMDGPU::G_OR:
254 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
255 case AMDGPU::G_XOR:
256 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
257 default:
258 llvm_unreachable("not a bit op");
262 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
263 MachineOperand &Dst = I.getOperand(0);
264 MachineOperand &Src0 = I.getOperand(1);
265 MachineOperand &Src1 = I.getOperand(2);
266 Register DstReg = Dst.getReg();
267 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
269 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
270 if (DstRB->getID() == AMDGPU::VCCRegBankID) {
271 const TargetRegisterClass *RC = TRI.getBoolRC();
272 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
273 RC == &AMDGPU::SReg_64RegClass);
274 I.setDesc(TII.get(InstOpc));
276 // FIXME: Hack to avoid turning the register bank into a register class.
277 // The selector for G_ICMP relies on seeing the register bank for the result
278 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
279 // be ambiguous whether it's a scalar or vector bool.
280 if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
281 MRI->setRegClass(Src0.getReg(), RC);
282 if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
283 MRI->setRegClass(Src1.getReg(), RC);
285 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
288 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
289 // the result?
290 if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
291 unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
292 I.setDesc(TII.get(InstOpc));
293 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
296 return false;
299 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
300 MachineBasicBlock *BB = I.getParent();
301 MachineFunction *MF = BB->getParent();
302 Register DstReg = I.getOperand(0).getReg();
303 const DebugLoc &DL = I.getDebugLoc();
304 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
305 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
306 const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
307 const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
309 if (Size == 32) {
310 if (IsSALU) {
311 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
312 MachineInstr *Add =
313 BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
314 .add(I.getOperand(1))
315 .add(I.getOperand(2));
316 I.eraseFromParent();
317 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
320 if (STI.hasAddNoCarry()) {
321 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
322 I.setDesc(TII.get(Opc));
323 I.addOperand(*MF, MachineOperand::CreateImm(0));
324 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
325 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
328 const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
330 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
331 MachineInstr *Add
332 = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
333 .addDef(UnusedCarry, RegState::Dead)
334 .add(I.getOperand(1))
335 .add(I.getOperand(2))
336 .addImm(0);
337 I.eraseFromParent();
338 return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
341 assert(!Sub && "illegal sub should not reach here");
343 const TargetRegisterClass &RC
344 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
345 const TargetRegisterClass &HalfRC
346 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
348 MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
349 MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
350 MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
351 MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
353 Register DstLo = MRI->createVirtualRegister(&HalfRC);
354 Register DstHi = MRI->createVirtualRegister(&HalfRC);
356 if (IsSALU) {
357 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
358 .add(Lo1)
359 .add(Lo2);
360 BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
361 .add(Hi1)
362 .add(Hi2);
363 } else {
364 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
365 Register CarryReg = MRI->createVirtualRegister(CarryRC);
366 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
367 .addDef(CarryReg)
368 .add(Lo1)
369 .add(Lo2)
370 .addImm(0);
371 MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
372 .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead)
373 .add(Hi1)
374 .add(Hi2)
375 .addReg(CarryReg, RegState::Kill)
376 .addImm(0);
378 if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
379 return false;
382 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
383 .addReg(DstLo)
384 .addImm(AMDGPU::sub0)
385 .addReg(DstHi)
386 .addImm(AMDGPU::sub1);
389 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
390 return false;
392 I.eraseFromParent();
393 return true;
396 bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const {
397 MachineBasicBlock *BB = I.getParent();
398 MachineFunction *MF = BB->getParent();
399 MachineRegisterInfo &MRI = MF->getRegInfo();
400 const DebugLoc &DL = I.getDebugLoc();
401 Register Dst0Reg = I.getOperand(0).getReg();
402 Register Dst1Reg = I.getOperand(1).getReg();
403 const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO;
405 if (!isSCC(Dst1Reg, MRI)) {
406 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
407 // carry out despite the _i32 name. These were renamed in VI to _U32.
408 // FIXME: We should probably rename the opcodes here.
409 unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
410 I.setDesc(TII.get(NewOpc));
411 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
412 I.addOperand(*MF, MachineOperand::CreateImm(0));
413 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
416 Register Src0Reg = I.getOperand(2).getReg();
417 Register Src1Reg = I.getOperand(3).getReg();
418 unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
419 BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg)
420 .add(I.getOperand(2))
421 .add(I.getOperand(3));
422 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
423 .addReg(AMDGPU::SCC);
425 if (!MRI.getRegClassOrNull(Dst1Reg))
426 MRI.setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
428 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, MRI) ||
429 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, MRI) ||
430 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI))
431 return false;
433 I.eraseFromParent();
434 return true;
437 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
438 MachineBasicBlock *BB = I.getParent();
439 unsigned Offset = I.getOperand(2).getImm();
440 if (Offset % 32 != 0)
441 return false;
443 unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
444 const DebugLoc &DL = I.getDebugLoc();
445 MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
446 I.getOperand(0).getReg())
447 .addReg(I.getOperand(1).getReg(), 0, SubReg);
449 for (const MachineOperand &MO : Copy->operands()) {
450 const TargetRegisterClass *RC =
451 TRI.getConstrainedRegClassForOperand(MO, *MRI);
452 if (!RC)
453 continue;
454 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
456 I.eraseFromParent();
457 return true;
460 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
461 MachineBasicBlock *BB = MI.getParent();
462 Register DstReg = MI.getOperand(0).getReg();
463 LLT DstTy = MRI->getType(DstReg);
464 LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
466 const unsigned SrcSize = SrcTy.getSizeInBits();
467 if (SrcSize < 32)
468 return false;
470 const DebugLoc &DL = MI.getDebugLoc();
471 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
472 const unsigned DstSize = DstTy.getSizeInBits();
473 const TargetRegisterClass *DstRC =
474 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
475 if (!DstRC)
476 return false;
478 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
479 MachineInstrBuilder MIB =
480 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
481 for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
482 MachineOperand &Src = MI.getOperand(I + 1);
483 MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
484 MIB.addImm(SubRegs[I]);
486 const TargetRegisterClass *SrcRC
487 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
488 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
489 return false;
492 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
493 return false;
495 MI.eraseFromParent();
496 return true;
499 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
500 MachineBasicBlock *BB = MI.getParent();
501 const int NumDst = MI.getNumOperands() - 1;
503 MachineOperand &Src = MI.getOperand(NumDst);
505 Register SrcReg = Src.getReg();
506 Register DstReg0 = MI.getOperand(0).getReg();
507 LLT DstTy = MRI->getType(DstReg0);
508 LLT SrcTy = MRI->getType(SrcReg);
510 const unsigned DstSize = DstTy.getSizeInBits();
511 const unsigned SrcSize = SrcTy.getSizeInBits();
512 const DebugLoc &DL = MI.getDebugLoc();
513 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
515 const TargetRegisterClass *SrcRC =
516 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
517 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
518 return false;
520 const unsigned SrcFlags = getUndefRegState(Src.isUndef());
522 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
523 // source, and this relies on the fact that the same subregister indices are
524 // used for both.
525 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
526 for (int I = 0, E = NumDst; I != E; ++I) {
527 MachineOperand &Dst = MI.getOperand(I);
528 BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
529 .addReg(SrcReg, SrcFlags, SubRegs[I]);
531 const TargetRegisterClass *DstRC =
532 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
533 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
534 return false;
537 MI.eraseFromParent();
538 return true;
541 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
542 return selectG_ADD_SUB(I);
545 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
546 const MachineOperand &MO = I.getOperand(0);
548 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
549 // regbank check here is to know why getConstrainedRegClassForOperand failed.
550 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
551 if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) ||
552 (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) {
553 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
554 return true;
557 return false;
560 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
561 MachineBasicBlock *BB = I.getParent();
563 Register DstReg = I.getOperand(0).getReg();
564 Register Src0Reg = I.getOperand(1).getReg();
565 Register Src1Reg = I.getOperand(2).getReg();
566 LLT Src1Ty = MRI->getType(Src1Reg);
568 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
569 unsigned InsSize = Src1Ty.getSizeInBits();
571 int64_t Offset = I.getOperand(3).getImm();
572 if (Offset % 32 != 0)
573 return false;
575 unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
576 if (SubReg == AMDGPU::NoSubRegister)
577 return false;
579 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
580 const TargetRegisterClass *DstRC =
581 TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
582 if (!DstRC)
583 return false;
585 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
586 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
587 const TargetRegisterClass *Src0RC =
588 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
589 const TargetRegisterClass *Src1RC =
590 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
592 // Deal with weird cases where the class only partially supports the subreg
593 // index.
594 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
595 if (!Src0RC)
596 return false;
598 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
599 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
600 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
601 return false;
603 const DebugLoc &DL = I.getDebugLoc();
604 BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
605 .addReg(Src0Reg)
606 .addReg(Src1Reg)
607 .addImm(SubReg);
609 I.eraseFromParent();
610 return true;
613 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
614 unsigned IntrinsicID = I.getIntrinsicID();
615 switch (IntrinsicID) {
616 case Intrinsic::amdgcn_if_break: {
617 MachineBasicBlock *BB = I.getParent();
619 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
620 // SelectionDAG uses for wave32 vs wave64.
621 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
622 .add(I.getOperand(0))
623 .add(I.getOperand(2))
624 .add(I.getOperand(3));
626 Register DstReg = I.getOperand(0).getReg();
627 Register Src0Reg = I.getOperand(2).getReg();
628 Register Src1Reg = I.getOperand(3).getReg();
630 I.eraseFromParent();
632 for (Register Reg : { DstReg, Src0Reg, Src1Reg })
633 MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
635 return true;
637 default:
638 return selectImpl(I, *CoverageInfo);
642 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
643 if (Size != 32 && Size != 64)
644 return -1;
645 switch (P) {
646 default:
647 llvm_unreachable("Unknown condition code!");
648 case CmpInst::ICMP_NE:
649 return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
650 case CmpInst::ICMP_EQ:
651 return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
652 case CmpInst::ICMP_SGT:
653 return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
654 case CmpInst::ICMP_SGE:
655 return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
656 case CmpInst::ICMP_SLT:
657 return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
658 case CmpInst::ICMP_SLE:
659 return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
660 case CmpInst::ICMP_UGT:
661 return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
662 case CmpInst::ICMP_UGE:
663 return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
664 case CmpInst::ICMP_ULT:
665 return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
666 case CmpInst::ICMP_ULE:
667 return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
671 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
672 unsigned Size) const {
673 if (Size == 64) {
674 if (!STI.hasScalarCompareEq64())
675 return -1;
677 switch (P) {
678 case CmpInst::ICMP_NE:
679 return AMDGPU::S_CMP_LG_U64;
680 case CmpInst::ICMP_EQ:
681 return AMDGPU::S_CMP_EQ_U64;
682 default:
683 return -1;
687 if (Size != 32)
688 return -1;
690 switch (P) {
691 case CmpInst::ICMP_NE:
692 return AMDGPU::S_CMP_LG_U32;
693 case CmpInst::ICMP_EQ:
694 return AMDGPU::S_CMP_EQ_U32;
695 case CmpInst::ICMP_SGT:
696 return AMDGPU::S_CMP_GT_I32;
697 case CmpInst::ICMP_SGE:
698 return AMDGPU::S_CMP_GE_I32;
699 case CmpInst::ICMP_SLT:
700 return AMDGPU::S_CMP_LT_I32;
701 case CmpInst::ICMP_SLE:
702 return AMDGPU::S_CMP_LE_I32;
703 case CmpInst::ICMP_UGT:
704 return AMDGPU::S_CMP_GT_U32;
705 case CmpInst::ICMP_UGE:
706 return AMDGPU::S_CMP_GE_U32;
707 case CmpInst::ICMP_ULT:
708 return AMDGPU::S_CMP_LT_U32;
709 case CmpInst::ICMP_ULE:
710 return AMDGPU::S_CMP_LE_U32;
711 default:
712 llvm_unreachable("Unknown condition code!");
716 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
717 MachineBasicBlock *BB = I.getParent();
718 const DebugLoc &DL = I.getDebugLoc();
720 Register SrcReg = I.getOperand(2).getReg();
721 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
723 auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
725 Register CCReg = I.getOperand(0).getReg();
726 if (isSCC(CCReg, *MRI)) {
727 int Opcode = getS_CMPOpcode(Pred, Size);
728 if (Opcode == -1)
729 return false;
730 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
731 .add(I.getOperand(2))
732 .add(I.getOperand(3));
733 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
734 .addReg(AMDGPU::SCC);
735 bool Ret =
736 constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
737 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
738 I.eraseFromParent();
739 return Ret;
742 int Opcode = getV_CMPOpcode(Pred, Size);
743 if (Opcode == -1)
744 return false;
746 MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
747 I.getOperand(0).getReg())
748 .add(I.getOperand(2))
749 .add(I.getOperand(3));
750 RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
751 *TRI.getBoolRC(), *MRI);
752 bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
753 I.eraseFromParent();
754 return Ret;
757 static MachineInstr *
758 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
759 unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
760 unsigned VM, bool Compr, unsigned Enabled, bool Done) {
761 const DebugLoc &DL = Insert->getDebugLoc();
762 MachineBasicBlock &BB = *Insert->getParent();
763 unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
764 return BuildMI(BB, Insert, DL, TII.get(Opcode))
765 .addImm(Tgt)
766 .addReg(Reg0)
767 .addReg(Reg1)
768 .addReg(Reg2)
769 .addReg(Reg3)
770 .addImm(VM)
771 .addImm(Compr)
772 .addImm(Enabled);
775 static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
776 int64_t C;
777 if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
778 return true;
780 // FIXME: matcher should ignore copies
781 return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
784 static unsigned extractGLC(unsigned AuxiliaryData) {
785 return AuxiliaryData & 1;
788 static unsigned extractSLC(unsigned AuxiliaryData) {
789 return (AuxiliaryData >> 1) & 1;
792 static unsigned extractDLC(unsigned AuxiliaryData) {
793 return (AuxiliaryData >> 2) & 1;
796 static unsigned extractSWZ(unsigned AuxiliaryData) {
797 return (AuxiliaryData >> 3) & 1;
800 // Returns Base register, constant offset, and offset def point.
801 static std::tuple<Register, unsigned, MachineInstr *>
802 getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
803 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
804 if (!Def)
805 return std::make_tuple(Reg, 0, nullptr);
807 if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
808 unsigned Offset;
809 const MachineOperand &Op = Def->getOperand(1);
810 if (Op.isImm())
811 Offset = Op.getImm();
812 else
813 Offset = Op.getCImm()->getZExtValue();
815 return std::make_tuple(Register(), Offset, Def);
818 int64_t Offset;
819 if (Def->getOpcode() == AMDGPU::G_ADD) {
820 // TODO: Handle G_OR used for add case
821 if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
822 return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
824 // FIXME: matcher should ignore copies
825 if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
826 return std::make_tuple(Def->getOperand(0).getReg(), Offset, Def);
829 return std::make_tuple(Reg, 0, Def);
832 static unsigned getBufferStoreOpcode(LLT Ty,
833 const unsigned MemSize,
834 const bool Offen) {
835 const int Size = Ty.getSizeInBits();
836 switch (8 * MemSize) {
837 case 8:
838 return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
839 AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
840 case 16:
841 return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
842 AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
843 default:
844 unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
845 AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
846 if (Size > 32)
847 Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
848 return Opc;
852 static unsigned getBufferStoreFormatOpcode(LLT Ty,
853 const unsigned MemSize,
854 const bool Offen) {
855 bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
856 bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
857 int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
859 if (IsD16Packed) {
860 switch (NumElts) {
861 case 1:
862 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
863 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
864 case 2:
865 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
866 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
867 case 3:
868 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
869 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
870 case 4:
871 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
872 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
873 default:
874 return -1;
878 if (IsD16Unpacked) {
879 switch (NumElts) {
880 case 1:
881 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
882 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
883 case 2:
884 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
885 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
886 case 3:
887 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
888 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
889 case 4:
890 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
891 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
892 default:
893 return -1;
897 switch (NumElts) {
898 case 1:
899 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
900 AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
901 case 2:
902 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
903 AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
904 case 3:
905 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
906 AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
907 case 4:
908 return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
909 AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
910 default:
911 return -1;
914 llvm_unreachable("unhandled buffer store");
917 // TODO: Move this to combiner
918 // Returns base register, imm offset, total constant offset.
919 std::tuple<Register, unsigned, unsigned>
920 AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
921 Register OrigOffset) const {
922 const unsigned MaxImm = 4095;
923 Register BaseReg;
924 unsigned TotalConstOffset;
925 MachineInstr *OffsetDef;
927 std::tie(BaseReg, TotalConstOffset, OffsetDef)
928 = getBaseWithConstantOffset(*MRI, OrigOffset);
930 unsigned ImmOffset = TotalConstOffset;
932 // If the immediate value is too big for the immoffset field, put the value
933 // and -4096 into the immoffset field so that the value that is copied/added
934 // for the voffset field is a multiple of 4096, and it stands more chance
935 // of being CSEd with the copy/add for another similar load/store.f
936 // However, do not do that rounding down to a multiple of 4096 if that is a
937 // negative number, as it appears to be illegal to have a negative offset
938 // in the vgpr, even if adding the immediate offset makes it positive.
939 unsigned Overflow = ImmOffset & ~MaxImm;
940 ImmOffset -= Overflow;
941 if ((int32_t)Overflow < 0) {
942 Overflow += ImmOffset;
943 ImmOffset = 0;
946 if (Overflow != 0) {
947 // In case this is in a waterfall loop, insert offset code at the def point
948 // of the offset, not inside the loop.
949 MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
950 MachineBasicBlock &OldMBB = B.getMBB();
951 B.setInstr(*OffsetDef);
953 if (!BaseReg) {
954 BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
955 B.buildInstr(AMDGPU::V_MOV_B32_e32)
956 .addDef(BaseReg)
957 .addImm(Overflow);
958 } else {
959 Register OverflowVal = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
960 B.buildInstr(AMDGPU::V_MOV_B32_e32)
961 .addDef(OverflowVal)
962 .addImm(Overflow);
964 Register NewBaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
965 TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
966 .addReg(BaseReg)
967 .addReg(OverflowVal, RegState::Kill)
968 .addImm(0);
969 BaseReg = NewBaseReg;
972 B.setInsertPt(OldMBB, OldInsPt);
975 return std::make_tuple(BaseReg, ImmOffset, TotalConstOffset);
978 bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
979 bool IsFormat) const {
980 MachineIRBuilder B(MI);
981 MachineFunction &MF = B.getMF();
982 Register VData = MI.getOperand(1).getReg();
983 LLT Ty = MRI->getType(VData);
985 int Size = Ty.getSizeInBits();
986 if (Size % 32 != 0)
987 return false;
989 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
990 MachineMemOperand *MMO = *MI.memoperands_begin();
991 const int MemSize = MMO->getSize();
993 Register RSrc = MI.getOperand(2).getReg();
994 Register VOffset = MI.getOperand(3).getReg();
995 Register SOffset = MI.getOperand(4).getReg();
996 unsigned AuxiliaryData = MI.getOperand(5).getImm();
997 unsigned ImmOffset;
998 unsigned TotalOffset;
1000 std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
1001 if (TotalOffset != 0)
1002 MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
1004 const bool Offen = !isZero(VOffset, *MRI);
1006 int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
1007 getBufferStoreOpcode(Ty, MemSize, Offen);
1008 if (Opc == -1)
1009 return false;
1011 MachineInstrBuilder MIB = B.buildInstr(Opc)
1012 .addUse(VData);
1014 if (Offen)
1015 MIB.addUse(VOffset);
1017 MIB.addUse(RSrc)
1018 .addUse(SOffset)
1019 .addImm(ImmOffset)
1020 .addImm(extractGLC(AuxiliaryData))
1021 .addImm(extractSLC(AuxiliaryData))
1022 .addImm(0) // tfe: FIXME: Remove from inst
1023 .addImm(extractDLC(AuxiliaryData))
1024 .addImm(extractSWZ(AuxiliaryData))
1025 .addMemOperand(MMO);
1027 MI.eraseFromParent();
1029 return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1032 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
1033 MachineInstr &I) const {
1034 MachineBasicBlock *BB = I.getParent();
1035 unsigned IntrinsicID = I.getIntrinsicID();
1036 switch (IntrinsicID) {
1037 case Intrinsic::amdgcn_exp: {
1038 int64_t Tgt = I.getOperand(1).getImm();
1039 int64_t Enabled = I.getOperand(2).getImm();
1040 int64_t Done = I.getOperand(7).getImm();
1041 int64_t VM = I.getOperand(8).getImm();
1043 MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
1044 I.getOperand(4).getReg(),
1045 I.getOperand(5).getReg(),
1046 I.getOperand(6).getReg(),
1047 VM, false, Enabled, Done);
1049 I.eraseFromParent();
1050 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
1052 case Intrinsic::amdgcn_exp_compr: {
1053 const DebugLoc &DL = I.getDebugLoc();
1054 int64_t Tgt = I.getOperand(1).getImm();
1055 int64_t Enabled = I.getOperand(2).getImm();
1056 Register Reg0 = I.getOperand(3).getReg();
1057 Register Reg1 = I.getOperand(4).getReg();
1058 Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1059 int64_t Done = I.getOperand(5).getImm();
1060 int64_t VM = I.getOperand(6).getImm();
1062 BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
1063 MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
1064 true, Enabled, Done);
1066 I.eraseFromParent();
1067 return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
1069 case Intrinsic::amdgcn_end_cf: {
1070 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
1071 // SelectionDAG uses for wave32 vs wave64.
1072 BuildMI(*BB, &I, I.getDebugLoc(),
1073 TII.get(AMDGPU::SI_END_CF))
1074 .add(I.getOperand(1));
1076 Register Reg = I.getOperand(1).getReg();
1077 I.eraseFromParent();
1079 if (!MRI->getRegClassOrNull(Reg))
1080 MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
1081 return true;
1083 case Intrinsic::amdgcn_raw_buffer_store:
1084 return selectStoreIntrinsic(I, false);
1085 case Intrinsic::amdgcn_raw_buffer_store_format:
1086 return selectStoreIntrinsic(I, true);
1087 default:
1088 return selectImpl(I, *CoverageInfo);
1092 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
1093 MachineBasicBlock *BB = I.getParent();
1094 const DebugLoc &DL = I.getDebugLoc();
1096 Register DstReg = I.getOperand(0).getReg();
1097 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
1098 assert(Size <= 32 || Size == 64);
1099 const MachineOperand &CCOp = I.getOperand(1);
1100 Register CCReg = CCOp.getReg();
1101 if (isSCC(CCReg, *MRI)) {
1102 unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
1103 AMDGPU::S_CSELECT_B32;
1104 MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1105 .addReg(CCReg);
1107 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
1108 // bank, because it does not cover the register class that we used to represent
1109 // for it. So we need to manually set the register class here.
1110 if (!MRI->getRegClassOrNull(CCReg))
1111 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
1112 MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
1113 .add(I.getOperand(2))
1114 .add(I.getOperand(3));
1116 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
1117 constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
1118 I.eraseFromParent();
1119 return Ret;
1122 // Wide VGPR select should have been split in RegBankSelect.
1123 if (Size > 32)
1124 return false;
1126 MachineInstr *Select =
1127 BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
1128 .addImm(0)
1129 .add(I.getOperand(3))
1130 .addImm(0)
1131 .add(I.getOperand(2))
1132 .add(I.getOperand(1));
1134 bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
1135 I.eraseFromParent();
1136 return Ret;
1139 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
1140 initM0(I);
1141 return selectImpl(I, *CoverageInfo);
1144 static int sizeToSubRegIndex(unsigned Size) {
1145 switch (Size) {
1146 case 32:
1147 return AMDGPU::sub0;
1148 case 64:
1149 return AMDGPU::sub0_sub1;
1150 case 96:
1151 return AMDGPU::sub0_sub1_sub2;
1152 case 128:
1153 return AMDGPU::sub0_sub1_sub2_sub3;
1154 case 256:
1155 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
1156 default:
1157 if (Size < 32)
1158 return AMDGPU::sub0;
1159 if (Size > 256)
1160 return -1;
1161 return sizeToSubRegIndex(PowerOf2Ceil(Size));
1165 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
1166 Register DstReg = I.getOperand(0).getReg();
1167 Register SrcReg = I.getOperand(1).getReg();
1168 const LLT DstTy = MRI->getType(DstReg);
1169 const LLT SrcTy = MRI->getType(SrcReg);
1170 if (!DstTy.isScalar())
1171 return false;
1173 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1174 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
1175 if (SrcRB != DstRB)
1176 return false;
1178 unsigned DstSize = DstTy.getSizeInBits();
1179 unsigned SrcSize = SrcTy.getSizeInBits();
1181 const TargetRegisterClass *SrcRC
1182 = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, *MRI);
1183 const TargetRegisterClass *DstRC
1184 = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, *MRI);
1186 if (SrcSize > 32) {
1187 int SubRegIdx = sizeToSubRegIndex(DstSize);
1188 if (SubRegIdx == -1)
1189 return false;
1191 // Deal with weird cases where the class only partially supports the subreg
1192 // index.
1193 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
1194 if (!SrcRC)
1195 return false;
1197 I.getOperand(1).setSubReg(SubRegIdx);
1200 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
1201 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
1202 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
1203 return false;
1206 I.setDesc(TII.get(TargetOpcode::COPY));
1207 return true;
1210 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
1211 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
1212 Mask = maskTrailingOnes<unsigned>(Size);
1213 int SignedMask = static_cast<int>(Mask);
1214 return SignedMask >= -16 && SignedMask <= 64;
1217 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
1218 bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
1219 const DebugLoc &DL = I.getDebugLoc();
1220 MachineBasicBlock &MBB = *I.getParent();
1221 const Register DstReg = I.getOperand(0).getReg();
1222 const Register SrcReg = I.getOperand(1).getReg();
1224 const LLT DstTy = MRI->getType(DstReg);
1225 const LLT SrcTy = MRI->getType(SrcReg);
1226 const LLT S1 = LLT::scalar(1);
1227 const unsigned SrcSize = SrcTy.getSizeInBits();
1228 const unsigned DstSize = DstTy.getSizeInBits();
1229 if (!DstTy.isScalar())
1230 return false;
1232 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
1234 if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
1235 if (SrcTy != S1 || DstSize > 64) // Invalid
1236 return false;
1238 unsigned Opcode =
1239 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
1240 const TargetRegisterClass *DstRC =
1241 DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
1243 // FIXME: Create an extra copy to avoid incorrectly constraining the result
1244 // of the scc producer.
1245 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1246 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
1247 .addReg(SrcReg);
1248 BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1249 .addReg(TmpReg);
1251 // The instruction operands are backwards from what you would expect.
1252 BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
1253 .addImm(0)
1254 .addImm(Signed ? -1 : 1);
1255 I.eraseFromParent();
1256 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
1259 if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
1260 if (SrcTy != S1) // Invalid
1261 return false;
1263 MachineInstr *ExtI =
1264 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
1265 .addImm(0) // src0_modifiers
1266 .addImm(0) // src0
1267 .addImm(0) // src1_modifiers
1268 .addImm(Signed ? -1 : 1) // src1
1269 .addUse(SrcReg);
1270 I.eraseFromParent();
1271 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1274 if (I.getOpcode() == AMDGPU::G_ANYEXT)
1275 return selectCOPY(I);
1277 if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
1278 // 64-bit should have been split up in RegBankSelect
1280 // Try to use an and with a mask if it will save code size.
1281 unsigned Mask;
1282 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1283 MachineInstr *ExtI =
1284 BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1285 .addImm(Mask)
1286 .addReg(SrcReg);
1287 I.eraseFromParent();
1288 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1291 const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
1292 MachineInstr *ExtI =
1293 BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1294 .addReg(SrcReg)
1295 .addImm(0) // Offset
1296 .addImm(SrcSize); // Width
1297 I.eraseFromParent();
1298 return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1301 if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1302 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
1303 return false;
1305 if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
1306 const unsigned SextOpc = SrcSize == 8 ?
1307 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1308 BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1309 .addReg(SrcReg);
1310 I.eraseFromParent();
1311 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
1314 const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
1315 const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1317 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1318 if (DstSize > 32 && SrcSize <= 32) {
1319 // We need a 64-bit register source, but the high bits don't matter.
1320 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
1321 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1322 BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1323 BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1324 .addReg(SrcReg)
1325 .addImm(AMDGPU::sub0)
1326 .addReg(UndefReg)
1327 .addImm(AMDGPU::sub1);
1329 BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1330 .addReg(ExtReg)
1331 .addImm(SrcSize << 16);
1333 I.eraseFromParent();
1334 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
1337 unsigned Mask;
1338 if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1339 BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1340 .addReg(SrcReg)
1341 .addImm(Mask);
1342 } else {
1343 BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1344 .addReg(SrcReg)
1345 .addImm(SrcSize << 16);
1348 I.eraseFromParent();
1349 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
1352 return false;
1355 static int64_t getFPTrueImmVal(unsigned Size, bool Signed) {
1356 switch (Size) {
1357 case 16:
1358 return Signed ? 0xBC00 : 0x3C00;
1359 case 32:
1360 return Signed ? 0xbf800000 : 0x3f800000;
1361 case 64:
1362 return Signed ? 0xbff0000000000000 : 0x3ff0000000000000;
1363 default:
1364 llvm_unreachable("Invalid FP type size");
1368 bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr &I) const {
1369 MachineBasicBlock *MBB = I.getParent();
1370 MachineFunction *MF = MBB->getParent();
1371 MachineRegisterInfo &MRI = MF->getRegInfo();
1372 Register Src = I.getOperand(1).getReg();
1373 if (!isSCC(Src, MRI))
1374 return selectImpl(I, *CoverageInfo);
1376 bool Signed = I.getOpcode() == AMDGPU::G_SITOFP;
1377 Register DstReg = I.getOperand(0).getReg();
1378 const LLT DstTy = MRI.getType(DstReg);
1379 const unsigned DstSize = DstTy.getSizeInBits();
1380 const DebugLoc &DL = I.getDebugLoc();
1382 BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
1383 .addReg(Src);
1385 unsigned NewOpc =
1386 DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
1387 auto MIB = BuildMI(*MBB, I, DL, TII.get(NewOpc), DstReg)
1388 .addImm(0)
1389 .addImm(getFPTrueImmVal(DstSize, Signed));
1391 if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
1392 return false;
1394 I.eraseFromParent();
1395 return true;
1398 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1399 MachineBasicBlock *BB = I.getParent();
1400 MachineOperand &ImmOp = I.getOperand(1);
1402 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1403 if (ImmOp.isFPImm()) {
1404 const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1405 ImmOp.ChangeToImmediate(Imm.getZExtValue());
1406 } else if (ImmOp.isCImm()) {
1407 ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1410 Register DstReg = I.getOperand(0).getReg();
1411 unsigned Size;
1412 bool IsSgpr;
1413 const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg());
1414 if (RB) {
1415 IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1416 Size = MRI->getType(DstReg).getSizeInBits();
1417 } else {
1418 const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg);
1419 IsSgpr = TRI.isSGPRClass(RC);
1420 Size = TRI.getRegSizeInBits(*RC);
1423 if (Size != 32 && Size != 64)
1424 return false;
1426 unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1427 if (Size == 32) {
1428 I.setDesc(TII.get(Opcode));
1429 I.addImplicitDefUseOperands(*MF);
1430 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1433 const DebugLoc &DL = I.getDebugLoc();
1435 APInt Imm(Size, I.getOperand(1).getImm());
1437 MachineInstr *ResInst;
1438 if (IsSgpr && TII.isInlineConstant(Imm)) {
1439 ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
1440 .addImm(I.getOperand(1).getImm());
1441 } else {
1442 const TargetRegisterClass *RC = IsSgpr ?
1443 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
1444 Register LoReg = MRI->createVirtualRegister(RC);
1445 Register HiReg = MRI->createVirtualRegister(RC);
1447 BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
1448 .addImm(Imm.trunc(32).getZExtValue());
1450 BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
1451 .addImm(Imm.ashr(32).getZExtValue());
1453 ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1454 .addReg(LoReg)
1455 .addImm(AMDGPU::sub0)
1456 .addReg(HiReg)
1457 .addImm(AMDGPU::sub1);
1460 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1461 // work for target independent opcodes
1462 I.eraseFromParent();
1463 const TargetRegisterClass *DstRC =
1464 TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI);
1465 if (!DstRC)
1466 return true;
1467 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
1470 static bool isConstant(const MachineInstr &MI) {
1471 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1474 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1475 const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1477 const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1479 assert(PtrMI);
1481 if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1482 return;
1484 GEPInfo GEPInfo(*PtrMI);
1486 for (unsigned i = 1; i != 3; ++i) {
1487 const MachineOperand &GEPOp = PtrMI->getOperand(i);
1488 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1489 assert(OpDef);
1490 if (i == 2 && isConstant(*OpDef)) {
1491 // TODO: Could handle constant base + variable offset, but a combine
1492 // probably should have commuted it.
1493 assert(GEPInfo.Imm == 0);
1494 GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1495 continue;
1497 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1498 if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1499 GEPInfo.SgprParts.push_back(GEPOp.getReg());
1500 else
1501 GEPInfo.VgprParts.push_back(GEPOp.getReg());
1504 AddrInfo.push_back(GEPInfo);
1505 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1508 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
1509 if (!MI.hasOneMemOperand())
1510 return false;
1512 const MachineMemOperand *MMO = *MI.memoperands_begin();
1513 const Value *Ptr = MMO->getValue();
1515 // UndefValue means this is a load of a kernel input. These are uniform.
1516 // Sometimes LDS instructions have constant pointers.
1517 // If Ptr is null, then that means this mem operand contains a
1518 // PseudoSourceValue like GOT.
1519 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1520 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1521 return true;
1523 if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1524 return true;
1526 const Instruction *I = dyn_cast<Instruction>(Ptr);
1527 return I && I->getMetadata("amdgpu.uniform");
1530 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1531 for (const GEPInfo &GEPInfo : AddrInfo) {
1532 if (!GEPInfo.VgprParts.empty())
1533 return true;
1535 return false;
1538 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
1539 MachineBasicBlock *BB = I.getParent();
1541 const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
1542 unsigned AS = PtrTy.getAddressSpace();
1543 if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
1544 STI.ldsRequiresM0Init()) {
1545 // If DS instructions require M0 initializtion, insert it before selecting.
1546 BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1547 .addImm(-1);
1551 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
1552 initM0(I);
1553 return selectImpl(I, *CoverageInfo);
1556 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1557 MachineBasicBlock *BB = I.getParent();
1558 MachineOperand &CondOp = I.getOperand(0);
1559 Register CondReg = CondOp.getReg();
1560 const DebugLoc &DL = I.getDebugLoc();
1562 unsigned BrOpcode;
1563 Register CondPhysReg;
1564 const TargetRegisterClass *ConstrainRC;
1566 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1567 // whether the branch is uniform when selecting the instruction. In
1568 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1569 // RegBankSelect knows what it's doing if the branch condition is scc, even
1570 // though it currently does not.
1571 if (isSCC(CondReg, *MRI)) {
1572 CondPhysReg = AMDGPU::SCC;
1573 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1574 // FIXME: Hack for isSCC tests
1575 ConstrainRC = &AMDGPU::SGPR_32RegClass;
1576 } else if (isVCC(CondReg, *MRI)) {
1577 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1578 // We sort of know that a VCC producer based on the register bank, that ands
1579 // inactive lanes with 0. What if there was a logical operation with vcc
1580 // producers in different blocks/with different exec masks?
1581 // FIXME: Should scc->vcc copies and with exec?
1582 CondPhysReg = TRI.getVCC();
1583 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1584 ConstrainRC = TRI.getBoolRC();
1585 } else
1586 return false;
1588 if (!MRI->getRegClassOrNull(CondReg))
1589 MRI->setRegClass(CondReg, ConstrainRC);
1591 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1592 .addReg(CondReg);
1593 BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1594 .addMBB(I.getOperand(1).getMBB());
1596 I.eraseFromParent();
1597 return true;
1600 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1601 Register DstReg = I.getOperand(0).getReg();
1602 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1603 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1604 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1605 if (IsVGPR)
1606 I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1608 return RBI.constrainGenericRegister(
1609 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
1612 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
1613 uint64_t Align = I.getOperand(2).getImm();
1614 const uint64_t Mask = ~((UINT64_C(1) << Align) - 1);
1616 MachineBasicBlock *BB = I.getParent();
1618 Register DstReg = I.getOperand(0).getReg();
1619 Register SrcReg = I.getOperand(1).getReg();
1621 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1622 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
1623 const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1624 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
1625 unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1626 const TargetRegisterClass &RegRC
1627 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
1629 LLT Ty = MRI->getType(DstReg);
1631 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
1632 *MRI);
1633 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
1634 *MRI);
1635 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
1636 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
1637 return false;
1639 const DebugLoc &DL = I.getDebugLoc();
1640 Register ImmReg = MRI->createVirtualRegister(&RegRC);
1641 BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg)
1642 .addImm(Mask);
1644 if (Ty.getSizeInBits() == 32) {
1645 BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
1646 .addReg(SrcReg)
1647 .addReg(ImmReg);
1648 I.eraseFromParent();
1649 return true;
1652 Register HiReg = MRI->createVirtualRegister(&RegRC);
1653 Register LoReg = MRI->createVirtualRegister(&RegRC);
1654 Register MaskLo = MRI->createVirtualRegister(&RegRC);
1656 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
1657 .addReg(SrcReg, 0, AMDGPU::sub0);
1658 BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
1659 .addReg(SrcReg, 0, AMDGPU::sub1);
1661 BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo)
1662 .addReg(LoReg)
1663 .addReg(ImmReg);
1664 BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1665 .addReg(MaskLo)
1666 .addImm(AMDGPU::sub0)
1667 .addReg(HiReg)
1668 .addImm(AMDGPU::sub1);
1669 I.eraseFromParent();
1670 return true;
1673 bool AMDGPUInstructionSelector::select(MachineInstr &I) {
1674 if (I.isPHI())
1675 return selectPHI(I);
1677 if (!I.isPreISelOpcode()) {
1678 if (I.isCopy())
1679 return selectCOPY(I);
1680 return true;
1683 switch (I.getOpcode()) {
1684 case TargetOpcode::G_AND:
1685 case TargetOpcode::G_OR:
1686 case TargetOpcode::G_XOR:
1687 if (selectG_AND_OR_XOR(I))
1688 return true;
1689 return selectImpl(I, *CoverageInfo);
1690 case TargetOpcode::G_ADD:
1691 case TargetOpcode::G_SUB:
1692 if (selectImpl(I, *CoverageInfo))
1693 return true;
1694 return selectG_ADD_SUB(I);
1695 case TargetOpcode::G_UADDO:
1696 case TargetOpcode::G_USUBO:
1697 return selectG_UADDO_USUBO(I);
1698 case TargetOpcode::G_INTTOPTR:
1699 case TargetOpcode::G_BITCAST:
1700 case TargetOpcode::G_PTRTOINT:
1701 return selectCOPY(I);
1702 case TargetOpcode::G_CONSTANT:
1703 case TargetOpcode::G_FCONSTANT:
1704 return selectG_CONSTANT(I);
1705 case TargetOpcode::G_EXTRACT:
1706 return selectG_EXTRACT(I);
1707 case TargetOpcode::G_MERGE_VALUES:
1708 case TargetOpcode::G_BUILD_VECTOR:
1709 case TargetOpcode::G_CONCAT_VECTORS:
1710 return selectG_MERGE_VALUES(I);
1711 case TargetOpcode::G_UNMERGE_VALUES:
1712 return selectG_UNMERGE_VALUES(I);
1713 case TargetOpcode::G_GEP:
1714 return selectG_GEP(I);
1715 case TargetOpcode::G_IMPLICIT_DEF:
1716 return selectG_IMPLICIT_DEF(I);
1717 case TargetOpcode::G_INSERT:
1718 return selectG_INSERT(I);
1719 case TargetOpcode::G_INTRINSIC:
1720 return selectG_INTRINSIC(I);
1721 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1722 return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
1723 case TargetOpcode::G_ICMP:
1724 if (selectG_ICMP(I))
1725 return true;
1726 return selectImpl(I, *CoverageInfo);
1727 case TargetOpcode::G_LOAD:
1728 case TargetOpcode::G_ATOMIC_CMPXCHG:
1729 case TargetOpcode::G_ATOMICRMW_XCHG:
1730 case TargetOpcode::G_ATOMICRMW_ADD:
1731 case TargetOpcode::G_ATOMICRMW_SUB:
1732 case TargetOpcode::G_ATOMICRMW_AND:
1733 case TargetOpcode::G_ATOMICRMW_OR:
1734 case TargetOpcode::G_ATOMICRMW_XOR:
1735 case TargetOpcode::G_ATOMICRMW_MIN:
1736 case TargetOpcode::G_ATOMICRMW_MAX:
1737 case TargetOpcode::G_ATOMICRMW_UMIN:
1738 case TargetOpcode::G_ATOMICRMW_UMAX:
1739 case TargetOpcode::G_ATOMICRMW_FADD:
1740 return selectG_LOAD_ATOMICRMW(I);
1741 case TargetOpcode::G_SELECT:
1742 return selectG_SELECT(I);
1743 case TargetOpcode::G_STORE:
1744 return selectG_STORE(I);
1745 case TargetOpcode::G_TRUNC:
1746 return selectG_TRUNC(I);
1747 case TargetOpcode::G_SEXT:
1748 case TargetOpcode::G_ZEXT:
1749 case TargetOpcode::G_ANYEXT:
1750 return selectG_SZA_EXT(I);
1751 case TargetOpcode::G_SITOFP:
1752 case TargetOpcode::G_UITOFP:
1753 return selectG_SITOFP_UITOFP(I);
1754 case TargetOpcode::G_BRCOND:
1755 return selectG_BRCOND(I);
1756 case TargetOpcode::G_FRAME_INDEX:
1757 return selectG_FRAME_INDEX(I);
1758 case TargetOpcode::G_FENCE:
1759 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1760 // is checking for G_CONSTANT
1761 I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1762 return true;
1763 case TargetOpcode::G_PTR_MASK:
1764 return selectG_PTR_MASK(I);
1765 default:
1766 return selectImpl(I, *CoverageInfo);
1768 return false;
1771 InstructionSelector::ComplexRendererFns
1772 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1773 return {{
1774 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1779 std::pair<Register, unsigned>
1780 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1781 Register Src) const {
1782 unsigned Mods = 0;
1783 MachineInstr *MI = MRI->getVRegDef(Src);
1785 if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1786 Src = MI->getOperand(1).getReg();
1787 Mods |= SISrcMods::NEG;
1788 MI = MRI->getVRegDef(Src);
1791 if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1792 Src = MI->getOperand(1).getReg();
1793 Mods |= SISrcMods::ABS;
1796 return std::make_pair(Src, Mods);
1800 /// This will select either an SGPR or VGPR operand and will save us from
1801 /// having to write an extra tablegen pattern.
1802 InstructionSelector::ComplexRendererFns
1803 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1804 return {{
1805 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1809 InstructionSelector::ComplexRendererFns
1810 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
1811 Register Src;
1812 unsigned Mods;
1813 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
1815 return {{
1816 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1817 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1818 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1819 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1823 InstructionSelector::ComplexRendererFns
1824 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
1825 Register Src;
1826 unsigned Mods;
1827 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
1829 return {{
1830 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1831 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1832 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1833 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1837 InstructionSelector::ComplexRendererFns
1838 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1839 return {{
1840 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1841 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1842 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
1846 InstructionSelector::ComplexRendererFns
1847 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
1848 Register Src;
1849 unsigned Mods;
1850 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
1852 return {{
1853 [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1854 [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
1858 InstructionSelector::ComplexRendererFns
1859 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
1860 // FIXME: Handle clamp and op_sel
1861 return {{
1862 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1863 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
1864 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp
1868 InstructionSelector::ComplexRendererFns
1869 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
1870 // FIXME: Handle op_sel
1871 return {{
1872 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1873 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
1877 InstructionSelector::ComplexRendererFns
1878 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1879 SmallVector<GEPInfo, 4> AddrInfo;
1880 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
1882 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1883 return None;
1885 const GEPInfo &GEPInfo = AddrInfo[0];
1887 if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1888 return None;
1890 unsigned PtrReg = GEPInfo.SgprParts[0];
1891 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1892 return {{
1893 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1894 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1898 InstructionSelector::ComplexRendererFns
1899 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1900 SmallVector<GEPInfo, 4> AddrInfo;
1901 getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
1903 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1904 return None;
1906 const GEPInfo &GEPInfo = AddrInfo[0];
1907 unsigned PtrReg = GEPInfo.SgprParts[0];
1908 int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1909 if (!isUInt<32>(EncodedImm))
1910 return None;
1912 return {{
1913 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1914 [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1918 InstructionSelector::ComplexRendererFns
1919 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1920 MachineInstr *MI = Root.getParent();
1921 MachineBasicBlock *MBB = MI->getParent();
1923 SmallVector<GEPInfo, 4> AddrInfo;
1924 getAddrModeInfo(*MI, *MRI, AddrInfo);
1926 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1927 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1928 if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1929 return None;
1931 const GEPInfo &GEPInfo = AddrInfo[0];
1932 if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1933 return None;
1935 // If we make it this far we have a load with an 32-bit immediate offset.
1936 // It is OK to select this using a sgpr offset, because we have already
1937 // failed trying to select this load into one of the _IMM variants since
1938 // the _IMM Patterns are considered before the _SGPR patterns.
1939 unsigned PtrReg = GEPInfo.SgprParts[0];
1940 Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1941 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1942 .addImm(GEPInfo.Imm);
1943 return {{
1944 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1945 [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1949 template <bool Signed>
1950 InstructionSelector::ComplexRendererFns
1951 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1952 MachineInstr *MI = Root.getParent();
1954 InstructionSelector::ComplexRendererFns Default = {{
1955 [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1956 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
1957 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1960 if (!STI.hasFlatInstOffsets())
1961 return Default;
1963 const MachineInstr *OpDef = MRI->getVRegDef(Root.getReg());
1964 if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1965 return Default;
1967 Optional<int64_t> Offset =
1968 getConstantVRegVal(OpDef->getOperand(2).getReg(), *MRI);
1969 if (!Offset.hasValue())
1970 return Default;
1972 unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1973 if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1974 return Default;
1976 Register BasePtr = OpDef->getOperand(1).getReg();
1978 return {{
1979 [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1980 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1981 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
1985 InstructionSelector::ComplexRendererFns
1986 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
1987 return selectFlatOffsetImpl<false>(Root);
1990 InstructionSelector::ComplexRendererFns
1991 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
1992 return selectFlatOffsetImpl<true>(Root);
1995 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1996 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1997 return PSV && PSV->isStack();
2000 InstructionSelector::ComplexRendererFns
2001 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
2002 MachineInstr *MI = Root.getParent();
2003 MachineBasicBlock *MBB = MI->getParent();
2004 MachineFunction *MF = MBB->getParent();
2005 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
2007 int64_t Offset = 0;
2008 if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) {
2009 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2011 // TODO: Should this be inside the render function? The iterator seems to
2012 // move.
2013 BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
2014 HighBits)
2015 .addImm(Offset & ~4095);
2017 return {{[=](MachineInstrBuilder &MIB) { // rsrc
2018 MIB.addReg(Info->getScratchRSrcReg());
2020 [=](MachineInstrBuilder &MIB) { // vaddr
2021 MIB.addReg(HighBits);
2023 [=](MachineInstrBuilder &MIB) { // soffset
2024 const MachineMemOperand *MMO = *MI->memoperands_begin();
2025 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
2027 Register SOffsetReg = isStackPtrRelative(PtrInfo)
2028 ? Info->getStackPtrOffsetReg()
2029 : Info->getScratchWaveOffsetReg();
2030 MIB.addReg(SOffsetReg);
2032 [=](MachineInstrBuilder &MIB) { // offset
2033 MIB.addImm(Offset & 4095);
2034 }}};
2037 assert(Offset == 0);
2039 // Try to fold a frame index directly into the MUBUF vaddr field, and any
2040 // offsets.
2041 Optional<int> FI;
2042 Register VAddr = Root.getReg();
2043 if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) {
2044 if (isBaseWithConstantOffset(Root, *MRI)) {
2045 const MachineOperand &LHS = RootDef->getOperand(1);
2046 const MachineOperand &RHS = RootDef->getOperand(2);
2047 const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
2048 const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
2049 if (LHSDef && RHSDef) {
2050 int64_t PossibleOffset =
2051 RHSDef->getOperand(1).getCImm()->getSExtValue();
2052 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
2053 (!STI.privateMemoryResourceIsRangeChecked() ||
2054 KnownBits->signBitIsZero(LHS.getReg()))) {
2055 if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
2056 FI = LHSDef->getOperand(1).getIndex();
2057 else
2058 VAddr = LHS.getReg();
2059 Offset = PossibleOffset;
2062 } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
2063 FI = RootDef->getOperand(1).getIndex();
2067 // If we don't know this private access is a local stack object, it needs to
2068 // be relative to the entry point's scratch wave offset register.
2069 // TODO: Should split large offsets that don't fit like above.
2070 // TODO: Don't use scratch wave offset just because the offset didn't fit.
2071 Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
2072 : Info->getScratchWaveOffsetReg();
2074 return {{[=](MachineInstrBuilder &MIB) { // rsrc
2075 MIB.addReg(Info->getScratchRSrcReg());
2077 [=](MachineInstrBuilder &MIB) { // vaddr
2078 if (FI.hasValue())
2079 MIB.addFrameIndex(FI.getValue());
2080 else
2081 MIB.addReg(VAddr);
2083 [=](MachineInstrBuilder &MIB) { // soffset
2084 MIB.addReg(SOffset);
2086 [=](MachineInstrBuilder &MIB) { // offset
2087 MIB.addImm(Offset);
2088 }}};
2091 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
2092 const MachineOperand &Base,
2093 int64_t Offset,
2094 unsigned OffsetBits) const {
2095 if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
2096 (OffsetBits == 8 && !isUInt<8>(Offset)))
2097 return false;
2099 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
2100 return true;
2102 // On Southern Islands instruction with a negative base value and an offset
2103 // don't seem to work.
2104 return KnownBits->signBitIsZero(Base.getReg());
2107 InstructionSelector::ComplexRendererFns
2108 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
2109 MachineOperand &Root) const {
2110 MachineInstr *MI = Root.getParent();
2111 MachineBasicBlock *MBB = MI->getParent();
2113 int64_t Offset = 0;
2114 if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
2115 !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
2116 return {};
2118 const MachineFunction *MF = MBB->getParent();
2119 const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
2120 const MachineMemOperand *MMO = *MI->memoperands_begin();
2121 const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
2123 Register SOffsetReg = isStackPtrRelative(PtrInfo)
2124 ? Info->getStackPtrOffsetReg()
2125 : Info->getScratchWaveOffsetReg();
2126 return {{
2127 [=](MachineInstrBuilder &MIB) {
2128 MIB.addReg(Info->getScratchRSrcReg());
2129 }, // rsrc
2130 [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
2131 [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
2135 InstructionSelector::ComplexRendererFns
2136 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
2137 const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
2138 if (!RootDef) {
2139 return {{
2140 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2141 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
2145 int64_t ConstAddr = 0;
2146 if (isBaseWithConstantOffset(Root, *MRI)) {
2147 const MachineOperand &LHS = RootDef->getOperand(1);
2148 const MachineOperand &RHS = RootDef->getOperand(2);
2149 const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg());
2150 const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg());
2151 if (LHSDef && RHSDef) {
2152 int64_t PossibleOffset =
2153 RHSDef->getOperand(1).getCImm()->getSExtValue();
2154 if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) {
2155 // (add n0, c0)
2156 return {{
2157 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
2158 [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
2162 } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
2166 } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
2171 return {{
2172 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
2173 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
2177 void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
2178 const MachineInstr &MI) const {
2179 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2180 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
2181 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
2182 assert(CstVal && "Expected constant value");
2183 MIB.addImm(CstVal.getValue());