1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// This file implements the targeting of the InstructionSelector class for
12 /// \todo This should be generated by TableGen.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUInstructionSelector.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterBankInfo.h"
18 #include "AMDGPURegisterInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "AMDGPUTargetMachine.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/Utils.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
35 #define DEBUG_TYPE "amdgpu-isel"
39 #define GET_GLOBALISEL_IMPL
40 #define AMDGPUSubtarget GCNSubtarget
41 #include "AMDGPUGenGlobalISel.inc"
42 #undef GET_GLOBALISEL_IMPL
43 #undef AMDGPUSubtarget
45 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
46 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
47 const AMDGPUTargetMachine
&TM
)
48 : InstructionSelector(), TII(*STI
.getInstrInfo()),
49 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
51 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
52 #define GET_GLOBALISEL_PREDICATES_INIT
53 #include "AMDGPUGenGlobalISel.inc"
54 #undef GET_GLOBALISEL_PREDICATES_INIT
55 #define GET_GLOBALISEL_TEMPORARIES_INIT
56 #include "AMDGPUGenGlobalISel.inc"
57 #undef GET_GLOBALISEL_TEMPORARIES_INIT
61 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
63 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
64 MachineBasicBlock
*BB
= I
.getParent();
65 MachineFunction
*MF
= BB
->getParent();
66 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
67 I
.setDesc(TII
.get(TargetOpcode::COPY
));
68 for (const MachineOperand
&MO
: I
.operands()) {
69 if (TargetRegisterInfo::isPhysicalRegister(MO
.getReg()))
72 const TargetRegisterClass
*RC
=
73 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
76 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
82 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
83 unsigned SubIdx
) const {
85 MachineInstr
*MI
= MO
.getParent();
86 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
87 MachineFunction
*MF
= BB
->getParent();
88 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
89 unsigned DstReg
= MRI
.createVirtualRegister(&AMDGPU::SGPR_32RegClass
);
92 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
93 unsigned Reg
= MO
.getReg();
94 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
95 .addReg(Reg
, 0, ComposedSubIdx
);
97 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
98 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
99 MO
.isEarlyClobber(), 0, MO
.isDebug(),
100 MO
.isInternalRead());
105 APInt
Imm(64, MO
.getImm());
109 llvm_unreachable("do not know to split immediate with this sub index.");
111 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
113 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
117 static int64_t getConstant(const MachineInstr
*MI
) {
118 return MI
->getOperand(1).getCImm()->getSExtValue();
121 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr
&I
) const {
122 MachineBasicBlock
*BB
= I
.getParent();
123 MachineFunction
*MF
= BB
->getParent();
124 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
125 unsigned Size
= RBI
.getSizeInBits(I
.getOperand(0).getReg(), MRI
, TRI
);
126 unsigned DstLo
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
127 unsigned DstHi
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
132 DebugLoc DL
= I
.getDebugLoc();
134 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), AMDGPU::sub0
));
135 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), AMDGPU::sub0
));
137 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
141 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), AMDGPU::sub1
));
142 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), AMDGPU::sub1
));
144 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
148 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), I
.getOperand(0).getReg())
150 .addImm(AMDGPU::sub0
)
152 .addImm(AMDGPU::sub1
);
154 for (MachineOperand
&MO
: I
.explicit_operands()) {
155 if (!MO
.isReg() || TargetRegisterInfo::isPhysicalRegister(MO
.getReg()))
157 RBI
.constrainGenericRegister(MO
.getReg(), AMDGPU::SReg_64RegClass
, MRI
);
164 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
165 return selectG_ADD(I
);
168 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
169 MachineBasicBlock
*BB
= I
.getParent();
170 MachineFunction
*MF
= BB
->getParent();
171 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
172 const MachineOperand
&MO
= I
.getOperand(0);
173 const TargetRegisterClass
*RC
=
174 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
176 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
177 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
181 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
,
182 CodeGenCoverage
&CoverageInfo
) const {
183 unsigned IntrinsicID
= I
.getOperand(1).getIntrinsicID();
185 switch (IntrinsicID
) {
188 case Intrinsic::maxnum
:
189 case Intrinsic::minnum
:
190 case Intrinsic::amdgcn_cvt_pkrtz
:
191 return selectImpl(I
, CoverageInfo
);
193 case Intrinsic::amdgcn_kernarg_segment_ptr
: {
194 MachineFunction
*MF
= I
.getParent()->getParent();
195 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
196 const SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
197 const ArgDescriptor
*InputPtrReg
;
198 const TargetRegisterClass
*RC
;
199 const DebugLoc
&DL
= I
.getDebugLoc();
201 std::tie(InputPtrReg
, RC
)
202 = MFI
->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR
);
204 report_fatal_error("missing kernarg segment ptr");
206 BuildMI(*I
.getParent(), &I
, DL
, TII
.get(AMDGPU::COPY
))
207 .add(I
.getOperand(0))
208 .addReg(MRI
.getLiveInVirtReg(InputPtrReg
->getRegister()));
216 static MachineInstr
*
217 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
218 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
219 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
220 const DebugLoc
&DL
= Insert
->getDebugLoc();
221 MachineBasicBlock
&BB
= *Insert
->getParent();
222 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
223 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
234 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
236 CodeGenCoverage
&CoverageInfo
) const {
237 MachineBasicBlock
*BB
= I
.getParent();
238 MachineFunction
*MF
= BB
->getParent();
239 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
241 unsigned IntrinsicID
= I
.getOperand(0).getIntrinsicID();
242 switch (IntrinsicID
) {
243 case Intrinsic::amdgcn_exp
: {
244 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
245 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
246 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(7).getReg()));
247 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(8).getReg()));
249 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
250 I
.getOperand(4).getReg(),
251 I
.getOperand(5).getReg(),
252 I
.getOperand(6).getReg(),
253 VM
, false, Enabled
, Done
);
256 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
258 case Intrinsic::amdgcn_exp_compr
: {
259 const DebugLoc
&DL
= I
.getDebugLoc();
260 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
261 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
262 unsigned Reg0
= I
.getOperand(3).getReg();
263 unsigned Reg1
= I
.getOperand(4).getReg();
264 unsigned Undef
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
265 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(5).getReg()));
266 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(6).getReg()));
268 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
269 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
270 true, Enabled
, Done
);
273 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
279 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
280 MachineBasicBlock
*BB
= I
.getParent();
281 MachineFunction
*MF
= BB
->getParent();
282 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
283 DebugLoc DL
= I
.getDebugLoc();
284 unsigned StoreSize
= RBI
.getSizeInBits(I
.getOperand(0).getReg(), MRI
, TRI
);
287 // FIXME: Select store instruction based on address space
292 Opcode
= AMDGPU::FLAT_STORE_DWORD
;
295 Opcode
= AMDGPU::FLAT_STORE_DWORDX2
;
298 Opcode
= AMDGPU::FLAT_STORE_DWORDX3
;
301 Opcode
= AMDGPU::FLAT_STORE_DWORDX4
;
305 MachineInstr
*Flat
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
306 .add(I
.getOperand(1))
307 .add(I
.getOperand(0))
313 // Now that we selected an opcode, we need to constrain the register
314 // operands to use appropriate classes.
315 bool Ret
= constrainSelectedInstRegOperands(*Flat
, TII
, TRI
, RBI
);
321 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
322 MachineBasicBlock
*BB
= I
.getParent();
323 MachineFunction
*MF
= BB
->getParent();
324 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
325 MachineOperand
&ImmOp
= I
.getOperand(1);
327 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
328 if (ImmOp
.isFPImm()) {
329 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
330 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
331 } else if (ImmOp
.isCImm()) {
332 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
335 unsigned DstReg
= I
.getOperand(0).getReg();
338 const RegisterBank
*RB
= MRI
.getRegBankOrNull(I
.getOperand(0).getReg());
340 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
341 Size
= MRI
.getType(DstReg
).getSizeInBits();
343 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(MRI
, DstReg
);
344 IsSgpr
= TRI
.isSGPRClass(RC
);
345 Size
= TRI
.getRegSizeInBits(*RC
);
348 if (Size
!= 32 && Size
!= 64)
351 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
353 I
.setDesc(TII
.get(Opcode
));
354 I
.addImplicitDefUseOperands(*MF
);
355 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
358 DebugLoc DL
= I
.getDebugLoc();
359 const TargetRegisterClass
*RC
= IsSgpr
? &AMDGPU::SReg_32_XM0RegClass
:
360 &AMDGPU::VGPR_32RegClass
;
361 unsigned LoReg
= MRI
.createVirtualRegister(RC
);
362 unsigned HiReg
= MRI
.createVirtualRegister(RC
);
363 const APInt
&Imm
= APInt(Size
, I
.getOperand(1).getImm());
365 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
366 .addImm(Imm
.trunc(32).getZExtValue());
368 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
369 .addImm(Imm
.ashr(32).getZExtValue());
371 const MachineInstr
*RS
=
372 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
374 .addImm(AMDGPU::sub0
)
376 .addImm(AMDGPU::sub1
);
378 // We can't call constrainSelectedInstRegOperands here, because it doesn't
379 // work for target independent opcodes
381 const TargetRegisterClass
*DstRC
=
382 TRI
.getConstrainedRegClassForOperand(RS
->getOperand(0), MRI
);
385 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
388 static bool isConstant(const MachineInstr
&MI
) {
389 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
392 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
393 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
395 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
399 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
402 GEPInfo
GEPInfo(*PtrMI
);
404 for (unsigned i
= 1, e
= 3; i
< e
; ++i
) {
405 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
406 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
408 if (isConstant(*OpDef
)) {
409 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
410 // are lacking other optimizations.
411 assert(GEPInfo
.Imm
== 0);
412 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
415 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
416 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
417 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
419 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
422 AddrInfo
.push_back(GEPInfo
);
423 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
426 static bool isInstrUniform(const MachineInstr
&MI
) {
427 if (!MI
.hasOneMemOperand())
430 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
431 const Value
*Ptr
= MMO
->getValue();
433 // UndefValue means this is a load of a kernel input. These are uniform.
434 // Sometimes LDS instructions have constant pointers.
435 // If Ptr is null, then that means this mem operand contains a
436 // PseudoSourceValue like GOT.
437 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
438 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
441 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
444 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
445 return I
&& I
->getMetadata("amdgpu.uniform");
448 static unsigned getSmrdOpcode(unsigned BaseOpcode
, unsigned LoadSize
) {
453 switch (BaseOpcode
) {
454 case AMDGPU::S_LOAD_DWORD_IMM
:
457 return AMDGPU::S_LOAD_DWORDX2_IMM
;
459 return AMDGPU::S_LOAD_DWORDX4_IMM
;
461 return AMDGPU::S_LOAD_DWORDX8_IMM
;
463 return AMDGPU::S_LOAD_DWORDX16_IMM
;
466 case AMDGPU::S_LOAD_DWORD_IMM_ci
:
469 return AMDGPU::S_LOAD_DWORDX2_IMM_ci
;
471 return AMDGPU::S_LOAD_DWORDX4_IMM_ci
;
473 return AMDGPU::S_LOAD_DWORDX8_IMM_ci
;
475 return AMDGPU::S_LOAD_DWORDX16_IMM_ci
;
478 case AMDGPU::S_LOAD_DWORD_SGPR
:
481 return AMDGPU::S_LOAD_DWORDX2_SGPR
;
483 return AMDGPU::S_LOAD_DWORDX4_SGPR
;
485 return AMDGPU::S_LOAD_DWORDX8_SGPR
;
487 return AMDGPU::S_LOAD_DWORDX16_SGPR
;
491 llvm_unreachable("Invalid base smrd opcode or size");
494 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
495 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
496 if (!GEPInfo
.VgprParts
.empty())
502 bool AMDGPUInstructionSelector::selectSMRD(MachineInstr
&I
,
503 ArrayRef
<GEPInfo
> AddrInfo
) const {
505 if (!I
.hasOneMemOperand())
508 if ((*I
.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS
&&
509 (*I
.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
512 if (!isInstrUniform(I
))
515 if (hasVgprParts(AddrInfo
))
518 MachineBasicBlock
*BB
= I
.getParent();
519 MachineFunction
*MF
= BB
->getParent();
520 const GCNSubtarget
&Subtarget
= MF
->getSubtarget
<GCNSubtarget
>();
521 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
522 unsigned DstReg
= I
.getOperand(0).getReg();
523 const DebugLoc
&DL
= I
.getDebugLoc();
525 unsigned LoadSize
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
527 if (!AddrInfo
.empty() && AddrInfo
[0].SgprParts
.size() == 1) {
529 const GEPInfo
&GEPInfo
= AddrInfo
[0];
531 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
532 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(Subtarget
, GEPInfo
.Imm
);
533 if (AMDGPU::isLegalSMRDImmOffset(Subtarget
, GEPInfo
.Imm
)) {
534 Opcode
= getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM
, LoadSize
);
536 MachineInstr
*SMRD
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), DstReg
)
540 return constrainSelectedInstRegOperands(*SMRD
, TII
, TRI
, RBI
);
543 if (Subtarget
.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS
&&
544 isUInt
<32>(EncodedImm
)) {
545 Opcode
= getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci
, LoadSize
);
546 MachineInstr
*SMRD
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), DstReg
)
550 return constrainSelectedInstRegOperands(*SMRD
, TII
, TRI
, RBI
);
553 if (isUInt
<32>(GEPInfo
.Imm
)) {
554 Opcode
= getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR
, LoadSize
);
555 unsigned OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
556 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
557 .addImm(GEPInfo
.Imm
);
559 MachineInstr
*SMRD
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), DstReg
)
563 return constrainSelectedInstRegOperands(*SMRD
, TII
, TRI
, RBI
);
567 unsigned PtrReg
= I
.getOperand(1).getReg();
568 Opcode
= getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM
, LoadSize
);
569 MachineInstr
*SMRD
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), DstReg
)
573 return constrainSelectedInstRegOperands(*SMRD
, TII
, TRI
, RBI
);
577 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr
&I
) const {
578 MachineBasicBlock
*BB
= I
.getParent();
579 MachineFunction
*MF
= BB
->getParent();
580 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
581 DebugLoc DL
= I
.getDebugLoc();
582 unsigned DstReg
= I
.getOperand(0).getReg();
583 unsigned PtrReg
= I
.getOperand(1).getReg();
584 unsigned LoadSize
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
587 SmallVector
<GEPInfo
, 4> AddrInfo
;
589 getAddrModeInfo(I
, MRI
, AddrInfo
);
591 if (selectSMRD(I
, AddrInfo
)) {
598 llvm_unreachable("Load size not supported\n");
600 Opcode
= AMDGPU::FLAT_LOAD_DWORD
;
603 Opcode
= AMDGPU::FLAT_LOAD_DWORDX2
;
607 MachineInstr
*Flat
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
608 .add(I
.getOperand(0))
614 bool Ret
= constrainSelectedInstRegOperands(*Flat
, TII
, TRI
, RBI
);
619 bool AMDGPUInstructionSelector::select(MachineInstr
&I
,
620 CodeGenCoverage
&CoverageInfo
) const {
622 if (!isPreISelGenericOpcode(I
.getOpcode())) {
624 return selectCOPY(I
);
628 switch (I
.getOpcode()) {
630 return selectImpl(I
, CoverageInfo
);
631 case TargetOpcode::G_ADD
:
632 return selectG_ADD(I
);
633 case TargetOpcode::G_BITCAST
:
634 return selectCOPY(I
);
635 case TargetOpcode::G_CONSTANT
:
636 case TargetOpcode::G_FCONSTANT
:
637 return selectG_CONSTANT(I
);
638 case TargetOpcode::G_GEP
:
639 return selectG_GEP(I
);
640 case TargetOpcode::G_IMPLICIT_DEF
:
641 return selectG_IMPLICIT_DEF(I
);
642 case TargetOpcode::G_INTRINSIC
:
643 return selectG_INTRINSIC(I
, CoverageInfo
);
644 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
645 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
, CoverageInfo
);
646 case TargetOpcode::G_LOAD
:
647 return selectG_LOAD(I
);
648 case TargetOpcode::G_STORE
:
649 return selectG_STORE(I
);
654 InstructionSelector::ComplexRendererFns
655 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
657 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
663 /// This will select either an SGPR or VGPR operand and will save us from
664 /// having to write an extra tablegen pattern.
665 InstructionSelector::ComplexRendererFns
666 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
668 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
672 InstructionSelector::ComplexRendererFns
673 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
675 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
676 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // src0_mods
677 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
678 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
681 InstructionSelector::ComplexRendererFns
682 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
684 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
685 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
686 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
690 InstructionSelector::ComplexRendererFns
691 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
693 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
694 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // src_mods