1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
37 #define DEBUG_TYPE "amdgpu-isel"
40 using namespace MIPatternMatch
;
42 #define GET_GLOBALISEL_IMPL
43 #define AMDGPUSubtarget GCNSubtarget
44 #include "AMDGPUGenGlobalISel.inc"
45 #undef GET_GLOBALISEL_IMPL
46 #undef AMDGPUSubtarget
48 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
49 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
50 const AMDGPUTargetMachine
&TM
)
51 : InstructionSelector(), TII(*STI
.getInstrInfo()),
52 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
54 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
55 #define GET_GLOBALISEL_PREDICATES_INIT
56 #include "AMDGPUGenGlobalISel.inc"
57 #undef GET_GLOBALISEL_PREDICATES_INIT
58 #define GET_GLOBALISEL_TEMPORARIES_INIT
59 #include "AMDGPUGenGlobalISel.inc"
60 #undef GET_GLOBALISEL_TEMPORARIES_INIT
64 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
66 static bool isSCC(Register Reg
, const MachineRegisterInfo
&MRI
) {
67 if (Register::isPhysicalRegister(Reg
))
68 return Reg
== AMDGPU::SCC
;
70 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
71 const TargetRegisterClass
*RC
=
72 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
74 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
75 // context of the register bank has been lost.
76 if (RC
->getID() != AMDGPU::SReg_32_XM0RegClassID
)
78 const LLT Ty
= MRI
.getType(Reg
);
79 return Ty
.isValid() && Ty
.getSizeInBits() == 1;
82 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
83 return RB
->getID() == AMDGPU::SCCRegBankID
;
86 bool AMDGPUInstructionSelector::isVCC(Register Reg
,
87 const MachineRegisterInfo
&MRI
) const {
88 if (Register::isPhysicalRegister(Reg
))
89 return Reg
== TRI
.getVCC();
91 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
92 const TargetRegisterClass
*RC
=
93 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
95 const LLT Ty
= MRI
.getType(Reg
);
96 return RC
->hasSuperClassEq(TRI
.getBoolRC()) &&
97 Ty
.isValid() && Ty
.getSizeInBits() == 1;
100 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
101 return RB
->getID() == AMDGPU::VCCRegBankID
;
104 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
105 const DebugLoc
&DL
= I
.getDebugLoc();
106 MachineBasicBlock
*BB
= I
.getParent();
107 MachineFunction
*MF
= BB
->getParent();
108 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
109 I
.setDesc(TII
.get(TargetOpcode::COPY
));
111 const MachineOperand
&Src
= I
.getOperand(1);
112 MachineOperand
&Dst
= I
.getOperand(0);
113 Register DstReg
= Dst
.getReg();
114 Register SrcReg
= Src
.getReg();
116 if (isVCC(DstReg
, MRI
)) {
117 if (SrcReg
== AMDGPU::SCC
) {
118 const TargetRegisterClass
*RC
119 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
122 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
125 if (!isVCC(SrcReg
, MRI
)) {
126 // TODO: Should probably leave the copy and let copyPhysReg expand it.
127 if (!RBI
.constrainGenericRegister(DstReg
, *TRI
.getBoolRC(), MRI
))
130 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CMP_NE_U32_e64
), DstReg
)
134 if (!MRI
.getRegClassOrNull(SrcReg
))
135 MRI
.setRegClass(SrcReg
, TRI
.getConstrainedRegClassForOperand(Src
, MRI
));
140 const TargetRegisterClass
*RC
=
141 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
142 if (RC
&& !RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
))
145 // Don't constrain the source register to a class so the def instruction
146 // handles it (unless it's undef).
148 // FIXME: This is a hack. When selecting the def, we neeed to know
149 // specifically know that the result is VCCRegBank, and not just an SGPR
150 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
152 const TargetRegisterClass
*SrcRC
=
153 TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
154 if (SrcRC
&& !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
161 for (const MachineOperand
&MO
: I
.operands()) {
162 if (Register::isPhysicalRegister(MO
.getReg()))
165 const TargetRegisterClass
*RC
=
166 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
169 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
174 bool AMDGPUInstructionSelector::selectPHI(MachineInstr
&I
) const {
175 MachineBasicBlock
*BB
= I
.getParent();
176 MachineFunction
*MF
= BB
->getParent();
177 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
179 const Register DefReg
= I
.getOperand(0).getReg();
180 const LLT DefTy
= MRI
.getType(DefReg
);
182 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
184 const RegClassOrRegBank
&RegClassOrBank
=
185 MRI
.getRegClassOrRegBank(DefReg
);
187 const TargetRegisterClass
*DefRC
188 = RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
190 if (!DefTy
.isValid()) {
191 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
195 const RegisterBank
&RB
= *RegClassOrBank
.get
<const RegisterBank
*>();
196 if (RB
.getID() == AMDGPU::SCCRegBankID
) {
197 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
201 DefRC
= TRI
.getRegClassForTypeOnBank(DefTy
, RB
, MRI
);
203 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
208 I
.setDesc(TII
.get(TargetOpcode::PHI
));
209 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, MRI
);
213 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
214 const TargetRegisterClass
&SubRC
,
215 unsigned SubIdx
) const {
217 MachineInstr
*MI
= MO
.getParent();
218 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
219 MachineFunction
*MF
= BB
->getParent();
220 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
221 Register DstReg
= MRI
.createVirtualRegister(&SubRC
);
224 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
225 Register Reg
= MO
.getReg();
226 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
227 .addReg(Reg
, 0, ComposedSubIdx
);
229 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
230 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
231 MO
.isEarlyClobber(), 0, MO
.isDebug(),
232 MO
.isInternalRead());
237 APInt
Imm(64, MO
.getImm());
241 llvm_unreachable("do not know to split immediate with this sub index.");
243 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
245 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
249 static unsigned getLogicalBitOpcode(unsigned Opc
, bool Is64
) {
252 return Is64
? AMDGPU::S_AND_B64
: AMDGPU::S_AND_B32
;
254 return Is64
? AMDGPU::S_OR_B64
: AMDGPU::S_OR_B32
;
256 return Is64
? AMDGPU::S_XOR_B64
: AMDGPU::S_XOR_B32
;
258 llvm_unreachable("not a bit op");
262 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr
&I
) const {
263 MachineBasicBlock
*BB
= I
.getParent();
264 MachineFunction
*MF
= BB
->getParent();
265 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
266 MachineOperand
&Dst
= I
.getOperand(0);
267 MachineOperand
&Src0
= I
.getOperand(1);
268 MachineOperand
&Src1
= I
.getOperand(2);
269 Register DstReg
= Dst
.getReg();
270 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
272 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
273 if (DstRB
->getID() == AMDGPU::VCCRegBankID
) {
274 const TargetRegisterClass
*RC
= TRI
.getBoolRC();
275 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(),
276 RC
== &AMDGPU::SReg_64RegClass
);
277 I
.setDesc(TII
.get(InstOpc
));
279 // FIXME: Hack to avoid turning the register bank into a register class.
280 // The selector for G_ICMP relies on seeing the register bank for the result
281 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
282 // be ambiguous whether it's a scalar or vector bool.
283 if (Src0
.isUndef() && !MRI
.getRegClassOrNull(Src0
.getReg()))
284 MRI
.setRegClass(Src0
.getReg(), RC
);
285 if (Src1
.isUndef() && !MRI
.getRegClassOrNull(Src1
.getReg()))
286 MRI
.setRegClass(Src1
.getReg(), RC
);
288 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
291 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
293 if (DstRB
->getID() == AMDGPU::SGPRRegBankID
) {
294 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(), Size
> 32);
295 I
.setDesc(TII
.get(InstOpc
));
296 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
302 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr
&I
) const {
303 MachineBasicBlock
*BB
= I
.getParent();
304 MachineFunction
*MF
= BB
->getParent();
305 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
306 Register DstReg
= I
.getOperand(0).getReg();
307 const DebugLoc
&DL
= I
.getDebugLoc();
308 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
309 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
310 const bool IsSALU
= DstRB
->getID() == AMDGPU::SGPRRegBankID
;
311 const bool Sub
= I
.getOpcode() == TargetOpcode::G_SUB
;
315 const unsigned Opc
= Sub
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
317 BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
318 .add(I
.getOperand(1))
319 .add(I
.getOperand(2));
321 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
324 if (STI
.hasAddNoCarry()) {
325 const unsigned Opc
= Sub
? AMDGPU::V_SUB_U32_e64
: AMDGPU::V_ADD_U32_e64
;
326 I
.setDesc(TII
.get(Opc
));
327 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
328 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
329 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
332 const unsigned Opc
= Sub
? AMDGPU::V_SUB_I32_e64
: AMDGPU::V_ADD_I32_e64
;
334 Register UnusedCarry
= MRI
.createVirtualRegister(TRI
.getWaveMaskRegClass());
336 = BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
337 .addDef(UnusedCarry
, RegState::Dead
)
338 .add(I
.getOperand(1))
339 .add(I
.getOperand(2))
342 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
345 assert(!Sub
&& "illegal sub should not reach here");
347 const TargetRegisterClass
&RC
348 = IsSALU
? AMDGPU::SReg_64_XEXECRegClass
: AMDGPU::VReg_64RegClass
;
349 const TargetRegisterClass
&HalfRC
350 = IsSALU
? AMDGPU::SReg_32RegClass
: AMDGPU::VGPR_32RegClass
;
352 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub0
));
353 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub0
));
354 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub1
));
355 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub1
));
357 Register DstLo
= MRI
.createVirtualRegister(&HalfRC
);
358 Register DstHi
= MRI
.createVirtualRegister(&HalfRC
);
361 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
364 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
368 const TargetRegisterClass
*CarryRC
= TRI
.getWaveMaskRegClass();
369 Register CarryReg
= MRI
.createVirtualRegister(CarryRC
);
370 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADD_I32_e64
), DstLo
)
375 MachineInstr
*Addc
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADDC_U32_e64
), DstHi
)
376 .addDef(MRI
.createVirtualRegister(CarryRC
), RegState::Dead
)
379 .addReg(CarryReg
, RegState::Kill
)
382 if (!constrainSelectedInstRegOperands(*Addc
, TII
, TRI
, RBI
))
386 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
388 .addImm(AMDGPU::sub0
)
390 .addImm(AMDGPU::sub1
);
393 if (!RBI
.constrainGenericRegister(DstReg
, RC
, MRI
))
400 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr
&I
) const {
401 MachineBasicBlock
*BB
= I
.getParent();
402 MachineFunction
*MF
= BB
->getParent();
403 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
404 assert(I
.getOperand(2).getImm() % 32 == 0);
405 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(2).getImm() / 32);
406 const DebugLoc
&DL
= I
.getDebugLoc();
407 MachineInstr
*Copy
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::COPY
),
408 I
.getOperand(0).getReg())
409 .addReg(I
.getOperand(1).getReg(), 0, SubReg
);
411 for (const MachineOperand
&MO
: Copy
->operands()) {
412 const TargetRegisterClass
*RC
=
413 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
416 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
422 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr
&MI
) const {
423 MachineBasicBlock
*BB
= MI
.getParent();
424 MachineFunction
*MF
= BB
->getParent();
425 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
426 Register DstReg
= MI
.getOperand(0).getReg();
427 LLT DstTy
= MRI
.getType(DstReg
);
428 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
430 const unsigned SrcSize
= SrcTy
.getSizeInBits();
434 const DebugLoc
&DL
= MI
.getDebugLoc();
435 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
436 const unsigned DstSize
= DstTy
.getSizeInBits();
437 const TargetRegisterClass
*DstRC
=
438 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, MRI
);
442 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(DstRC
, SrcSize
/ 8);
443 MachineInstrBuilder MIB
=
444 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::REG_SEQUENCE
), DstReg
);
445 for (int I
= 0, E
= MI
.getNumOperands() - 1; I
!= E
; ++I
) {
446 MachineOperand
&Src
= MI
.getOperand(I
+ 1);
447 MIB
.addReg(Src
.getReg(), getUndefRegState(Src
.isUndef()));
448 MIB
.addImm(SubRegs
[I
]);
450 const TargetRegisterClass
*SrcRC
451 = TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
452 if (SrcRC
&& !RBI
.constrainGenericRegister(Src
.getReg(), *SrcRC
, MRI
))
456 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
))
459 MI
.eraseFromParent();
463 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr
&MI
) const {
464 MachineBasicBlock
*BB
= MI
.getParent();
465 MachineFunction
*MF
= BB
->getParent();
466 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
467 const int NumDst
= MI
.getNumOperands() - 1;
469 MachineOperand
&Src
= MI
.getOperand(NumDst
);
471 Register SrcReg
= Src
.getReg();
472 Register DstReg0
= MI
.getOperand(0).getReg();
473 LLT DstTy
= MRI
.getType(DstReg0
);
474 LLT SrcTy
= MRI
.getType(SrcReg
);
476 const unsigned DstSize
= DstTy
.getSizeInBits();
477 const unsigned SrcSize
= SrcTy
.getSizeInBits();
478 const DebugLoc
&DL
= MI
.getDebugLoc();
479 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
481 const TargetRegisterClass
*SrcRC
=
482 TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcBank
, MRI
);
483 if (!SrcRC
|| !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
486 const unsigned SrcFlags
= getUndefRegState(Src
.isUndef());
488 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
489 // source, and this relies on the fact that the same subregister indices are
491 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(SrcRC
, DstSize
/ 8);
492 for (int I
= 0, E
= NumDst
; I
!= E
; ++I
) {
493 MachineOperand
&Dst
= MI
.getOperand(I
);
494 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::COPY
), Dst
.getReg())
495 .addReg(SrcReg
, SrcFlags
, SubRegs
[I
]);
497 const TargetRegisterClass
*DstRC
=
498 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
499 if (DstRC
&& !RBI
.constrainGenericRegister(Dst
.getReg(), *DstRC
, MRI
))
503 MI
.eraseFromParent();
507 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
508 return selectG_ADD_SUB(I
);
511 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
512 MachineBasicBlock
*BB
= I
.getParent();
513 MachineFunction
*MF
= BB
->getParent();
514 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
515 const MachineOperand
&MO
= I
.getOperand(0);
517 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
518 // regbank check here is to know why getConstrainedRegClassForOperand failed.
519 const TargetRegisterClass
*RC
= TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
520 if ((!RC
&& !MRI
.getRegBankOrNull(MO
.getReg())) ||
521 (RC
&& RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
))) {
522 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
529 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr
&I
) const {
530 MachineBasicBlock
*BB
= I
.getParent();
531 MachineFunction
*MF
= BB
->getParent();
532 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
534 Register Src0Reg
= I
.getOperand(1).getReg();
535 Register Src1Reg
= I
.getOperand(2).getReg();
536 LLT Src1Ty
= MRI
.getType(Src1Reg
);
537 if (Src1Ty
.getSizeInBits() != 32)
540 int64_t Offset
= I
.getOperand(3).getImm();
541 if (Offset
% 32 != 0)
544 unsigned SubReg
= TRI
.getSubRegFromChannel(Offset
/ 32);
545 const DebugLoc
&DL
= I
.getDebugLoc();
547 MachineInstr
*Ins
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::INSERT_SUBREG
))
548 .addDef(I
.getOperand(0).getReg())
553 for (const MachineOperand
&MO
: Ins
->operands()) {
556 if (Register::isPhysicalRegister(MO
.getReg()))
559 const TargetRegisterClass
*RC
=
560 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
563 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
569 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
) const {
570 unsigned IntrinsicID
= I
.getOperand(I
.getNumExplicitDefs()).getIntrinsicID();
571 switch (IntrinsicID
) {
572 case Intrinsic::amdgcn_if_break
: {
573 MachineBasicBlock
*BB
= I
.getParent();
574 MachineFunction
*MF
= BB
->getParent();
575 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
577 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
578 // SelectionDAG uses for wave32 vs wave64.
579 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::SI_IF_BREAK
))
580 .add(I
.getOperand(0))
581 .add(I
.getOperand(2))
582 .add(I
.getOperand(3));
584 Register DstReg
= I
.getOperand(0).getReg();
585 Register Src0Reg
= I
.getOperand(2).getReg();
586 Register Src1Reg
= I
.getOperand(3).getReg();
590 for (Register Reg
: { DstReg
, Src0Reg
, Src1Reg
}) {
591 if (!MRI
.getRegClassOrNull(Reg
))
592 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
598 return selectImpl(I
, *CoverageInfo
);
602 static int getV_CMPOpcode(CmpInst::Predicate P
, unsigned Size
) {
603 if (Size
!= 32 && Size
!= 64)
607 llvm_unreachable("Unknown condition code!");
608 case CmpInst::ICMP_NE
:
609 return Size
== 32 ? AMDGPU::V_CMP_NE_U32_e64
: AMDGPU::V_CMP_NE_U64_e64
;
610 case CmpInst::ICMP_EQ
:
611 return Size
== 32 ? AMDGPU::V_CMP_EQ_U32_e64
: AMDGPU::V_CMP_EQ_U64_e64
;
612 case CmpInst::ICMP_SGT
:
613 return Size
== 32 ? AMDGPU::V_CMP_GT_I32_e64
: AMDGPU::V_CMP_GT_I64_e64
;
614 case CmpInst::ICMP_SGE
:
615 return Size
== 32 ? AMDGPU::V_CMP_GE_I32_e64
: AMDGPU::V_CMP_GE_I64_e64
;
616 case CmpInst::ICMP_SLT
:
617 return Size
== 32 ? AMDGPU::V_CMP_LT_I32_e64
: AMDGPU::V_CMP_LT_I64_e64
;
618 case CmpInst::ICMP_SLE
:
619 return Size
== 32 ? AMDGPU::V_CMP_LE_I32_e64
: AMDGPU::V_CMP_LE_I64_e64
;
620 case CmpInst::ICMP_UGT
:
621 return Size
== 32 ? AMDGPU::V_CMP_GT_U32_e64
: AMDGPU::V_CMP_GT_U64_e64
;
622 case CmpInst::ICMP_UGE
:
623 return Size
== 32 ? AMDGPU::V_CMP_GE_U32_e64
: AMDGPU::V_CMP_GE_U64_e64
;
624 case CmpInst::ICMP_ULT
:
625 return Size
== 32 ? AMDGPU::V_CMP_LT_U32_e64
: AMDGPU::V_CMP_LT_U64_e64
;
626 case CmpInst::ICMP_ULE
:
627 return Size
== 32 ? AMDGPU::V_CMP_LE_U32_e64
: AMDGPU::V_CMP_LE_U64_e64
;
631 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P
,
632 unsigned Size
) const {
634 if (!STI
.hasScalarCompareEq64())
638 case CmpInst::ICMP_NE
:
639 return AMDGPU::S_CMP_LG_U64
;
640 case CmpInst::ICMP_EQ
:
641 return AMDGPU::S_CMP_EQ_U64
;
651 case CmpInst::ICMP_NE
:
652 return AMDGPU::S_CMP_LG_U32
;
653 case CmpInst::ICMP_EQ
:
654 return AMDGPU::S_CMP_EQ_U32
;
655 case CmpInst::ICMP_SGT
:
656 return AMDGPU::S_CMP_GT_I32
;
657 case CmpInst::ICMP_SGE
:
658 return AMDGPU::S_CMP_GE_I32
;
659 case CmpInst::ICMP_SLT
:
660 return AMDGPU::S_CMP_LT_I32
;
661 case CmpInst::ICMP_SLE
:
662 return AMDGPU::S_CMP_LE_I32
;
663 case CmpInst::ICMP_UGT
:
664 return AMDGPU::S_CMP_GT_U32
;
665 case CmpInst::ICMP_UGE
:
666 return AMDGPU::S_CMP_GE_U32
;
667 case CmpInst::ICMP_ULT
:
668 return AMDGPU::S_CMP_LT_U32
;
669 case CmpInst::ICMP_ULE
:
670 return AMDGPU::S_CMP_LE_U32
;
672 llvm_unreachable("Unknown condition code!");
676 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr
&I
) const {
677 MachineBasicBlock
*BB
= I
.getParent();
678 MachineFunction
*MF
= BB
->getParent();
679 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
680 const DebugLoc
&DL
= I
.getDebugLoc();
682 Register SrcReg
= I
.getOperand(2).getReg();
683 unsigned Size
= RBI
.getSizeInBits(SrcReg
, MRI
, TRI
);
685 auto Pred
= (CmpInst::Predicate
)I
.getOperand(1).getPredicate();
687 Register CCReg
= I
.getOperand(0).getReg();
688 if (isSCC(CCReg
, MRI
)) {
689 int Opcode
= getS_CMPOpcode(Pred
, Size
);
692 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
693 .add(I
.getOperand(2))
694 .add(I
.getOperand(3));
695 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CCReg
)
696 .addReg(AMDGPU::SCC
);
698 constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
) &&
699 RBI
.constrainGenericRegister(CCReg
, AMDGPU::SReg_32RegClass
, MRI
);
704 int Opcode
= getV_CMPOpcode(Pred
, Size
);
708 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
),
709 I
.getOperand(0).getReg())
710 .add(I
.getOperand(2))
711 .add(I
.getOperand(3));
712 RBI
.constrainGenericRegister(ICmp
->getOperand(0).getReg(),
713 *TRI
.getBoolRC(), MRI
);
714 bool Ret
= constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
);
719 static MachineInstr
*
720 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
721 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
722 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
723 const DebugLoc
&DL
= Insert
->getDebugLoc();
724 MachineBasicBlock
&BB
= *Insert
->getParent();
725 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
726 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
737 static bool isZero(Register Reg
, MachineRegisterInfo
&MRI
) {
739 if (mi_match(Reg
, MRI
, m_ICst(C
)) && C
== 0)
742 // FIXME: matcher should ignore copies
743 return mi_match(Reg
, MRI
, m_Copy(m_ICst(C
))) && C
== 0;
746 static unsigned extractGLC(unsigned CachePolicy
) {
747 return CachePolicy
& 1;
750 static unsigned extractSLC(unsigned CachePolicy
) {
751 return (CachePolicy
>> 1) & 1;
754 static unsigned extractDLC(unsigned CachePolicy
) {
755 return (CachePolicy
>> 2) & 1;
758 // Returns Base register, constant offset, and offset def point.
759 static std::tuple
<Register
, unsigned, MachineInstr
*>
760 getBaseWithConstantOffset(MachineRegisterInfo
&MRI
, Register Reg
) {
761 MachineInstr
*Def
= getDefIgnoringCopies(Reg
, MRI
);
763 return std::make_tuple(Reg
, 0, nullptr);
765 if (Def
->getOpcode() == AMDGPU::G_CONSTANT
) {
767 const MachineOperand
&Op
= Def
->getOperand(1);
769 Offset
= Op
.getImm();
771 Offset
= Op
.getCImm()->getZExtValue();
773 return std::make_tuple(Register(), Offset
, Def
);
777 if (Def
->getOpcode() == AMDGPU::G_ADD
) {
778 // TODO: Handle G_OR used for add case
779 if (mi_match(Def
->getOperand(1).getReg(), MRI
, m_ICst(Offset
)))
780 return std::make_tuple(Def
->getOperand(0).getReg(), Offset
, Def
);
782 // FIXME: matcher should ignore copies
783 if (mi_match(Def
->getOperand(1).getReg(), MRI
, m_Copy(m_ICst(Offset
))))
784 return std::make_tuple(Def
->getOperand(0).getReg(), Offset
, Def
);
787 return std::make_tuple(Reg
, 0, Def
);
790 static unsigned getBufferStoreOpcode(LLT Ty
,
791 const unsigned MemSize
,
793 const int Size
= Ty
.getSizeInBits();
794 switch (8 * MemSize
) {
796 return Offen
? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact
:
797 AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact
;
799 return Offen
? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact
:
800 AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact
;
802 unsigned Opc
= Offen
? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact
:
803 AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact
;
805 Opc
= AMDGPU::getMUBUFOpcode(Opc
, Size
/ 32);
810 static unsigned getBufferStoreFormatOpcode(LLT Ty
,
811 const unsigned MemSize
,
813 bool IsD16Packed
= Ty
.getScalarSizeInBits() == 16;
814 bool IsD16Unpacked
= 8 * MemSize
< Ty
.getSizeInBits();
815 int NumElts
= Ty
.isVector() ? Ty
.getNumElements() : 1;
820 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact
:
821 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact
;
823 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact
:
824 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact
;
826 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact
:
827 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact
;
829 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact
:
830 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact
;
839 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact
:
840 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact
;
842 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact
:
843 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact
;
845 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact
:
846 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact
;
848 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact
:
849 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact
;
857 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact
:
858 AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact
;
860 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact
:
861 AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact
;
863 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact
:
864 AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact
;
866 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact
:
867 AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact
;
872 llvm_unreachable("unhandled buffer store");
875 // TODO: Move this to combiner
876 // Returns base register, imm offset, total constant offset.
877 std::tuple
<Register
, unsigned, unsigned>
878 AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder
&B
,
879 Register OrigOffset
) const {
880 const unsigned MaxImm
= 4095;
882 unsigned TotalConstOffset
;
883 MachineInstr
*OffsetDef
;
884 MachineRegisterInfo
&MRI
= *B
.getMRI();
886 std::tie(BaseReg
, TotalConstOffset
, OffsetDef
)
887 = getBaseWithConstantOffset(MRI
, OrigOffset
);
889 unsigned ImmOffset
= TotalConstOffset
;
891 // If the immediate value is too big for the immoffset field, put the value
892 // and -4096 into the immoffset field so that the value that is copied/added
893 // for the voffset field is a multiple of 4096, and it stands more chance
894 // of being CSEd with the copy/add for another similar load/store.f
895 // However, do not do that rounding down to a multiple of 4096 if that is a
896 // negative number, as it appears to be illegal to have a negative offset
897 // in the vgpr, even if adding the immediate offset makes it positive.
898 unsigned Overflow
= ImmOffset
& ~MaxImm
;
899 ImmOffset
-= Overflow
;
900 if ((int32_t)Overflow
< 0) {
901 Overflow
+= ImmOffset
;
906 // In case this is in a waterfall loop, insert offset code at the def point
907 // of the offset, not inside the loop.
908 MachineBasicBlock::iterator OldInsPt
= B
.getInsertPt();
909 MachineBasicBlock
&OldMBB
= B
.getMBB();
910 B
.setInstr(*OffsetDef
);
913 BaseReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
914 B
.buildInstr(AMDGPU::V_MOV_B32_e32
)
918 Register OverflowVal
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
919 B
.buildInstr(AMDGPU::V_MOV_B32_e32
)
923 Register NewBaseReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
924 TII
.getAddNoCarry(B
.getMBB(), B
.getInsertPt(), B
.getDebugLoc(), NewBaseReg
)
926 .addReg(OverflowVal
, RegState::Kill
)
928 BaseReg
= NewBaseReg
;
931 B
.setInsertPt(OldMBB
, OldInsPt
);
934 return std::make_tuple(BaseReg
, ImmOffset
, TotalConstOffset
);
937 bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr
&MI
,
938 bool IsFormat
) const {
939 MachineIRBuilder
B(MI
);
940 MachineRegisterInfo
&MRI
= *B
.getMRI();
941 MachineFunction
&MF
= B
.getMF();
942 Register VData
= MI
.getOperand(1).getReg();
943 LLT Ty
= MRI
.getType(VData
);
945 int Size
= Ty
.getSizeInBits();
949 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
950 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
951 const int MemSize
= MMO
->getSize();
953 Register RSrc
= MI
.getOperand(2).getReg();
954 Register VOffset
= MI
.getOperand(3).getReg();
955 Register SOffset
= MI
.getOperand(4).getReg();
956 unsigned CachePolicy
= MI
.getOperand(5).getImm();
958 unsigned TotalOffset
;
960 std::tie(VOffset
, ImmOffset
, TotalOffset
) = splitBufferOffsets(B
, VOffset
);
961 if (TotalOffset
!= 0)
962 MMO
= MF
.getMachineMemOperand(MMO
, TotalOffset
, MemSize
);
964 const bool Offen
= !isZero(VOffset
, MRI
);
966 int Opc
= IsFormat
? getBufferStoreFormatOpcode(Ty
, MemSize
, Offen
) :
967 getBufferStoreOpcode(Ty
, MemSize
, Offen
);
971 MachineInstrBuilder MIB
= B
.buildInstr(Opc
)
980 .addImm(extractGLC(CachePolicy
))
981 .addImm(extractSLC(CachePolicy
))
982 .addImm(0) // tfe: FIXME: Remove from inst
983 .addImm(extractDLC(CachePolicy
))
986 MI
.eraseFromParent();
988 return constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
991 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
992 MachineInstr
&I
) const {
993 MachineBasicBlock
*BB
= I
.getParent();
994 MachineFunction
*MF
= BB
->getParent();
995 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
997 unsigned IntrinsicID
= I
.getIntrinsicID();
998 switch (IntrinsicID
) {
999 case Intrinsic::amdgcn_exp
: {
1000 int64_t Tgt
= I
.getOperand(1).getImm();
1001 int64_t Enabled
= I
.getOperand(2).getImm();
1002 int64_t Done
= I
.getOperand(7).getImm();
1003 int64_t VM
= I
.getOperand(8).getImm();
1005 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
1006 I
.getOperand(4).getReg(),
1007 I
.getOperand(5).getReg(),
1008 I
.getOperand(6).getReg(),
1009 VM
, false, Enabled
, Done
);
1011 I
.eraseFromParent();
1012 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
1014 case Intrinsic::amdgcn_exp_compr
: {
1015 const DebugLoc
&DL
= I
.getDebugLoc();
1016 int64_t Tgt
= I
.getOperand(1).getImm();
1017 int64_t Enabled
= I
.getOperand(2).getImm();
1018 Register Reg0
= I
.getOperand(3).getReg();
1019 Register Reg1
= I
.getOperand(4).getReg();
1020 Register Undef
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1021 int64_t Done
= I
.getOperand(5).getImm();
1022 int64_t VM
= I
.getOperand(6).getImm();
1024 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
1025 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
1026 true, Enabled
, Done
);
1028 I
.eraseFromParent();
1029 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
1031 case Intrinsic::amdgcn_end_cf
: {
1032 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
1033 // SelectionDAG uses for wave32 vs wave64.
1034 BuildMI(*BB
, &I
, I
.getDebugLoc(),
1035 TII
.get(AMDGPU::SI_END_CF
))
1036 .add(I
.getOperand(1));
1038 Register Reg
= I
.getOperand(1).getReg();
1039 I
.eraseFromParent();
1041 if (!MRI
.getRegClassOrNull(Reg
))
1042 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
1045 case Intrinsic::amdgcn_raw_buffer_store
:
1046 return selectStoreIntrinsic(I
, false);
1047 case Intrinsic::amdgcn_raw_buffer_store_format
:
1048 return selectStoreIntrinsic(I
, true);
1050 return selectImpl(I
, *CoverageInfo
);
1054 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr
&I
) const {
1055 MachineBasicBlock
*BB
= I
.getParent();
1056 MachineFunction
*MF
= BB
->getParent();
1057 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1058 const DebugLoc
&DL
= I
.getDebugLoc();
1060 Register DstReg
= I
.getOperand(0).getReg();
1061 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
1062 assert(Size
<= 32 || Size
== 64);
1063 const MachineOperand
&CCOp
= I
.getOperand(1);
1064 Register CCReg
= CCOp
.getReg();
1065 if (isSCC(CCReg
, MRI
)) {
1066 unsigned SelectOpcode
= Size
== 64 ? AMDGPU::S_CSELECT_B64
:
1067 AMDGPU::S_CSELECT_B32
;
1068 MachineInstr
*CopySCC
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
1071 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
1072 // bank, because it does not cover the register class that we used to represent
1073 // for it. So we need to manually set the register class here.
1074 if (!MRI
.getRegClassOrNull(CCReg
))
1075 MRI
.setRegClass(CCReg
, TRI
.getConstrainedRegClassForOperand(CCOp
, MRI
));
1076 MachineInstr
*Select
= BuildMI(*BB
, &I
, DL
, TII
.get(SelectOpcode
), DstReg
)
1077 .add(I
.getOperand(2))
1078 .add(I
.getOperand(3));
1080 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
) |
1081 constrainSelectedInstRegOperands(*CopySCC
, TII
, TRI
, RBI
);
1082 I
.eraseFromParent();
1086 // Wide VGPR select should have been split in RegBankSelect.
1090 MachineInstr
*Select
=
1091 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
1093 .add(I
.getOperand(3))
1095 .add(I
.getOperand(2))
1096 .add(I
.getOperand(1));
1098 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
);
1099 I
.eraseFromParent();
1103 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
1105 return selectImpl(I
, *CoverageInfo
);
1108 static int sizeToSubRegIndex(unsigned Size
) {
1111 return AMDGPU::sub0
;
1113 return AMDGPU::sub0_sub1
;
1115 return AMDGPU::sub0_sub1_sub2
;
1117 return AMDGPU::sub0_sub1_sub2_sub3
;
1119 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
1122 return AMDGPU::sub0
;
1125 return sizeToSubRegIndex(PowerOf2Ceil(Size
));
1129 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr
&I
) const {
1130 MachineBasicBlock
*BB
= I
.getParent();
1131 MachineFunction
*MF
= BB
->getParent();
1132 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1134 Register DstReg
= I
.getOperand(0).getReg();
1135 Register SrcReg
= I
.getOperand(1).getReg();
1136 const LLT DstTy
= MRI
.getType(DstReg
);
1137 const LLT SrcTy
= MRI
.getType(SrcReg
);
1138 if (!DstTy
.isScalar())
1141 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1142 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
1146 unsigned DstSize
= DstTy
.getSizeInBits();
1147 unsigned SrcSize
= SrcTy
.getSizeInBits();
1149 const TargetRegisterClass
*SrcRC
1150 = TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcRB
, MRI
);
1151 const TargetRegisterClass
*DstRC
1152 = TRI
.getRegClassForSizeOnBank(DstSize
, *DstRB
, MRI
);
1155 int SubRegIdx
= sizeToSubRegIndex(DstSize
);
1156 if (SubRegIdx
== -1)
1159 // Deal with weird cases where the class only partially supports the subreg
1161 SrcRC
= TRI
.getSubClassWithSubReg(SrcRC
, SubRegIdx
);
1165 I
.getOperand(1).setSubReg(SubRegIdx
);
1168 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
) ||
1169 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
1170 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
1174 I
.setDesc(TII
.get(TargetOpcode::COPY
));
1178 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
1179 static bool shouldUseAndMask(unsigned Size
, unsigned &Mask
) {
1180 Mask
= maskTrailingOnes
<unsigned>(Size
);
1181 int SignedMask
= static_cast<int>(Mask
);
1182 return SignedMask
>= -16 && SignedMask
<= 64;
1185 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr
&I
) const {
1186 bool Signed
= I
.getOpcode() == AMDGPU::G_SEXT
;
1187 const DebugLoc
&DL
= I
.getDebugLoc();
1188 MachineBasicBlock
&MBB
= *I
.getParent();
1189 MachineFunction
&MF
= *MBB
.getParent();
1190 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1191 const Register DstReg
= I
.getOperand(0).getReg();
1192 const Register SrcReg
= I
.getOperand(1).getReg();
1194 const LLT DstTy
= MRI
.getType(DstReg
);
1195 const LLT SrcTy
= MRI
.getType(SrcReg
);
1196 const LLT S1
= LLT::scalar(1);
1197 const unsigned SrcSize
= SrcTy
.getSizeInBits();
1198 const unsigned DstSize
= DstTy
.getSizeInBits();
1199 if (!DstTy
.isScalar())
1202 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
1204 if (SrcBank
->getID() == AMDGPU::SCCRegBankID
) {
1205 if (SrcTy
!= S1
|| DstSize
> 64) // Invalid
1209 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
1210 const TargetRegisterClass
*DstRC
=
1211 DstSize
> 32 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass
;
1213 // FIXME: Create an extra copy to avoid incorrectly constraining the result
1214 // of the scc producer.
1215 Register TmpReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1216 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), TmpReg
)
1218 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
1221 // The instruction operands are backwards from what you would expect.
1222 BuildMI(MBB
, I
, DL
, TII
.get(Opcode
), DstReg
)
1224 .addImm(Signed
? -1 : 1);
1225 I
.eraseFromParent();
1226 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
1229 if (SrcBank
->getID() == AMDGPU::VCCRegBankID
&& DstSize
<= 32) {
1230 if (SrcTy
!= S1
) // Invalid
1233 MachineInstr
*ExtI
=
1234 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
1235 .addImm(0) // src0_modifiers
1237 .addImm(0) // src1_modifiers
1238 .addImm(Signed
? -1 : 1) // src1
1240 I
.eraseFromParent();
1241 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1244 if (I
.getOpcode() == AMDGPU::G_ANYEXT
)
1245 return selectCOPY(I
);
1247 if (SrcBank
->getID() == AMDGPU::VGPRRegBankID
&& DstSize
<= 32) {
1248 // 64-bit should have been split up in RegBankSelect
1250 // Try to use an and with a mask if it will save code size.
1252 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1253 MachineInstr
*ExtI
=
1254 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_AND_B32_e32
), DstReg
)
1257 I
.eraseFromParent();
1258 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1261 const unsigned BFE
= Signed
? AMDGPU::V_BFE_I32
: AMDGPU::V_BFE_U32
;
1262 MachineInstr
*ExtI
=
1263 BuildMI(MBB
, I
, DL
, TII
.get(BFE
), DstReg
)
1265 .addImm(0) // Offset
1266 .addImm(SrcSize
); // Width
1267 I
.eraseFromParent();
1268 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1271 if (SrcBank
->getID() == AMDGPU::SGPRRegBankID
&& DstSize
<= 64) {
1272 if (!RBI
.constrainGenericRegister(SrcReg
, AMDGPU::SReg_32RegClass
, MRI
))
1275 if (Signed
&& DstSize
== 32 && (SrcSize
== 8 || SrcSize
== 16)) {
1276 const unsigned SextOpc
= SrcSize
== 8 ?
1277 AMDGPU::S_SEXT_I32_I8
: AMDGPU::S_SEXT_I32_I16
;
1278 BuildMI(MBB
, I
, DL
, TII
.get(SextOpc
), DstReg
)
1280 I
.eraseFromParent();
1281 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1284 const unsigned BFE64
= Signed
? AMDGPU::S_BFE_I64
: AMDGPU::S_BFE_U64
;
1285 const unsigned BFE32
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1287 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1288 if (DstSize
> 32 && SrcSize
<= 32) {
1289 // We need a 64-bit register source, but the high bits don't matter.
1290 Register ExtReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1291 Register UndefReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1292 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), UndefReg
);
1293 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), ExtReg
)
1295 .addImm(AMDGPU::sub0
)
1297 .addImm(AMDGPU::sub1
);
1299 BuildMI(MBB
, I
, DL
, TII
.get(BFE64
), DstReg
)
1301 .addImm(SrcSize
<< 16);
1303 I
.eraseFromParent();
1304 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_64RegClass
, MRI
);
1308 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1309 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::S_AND_B32
), DstReg
)
1313 BuildMI(MBB
, I
, DL
, TII
.get(BFE32
), DstReg
)
1315 .addImm(SrcSize
<< 16);
1318 I
.eraseFromParent();
1319 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1325 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
1326 MachineBasicBlock
*BB
= I
.getParent();
1327 MachineFunction
*MF
= BB
->getParent();
1328 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1329 MachineOperand
&ImmOp
= I
.getOperand(1);
1331 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1332 if (ImmOp
.isFPImm()) {
1333 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
1334 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
1335 } else if (ImmOp
.isCImm()) {
1336 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
1339 Register DstReg
= I
.getOperand(0).getReg();
1342 const RegisterBank
*RB
= MRI
.getRegBankOrNull(I
.getOperand(0).getReg());
1344 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
1345 Size
= MRI
.getType(DstReg
).getSizeInBits();
1347 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(MRI
, DstReg
);
1348 IsSgpr
= TRI
.isSGPRClass(RC
);
1349 Size
= TRI
.getRegSizeInBits(*RC
);
1352 if (Size
!= 32 && Size
!= 64)
1355 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
1357 I
.setDesc(TII
.get(Opcode
));
1358 I
.addImplicitDefUseOperands(*MF
);
1359 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
1362 DebugLoc DL
= I
.getDebugLoc();
1363 const TargetRegisterClass
*RC
= IsSgpr
? &AMDGPU::SReg_32_XM0RegClass
:
1364 &AMDGPU::VGPR_32RegClass
;
1365 Register LoReg
= MRI
.createVirtualRegister(RC
);
1366 Register HiReg
= MRI
.createVirtualRegister(RC
);
1367 const APInt
&Imm
= APInt(Size
, I
.getOperand(1).getImm());
1369 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
1370 .addImm(Imm
.trunc(32).getZExtValue());
1372 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
1373 .addImm(Imm
.ashr(32).getZExtValue());
1375 const MachineInstr
*RS
=
1376 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1378 .addImm(AMDGPU::sub0
)
1380 .addImm(AMDGPU::sub1
);
1382 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1383 // work for target independent opcodes
1384 I
.eraseFromParent();
1385 const TargetRegisterClass
*DstRC
=
1386 TRI
.getConstrainedRegClassForOperand(RS
->getOperand(0), MRI
);
1389 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
1392 static bool isConstant(const MachineInstr
&MI
) {
1393 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
1396 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
1397 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
1399 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
1403 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
1406 GEPInfo
GEPInfo(*PtrMI
);
1408 for (unsigned i
= 1; i
!= 3; ++i
) {
1409 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
1410 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
1412 if (i
== 2 && isConstant(*OpDef
)) {
1413 // TODO: Could handle constant base + variable offset, but a combine
1414 // probably should have commuted it.
1415 assert(GEPInfo
.Imm
== 0);
1416 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
1419 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
1420 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
1421 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
1423 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
1426 AddrInfo
.push_back(GEPInfo
);
1427 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
1430 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr
&MI
) const {
1431 if (!MI
.hasOneMemOperand())
1434 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1435 const Value
*Ptr
= MMO
->getValue();
1437 // UndefValue means this is a load of a kernel input. These are uniform.
1438 // Sometimes LDS instructions have constant pointers.
1439 // If Ptr is null, then that means this mem operand contains a
1440 // PseudoSourceValue like GOT.
1441 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
1442 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
1445 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
1448 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
1449 return I
&& I
->getMetadata("amdgpu.uniform");
1452 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
1453 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
1454 if (!GEPInfo
.VgprParts
.empty())
1460 void AMDGPUInstructionSelector::initM0(MachineInstr
&I
) const {
1461 MachineBasicBlock
*BB
= I
.getParent();
1462 MachineFunction
*MF
= BB
->getParent();
1463 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1465 const LLT PtrTy
= MRI
.getType(I
.getOperand(1).getReg());
1466 unsigned AS
= PtrTy
.getAddressSpace();
1467 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
) &&
1468 STI
.ldsRequiresM0Init()) {
1469 // If DS instructions require M0 initializtion, insert it before selecting.
1470 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), AMDGPU::M0
)
1475 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr
&I
) const {
1477 return selectImpl(I
, *CoverageInfo
);
1480 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr
&I
) const {
1481 MachineBasicBlock
*BB
= I
.getParent();
1482 MachineFunction
*MF
= BB
->getParent();
1483 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1484 MachineOperand
&CondOp
= I
.getOperand(0);
1485 Register CondReg
= CondOp
.getReg();
1486 const DebugLoc
&DL
= I
.getDebugLoc();
1489 Register CondPhysReg
;
1490 const TargetRegisterClass
*ConstrainRC
;
1492 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1493 // whether the branch is uniform when selecting the instruction. In
1494 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1495 // RegBankSelect knows what it's doing if the branch condition is scc, even
1496 // though it currently does not.
1497 if (isSCC(CondReg
, MRI
)) {
1498 CondPhysReg
= AMDGPU::SCC
;
1499 BrOpcode
= AMDGPU::S_CBRANCH_SCC1
;
1500 ConstrainRC
= &AMDGPU::SReg_32_XM0RegClass
;
1501 } else if (isVCC(CondReg
, MRI
)) {
1502 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1503 // We sort of know that a VCC producer based on the register bank, that ands
1504 // inactive lanes with 0. What if there was a logical operation with vcc
1505 // producers in different blocks/with different exec masks?
1506 // FIXME: Should scc->vcc copies and with exec?
1507 CondPhysReg
= TRI
.getVCC();
1508 BrOpcode
= AMDGPU::S_CBRANCH_VCCNZ
;
1509 ConstrainRC
= TRI
.getBoolRC();
1513 if (!MRI
.getRegClassOrNull(CondReg
))
1514 MRI
.setRegClass(CondReg
, ConstrainRC
);
1516 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CondPhysReg
)
1518 BuildMI(*BB
, &I
, DL
, TII
.get(BrOpcode
))
1519 .addMBB(I
.getOperand(1).getMBB());
1521 I
.eraseFromParent();
1525 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr
&I
) const {
1526 MachineBasicBlock
*BB
= I
.getParent();
1527 MachineFunction
*MF
= BB
->getParent();
1528 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1530 Register DstReg
= I
.getOperand(0).getReg();
1531 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1532 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1533 I
.setDesc(TII
.get(IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
));
1535 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
1537 return RBI
.constrainGenericRegister(
1538 DstReg
, IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
, MRI
);
1541 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr
&I
) const {
1542 uint64_t Align
= I
.getOperand(2).getImm();
1543 const uint64_t Mask
= ~((UINT64_C(1) << Align
) - 1);
1545 MachineBasicBlock
*BB
= I
.getParent();
1546 MachineFunction
*MF
= BB
->getParent();
1547 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1549 Register DstReg
= I
.getOperand(0).getReg();
1550 Register SrcReg
= I
.getOperand(1).getReg();
1552 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1553 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
1554 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1555 unsigned NewOpc
= IsVGPR
? AMDGPU::V_AND_B32_e64
: AMDGPU::S_AND_B32
;
1556 unsigned MovOpc
= IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
;
1557 const TargetRegisterClass
&RegRC
1558 = IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
;
1560 LLT Ty
= MRI
.getType(DstReg
);
1562 const TargetRegisterClass
*DstRC
= TRI
.getRegClassForTypeOnBank(Ty
, *DstRB
,
1564 const TargetRegisterClass
*SrcRC
= TRI
.getRegClassForTypeOnBank(Ty
, *SrcRB
,
1566 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
) ||
1567 !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
1570 const DebugLoc
&DL
= I
.getDebugLoc();
1571 Register ImmReg
= MRI
.createVirtualRegister(&RegRC
);
1572 BuildMI(*BB
, &I
, DL
, TII
.get(MovOpc
), ImmReg
)
1575 if (Ty
.getSizeInBits() == 32) {
1576 BuildMI(*BB
, &I
, DL
, TII
.get(NewOpc
), DstReg
)
1579 I
.eraseFromParent();
1583 Register HiReg
= MRI
.createVirtualRegister(&RegRC
);
1584 Register LoReg
= MRI
.createVirtualRegister(&RegRC
);
1585 Register MaskLo
= MRI
.createVirtualRegister(&RegRC
);
1587 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), LoReg
)
1588 .addReg(SrcReg
, 0, AMDGPU::sub0
);
1589 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), HiReg
)
1590 .addReg(SrcReg
, 0, AMDGPU::sub1
);
1592 BuildMI(*BB
, &I
, DL
, TII
.get(NewOpc
), MaskLo
)
1595 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1597 .addImm(AMDGPU::sub0
)
1599 .addImm(AMDGPU::sub1
);
1600 I
.eraseFromParent();
1604 bool AMDGPUInstructionSelector::select(MachineInstr
&I
) {
1606 return selectPHI(I
);
1608 if (!isPreISelGenericOpcode(I
.getOpcode())) {
1610 return selectCOPY(I
);
1614 switch (I
.getOpcode()) {
1615 case TargetOpcode::G_AND
:
1616 case TargetOpcode::G_OR
:
1617 case TargetOpcode::G_XOR
:
1618 if (selectG_AND_OR_XOR(I
))
1620 return selectImpl(I
, *CoverageInfo
);
1621 case TargetOpcode::G_ADD
:
1622 case TargetOpcode::G_SUB
:
1623 if (selectImpl(I
, *CoverageInfo
))
1625 return selectG_ADD_SUB(I
);
1626 case TargetOpcode::G_INTTOPTR
:
1627 case TargetOpcode::G_BITCAST
:
1628 return selectCOPY(I
);
1629 case TargetOpcode::G_CONSTANT
:
1630 case TargetOpcode::G_FCONSTANT
:
1631 return selectG_CONSTANT(I
);
1632 case TargetOpcode::G_EXTRACT
:
1633 return selectG_EXTRACT(I
);
1634 case TargetOpcode::G_MERGE_VALUES
:
1635 case TargetOpcode::G_BUILD_VECTOR
:
1636 case TargetOpcode::G_CONCAT_VECTORS
:
1637 return selectG_MERGE_VALUES(I
);
1638 case TargetOpcode::G_UNMERGE_VALUES
:
1639 return selectG_UNMERGE_VALUES(I
);
1640 case TargetOpcode::G_GEP
:
1641 return selectG_GEP(I
);
1642 case TargetOpcode::G_IMPLICIT_DEF
:
1643 return selectG_IMPLICIT_DEF(I
);
1644 case TargetOpcode::G_INSERT
:
1645 return selectG_INSERT(I
);
1646 case TargetOpcode::G_INTRINSIC
:
1647 return selectG_INTRINSIC(I
);
1648 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
1649 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
);
1650 case TargetOpcode::G_ICMP
:
1651 if (selectG_ICMP(I
))
1653 return selectImpl(I
, *CoverageInfo
);
1654 case TargetOpcode::G_LOAD
:
1655 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1656 case TargetOpcode::G_ATOMICRMW_XCHG
:
1657 case TargetOpcode::G_ATOMICRMW_ADD
:
1658 case TargetOpcode::G_ATOMICRMW_SUB
:
1659 case TargetOpcode::G_ATOMICRMW_AND
:
1660 case TargetOpcode::G_ATOMICRMW_OR
:
1661 case TargetOpcode::G_ATOMICRMW_XOR
:
1662 case TargetOpcode::G_ATOMICRMW_MIN
:
1663 case TargetOpcode::G_ATOMICRMW_MAX
:
1664 case TargetOpcode::G_ATOMICRMW_UMIN
:
1665 case TargetOpcode::G_ATOMICRMW_UMAX
:
1666 case TargetOpcode::G_ATOMICRMW_FADD
:
1667 return selectG_LOAD_ATOMICRMW(I
);
1668 case TargetOpcode::G_SELECT
:
1669 return selectG_SELECT(I
);
1670 case TargetOpcode::G_STORE
:
1671 return selectG_STORE(I
);
1672 case TargetOpcode::G_TRUNC
:
1673 return selectG_TRUNC(I
);
1674 case TargetOpcode::G_SEXT
:
1675 case TargetOpcode::G_ZEXT
:
1676 case TargetOpcode::G_ANYEXT
:
1677 return selectG_SZA_EXT(I
);
1678 case TargetOpcode::G_BRCOND
:
1679 return selectG_BRCOND(I
);
1680 case TargetOpcode::G_FRAME_INDEX
:
1681 return selectG_FRAME_INDEX(I
);
1682 case TargetOpcode::G_FENCE
:
1683 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1684 // is checking for G_CONSTANT
1685 I
.setDesc(TII
.get(AMDGPU::ATOMIC_FENCE
));
1687 case TargetOpcode::G_PTR_MASK
:
1688 return selectG_PTR_MASK(I
);
1690 return selectImpl(I
, *CoverageInfo
);
1695 InstructionSelector::ComplexRendererFns
1696 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
1698 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1703 std::pair
<Register
, unsigned>
1704 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1705 Register Src
, const MachineRegisterInfo
&MRI
) const {
1707 MachineInstr
*MI
= MRI
.getVRegDef(Src
);
1709 if (MI
&& MI
->getOpcode() == AMDGPU::G_FNEG
) {
1710 Src
= MI
->getOperand(1).getReg();
1711 Mods
|= SISrcMods::NEG
;
1712 MI
= MRI
.getVRegDef(Src
);
1715 if (MI
&& MI
->getOpcode() == AMDGPU::G_FABS
) {
1716 Src
= MI
->getOperand(1).getReg();
1717 Mods
|= SISrcMods::ABS
;
1720 return std::make_pair(Src
, Mods
);
1724 /// This will select either an SGPR or VGPR operand and will save us from
1725 /// having to write an extra tablegen pattern.
1726 InstructionSelector::ComplexRendererFns
1727 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
1729 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1733 InstructionSelector::ComplexRendererFns
1734 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
1735 MachineRegisterInfo
&MRI
1736 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1740 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1743 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1744 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1745 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1746 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1750 InstructionSelector::ComplexRendererFns
1751 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand
&Root
) const {
1752 MachineRegisterInfo
&MRI
1753 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1757 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1760 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1761 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1762 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1763 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1767 InstructionSelector::ComplexRendererFns
1768 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
1770 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1771 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1772 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1776 InstructionSelector::ComplexRendererFns
1777 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
1778 MachineRegisterInfo
&MRI
1779 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1783 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1786 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1787 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); } // src_mods
1791 InstructionSelector::ComplexRendererFns
1792 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand
&Root
) const {
1793 // FIXME: Handle clamp and op_sel
1795 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1796 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // src_mods
1797 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // clamp
1801 InstructionSelector::ComplexRendererFns
1802 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand
&Root
) const {
1803 // FIXME: Handle op_sel
1805 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1806 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // src_mods
1810 InstructionSelector::ComplexRendererFns
1811 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand
&Root
) const {
1812 MachineRegisterInfo
&MRI
=
1813 Root
.getParent()->getParent()->getParent()->getRegInfo();
1815 SmallVector
<GEPInfo
, 4> AddrInfo
;
1816 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1818 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1821 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1823 if (!AMDGPU::isLegalSMRDImmOffset(STI
, GEPInfo
.Imm
))
1826 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1827 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1829 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1830 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1834 InstructionSelector::ComplexRendererFns
1835 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand
&Root
) const {
1836 MachineRegisterInfo
&MRI
=
1837 Root
.getParent()->getParent()->getParent()->getRegInfo();
1839 SmallVector
<GEPInfo
, 4> AddrInfo
;
1840 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1842 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1845 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1846 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1847 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1848 if (!isUInt
<32>(EncodedImm
))
1852 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1853 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1857 InstructionSelector::ComplexRendererFns
1858 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand
&Root
) const {
1859 MachineInstr
*MI
= Root
.getParent();
1860 MachineBasicBlock
*MBB
= MI
->getParent();
1861 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1863 SmallVector
<GEPInfo
, 4> AddrInfo
;
1864 getAddrModeInfo(*MI
, MRI
, AddrInfo
);
1866 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1867 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1868 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1871 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1872 if (!GEPInfo
.Imm
|| !isUInt
<32>(GEPInfo
.Imm
))
1875 // If we make it this far we have a load with an 32-bit immediate offset.
1876 // It is OK to select this using a sgpr offset, because we have already
1877 // failed trying to select this load into one of the _IMM variants since
1878 // the _IMM Patterns are considered before the _SGPR patterns.
1879 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1880 Register OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1881 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
1882 .addImm(GEPInfo
.Imm
);
1884 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1885 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(OffsetReg
); }
1889 template <bool Signed
>
1890 InstructionSelector::ComplexRendererFns
1891 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand
&Root
) const {
1892 MachineInstr
*MI
= Root
.getParent();
1893 MachineBasicBlock
*MBB
= MI
->getParent();
1894 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1896 InstructionSelector::ComplexRendererFns Default
= {{
1897 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1898 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // offset
1899 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1902 if (!STI
.hasFlatInstOffsets())
1905 const MachineInstr
*OpDef
= MRI
.getVRegDef(Root
.getReg());
1906 if (!OpDef
|| OpDef
->getOpcode() != AMDGPU::G_GEP
)
1909 Optional
<int64_t> Offset
=
1910 getConstantVRegVal(OpDef
->getOperand(2).getReg(), MRI
);
1911 if (!Offset
.hasValue())
1914 unsigned AddrSpace
= (*MI
->memoperands_begin())->getAddrSpace();
1915 if (!TII
.isLegalFLATOffset(Offset
.getValue(), AddrSpace
, Signed
))
1918 Register BasePtr
= OpDef
->getOperand(1).getReg();
1921 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(BasePtr
); },
1922 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
.getValue()); },
1923 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1927 InstructionSelector::ComplexRendererFns
1928 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand
&Root
) const {
1929 return selectFlatOffsetImpl
<false>(Root
);
1932 InstructionSelector::ComplexRendererFns
1933 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand
&Root
) const {
1934 return selectFlatOffsetImpl
<true>(Root
);
1937 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1938 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1939 return PSV
&& PSV
->isStack();
1942 InstructionSelector::ComplexRendererFns
1943 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand
&Root
) const {
1944 MachineInstr
*MI
= Root
.getParent();
1945 MachineBasicBlock
*MBB
= MI
->getParent();
1946 MachineFunction
*MF
= MBB
->getParent();
1947 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1948 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1951 if (mi_match(Root
.getReg(), MRI
, m_ICst(Offset
))) {
1952 Register HighBits
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1954 // TODO: Should this be inside the render function? The iterator seems to
1956 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::V_MOV_B32_e32
),
1958 .addImm(Offset
& ~4095);
1960 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1961 MIB
.addReg(Info
->getScratchRSrcReg());
1963 [=](MachineInstrBuilder
&MIB
) { // vaddr
1964 MIB
.addReg(HighBits
);
1966 [=](MachineInstrBuilder
&MIB
) { // soffset
1967 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1968 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1970 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1971 ? Info
->getStackPtrOffsetReg()
1972 : Info
->getScratchWaveOffsetReg();
1973 MIB
.addReg(SOffsetReg
);
1975 [=](MachineInstrBuilder
&MIB
) { // offset
1976 MIB
.addImm(Offset
& 4095);
1980 assert(Offset
== 0);
1982 // Try to fold a frame index directly into the MUBUF vaddr field, and any
1985 Register VAddr
= Root
.getReg();
1986 if (const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg())) {
1987 if (isBaseWithConstantOffset(Root
, MRI
)) {
1988 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1989 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1990 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1991 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1992 if (LHSDef
&& RHSDef
) {
1993 int64_t PossibleOffset
=
1994 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1995 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset
) &&
1996 (!STI
.privateMemoryResourceIsRangeChecked() ||
1997 KnownBits
->signBitIsZero(LHS
.getReg()))) {
1998 if (LHSDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
)
1999 FI
= LHSDef
->getOperand(1).getIndex();
2001 VAddr
= LHS
.getReg();
2002 Offset
= PossibleOffset
;
2005 } else if (RootDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
) {
2006 FI
= RootDef
->getOperand(1).getIndex();
2010 // If we don't know this private access is a local stack object, it needs to
2011 // be relative to the entry point's scratch wave offset register.
2012 // TODO: Should split large offsets that don't fit like above.
2013 // TODO: Don't use scratch wave offset just because the offset didn't fit.
2014 Register SOffset
= FI
.hasValue() ? Info
->getStackPtrOffsetReg()
2015 : Info
->getScratchWaveOffsetReg();
2017 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
2018 MIB
.addReg(Info
->getScratchRSrcReg());
2020 [=](MachineInstrBuilder
&MIB
) { // vaddr
2022 MIB
.addFrameIndex(FI
.getValue());
2026 [=](MachineInstrBuilder
&MIB
) { // soffset
2027 MIB
.addReg(SOffset
);
2029 [=](MachineInstrBuilder
&MIB
) { // offset
2034 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo
&MRI
,
2035 const MachineOperand
&Base
,
2037 unsigned OffsetBits
) const {
2038 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
2039 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
2042 if (STI
.hasUsableDSOffset() || STI
.unsafeDSOffsetFoldingEnabled())
2045 // On Southern Islands instruction with a negative base value and an offset
2046 // don't seem to work.
2047 return KnownBits
->signBitIsZero(Base
.getReg());
2050 InstructionSelector::ComplexRendererFns
2051 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
2052 MachineOperand
&Root
) const {
2053 MachineInstr
*MI
= Root
.getParent();
2054 MachineBasicBlock
*MBB
= MI
->getParent();
2055 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
2058 if (!mi_match(Root
.getReg(), MRI
, m_ICst(Offset
)) ||
2059 !SIInstrInfo::isLegalMUBUFImmOffset(Offset
))
2062 const MachineFunction
*MF
= MBB
->getParent();
2063 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
2064 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
2065 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
2067 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
2068 ? Info
->getStackPtrOffsetReg()
2069 : Info
->getScratchWaveOffsetReg();
2071 [=](MachineInstrBuilder
&MIB
) {
2072 MIB
.addReg(Info
->getScratchRSrcReg());
2074 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(SOffsetReg
); }, // soffset
2075 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
); } // offset
2079 InstructionSelector::ComplexRendererFns
2080 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand
&Root
) const {
2081 MachineInstr
*MI
= Root
.getParent();
2082 MachineBasicBlock
*MBB
= MI
->getParent();
2083 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
2085 const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
2088 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
2089 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
2093 int64_t ConstAddr
= 0;
2094 if (isBaseWithConstantOffset(Root
, MRI
)) {
2095 const MachineOperand
&LHS
= RootDef
->getOperand(1);
2096 const MachineOperand
&RHS
= RootDef
->getOperand(2);
2097 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
2098 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
2099 if (LHSDef
&& RHSDef
) {
2100 int64_t PossibleOffset
=
2101 RHSDef
->getOperand(1).getCImm()->getSExtValue();
2102 if (isDSOffsetLegal(MRI
, LHS
, PossibleOffset
, 16)) {
2105 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
2106 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(PossibleOffset
); }
2110 } else if (RootDef
->getOpcode() == AMDGPU::G_SUB
) {
2114 } else if (mi_match(Root
.getReg(), MRI
, m_ICst(ConstAddr
))) {
2120 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
2121 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }