1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
25 #include "llvm/CodeGen/GlobalISel/Utils.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
35 #define DEBUG_TYPE "amdgpu-isel"
38 using namespace MIPatternMatch
;
40 #define GET_GLOBALISEL_IMPL
41 #define AMDGPUSubtarget GCNSubtarget
42 #include "AMDGPUGenGlobalISel.inc"
43 #undef GET_GLOBALISEL_IMPL
44 #undef AMDGPUSubtarget
46 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
47 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
48 const AMDGPUTargetMachine
&TM
)
49 : InstructionSelector(), TII(*STI
.getInstrInfo()),
50 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
53 #define GET_GLOBALISEL_PREDICATES_INIT
54 #include "AMDGPUGenGlobalISel.inc"
55 #undef GET_GLOBALISEL_PREDICATES_INIT
56 #define GET_GLOBALISEL_TEMPORARIES_INIT
57 #include "AMDGPUGenGlobalISel.inc"
58 #undef GET_GLOBALISEL_TEMPORARIES_INIT
62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
64 static bool isSCC(Register Reg
, const MachineRegisterInfo
&MRI
) {
65 if (Register::isPhysicalRegister(Reg
))
66 return Reg
== AMDGPU::SCC
;
68 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
69 const TargetRegisterClass
*RC
=
70 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
73 // context of the register bank has been lost.
74 if (RC
->getID() != AMDGPU::SReg_32_XM0RegClassID
)
76 const LLT Ty
= MRI
.getType(Reg
);
77 return Ty
.isValid() && Ty
.getSizeInBits() == 1;
80 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
81 return RB
->getID() == AMDGPU::SCCRegBankID
;
84 bool AMDGPUInstructionSelector::isVCC(Register Reg
,
85 const MachineRegisterInfo
&MRI
) const {
86 if (Register::isPhysicalRegister(Reg
))
87 return Reg
== TRI
.getVCC();
89 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
90 const TargetRegisterClass
*RC
=
91 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
93 const LLT Ty
= MRI
.getType(Reg
);
94 return RC
->hasSuperClassEq(TRI
.getBoolRC()) &&
95 Ty
.isValid() && Ty
.getSizeInBits() == 1;
98 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
99 return RB
->getID() == AMDGPU::VCCRegBankID
;
102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
103 const DebugLoc
&DL
= I
.getDebugLoc();
104 MachineBasicBlock
*BB
= I
.getParent();
105 MachineFunction
*MF
= BB
->getParent();
106 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
107 I
.setDesc(TII
.get(TargetOpcode::COPY
));
109 const MachineOperand
&Src
= I
.getOperand(1);
110 MachineOperand
&Dst
= I
.getOperand(0);
111 Register DstReg
= Dst
.getReg();
112 Register SrcReg
= Src
.getReg();
114 if (isVCC(DstReg
, MRI
)) {
115 if (SrcReg
== AMDGPU::SCC
) {
116 const TargetRegisterClass
*RC
117 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
120 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
123 if (!isVCC(SrcReg
, MRI
)) {
124 // TODO: Should probably leave the copy and let copyPhysReg expand it.
125 if (!RBI
.constrainGenericRegister(DstReg
, *TRI
.getBoolRC(), MRI
))
128 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CMP_NE_U32_e64
), DstReg
)
132 if (!MRI
.getRegClassOrNull(SrcReg
))
133 MRI
.setRegClass(SrcReg
, TRI
.getConstrainedRegClassForOperand(Src
, MRI
));
138 const TargetRegisterClass
*RC
=
139 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
140 if (RC
&& !RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
))
143 // Don't constrain the source register to a class so the def instruction
144 // handles it (unless it's undef).
146 // FIXME: This is a hack. When selecting the def, we neeed to know
147 // specifically know that the result is VCCRegBank, and not just an SGPR
148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
150 const TargetRegisterClass
*SrcRC
=
151 TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
152 if (SrcRC
&& !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
159 for (const MachineOperand
&MO
: I
.operands()) {
160 if (Register::isPhysicalRegister(MO
.getReg()))
163 const TargetRegisterClass
*RC
=
164 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
167 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr
&I
) const {
173 MachineBasicBlock
*BB
= I
.getParent();
174 MachineFunction
*MF
= BB
->getParent();
175 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
177 const Register DefReg
= I
.getOperand(0).getReg();
178 const LLT DefTy
= MRI
.getType(DefReg
);
180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
182 const RegClassOrRegBank
&RegClassOrBank
=
183 MRI
.getRegClassOrRegBank(DefReg
);
185 const TargetRegisterClass
*DefRC
186 = RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
188 if (!DefTy
.isValid()) {
189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
193 const RegisterBank
&RB
= *RegClassOrBank
.get
<const RegisterBank
*>();
194 if (RB
.getID() == AMDGPU::SCCRegBankID
) {
195 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
199 DefRC
= TRI
.getRegClassForTypeOnBank(DefTy
, RB
, MRI
);
201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
206 I
.setDesc(TII
.get(TargetOpcode::PHI
));
207 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, MRI
);
211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
212 const TargetRegisterClass
&SubRC
,
213 unsigned SubIdx
) const {
215 MachineInstr
*MI
= MO
.getParent();
216 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
217 MachineFunction
*MF
= BB
->getParent();
218 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
219 Register DstReg
= MRI
.createVirtualRegister(&SubRC
);
222 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
223 Register Reg
= MO
.getReg();
224 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
225 .addReg(Reg
, 0, ComposedSubIdx
);
227 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
228 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
229 MO
.isEarlyClobber(), 0, MO
.isDebug(),
230 MO
.isInternalRead());
235 APInt
Imm(64, MO
.getImm());
239 llvm_unreachable("do not know to split immediate with this sub index.");
241 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
243 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
247 static int64_t getConstant(const MachineInstr
*MI
) {
248 return MI
->getOperand(1).getCImm()->getSExtValue();
251 static unsigned getLogicalBitOpcode(unsigned Opc
, bool Is64
) {
254 return Is64
? AMDGPU::S_AND_B64
: AMDGPU::S_AND_B32
;
256 return Is64
? AMDGPU::S_OR_B64
: AMDGPU::S_OR_B32
;
258 return Is64
? AMDGPU::S_XOR_B64
: AMDGPU::S_XOR_B32
;
260 llvm_unreachable("not a bit op");
264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr
&I
) const {
265 MachineBasicBlock
*BB
= I
.getParent();
266 MachineFunction
*MF
= BB
->getParent();
267 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
268 MachineOperand
&Dst
= I
.getOperand(0);
269 MachineOperand
&Src0
= I
.getOperand(1);
270 MachineOperand
&Src1
= I
.getOperand(2);
271 Register DstReg
= Dst
.getReg();
272 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
274 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
275 if (DstRB
->getID() == AMDGPU::VCCRegBankID
) {
276 const TargetRegisterClass
*RC
= TRI
.getBoolRC();
277 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(),
278 RC
== &AMDGPU::SReg_64RegClass
);
279 I
.setDesc(TII
.get(InstOpc
));
281 // FIXME: Hack to avoid turning the register bank into a register class.
282 // The selector for G_ICMP relies on seeing the register bank for the result
283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
284 // be ambiguous whether it's a scalar or vector bool.
285 if (Src0
.isUndef() && !MRI
.getRegClassOrNull(Src0
.getReg()))
286 MRI
.setRegClass(Src0
.getReg(), RC
);
287 if (Src1
.isUndef() && !MRI
.getRegClassOrNull(Src1
.getReg()))
288 MRI
.setRegClass(Src1
.getReg(), RC
);
290 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
295 if (DstRB
->getID() == AMDGPU::SGPRRegBankID
) {
296 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(), Size
> 32);
297 I
.setDesc(TII
.get(InstOpc
));
298 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
304 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr
&I
) const {
305 MachineBasicBlock
*BB
= I
.getParent();
306 MachineFunction
*MF
= BB
->getParent();
307 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
308 Register DstReg
= I
.getOperand(0).getReg();
309 const DebugLoc
&DL
= I
.getDebugLoc();
310 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
311 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
312 const bool IsSALU
= DstRB
->getID() == AMDGPU::SGPRRegBankID
;
313 const bool Sub
= I
.getOpcode() == TargetOpcode::G_SUB
;
317 const unsigned Opc
= Sub
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
319 BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
320 .add(I
.getOperand(1))
321 .add(I
.getOperand(2));
323 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
326 if (STI
.hasAddNoCarry()) {
327 const unsigned Opc
= Sub
? AMDGPU::V_SUB_U32_e64
: AMDGPU::V_ADD_U32_e64
;
328 I
.setDesc(TII
.get(Opc
));
329 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
330 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
331 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
334 const unsigned Opc
= Sub
? AMDGPU::V_SUB_I32_e64
: AMDGPU::V_ADD_I32_e64
;
336 Register UnusedCarry
= MRI
.createVirtualRegister(TRI
.getWaveMaskRegClass());
338 = BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
339 .addDef(UnusedCarry
, RegState::Dead
)
340 .add(I
.getOperand(1))
341 .add(I
.getOperand(2))
344 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
347 assert(!Sub
&& "illegal sub should not reach here");
349 const TargetRegisterClass
&RC
350 = IsSALU
? AMDGPU::SReg_64_XEXECRegClass
: AMDGPU::VReg_64RegClass
;
351 const TargetRegisterClass
&HalfRC
352 = IsSALU
? AMDGPU::SReg_32RegClass
: AMDGPU::VGPR_32RegClass
;
354 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub0
));
355 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub0
));
356 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub1
));
357 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub1
));
359 Register DstLo
= MRI
.createVirtualRegister(&HalfRC
);
360 Register DstHi
= MRI
.createVirtualRegister(&HalfRC
);
363 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
366 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
370 const TargetRegisterClass
*CarryRC
= TRI
.getWaveMaskRegClass();
371 Register CarryReg
= MRI
.createVirtualRegister(CarryRC
);
372 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADD_I32_e64
), DstLo
)
377 MachineInstr
*Addc
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADDC_U32_e64
), DstHi
)
378 .addDef(MRI
.createVirtualRegister(CarryRC
), RegState::Dead
)
381 .addReg(CarryReg
, RegState::Kill
)
384 if (!constrainSelectedInstRegOperands(*Addc
, TII
, TRI
, RBI
))
388 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
390 .addImm(AMDGPU::sub0
)
392 .addImm(AMDGPU::sub1
);
395 if (!RBI
.constrainGenericRegister(DstReg
, RC
, MRI
))
402 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr
&I
) const {
403 MachineBasicBlock
*BB
= I
.getParent();
404 MachineFunction
*MF
= BB
->getParent();
405 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
406 assert(I
.getOperand(2).getImm() % 32 == 0);
407 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(2).getImm() / 32);
408 const DebugLoc
&DL
= I
.getDebugLoc();
409 MachineInstr
*Copy
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::COPY
),
410 I
.getOperand(0).getReg())
411 .addReg(I
.getOperand(1).getReg(), 0, SubReg
);
413 for (const MachineOperand
&MO
: Copy
->operands()) {
414 const TargetRegisterClass
*RC
=
415 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
418 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
424 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr
&MI
) const {
425 MachineBasicBlock
*BB
= MI
.getParent();
426 MachineFunction
*MF
= BB
->getParent();
427 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
428 Register DstReg
= MI
.getOperand(0).getReg();
429 LLT DstTy
= MRI
.getType(DstReg
);
430 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
432 const unsigned SrcSize
= SrcTy
.getSizeInBits();
436 const DebugLoc
&DL
= MI
.getDebugLoc();
437 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
438 const unsigned DstSize
= DstTy
.getSizeInBits();
439 const TargetRegisterClass
*DstRC
=
440 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, MRI
);
444 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(DstRC
, SrcSize
/ 8);
445 MachineInstrBuilder MIB
=
446 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::REG_SEQUENCE
), DstReg
);
447 for (int I
= 0, E
= MI
.getNumOperands() - 1; I
!= E
; ++I
) {
448 MachineOperand
&Src
= MI
.getOperand(I
+ 1);
449 MIB
.addReg(Src
.getReg(), getUndefRegState(Src
.isUndef()));
450 MIB
.addImm(SubRegs
[I
]);
452 const TargetRegisterClass
*SrcRC
453 = TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
454 if (SrcRC
&& !RBI
.constrainGenericRegister(Src
.getReg(), *SrcRC
, MRI
))
458 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
))
461 MI
.eraseFromParent();
465 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr
&MI
) const {
466 MachineBasicBlock
*BB
= MI
.getParent();
467 MachineFunction
*MF
= BB
->getParent();
468 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
469 const int NumDst
= MI
.getNumOperands() - 1;
471 MachineOperand
&Src
= MI
.getOperand(NumDst
);
473 Register SrcReg
= Src
.getReg();
474 Register DstReg0
= MI
.getOperand(0).getReg();
475 LLT DstTy
= MRI
.getType(DstReg0
);
476 LLT SrcTy
= MRI
.getType(SrcReg
);
478 const unsigned DstSize
= DstTy
.getSizeInBits();
479 const unsigned SrcSize
= SrcTy
.getSizeInBits();
480 const DebugLoc
&DL
= MI
.getDebugLoc();
481 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
483 const TargetRegisterClass
*SrcRC
=
484 TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcBank
, MRI
);
485 if (!SrcRC
|| !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
488 const unsigned SrcFlags
= getUndefRegState(Src
.isUndef());
490 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
491 // source, and this relies on the fact that the same subregister indices are
493 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(SrcRC
, DstSize
/ 8);
494 for (int I
= 0, E
= NumDst
; I
!= E
; ++I
) {
495 MachineOperand
&Dst
= MI
.getOperand(I
);
496 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::COPY
), Dst
.getReg())
497 .addReg(SrcReg
, SrcFlags
, SubRegs
[I
]);
499 const TargetRegisterClass
*DstRC
=
500 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
501 if (DstRC
&& !RBI
.constrainGenericRegister(Dst
.getReg(), *DstRC
, MRI
))
505 MI
.eraseFromParent();
509 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
510 return selectG_ADD_SUB(I
);
513 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
514 MachineBasicBlock
*BB
= I
.getParent();
515 MachineFunction
*MF
= BB
->getParent();
516 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
517 const MachineOperand
&MO
= I
.getOperand(0);
519 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
520 // regbank check here is to know why getConstrainedRegClassForOperand failed.
521 const TargetRegisterClass
*RC
= TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
522 if ((!RC
&& !MRI
.getRegBankOrNull(MO
.getReg())) ||
523 (RC
&& RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
))) {
524 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
531 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr
&I
) const {
532 MachineBasicBlock
*BB
= I
.getParent();
533 MachineFunction
*MF
= BB
->getParent();
534 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
535 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(3).getImm() / 32);
536 DebugLoc DL
= I
.getDebugLoc();
537 MachineInstr
*Ins
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::INSERT_SUBREG
))
538 .addDef(I
.getOperand(0).getReg())
539 .addReg(I
.getOperand(1).getReg())
540 .addReg(I
.getOperand(2).getReg())
543 for (const MachineOperand
&MO
: Ins
->operands()) {
546 if (Register::isPhysicalRegister(MO
.getReg()))
549 const TargetRegisterClass
*RC
=
550 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
553 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
559 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
) const {
560 unsigned IntrinsicID
= I
.getOperand(I
.getNumExplicitDefs()).getIntrinsicID();
561 switch (IntrinsicID
) {
562 case Intrinsic::amdgcn_if_break
: {
563 MachineBasicBlock
*BB
= I
.getParent();
564 MachineFunction
*MF
= BB
->getParent();
565 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
567 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
568 // SelectionDAG uses for wave32 vs wave64.
569 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::SI_IF_BREAK
))
570 .add(I
.getOperand(0))
571 .add(I
.getOperand(2))
572 .add(I
.getOperand(3));
574 Register DstReg
= I
.getOperand(0).getReg();
575 Register Src0Reg
= I
.getOperand(2).getReg();
576 Register Src1Reg
= I
.getOperand(3).getReg();
580 for (Register Reg
: { DstReg
, Src0Reg
, Src1Reg
}) {
581 if (!MRI
.getRegClassOrNull(Reg
))
582 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
588 return selectImpl(I
, *CoverageInfo
);
592 static int getV_CMPOpcode(CmpInst::Predicate P
, unsigned Size
) {
593 if (Size
!= 32 && Size
!= 64)
597 llvm_unreachable("Unknown condition code!");
598 case CmpInst::ICMP_NE
:
599 return Size
== 32 ? AMDGPU::V_CMP_NE_U32_e64
: AMDGPU::V_CMP_NE_U64_e64
;
600 case CmpInst::ICMP_EQ
:
601 return Size
== 32 ? AMDGPU::V_CMP_EQ_U32_e64
: AMDGPU::V_CMP_EQ_U64_e64
;
602 case CmpInst::ICMP_SGT
:
603 return Size
== 32 ? AMDGPU::V_CMP_GT_I32_e64
: AMDGPU::V_CMP_GT_I64_e64
;
604 case CmpInst::ICMP_SGE
:
605 return Size
== 32 ? AMDGPU::V_CMP_GE_I32_e64
: AMDGPU::V_CMP_GE_I64_e64
;
606 case CmpInst::ICMP_SLT
:
607 return Size
== 32 ? AMDGPU::V_CMP_LT_I32_e64
: AMDGPU::V_CMP_LT_I64_e64
;
608 case CmpInst::ICMP_SLE
:
609 return Size
== 32 ? AMDGPU::V_CMP_LE_I32_e64
: AMDGPU::V_CMP_LE_I64_e64
;
610 case CmpInst::ICMP_UGT
:
611 return Size
== 32 ? AMDGPU::V_CMP_GT_U32_e64
: AMDGPU::V_CMP_GT_U64_e64
;
612 case CmpInst::ICMP_UGE
:
613 return Size
== 32 ? AMDGPU::V_CMP_GE_U32_e64
: AMDGPU::V_CMP_GE_U64_e64
;
614 case CmpInst::ICMP_ULT
:
615 return Size
== 32 ? AMDGPU::V_CMP_LT_U32_e64
: AMDGPU::V_CMP_LT_U64_e64
;
616 case CmpInst::ICMP_ULE
:
617 return Size
== 32 ? AMDGPU::V_CMP_LE_U32_e64
: AMDGPU::V_CMP_LE_U64_e64
;
621 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P
,
622 unsigned Size
) const {
624 if (!STI
.hasScalarCompareEq64())
628 case CmpInst::ICMP_NE
:
629 return AMDGPU::S_CMP_LG_U64
;
630 case CmpInst::ICMP_EQ
:
631 return AMDGPU::S_CMP_EQ_U64
;
641 case CmpInst::ICMP_NE
:
642 return AMDGPU::S_CMP_LG_U32
;
643 case CmpInst::ICMP_EQ
:
644 return AMDGPU::S_CMP_EQ_U32
;
645 case CmpInst::ICMP_SGT
:
646 return AMDGPU::S_CMP_GT_I32
;
647 case CmpInst::ICMP_SGE
:
648 return AMDGPU::S_CMP_GE_I32
;
649 case CmpInst::ICMP_SLT
:
650 return AMDGPU::S_CMP_LT_I32
;
651 case CmpInst::ICMP_SLE
:
652 return AMDGPU::S_CMP_LE_I32
;
653 case CmpInst::ICMP_UGT
:
654 return AMDGPU::S_CMP_GT_U32
;
655 case CmpInst::ICMP_UGE
:
656 return AMDGPU::S_CMP_GE_U32
;
657 case CmpInst::ICMP_ULT
:
658 return AMDGPU::S_CMP_LT_U32
;
659 case CmpInst::ICMP_ULE
:
660 return AMDGPU::S_CMP_LE_U32
;
662 llvm_unreachable("Unknown condition code!");
666 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr
&I
) const {
667 MachineBasicBlock
*BB
= I
.getParent();
668 MachineFunction
*MF
= BB
->getParent();
669 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
670 const DebugLoc
&DL
= I
.getDebugLoc();
672 Register SrcReg
= I
.getOperand(2).getReg();
673 unsigned Size
= RBI
.getSizeInBits(SrcReg
, MRI
, TRI
);
675 auto Pred
= (CmpInst::Predicate
)I
.getOperand(1).getPredicate();
677 Register CCReg
= I
.getOperand(0).getReg();
678 if (isSCC(CCReg
, MRI
)) {
679 int Opcode
= getS_CMPOpcode(Pred
, Size
);
682 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
683 .add(I
.getOperand(2))
684 .add(I
.getOperand(3));
685 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CCReg
)
686 .addReg(AMDGPU::SCC
);
688 constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
) &&
689 RBI
.constrainGenericRegister(CCReg
, AMDGPU::SReg_32RegClass
, MRI
);
694 int Opcode
= getV_CMPOpcode(Pred
, Size
);
698 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
),
699 I
.getOperand(0).getReg())
700 .add(I
.getOperand(2))
701 .add(I
.getOperand(3));
702 RBI
.constrainGenericRegister(ICmp
->getOperand(0).getReg(),
703 *TRI
.getBoolRC(), MRI
);
704 bool Ret
= constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
);
709 static MachineInstr
*
710 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
711 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
712 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
713 const DebugLoc
&DL
= Insert
->getDebugLoc();
714 MachineBasicBlock
&BB
= *Insert
->getParent();
715 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
716 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
727 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
728 MachineInstr
&I
) const {
729 MachineBasicBlock
*BB
= I
.getParent();
730 MachineFunction
*MF
= BB
->getParent();
731 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
733 unsigned IntrinsicID
= I
.getOperand(0).getIntrinsicID();
734 switch (IntrinsicID
) {
735 case Intrinsic::amdgcn_exp
: {
736 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
737 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
738 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(7).getReg()));
739 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(8).getReg()));
741 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
742 I
.getOperand(4).getReg(),
743 I
.getOperand(5).getReg(),
744 I
.getOperand(6).getReg(),
745 VM
, false, Enabled
, Done
);
748 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
750 case Intrinsic::amdgcn_exp_compr
: {
751 const DebugLoc
&DL
= I
.getDebugLoc();
752 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
753 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
754 Register Reg0
= I
.getOperand(3).getReg();
755 Register Reg1
= I
.getOperand(4).getReg();
756 Register Undef
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
757 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(5).getReg()));
758 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(6).getReg()));
760 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
761 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
762 true, Enabled
, Done
);
765 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
767 case Intrinsic::amdgcn_end_cf
: {
768 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
769 // SelectionDAG uses for wave32 vs wave64.
770 BuildMI(*BB
, &I
, I
.getDebugLoc(),
771 TII
.get(AMDGPU::SI_END_CF
))
772 .add(I
.getOperand(1));
774 Register Reg
= I
.getOperand(1).getReg();
777 if (!MRI
.getRegClassOrNull(Reg
))
778 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
782 return selectImpl(I
, *CoverageInfo
);
786 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr
&I
) const {
787 MachineBasicBlock
*BB
= I
.getParent();
788 MachineFunction
*MF
= BB
->getParent();
789 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
790 const DebugLoc
&DL
= I
.getDebugLoc();
792 Register DstReg
= I
.getOperand(0).getReg();
793 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
794 assert(Size
<= 32 || Size
== 64);
795 const MachineOperand
&CCOp
= I
.getOperand(1);
796 Register CCReg
= CCOp
.getReg();
797 if (isSCC(CCReg
, MRI
)) {
798 unsigned SelectOpcode
= Size
== 64 ? AMDGPU::S_CSELECT_B64
:
799 AMDGPU::S_CSELECT_B32
;
800 MachineInstr
*CopySCC
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
803 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
804 // bank, because it does not cover the register class that we used to represent
805 // for it. So we need to manually set the register class here.
806 if (!MRI
.getRegClassOrNull(CCReg
))
807 MRI
.setRegClass(CCReg
, TRI
.getConstrainedRegClassForOperand(CCOp
, MRI
));
808 MachineInstr
*Select
= BuildMI(*BB
, &I
, DL
, TII
.get(SelectOpcode
), DstReg
)
809 .add(I
.getOperand(2))
810 .add(I
.getOperand(3));
812 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
) |
813 constrainSelectedInstRegOperands(*CopySCC
, TII
, TRI
, RBI
);
818 // Wide VGPR select should have been split in RegBankSelect.
822 MachineInstr
*Select
=
823 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
825 .add(I
.getOperand(3))
827 .add(I
.getOperand(2))
828 .add(I
.getOperand(1));
830 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
);
835 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
837 return selectImpl(I
, *CoverageInfo
);
840 static int sizeToSubRegIndex(unsigned Size
) {
845 return AMDGPU::sub0_sub1
;
847 return AMDGPU::sub0_sub1_sub2
;
849 return AMDGPU::sub0_sub1_sub2_sub3
;
851 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
857 return sizeToSubRegIndex(PowerOf2Ceil(Size
));
861 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr
&I
) const {
862 MachineBasicBlock
*BB
= I
.getParent();
863 MachineFunction
*MF
= BB
->getParent();
864 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
866 Register DstReg
= I
.getOperand(0).getReg();
867 Register SrcReg
= I
.getOperand(1).getReg();
868 const LLT DstTy
= MRI
.getType(DstReg
);
869 const LLT SrcTy
= MRI
.getType(SrcReg
);
870 if (!DstTy
.isScalar())
873 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
874 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
878 unsigned DstSize
= DstTy
.getSizeInBits();
879 unsigned SrcSize
= SrcTy
.getSizeInBits();
881 const TargetRegisterClass
*SrcRC
882 = TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcRB
, MRI
);
883 const TargetRegisterClass
*DstRC
884 = TRI
.getRegClassForSizeOnBank(DstSize
, *DstRB
, MRI
);
887 int SubRegIdx
= sizeToSubRegIndex(DstSize
);
891 // Deal with weird cases where the class only partially supports the subreg
893 SrcRC
= TRI
.getSubClassWithSubReg(SrcRC
, SubRegIdx
);
897 I
.getOperand(1).setSubReg(SubRegIdx
);
900 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
) ||
901 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
902 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
906 I
.setDesc(TII
.get(TargetOpcode::COPY
));
910 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
911 static bool shouldUseAndMask(unsigned Size
, unsigned &Mask
) {
912 Mask
= maskTrailingOnes
<unsigned>(Size
);
913 int SignedMask
= static_cast<int>(Mask
);
914 return SignedMask
>= -16 && SignedMask
<= 64;
917 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr
&I
) const {
918 bool Signed
= I
.getOpcode() == AMDGPU::G_SEXT
;
919 const DebugLoc
&DL
= I
.getDebugLoc();
920 MachineBasicBlock
&MBB
= *I
.getParent();
921 MachineFunction
&MF
= *MBB
.getParent();
922 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
923 const Register DstReg
= I
.getOperand(0).getReg();
924 const Register SrcReg
= I
.getOperand(1).getReg();
926 const LLT DstTy
= MRI
.getType(DstReg
);
927 const LLT SrcTy
= MRI
.getType(SrcReg
);
928 const LLT S1
= LLT::scalar(1);
929 const unsigned SrcSize
= SrcTy
.getSizeInBits();
930 const unsigned DstSize
= DstTy
.getSizeInBits();
931 if (!DstTy
.isScalar())
934 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
936 if (SrcBank
->getID() == AMDGPU::SCCRegBankID
) {
937 if (SrcTy
!= S1
|| DstSize
> 64) // Invalid
941 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
942 const TargetRegisterClass
*DstRC
=
943 DstSize
> 32 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass
;
945 // FIXME: Create an extra copy to avoid incorrectly constraining the result
946 // of the scc producer.
947 Register TmpReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
948 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), TmpReg
)
950 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
953 // The instruction operands are backwards from what you would expect.
954 BuildMI(MBB
, I
, DL
, TII
.get(Opcode
), DstReg
)
956 .addImm(Signed
? -1 : 1);
958 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
961 if (SrcBank
->getID() == AMDGPU::VCCRegBankID
&& DstSize
<= 32) {
962 if (SrcTy
!= S1
) // Invalid
966 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
967 .addImm(0) // src0_modifiers
969 .addImm(0) // src1_modifiers
970 .addImm(Signed
? -1 : 1) // src1
973 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
976 if (I
.getOpcode() == AMDGPU::G_ANYEXT
)
977 return selectCOPY(I
);
979 if (SrcBank
->getID() == AMDGPU::VGPRRegBankID
&& DstSize
<= 32) {
980 // 64-bit should have been split up in RegBankSelect
982 // Try to use an and with a mask if it will save code size.
984 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
986 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_AND_B32_e32
), DstReg
)
990 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
993 const unsigned BFE
= Signed
? AMDGPU::V_BFE_I32
: AMDGPU::V_BFE_U32
;
995 BuildMI(MBB
, I
, DL
, TII
.get(BFE
), DstReg
)
998 .addImm(SrcSize
); // Width
1000 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1003 if (SrcBank
->getID() == AMDGPU::SGPRRegBankID
&& DstSize
<= 64) {
1004 if (!RBI
.constrainGenericRegister(SrcReg
, AMDGPU::SReg_32RegClass
, MRI
))
1007 if (Signed
&& DstSize
== 32 && (SrcSize
== 8 || SrcSize
== 16)) {
1008 const unsigned SextOpc
= SrcSize
== 8 ?
1009 AMDGPU::S_SEXT_I32_I8
: AMDGPU::S_SEXT_I32_I16
;
1010 BuildMI(MBB
, I
, DL
, TII
.get(SextOpc
), DstReg
)
1012 I
.eraseFromParent();
1013 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1016 const unsigned BFE64
= Signed
? AMDGPU::S_BFE_I64
: AMDGPU::S_BFE_U64
;
1017 const unsigned BFE32
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1019 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1020 if (DstSize
> 32 && SrcSize
<= 32) {
1021 // We need a 64-bit register source, but the high bits don't matter.
1022 Register ExtReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1023 Register UndefReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1024 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), UndefReg
);
1025 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), ExtReg
)
1027 .addImm(AMDGPU::sub0
)
1029 .addImm(AMDGPU::sub1
);
1031 BuildMI(MBB
, I
, DL
, TII
.get(BFE64
), DstReg
)
1033 .addImm(SrcSize
<< 16);
1035 I
.eraseFromParent();
1036 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_64RegClass
, MRI
);
1040 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1041 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::S_AND_B32
), DstReg
)
1045 BuildMI(MBB
, I
, DL
, TII
.get(BFE32
), DstReg
)
1047 .addImm(SrcSize
<< 16);
1050 I
.eraseFromParent();
1051 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1057 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
1058 MachineBasicBlock
*BB
= I
.getParent();
1059 MachineFunction
*MF
= BB
->getParent();
1060 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1061 MachineOperand
&ImmOp
= I
.getOperand(1);
1063 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1064 if (ImmOp
.isFPImm()) {
1065 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
1066 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
1067 } else if (ImmOp
.isCImm()) {
1068 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
1071 Register DstReg
= I
.getOperand(0).getReg();
1074 const RegisterBank
*RB
= MRI
.getRegBankOrNull(I
.getOperand(0).getReg());
1076 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
1077 Size
= MRI
.getType(DstReg
).getSizeInBits();
1079 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(MRI
, DstReg
);
1080 IsSgpr
= TRI
.isSGPRClass(RC
);
1081 Size
= TRI
.getRegSizeInBits(*RC
);
1084 if (Size
!= 32 && Size
!= 64)
1087 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
1089 I
.setDesc(TII
.get(Opcode
));
1090 I
.addImplicitDefUseOperands(*MF
);
1091 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
1094 DebugLoc DL
= I
.getDebugLoc();
1095 const TargetRegisterClass
*RC
= IsSgpr
? &AMDGPU::SReg_32_XM0RegClass
:
1096 &AMDGPU::VGPR_32RegClass
;
1097 Register LoReg
= MRI
.createVirtualRegister(RC
);
1098 Register HiReg
= MRI
.createVirtualRegister(RC
);
1099 const APInt
&Imm
= APInt(Size
, I
.getOperand(1).getImm());
1101 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
1102 .addImm(Imm
.trunc(32).getZExtValue());
1104 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
1105 .addImm(Imm
.ashr(32).getZExtValue());
1107 const MachineInstr
*RS
=
1108 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1110 .addImm(AMDGPU::sub0
)
1112 .addImm(AMDGPU::sub1
);
1114 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1115 // work for target independent opcodes
1116 I
.eraseFromParent();
1117 const TargetRegisterClass
*DstRC
=
1118 TRI
.getConstrainedRegClassForOperand(RS
->getOperand(0), MRI
);
1121 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
1124 static bool isConstant(const MachineInstr
&MI
) {
1125 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
1128 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
1129 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
1131 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
1135 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
1138 GEPInfo
GEPInfo(*PtrMI
);
1140 for (unsigned i
= 1; i
!= 3; ++i
) {
1141 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
1142 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
1144 if (i
== 2 && isConstant(*OpDef
)) {
1145 // TODO: Could handle constant base + variable offset, but a combine
1146 // probably should have commuted it.
1147 assert(GEPInfo
.Imm
== 0);
1148 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
1151 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
1152 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
1153 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
1155 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
1158 AddrInfo
.push_back(GEPInfo
);
1159 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
1162 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr
&MI
) const {
1163 if (!MI
.hasOneMemOperand())
1166 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1167 const Value
*Ptr
= MMO
->getValue();
1169 // UndefValue means this is a load of a kernel input. These are uniform.
1170 // Sometimes LDS instructions have constant pointers.
1171 // If Ptr is null, then that means this mem operand contains a
1172 // PseudoSourceValue like GOT.
1173 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
1174 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
1177 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
1180 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
1181 return I
&& I
->getMetadata("amdgpu.uniform");
1184 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
1185 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
1186 if (!GEPInfo
.VgprParts
.empty())
1192 void AMDGPUInstructionSelector::initM0(MachineInstr
&I
) const {
1193 MachineBasicBlock
*BB
= I
.getParent();
1194 MachineFunction
*MF
= BB
->getParent();
1195 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1197 const LLT PtrTy
= MRI
.getType(I
.getOperand(1).getReg());
1198 unsigned AS
= PtrTy
.getAddressSpace();
1199 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
) &&
1200 STI
.ldsRequiresM0Init()) {
1201 // If DS instructions require M0 initializtion, insert it before selecting.
1202 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), AMDGPU::M0
)
1207 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr
&I
) const {
1209 return selectImpl(I
, *CoverageInfo
);
1212 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr
&I
) const {
1213 MachineBasicBlock
*BB
= I
.getParent();
1214 MachineFunction
*MF
= BB
->getParent();
1215 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1216 MachineOperand
&CondOp
= I
.getOperand(0);
1217 Register CondReg
= CondOp
.getReg();
1218 const DebugLoc
&DL
= I
.getDebugLoc();
1221 Register CondPhysReg
;
1222 const TargetRegisterClass
*ConstrainRC
;
1224 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1225 // whether the branch is uniform when selecting the instruction. In
1226 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1227 // RegBankSelect knows what it's doing if the branch condition is scc, even
1228 // though it currently does not.
1229 if (isSCC(CondReg
, MRI
)) {
1230 CondPhysReg
= AMDGPU::SCC
;
1231 BrOpcode
= AMDGPU::S_CBRANCH_SCC1
;
1232 ConstrainRC
= &AMDGPU::SReg_32_XM0RegClass
;
1233 } else if (isVCC(CondReg
, MRI
)) {
1234 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1235 // We sort of know that a VCC producer based on the register bank, that ands
1236 // inactive lanes with 0. What if there was a logical operation with vcc
1237 // producers in different blocks/with different exec masks?
1238 // FIXME: Should scc->vcc copies and with exec?
1239 CondPhysReg
= TRI
.getVCC();
1240 BrOpcode
= AMDGPU::S_CBRANCH_VCCNZ
;
1241 ConstrainRC
= TRI
.getBoolRC();
1245 if (!MRI
.getRegClassOrNull(CondReg
))
1246 MRI
.setRegClass(CondReg
, ConstrainRC
);
1248 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CondPhysReg
)
1250 BuildMI(*BB
, &I
, DL
, TII
.get(BrOpcode
))
1251 .addMBB(I
.getOperand(1).getMBB());
1253 I
.eraseFromParent();
1257 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr
&I
) const {
1258 MachineBasicBlock
*BB
= I
.getParent();
1259 MachineFunction
*MF
= BB
->getParent();
1260 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1262 Register DstReg
= I
.getOperand(0).getReg();
1263 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1264 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1265 I
.setDesc(TII
.get(IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
));
1267 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
1269 return RBI
.constrainGenericRegister(
1270 DstReg
, IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
, MRI
);
1273 bool AMDGPUInstructionSelector::select(MachineInstr
&I
) {
1275 return selectPHI(I
);
1277 if (!isPreISelGenericOpcode(I
.getOpcode())) {
1279 return selectCOPY(I
);
1283 switch (I
.getOpcode()) {
1284 case TargetOpcode::G_AND
:
1285 case TargetOpcode::G_OR
:
1286 case TargetOpcode::G_XOR
:
1287 if (selectG_AND_OR_XOR(I
))
1289 return selectImpl(I
, *CoverageInfo
);
1290 case TargetOpcode::G_ADD
:
1291 case TargetOpcode::G_SUB
:
1292 if (selectG_ADD_SUB(I
))
1296 return selectImpl(I
, *CoverageInfo
);
1297 case TargetOpcode::G_INTTOPTR
:
1298 case TargetOpcode::G_BITCAST
:
1299 return selectCOPY(I
);
1300 case TargetOpcode::G_CONSTANT
:
1301 case TargetOpcode::G_FCONSTANT
:
1302 return selectG_CONSTANT(I
);
1303 case TargetOpcode::G_EXTRACT
:
1304 return selectG_EXTRACT(I
);
1305 case TargetOpcode::G_MERGE_VALUES
:
1306 case TargetOpcode::G_BUILD_VECTOR
:
1307 case TargetOpcode::G_CONCAT_VECTORS
:
1308 return selectG_MERGE_VALUES(I
);
1309 case TargetOpcode::G_UNMERGE_VALUES
:
1310 return selectG_UNMERGE_VALUES(I
);
1311 case TargetOpcode::G_GEP
:
1312 return selectG_GEP(I
);
1313 case TargetOpcode::G_IMPLICIT_DEF
:
1314 return selectG_IMPLICIT_DEF(I
);
1315 case TargetOpcode::G_INSERT
:
1316 return selectG_INSERT(I
);
1317 case TargetOpcode::G_INTRINSIC
:
1318 return selectG_INTRINSIC(I
);
1319 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
1320 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
);
1321 case TargetOpcode::G_ICMP
:
1322 if (selectG_ICMP(I
))
1324 return selectImpl(I
, *CoverageInfo
);
1325 case TargetOpcode::G_LOAD
:
1326 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1327 case TargetOpcode::G_ATOMICRMW_XCHG
:
1328 case TargetOpcode::G_ATOMICRMW_ADD
:
1329 case TargetOpcode::G_ATOMICRMW_SUB
:
1330 case TargetOpcode::G_ATOMICRMW_AND
:
1331 case TargetOpcode::G_ATOMICRMW_OR
:
1332 case TargetOpcode::G_ATOMICRMW_XOR
:
1333 case TargetOpcode::G_ATOMICRMW_MIN
:
1334 case TargetOpcode::G_ATOMICRMW_MAX
:
1335 case TargetOpcode::G_ATOMICRMW_UMIN
:
1336 case TargetOpcode::G_ATOMICRMW_UMAX
:
1337 case TargetOpcode::G_ATOMICRMW_FADD
:
1338 return selectG_LOAD_ATOMICRMW(I
);
1339 case TargetOpcode::G_SELECT
:
1340 return selectG_SELECT(I
);
1341 case TargetOpcode::G_STORE
:
1342 return selectG_STORE(I
);
1343 case TargetOpcode::G_TRUNC
:
1344 return selectG_TRUNC(I
);
1345 case TargetOpcode::G_SEXT
:
1346 case TargetOpcode::G_ZEXT
:
1347 case TargetOpcode::G_ANYEXT
:
1348 return selectG_SZA_EXT(I
);
1349 case TargetOpcode::G_BRCOND
:
1350 return selectG_BRCOND(I
);
1351 case TargetOpcode::G_FRAME_INDEX
:
1352 return selectG_FRAME_INDEX(I
);
1353 case TargetOpcode::G_FENCE
:
1354 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1355 // is checking for G_CONSTANT
1356 I
.setDesc(TII
.get(AMDGPU::ATOMIC_FENCE
));
1362 InstructionSelector::ComplexRendererFns
1363 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
1365 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1370 std::pair
<Register
, unsigned>
1371 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1372 Register Src
, const MachineRegisterInfo
&MRI
) const {
1374 MachineInstr
*MI
= MRI
.getVRegDef(Src
);
1376 if (MI
&& MI
->getOpcode() == AMDGPU::G_FNEG
) {
1377 Src
= MI
->getOperand(1).getReg();
1378 Mods
|= SISrcMods::NEG
;
1379 MI
= MRI
.getVRegDef(Src
);
1382 if (MI
&& MI
->getOpcode() == AMDGPU::G_FABS
) {
1383 Src
= MI
->getOperand(1).getReg();
1384 Mods
|= SISrcMods::ABS
;
1387 return std::make_pair(Src
, Mods
);
1391 /// This will select either an SGPR or VGPR operand and will save us from
1392 /// having to write an extra tablegen pattern.
1393 InstructionSelector::ComplexRendererFns
1394 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
1396 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1400 InstructionSelector::ComplexRendererFns
1401 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
1402 MachineRegisterInfo
&MRI
1403 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1407 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1410 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1411 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1412 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1413 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1416 InstructionSelector::ComplexRendererFns
1417 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
1419 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1420 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1421 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1425 InstructionSelector::ComplexRendererFns
1426 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
1427 MachineRegisterInfo
&MRI
1428 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1432 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1435 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1436 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); } // src_mods
1440 InstructionSelector::ComplexRendererFns
1441 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand
&Root
) const {
1442 MachineRegisterInfo
&MRI
=
1443 Root
.getParent()->getParent()->getParent()->getRegInfo();
1445 SmallVector
<GEPInfo
, 4> AddrInfo
;
1446 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1448 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1451 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1453 if (!AMDGPU::isLegalSMRDImmOffset(STI
, GEPInfo
.Imm
))
1456 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1457 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1459 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1460 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1464 InstructionSelector::ComplexRendererFns
1465 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand
&Root
) const {
1466 MachineRegisterInfo
&MRI
=
1467 Root
.getParent()->getParent()->getParent()->getRegInfo();
1469 SmallVector
<GEPInfo
, 4> AddrInfo
;
1470 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1472 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1475 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1476 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1477 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1478 if (!isUInt
<32>(EncodedImm
))
1482 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1483 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1487 InstructionSelector::ComplexRendererFns
1488 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand
&Root
) const {
1489 MachineInstr
*MI
= Root
.getParent();
1490 MachineBasicBlock
*MBB
= MI
->getParent();
1491 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1493 SmallVector
<GEPInfo
, 4> AddrInfo
;
1494 getAddrModeInfo(*MI
, MRI
, AddrInfo
);
1496 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1497 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1498 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1501 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1502 if (!GEPInfo
.Imm
|| !isUInt
<32>(GEPInfo
.Imm
))
1505 // If we make it this far we have a load with an 32-bit immediate offset.
1506 // It is OK to select this using a sgpr offset, because we have already
1507 // failed trying to select this load into one of the _IMM variants since
1508 // the _IMM Patterns are considered before the _SGPR patterns.
1509 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1510 Register OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1511 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
1512 .addImm(GEPInfo
.Imm
);
1514 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1515 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(OffsetReg
); }
1519 template <bool Signed
>
1520 InstructionSelector::ComplexRendererFns
1521 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand
&Root
) const {
1522 MachineInstr
*MI
= Root
.getParent();
1523 MachineBasicBlock
*MBB
= MI
->getParent();
1524 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1526 InstructionSelector::ComplexRendererFns Default
= {{
1527 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1528 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // offset
1529 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1532 if (!STI
.hasFlatInstOffsets())
1535 const MachineInstr
*OpDef
= MRI
.getVRegDef(Root
.getReg());
1536 if (!OpDef
|| OpDef
->getOpcode() != AMDGPU::G_GEP
)
1539 Optional
<int64_t> Offset
=
1540 getConstantVRegVal(OpDef
->getOperand(2).getReg(), MRI
);
1541 if (!Offset
.hasValue())
1544 unsigned AddrSpace
= (*MI
->memoperands_begin())->getAddrSpace();
1545 if (!TII
.isLegalFLATOffset(Offset
.getValue(), AddrSpace
, Signed
))
1548 Register BasePtr
= OpDef
->getOperand(1).getReg();
1551 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(BasePtr
); },
1552 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
.getValue()); },
1553 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1557 InstructionSelector::ComplexRendererFns
1558 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand
&Root
) const {
1559 return selectFlatOffsetImpl
<false>(Root
);
1562 InstructionSelector::ComplexRendererFns
1563 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand
&Root
) const {
1564 return selectFlatOffsetImpl
<true>(Root
);
1568 static bool signBitIsZero(const MachineOperand
&Op
,
1569 const MachineRegisterInfo
&MRI
) {
1573 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1574 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1575 return PSV
&& PSV
->isStack();
1578 InstructionSelector::ComplexRendererFns
1579 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand
&Root
) const {
1580 MachineInstr
*MI
= Root
.getParent();
1581 MachineBasicBlock
*MBB
= MI
->getParent();
1582 MachineFunction
*MF
= MBB
->getParent();
1583 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1584 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1587 if (mi_match(Root
.getReg(), MRI
, m_ICst(Offset
))) {
1588 Register HighBits
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1590 // TODO: Should this be inside the render function? The iterator seems to
1592 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::V_MOV_B32_e32
),
1594 .addImm(Offset
& ~4095);
1596 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1597 MIB
.addReg(Info
->getScratchRSrcReg());
1599 [=](MachineInstrBuilder
&MIB
) { // vaddr
1600 MIB
.addReg(HighBits
);
1602 [=](MachineInstrBuilder
&MIB
) { // soffset
1603 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1604 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1606 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1607 ? Info
->getStackPtrOffsetReg()
1608 : Info
->getScratchWaveOffsetReg();
1609 MIB
.addReg(SOffsetReg
);
1611 [=](MachineInstrBuilder
&MIB
) { // offset
1612 MIB
.addImm(Offset
& 4095);
1616 assert(Offset
== 0);
1618 // Try to fold a frame index directly into the MUBUF vaddr field, and any
1621 Register VAddr
= Root
.getReg();
1622 if (const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg())) {
1623 if (isBaseWithConstantOffset(Root
, MRI
)) {
1624 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1625 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1626 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1627 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1628 if (LHSDef
&& RHSDef
) {
1629 int64_t PossibleOffset
=
1630 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1631 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset
) &&
1632 (!STI
.privateMemoryResourceIsRangeChecked() ||
1633 signBitIsZero(LHS
, MRI
))) {
1634 if (LHSDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
)
1635 FI
= LHSDef
->getOperand(1).getIndex();
1637 VAddr
= LHS
.getReg();
1638 Offset
= PossibleOffset
;
1641 } else if (RootDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
) {
1642 FI
= RootDef
->getOperand(1).getIndex();
1646 // If we don't know this private access is a local stack object, it needs to
1647 // be relative to the entry point's scratch wave offset register.
1648 // TODO: Should split large offsets that don't fit like above.
1649 // TODO: Don't use scratch wave offset just because the offset didn't fit.
1650 Register SOffset
= FI
.hasValue() ? Info
->getStackPtrOffsetReg()
1651 : Info
->getScratchWaveOffsetReg();
1653 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1654 MIB
.addReg(Info
->getScratchRSrcReg());
1656 [=](MachineInstrBuilder
&MIB
) { // vaddr
1658 MIB
.addFrameIndex(FI
.getValue());
1662 [=](MachineInstrBuilder
&MIB
) { // soffset
1663 MIB
.addReg(SOffset
);
1665 [=](MachineInstrBuilder
&MIB
) { // offset
1670 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo
&MRI
,
1671 const MachineOperand
&Base
,
1673 unsigned OffsetBits
) const {
1674 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
1675 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
1678 if (STI
.hasUsableDSOffset() || STI
.unsafeDSOffsetFoldingEnabled())
1681 // On Southern Islands instruction with a negative base value and an offset
1682 // don't seem to work.
1683 return signBitIsZero(Base
, MRI
);
1686 InstructionSelector::ComplexRendererFns
1687 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1688 MachineOperand
&Root
) const {
1689 MachineInstr
*MI
= Root
.getParent();
1690 MachineBasicBlock
*MBB
= MI
->getParent();
1691 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1694 if (!mi_match(Root
.getReg(), MRI
, m_ICst(Offset
)) ||
1695 !SIInstrInfo::isLegalMUBUFImmOffset(Offset
))
1698 const MachineFunction
*MF
= MBB
->getParent();
1699 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1700 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1701 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1703 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1704 ? Info
->getStackPtrOffsetReg()
1705 : Info
->getScratchWaveOffsetReg();
1707 [=](MachineInstrBuilder
&MIB
) {
1708 MIB
.addReg(Info
->getScratchRSrcReg());
1710 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(SOffsetReg
); }, // soffset
1711 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
); } // offset
1715 InstructionSelector::ComplexRendererFns
1716 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand
&Root
) const {
1717 MachineInstr
*MI
= Root
.getParent();
1718 MachineBasicBlock
*MBB
= MI
->getParent();
1719 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1721 const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
1724 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1725 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
1729 int64_t ConstAddr
= 0;
1730 if (isBaseWithConstantOffset(Root
, MRI
)) {
1731 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1732 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1733 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1734 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1735 if (LHSDef
&& RHSDef
) {
1736 int64_t PossibleOffset
=
1737 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1738 if (isDSOffsetLegal(MRI
, LHS
, PossibleOffset
, 16)) {
1741 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
1742 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(PossibleOffset
); }
1746 } else if (RootDef
->getOpcode() == AMDGPU::G_SUB
) {
1750 } else if (mi_match(Root
.getReg(), MRI
, m_ICst(ConstAddr
))) {
1756 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1757 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }