1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
25 #include "llvm/CodeGen/GlobalISel/Utils.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
35 #define DEBUG_TYPE "amdgpu-isel"
38 using namespace MIPatternMatch
;
40 #define GET_GLOBALISEL_IMPL
41 #define AMDGPUSubtarget GCNSubtarget
42 #include "AMDGPUGenGlobalISel.inc"
43 #undef GET_GLOBALISEL_IMPL
44 #undef AMDGPUSubtarget
46 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
47 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
48 const AMDGPUTargetMachine
&TM
)
49 : InstructionSelector(), TII(*STI
.getInstrInfo()),
50 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
53 #define GET_GLOBALISEL_PREDICATES_INIT
54 #include "AMDGPUGenGlobalISel.inc"
55 #undef GET_GLOBALISEL_PREDICATES_INIT
56 #define GET_GLOBALISEL_TEMPORARIES_INIT
57 #include "AMDGPUGenGlobalISel.inc"
58 #undef GET_GLOBALISEL_TEMPORARIES_INIT
62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
64 static bool isSCC(Register Reg
, const MachineRegisterInfo
&MRI
) {
65 if (Register::isPhysicalRegister(Reg
))
66 return Reg
== AMDGPU::SCC
;
68 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
69 const TargetRegisterClass
*RC
=
70 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
73 // context of the register bank has been lost.
74 if (RC
->getID() != AMDGPU::SReg_32_XM0RegClassID
)
76 const LLT Ty
= MRI
.getType(Reg
);
77 return Ty
.isValid() && Ty
.getSizeInBits() == 1;
80 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
81 return RB
->getID() == AMDGPU::SCCRegBankID
;
84 bool AMDGPUInstructionSelector::isVCC(Register Reg
,
85 const MachineRegisterInfo
&MRI
) const {
86 if (Register::isPhysicalRegister(Reg
))
87 return Reg
== TRI
.getVCC();
89 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
90 const TargetRegisterClass
*RC
=
91 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
93 const LLT Ty
= MRI
.getType(Reg
);
94 return RC
->hasSuperClassEq(TRI
.getBoolRC()) &&
95 Ty
.isValid() && Ty
.getSizeInBits() == 1;
98 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
99 return RB
->getID() == AMDGPU::VCCRegBankID
;
102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
103 const DebugLoc
&DL
= I
.getDebugLoc();
104 MachineBasicBlock
*BB
= I
.getParent();
105 MachineFunction
*MF
= BB
->getParent();
106 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
107 I
.setDesc(TII
.get(TargetOpcode::COPY
));
109 const MachineOperand
&Src
= I
.getOperand(1);
110 MachineOperand
&Dst
= I
.getOperand(0);
111 Register DstReg
= Dst
.getReg();
112 Register SrcReg
= Src
.getReg();
114 if (isVCC(DstReg
, MRI
)) {
115 if (SrcReg
== AMDGPU::SCC
) {
116 const TargetRegisterClass
*RC
117 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
120 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
123 if (!isVCC(SrcReg
, MRI
)) {
124 // TODO: Should probably leave the copy and let copyPhysReg expand it.
125 if (!RBI
.constrainGenericRegister(DstReg
, *TRI
.getBoolRC(), MRI
))
128 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CMP_NE_U32_e64
), DstReg
)
132 if (!MRI
.getRegClassOrNull(SrcReg
))
133 MRI
.setRegClass(SrcReg
, TRI
.getConstrainedRegClassForOperand(Src
, MRI
));
138 const TargetRegisterClass
*RC
=
139 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
140 if (RC
&& !RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
))
143 // Don't constrain the source register to a class so the def instruction
144 // handles it (unless it's undef).
146 // FIXME: This is a hack. When selecting the def, we neeed to know
147 // specifically know that the result is VCCRegBank, and not just an SGPR
148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
150 const TargetRegisterClass
*SrcRC
=
151 TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
152 if (SrcRC
&& !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
159 for (const MachineOperand
&MO
: I
.operands()) {
160 if (Register::isPhysicalRegister(MO
.getReg()))
163 const TargetRegisterClass
*RC
=
164 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
167 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr
&I
) const {
173 MachineBasicBlock
*BB
= I
.getParent();
174 MachineFunction
*MF
= BB
->getParent();
175 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
177 const Register DefReg
= I
.getOperand(0).getReg();
178 const LLT DefTy
= MRI
.getType(DefReg
);
180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
182 const RegClassOrRegBank
&RegClassOrBank
=
183 MRI
.getRegClassOrRegBank(DefReg
);
185 const TargetRegisterClass
*DefRC
186 = RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
188 if (!DefTy
.isValid()) {
189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
193 const RegisterBank
&RB
= *RegClassOrBank
.get
<const RegisterBank
*>();
194 if (RB
.getID() == AMDGPU::SCCRegBankID
) {
195 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
199 DefRC
= TRI
.getRegClassForTypeOnBank(DefTy
, RB
, MRI
);
201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
206 I
.setDesc(TII
.get(TargetOpcode::PHI
));
207 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, MRI
);
211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
212 const TargetRegisterClass
&SubRC
,
213 unsigned SubIdx
) const {
215 MachineInstr
*MI
= MO
.getParent();
216 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
217 MachineFunction
*MF
= BB
->getParent();
218 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
219 Register DstReg
= MRI
.createVirtualRegister(&SubRC
);
222 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
223 Register Reg
= MO
.getReg();
224 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
225 .addReg(Reg
, 0, ComposedSubIdx
);
227 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
228 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
229 MO
.isEarlyClobber(), 0, MO
.isDebug(),
230 MO
.isInternalRead());
235 APInt
Imm(64, MO
.getImm());
239 llvm_unreachable("do not know to split immediate with this sub index.");
241 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
243 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
247 static int64_t getConstant(const MachineInstr
*MI
) {
248 return MI
->getOperand(1).getCImm()->getSExtValue();
251 static unsigned getLogicalBitOpcode(unsigned Opc
, bool Is64
) {
254 return Is64
? AMDGPU::S_AND_B64
: AMDGPU::S_AND_B32
;
256 return Is64
? AMDGPU::S_OR_B64
: AMDGPU::S_OR_B32
;
258 return Is64
? AMDGPU::S_XOR_B64
: AMDGPU::S_XOR_B32
;
260 llvm_unreachable("not a bit op");
264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr
&I
) const {
265 MachineBasicBlock
*BB
= I
.getParent();
266 MachineFunction
*MF
= BB
->getParent();
267 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
268 MachineOperand
&Dst
= I
.getOperand(0);
269 MachineOperand
&Src0
= I
.getOperand(1);
270 MachineOperand
&Src1
= I
.getOperand(2);
271 Register DstReg
= Dst
.getReg();
272 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
274 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
275 if (DstRB
->getID() == AMDGPU::VCCRegBankID
) {
276 const TargetRegisterClass
*RC
= TRI
.getBoolRC();
277 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(),
278 RC
== &AMDGPU::SReg_64RegClass
);
279 I
.setDesc(TII
.get(InstOpc
));
281 // FIXME: Hack to avoid turning the register bank into a register class.
282 // The selector for G_ICMP relies on seeing the register bank for the result
283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
284 // be ambiguous whether it's a scalar or vector bool.
285 if (Src0
.isUndef() && !MRI
.getRegClassOrNull(Src0
.getReg()))
286 MRI
.setRegClass(Src0
.getReg(), RC
);
287 if (Src1
.isUndef() && !MRI
.getRegClassOrNull(Src1
.getReg()))
288 MRI
.setRegClass(Src1
.getReg(), RC
);
290 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
295 if (DstRB
->getID() == AMDGPU::SGPRRegBankID
) {
296 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(), Size
> 32);
297 I
.setDesc(TII
.get(InstOpc
));
299 const TargetRegisterClass
*RC
300 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
303 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
) &&
304 RBI
.constrainGenericRegister(Src0
.getReg(), *RC
, MRI
) &&
305 RBI
.constrainGenericRegister(Src1
.getReg(), *RC
, MRI
);
311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr
&I
) const {
312 MachineBasicBlock
*BB
= I
.getParent();
313 MachineFunction
*MF
= BB
->getParent();
314 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
315 Register DstReg
= I
.getOperand(0).getReg();
316 const DebugLoc
&DL
= I
.getDebugLoc();
317 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
318 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
319 const bool IsSALU
= DstRB
->getID() == AMDGPU::SGPRRegBankID
;
320 const bool Sub
= I
.getOpcode() == TargetOpcode::G_SUB
;
324 const unsigned Opc
= Sub
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
326 BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
327 .add(I
.getOperand(1))
328 .add(I
.getOperand(2));
330 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
333 if (STI
.hasAddNoCarry()) {
334 const unsigned Opc
= Sub
? AMDGPU::V_SUB_U32_e64
: AMDGPU::V_ADD_U32_e64
;
335 I
.setDesc(TII
.get(Opc
));
336 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
337 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
338 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
341 const unsigned Opc
= Sub
? AMDGPU::V_SUB_I32_e64
: AMDGPU::V_ADD_I32_e64
;
343 Register UnusedCarry
= MRI
.createVirtualRegister(TRI
.getWaveMaskRegClass());
345 = BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
346 .addDef(UnusedCarry
, RegState::Dead
)
347 .add(I
.getOperand(1))
348 .add(I
.getOperand(2))
351 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
354 assert(!Sub
&& "illegal sub should not reach here");
356 const TargetRegisterClass
&RC
357 = IsSALU
? AMDGPU::SReg_64_XEXECRegClass
: AMDGPU::VReg_64RegClass
;
358 const TargetRegisterClass
&HalfRC
359 = IsSALU
? AMDGPU::SReg_32RegClass
: AMDGPU::VGPR_32RegClass
;
361 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub0
));
362 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub0
));
363 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub1
));
364 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub1
));
366 Register DstLo
= MRI
.createVirtualRegister(&HalfRC
);
367 Register DstHi
= MRI
.createVirtualRegister(&HalfRC
);
370 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
373 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
377 const TargetRegisterClass
*CarryRC
= TRI
.getWaveMaskRegClass();
378 Register CarryReg
= MRI
.createVirtualRegister(CarryRC
);
379 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADD_I32_e64
), DstLo
)
384 MachineInstr
*Addc
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADDC_U32_e64
), DstHi
)
385 .addDef(MRI
.createVirtualRegister(CarryRC
), RegState::Dead
)
388 .addReg(CarryReg
, RegState::Kill
)
391 if (!constrainSelectedInstRegOperands(*Addc
, TII
, TRI
, RBI
))
395 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
397 .addImm(AMDGPU::sub0
)
399 .addImm(AMDGPU::sub1
);
402 if (!RBI
.constrainGenericRegister(DstReg
, RC
, MRI
))
409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr
&I
) const {
410 MachineBasicBlock
*BB
= I
.getParent();
411 MachineFunction
*MF
= BB
->getParent();
412 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
413 assert(I
.getOperand(2).getImm() % 32 == 0);
414 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(2).getImm() / 32);
415 const DebugLoc
&DL
= I
.getDebugLoc();
416 MachineInstr
*Copy
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::COPY
),
417 I
.getOperand(0).getReg())
418 .addReg(I
.getOperand(1).getReg(), 0, SubReg
);
420 for (const MachineOperand
&MO
: Copy
->operands()) {
421 const TargetRegisterClass
*RC
=
422 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
425 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr
&MI
) const {
432 MachineBasicBlock
*BB
= MI
.getParent();
433 MachineFunction
*MF
= BB
->getParent();
434 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
435 Register DstReg
= MI
.getOperand(0).getReg();
436 LLT DstTy
= MRI
.getType(DstReg
);
437 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
439 const unsigned SrcSize
= SrcTy
.getSizeInBits();
443 const DebugLoc
&DL
= MI
.getDebugLoc();
444 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
445 const unsigned DstSize
= DstTy
.getSizeInBits();
446 const TargetRegisterClass
*DstRC
=
447 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, MRI
);
451 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(DstRC
, SrcSize
/ 8);
452 MachineInstrBuilder MIB
=
453 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::REG_SEQUENCE
), DstReg
);
454 for (int I
= 0, E
= MI
.getNumOperands() - 1; I
!= E
; ++I
) {
455 MachineOperand
&Src
= MI
.getOperand(I
+ 1);
456 MIB
.addReg(Src
.getReg(), getUndefRegState(Src
.isUndef()));
457 MIB
.addImm(SubRegs
[I
]);
459 const TargetRegisterClass
*SrcRC
460 = TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
461 if (SrcRC
&& !RBI
.constrainGenericRegister(Src
.getReg(), *SrcRC
, MRI
))
465 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
))
468 MI
.eraseFromParent();
472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr
&MI
) const {
473 MachineBasicBlock
*BB
= MI
.getParent();
474 MachineFunction
*MF
= BB
->getParent();
475 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
476 const int NumDst
= MI
.getNumOperands() - 1;
478 MachineOperand
&Src
= MI
.getOperand(NumDst
);
480 Register SrcReg
= Src
.getReg();
481 Register DstReg0
= MI
.getOperand(0).getReg();
482 LLT DstTy
= MRI
.getType(DstReg0
);
483 LLT SrcTy
= MRI
.getType(SrcReg
);
485 const unsigned DstSize
= DstTy
.getSizeInBits();
486 const unsigned SrcSize
= SrcTy
.getSizeInBits();
487 const DebugLoc
&DL
= MI
.getDebugLoc();
488 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
490 const TargetRegisterClass
*SrcRC
=
491 TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcBank
, MRI
);
492 if (!SrcRC
|| !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
495 const unsigned SrcFlags
= getUndefRegState(Src
.isUndef());
497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
498 // source, and this relies on the fact that the same subregister indices are
500 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(SrcRC
, DstSize
/ 8);
501 for (int I
= 0, E
= NumDst
; I
!= E
; ++I
) {
502 MachineOperand
&Dst
= MI
.getOperand(I
);
503 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::COPY
), Dst
.getReg())
504 .addReg(SrcReg
, SrcFlags
, SubRegs
[I
]);
506 const TargetRegisterClass
*DstRC
=
507 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
508 if (DstRC
&& !RBI
.constrainGenericRegister(Dst
.getReg(), *DstRC
, MRI
))
512 MI
.eraseFromParent();
516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
517 return selectG_ADD_SUB(I
);
520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
521 MachineBasicBlock
*BB
= I
.getParent();
522 MachineFunction
*MF
= BB
->getParent();
523 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
524 const MachineOperand
&MO
= I
.getOperand(0);
526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
527 // regbank check here is to know why getConstrainedRegClassForOperand failed.
528 const TargetRegisterClass
*RC
= TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
529 if ((!RC
&& !MRI
.getRegBankOrNull(MO
.getReg())) ||
530 (RC
&& RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
))) {
531 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr
&I
) const {
539 MachineBasicBlock
*BB
= I
.getParent();
540 MachineFunction
*MF
= BB
->getParent();
541 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
542 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(3).getImm() / 32);
543 DebugLoc DL
= I
.getDebugLoc();
544 MachineInstr
*Ins
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::INSERT_SUBREG
))
545 .addDef(I
.getOperand(0).getReg())
546 .addReg(I
.getOperand(1).getReg())
547 .addReg(I
.getOperand(2).getReg())
550 for (const MachineOperand
&MO
: Ins
->operands()) {
553 if (Register::isPhysicalRegister(MO
.getReg()))
556 const TargetRegisterClass
*RC
=
557 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
560 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
566 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
) const {
567 unsigned IntrinsicID
= I
.getOperand(I
.getNumExplicitDefs()).getIntrinsicID();
568 switch (IntrinsicID
) {
569 case Intrinsic::amdgcn_if_break
: {
570 MachineBasicBlock
*BB
= I
.getParent();
571 MachineFunction
*MF
= BB
->getParent();
572 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
574 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
575 // SelectionDAG uses for wave32 vs wave64.
576 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::SI_IF_BREAK
))
577 .add(I
.getOperand(0))
578 .add(I
.getOperand(2))
579 .add(I
.getOperand(3));
581 Register DstReg
= I
.getOperand(0).getReg();
582 Register Src0Reg
= I
.getOperand(2).getReg();
583 Register Src1Reg
= I
.getOperand(3).getReg();
587 for (Register Reg
: { DstReg
, Src0Reg
, Src1Reg
}) {
588 if (!MRI
.getRegClassOrNull(Reg
))
589 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
595 return selectImpl(I
, *CoverageInfo
);
599 static int getV_CMPOpcode(CmpInst::Predicate P
, unsigned Size
) {
600 if (Size
!= 32 && Size
!= 64)
604 llvm_unreachable("Unknown condition code!");
605 case CmpInst::ICMP_NE
:
606 return Size
== 32 ? AMDGPU::V_CMP_NE_U32_e64
: AMDGPU::V_CMP_NE_U64_e64
;
607 case CmpInst::ICMP_EQ
:
608 return Size
== 32 ? AMDGPU::V_CMP_EQ_U32_e64
: AMDGPU::V_CMP_EQ_U64_e64
;
609 case CmpInst::ICMP_SGT
:
610 return Size
== 32 ? AMDGPU::V_CMP_GT_I32_e64
: AMDGPU::V_CMP_GT_I64_e64
;
611 case CmpInst::ICMP_SGE
:
612 return Size
== 32 ? AMDGPU::V_CMP_GE_I32_e64
: AMDGPU::V_CMP_GE_I64_e64
;
613 case CmpInst::ICMP_SLT
:
614 return Size
== 32 ? AMDGPU::V_CMP_LT_I32_e64
: AMDGPU::V_CMP_LT_I64_e64
;
615 case CmpInst::ICMP_SLE
:
616 return Size
== 32 ? AMDGPU::V_CMP_LE_I32_e64
: AMDGPU::V_CMP_LE_I64_e64
;
617 case CmpInst::ICMP_UGT
:
618 return Size
== 32 ? AMDGPU::V_CMP_GT_U32_e64
: AMDGPU::V_CMP_GT_U64_e64
;
619 case CmpInst::ICMP_UGE
:
620 return Size
== 32 ? AMDGPU::V_CMP_GE_U32_e64
: AMDGPU::V_CMP_GE_U64_e64
;
621 case CmpInst::ICMP_ULT
:
622 return Size
== 32 ? AMDGPU::V_CMP_LT_U32_e64
: AMDGPU::V_CMP_LT_U64_e64
;
623 case CmpInst::ICMP_ULE
:
624 return Size
== 32 ? AMDGPU::V_CMP_LE_U32_e64
: AMDGPU::V_CMP_LE_U64_e64
;
628 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P
,
629 unsigned Size
) const {
631 if (!STI
.hasScalarCompareEq64())
635 case CmpInst::ICMP_NE
:
636 return AMDGPU::S_CMP_LG_U64
;
637 case CmpInst::ICMP_EQ
:
638 return AMDGPU::S_CMP_EQ_U64
;
648 case CmpInst::ICMP_NE
:
649 return AMDGPU::S_CMP_LG_U32
;
650 case CmpInst::ICMP_EQ
:
651 return AMDGPU::S_CMP_EQ_U32
;
652 case CmpInst::ICMP_SGT
:
653 return AMDGPU::S_CMP_GT_I32
;
654 case CmpInst::ICMP_SGE
:
655 return AMDGPU::S_CMP_GE_I32
;
656 case CmpInst::ICMP_SLT
:
657 return AMDGPU::S_CMP_LT_I32
;
658 case CmpInst::ICMP_SLE
:
659 return AMDGPU::S_CMP_LE_I32
;
660 case CmpInst::ICMP_UGT
:
661 return AMDGPU::S_CMP_GT_U32
;
662 case CmpInst::ICMP_UGE
:
663 return AMDGPU::S_CMP_GE_U32
;
664 case CmpInst::ICMP_ULT
:
665 return AMDGPU::S_CMP_LT_U32
;
666 case CmpInst::ICMP_ULE
:
667 return AMDGPU::S_CMP_LE_U32
;
669 llvm_unreachable("Unknown condition code!");
673 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr
&I
) const {
674 MachineBasicBlock
*BB
= I
.getParent();
675 MachineFunction
*MF
= BB
->getParent();
676 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
677 const DebugLoc
&DL
= I
.getDebugLoc();
679 Register SrcReg
= I
.getOperand(2).getReg();
680 unsigned Size
= RBI
.getSizeInBits(SrcReg
, MRI
, TRI
);
682 auto Pred
= (CmpInst::Predicate
)I
.getOperand(1).getPredicate();
684 Register CCReg
= I
.getOperand(0).getReg();
685 if (isSCC(CCReg
, MRI
)) {
686 int Opcode
= getS_CMPOpcode(Pred
, Size
);
689 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
690 .add(I
.getOperand(2))
691 .add(I
.getOperand(3));
692 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CCReg
)
693 .addReg(AMDGPU::SCC
);
695 constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
) &&
696 RBI
.constrainGenericRegister(CCReg
, AMDGPU::SReg_32RegClass
, MRI
);
701 int Opcode
= getV_CMPOpcode(Pred
, Size
);
705 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
),
706 I
.getOperand(0).getReg())
707 .add(I
.getOperand(2))
708 .add(I
.getOperand(3));
709 RBI
.constrainGenericRegister(ICmp
->getOperand(0).getReg(),
710 *TRI
.getBoolRC(), MRI
);
711 bool Ret
= constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
);
716 static MachineInstr
*
717 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
718 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
719 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
720 const DebugLoc
&DL
= Insert
->getDebugLoc();
721 MachineBasicBlock
&BB
= *Insert
->getParent();
722 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
723 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
734 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
735 MachineInstr
&I
) const {
736 MachineBasicBlock
*BB
= I
.getParent();
737 MachineFunction
*MF
= BB
->getParent();
738 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
740 unsigned IntrinsicID
= I
.getOperand(0).getIntrinsicID();
741 switch (IntrinsicID
) {
742 case Intrinsic::amdgcn_exp
: {
743 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
744 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
745 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(7).getReg()));
746 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(8).getReg()));
748 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
749 I
.getOperand(4).getReg(),
750 I
.getOperand(5).getReg(),
751 I
.getOperand(6).getReg(),
752 VM
, false, Enabled
, Done
);
755 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
757 case Intrinsic::amdgcn_exp_compr
: {
758 const DebugLoc
&DL
= I
.getDebugLoc();
759 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
760 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
761 Register Reg0
= I
.getOperand(3).getReg();
762 Register Reg1
= I
.getOperand(4).getReg();
763 Register Undef
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
764 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(5).getReg()));
765 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(6).getReg()));
767 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
768 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
769 true, Enabled
, Done
);
772 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
774 case Intrinsic::amdgcn_end_cf
: {
775 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
776 // SelectionDAG uses for wave32 vs wave64.
777 BuildMI(*BB
, &I
, I
.getDebugLoc(),
778 TII
.get(AMDGPU::SI_END_CF
))
779 .add(I
.getOperand(1));
781 Register Reg
= I
.getOperand(1).getReg();
784 if (!MRI
.getRegClassOrNull(Reg
))
785 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
789 return selectImpl(I
, *CoverageInfo
);
793 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr
&I
) const {
794 MachineBasicBlock
*BB
= I
.getParent();
795 MachineFunction
*MF
= BB
->getParent();
796 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
797 const DebugLoc
&DL
= I
.getDebugLoc();
799 Register DstReg
= I
.getOperand(0).getReg();
800 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
801 assert(Size
<= 32 || Size
== 64);
802 const MachineOperand
&CCOp
= I
.getOperand(1);
803 Register CCReg
= CCOp
.getReg();
804 if (isSCC(CCReg
, MRI
)) {
805 unsigned SelectOpcode
= Size
== 64 ? AMDGPU::S_CSELECT_B64
:
806 AMDGPU::S_CSELECT_B32
;
807 MachineInstr
*CopySCC
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
810 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
811 // bank, because it does not cover the register class that we used to represent
812 // for it. So we need to manually set the register class here.
813 if (!MRI
.getRegClassOrNull(CCReg
))
814 MRI
.setRegClass(CCReg
, TRI
.getConstrainedRegClassForOperand(CCOp
, MRI
));
815 MachineInstr
*Select
= BuildMI(*BB
, &I
, DL
, TII
.get(SelectOpcode
), DstReg
)
816 .add(I
.getOperand(2))
817 .add(I
.getOperand(3));
819 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
) |
820 constrainSelectedInstRegOperands(*CopySCC
, TII
, TRI
, RBI
);
825 // Wide VGPR select should have been split in RegBankSelect.
829 MachineInstr
*Select
=
830 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
832 .add(I
.getOperand(3))
834 .add(I
.getOperand(2))
835 .add(I
.getOperand(1));
837 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
);
842 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
844 return selectImpl(I
, *CoverageInfo
);
847 static int sizeToSubRegIndex(unsigned Size
) {
852 return AMDGPU::sub0_sub1
;
854 return AMDGPU::sub0_sub1_sub2
;
856 return AMDGPU::sub0_sub1_sub2_sub3
;
858 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
864 return sizeToSubRegIndex(PowerOf2Ceil(Size
));
868 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr
&I
) const {
869 MachineBasicBlock
*BB
= I
.getParent();
870 MachineFunction
*MF
= BB
->getParent();
871 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
873 Register DstReg
= I
.getOperand(0).getReg();
874 Register SrcReg
= I
.getOperand(1).getReg();
875 const LLT DstTy
= MRI
.getType(DstReg
);
876 const LLT SrcTy
= MRI
.getType(SrcReg
);
877 if (!DstTy
.isScalar())
880 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
881 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
885 unsigned DstSize
= DstTy
.getSizeInBits();
886 unsigned SrcSize
= SrcTy
.getSizeInBits();
888 const TargetRegisterClass
*SrcRC
889 = TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcRB
, MRI
);
890 const TargetRegisterClass
*DstRC
891 = TRI
.getRegClassForSizeOnBank(DstSize
, *DstRB
, MRI
);
894 int SubRegIdx
= sizeToSubRegIndex(DstSize
);
898 // Deal with weird cases where the class only partially supports the subreg
900 SrcRC
= TRI
.getSubClassWithSubReg(SrcRC
, SubRegIdx
);
904 I
.getOperand(1).setSubReg(SubRegIdx
);
907 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
) ||
908 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
909 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
913 I
.setDesc(TII
.get(TargetOpcode::COPY
));
917 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
918 static bool shouldUseAndMask(unsigned Size
, unsigned &Mask
) {
919 Mask
= maskTrailingOnes
<unsigned>(Size
);
920 int SignedMask
= static_cast<int>(Mask
);
921 return SignedMask
>= -16 && SignedMask
<= 64;
924 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr
&I
) const {
925 bool Signed
= I
.getOpcode() == AMDGPU::G_SEXT
;
926 const DebugLoc
&DL
= I
.getDebugLoc();
927 MachineBasicBlock
&MBB
= *I
.getParent();
928 MachineFunction
&MF
= *MBB
.getParent();
929 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
930 const Register DstReg
= I
.getOperand(0).getReg();
931 const Register SrcReg
= I
.getOperand(1).getReg();
933 const LLT DstTy
= MRI
.getType(DstReg
);
934 const LLT SrcTy
= MRI
.getType(SrcReg
);
935 const LLT S1
= LLT::scalar(1);
936 const unsigned SrcSize
= SrcTy
.getSizeInBits();
937 const unsigned DstSize
= DstTy
.getSizeInBits();
938 if (!DstTy
.isScalar())
941 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
943 if (SrcBank
->getID() == AMDGPU::SCCRegBankID
) {
944 if (SrcTy
!= S1
|| DstSize
> 64) // Invalid
948 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
949 const TargetRegisterClass
*DstRC
=
950 DstSize
> 32 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass
;
952 // FIXME: Create an extra copy to avoid incorrectly constraining the result
953 // of the scc producer.
954 Register TmpReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
955 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), TmpReg
)
957 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
960 // The instruction operands are backwards from what you would expect.
961 BuildMI(MBB
, I
, DL
, TII
.get(Opcode
), DstReg
)
963 .addImm(Signed
? -1 : 1);
965 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
968 if (SrcBank
->getID() == AMDGPU::VCCRegBankID
&& DstSize
<= 32) {
969 if (SrcTy
!= S1
) // Invalid
973 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
974 .addImm(0) // src0_modifiers
976 .addImm(0) // src1_modifiers
977 .addImm(Signed
? -1 : 1) // src1
980 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
983 if (I
.getOpcode() == AMDGPU::G_ANYEXT
)
984 return selectCOPY(I
);
986 if (SrcBank
->getID() == AMDGPU::VGPRRegBankID
&& DstSize
<= 32) {
987 // 64-bit should have been split up in RegBankSelect
989 // Try to use an and with a mask if it will save code size.
991 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
993 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_AND_B32_e32
), DstReg
)
997 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1000 const unsigned BFE
= Signed
? AMDGPU::V_BFE_I32
: AMDGPU::V_BFE_U32
;
1001 MachineInstr
*ExtI
=
1002 BuildMI(MBB
, I
, DL
, TII
.get(BFE
), DstReg
)
1004 .addImm(0) // Offset
1005 .addImm(SrcSize
); // Width
1006 I
.eraseFromParent();
1007 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1010 if (SrcBank
->getID() == AMDGPU::SGPRRegBankID
&& DstSize
<= 64) {
1011 if (!RBI
.constrainGenericRegister(SrcReg
, AMDGPU::SReg_32RegClass
, MRI
))
1014 if (Signed
&& DstSize
== 32 && (SrcSize
== 8 || SrcSize
== 16)) {
1015 const unsigned SextOpc
= SrcSize
== 8 ?
1016 AMDGPU::S_SEXT_I32_I8
: AMDGPU::S_SEXT_I32_I16
;
1017 BuildMI(MBB
, I
, DL
, TII
.get(SextOpc
), DstReg
)
1019 I
.eraseFromParent();
1020 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1023 const unsigned BFE64
= Signed
? AMDGPU::S_BFE_I64
: AMDGPU::S_BFE_U64
;
1024 const unsigned BFE32
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1026 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1027 if (DstSize
> 32 && SrcSize
<= 32) {
1028 // We need a 64-bit register source, but the high bits don't matter.
1029 Register ExtReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1030 Register UndefReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1031 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), UndefReg
);
1032 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), ExtReg
)
1034 .addImm(AMDGPU::sub0
)
1036 .addImm(AMDGPU::sub1
);
1038 BuildMI(MBB
, I
, DL
, TII
.get(BFE64
), DstReg
)
1040 .addImm(SrcSize
<< 16);
1042 I
.eraseFromParent();
1043 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_64RegClass
, MRI
);
1047 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1048 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::S_AND_B32
), DstReg
)
1052 BuildMI(MBB
, I
, DL
, TII
.get(BFE32
), DstReg
)
1054 .addImm(SrcSize
<< 16);
1057 I
.eraseFromParent();
1058 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1064 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
1065 MachineBasicBlock
*BB
= I
.getParent();
1066 MachineFunction
*MF
= BB
->getParent();
1067 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1068 MachineOperand
&ImmOp
= I
.getOperand(1);
1070 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1071 if (ImmOp
.isFPImm()) {
1072 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
1073 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
1074 } else if (ImmOp
.isCImm()) {
1075 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
1078 Register DstReg
= I
.getOperand(0).getReg();
1081 const RegisterBank
*RB
= MRI
.getRegBankOrNull(I
.getOperand(0).getReg());
1083 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
1084 Size
= MRI
.getType(DstReg
).getSizeInBits();
1086 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(MRI
, DstReg
);
1087 IsSgpr
= TRI
.isSGPRClass(RC
);
1088 Size
= TRI
.getRegSizeInBits(*RC
);
1091 if (Size
!= 32 && Size
!= 64)
1094 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
1096 I
.setDesc(TII
.get(Opcode
));
1097 I
.addImplicitDefUseOperands(*MF
);
1098 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
1101 DebugLoc DL
= I
.getDebugLoc();
1102 const TargetRegisterClass
*RC
= IsSgpr
? &AMDGPU::SReg_32_XM0RegClass
:
1103 &AMDGPU::VGPR_32RegClass
;
1104 Register LoReg
= MRI
.createVirtualRegister(RC
);
1105 Register HiReg
= MRI
.createVirtualRegister(RC
);
1106 const APInt
&Imm
= APInt(Size
, I
.getOperand(1).getImm());
1108 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
1109 .addImm(Imm
.trunc(32).getZExtValue());
1111 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
1112 .addImm(Imm
.ashr(32).getZExtValue());
1114 const MachineInstr
*RS
=
1115 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1117 .addImm(AMDGPU::sub0
)
1119 .addImm(AMDGPU::sub1
);
1121 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1122 // work for target independent opcodes
1123 I
.eraseFromParent();
1124 const TargetRegisterClass
*DstRC
=
1125 TRI
.getConstrainedRegClassForOperand(RS
->getOperand(0), MRI
);
1128 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
1131 static bool isConstant(const MachineInstr
&MI
) {
1132 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
1135 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
1136 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
1138 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
1142 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
1145 GEPInfo
GEPInfo(*PtrMI
);
1147 for (unsigned i
= 1, e
= 3; i
< e
; ++i
) {
1148 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
1149 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
1151 if (isConstant(*OpDef
)) {
1152 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1153 // are lacking other optimizations.
1154 assert(GEPInfo
.Imm
== 0);
1155 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
1158 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
1159 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
1160 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
1162 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
1165 AddrInfo
.push_back(GEPInfo
);
1166 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
1169 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr
&MI
) const {
1170 if (!MI
.hasOneMemOperand())
1173 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1174 const Value
*Ptr
= MMO
->getValue();
1176 // UndefValue means this is a load of a kernel input. These are uniform.
1177 // Sometimes LDS instructions have constant pointers.
1178 // If Ptr is null, then that means this mem operand contains a
1179 // PseudoSourceValue like GOT.
1180 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
1181 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
1184 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
1187 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
1188 return I
&& I
->getMetadata("amdgpu.uniform");
1191 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
1192 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
1193 if (!GEPInfo
.VgprParts
.empty())
1199 void AMDGPUInstructionSelector::initM0(MachineInstr
&I
) const {
1200 MachineBasicBlock
*BB
= I
.getParent();
1201 MachineFunction
*MF
= BB
->getParent();
1202 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1204 const LLT PtrTy
= MRI
.getType(I
.getOperand(1).getReg());
1205 unsigned AS
= PtrTy
.getAddressSpace();
1206 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
) &&
1207 STI
.ldsRequiresM0Init()) {
1208 // If DS instructions require M0 initializtion, insert it before selecting.
1209 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), AMDGPU::M0
)
1214 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr
&I
) const {
1216 return selectImpl(I
, *CoverageInfo
);
1219 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr
&I
) const {
1220 MachineBasicBlock
*BB
= I
.getParent();
1221 MachineFunction
*MF
= BB
->getParent();
1222 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1223 MachineOperand
&CondOp
= I
.getOperand(0);
1224 Register CondReg
= CondOp
.getReg();
1225 const DebugLoc
&DL
= I
.getDebugLoc();
1228 Register CondPhysReg
;
1229 const TargetRegisterClass
*ConstrainRC
;
1231 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1232 // whether the branch is uniform when selecting the instruction. In
1233 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1234 // RegBankSelect knows what it's doing if the branch condition is scc, even
1235 // though it currently does not.
1236 if (isSCC(CondReg
, MRI
)) {
1237 CondPhysReg
= AMDGPU::SCC
;
1238 BrOpcode
= AMDGPU::S_CBRANCH_SCC1
;
1239 ConstrainRC
= &AMDGPU::SReg_32_XM0RegClass
;
1240 } else if (isVCC(CondReg
, MRI
)) {
1241 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1242 // We sort of know that a VCC producer based on the register bank, that ands
1243 // inactive lanes with 0. What if there was a logical operation with vcc
1244 // producers in different blocks/with different exec masks?
1245 // FIXME: Should scc->vcc copies and with exec?
1246 CondPhysReg
= TRI
.getVCC();
1247 BrOpcode
= AMDGPU::S_CBRANCH_VCCNZ
;
1248 ConstrainRC
= TRI
.getBoolRC();
1252 if (!MRI
.getRegClassOrNull(CondReg
))
1253 MRI
.setRegClass(CondReg
, ConstrainRC
);
1255 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CondPhysReg
)
1257 BuildMI(*BB
, &I
, DL
, TII
.get(BrOpcode
))
1258 .addMBB(I
.getOperand(1).getMBB());
1260 I
.eraseFromParent();
1264 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr
&I
) const {
1265 MachineBasicBlock
*BB
= I
.getParent();
1266 MachineFunction
*MF
= BB
->getParent();
1267 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1269 Register DstReg
= I
.getOperand(0).getReg();
1270 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1271 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1272 I
.setDesc(TII
.get(IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
));
1274 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
1276 return RBI
.constrainGenericRegister(
1277 DstReg
, IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
, MRI
);
1280 bool AMDGPUInstructionSelector::select(MachineInstr
&I
) {
1282 return selectPHI(I
);
1284 if (!isPreISelGenericOpcode(I
.getOpcode())) {
1286 return selectCOPY(I
);
1290 switch (I
.getOpcode()) {
1291 case TargetOpcode::G_AND
:
1292 case TargetOpcode::G_OR
:
1293 case TargetOpcode::G_XOR
:
1294 if (selectG_AND_OR_XOR(I
))
1296 return selectImpl(I
, *CoverageInfo
);
1297 case TargetOpcode::G_ADD
:
1298 case TargetOpcode::G_SUB
:
1299 if (selectG_ADD_SUB(I
))
1303 return selectImpl(I
, *CoverageInfo
);
1304 case TargetOpcode::G_INTTOPTR
:
1305 case TargetOpcode::G_BITCAST
:
1306 return selectCOPY(I
);
1307 case TargetOpcode::G_CONSTANT
:
1308 case TargetOpcode::G_FCONSTANT
:
1309 return selectG_CONSTANT(I
);
1310 case TargetOpcode::G_EXTRACT
:
1311 return selectG_EXTRACT(I
);
1312 case TargetOpcode::G_MERGE_VALUES
:
1313 case TargetOpcode::G_BUILD_VECTOR
:
1314 case TargetOpcode::G_CONCAT_VECTORS
:
1315 return selectG_MERGE_VALUES(I
);
1316 case TargetOpcode::G_UNMERGE_VALUES
:
1317 return selectG_UNMERGE_VALUES(I
);
1318 case TargetOpcode::G_GEP
:
1319 return selectG_GEP(I
);
1320 case TargetOpcode::G_IMPLICIT_DEF
:
1321 return selectG_IMPLICIT_DEF(I
);
1322 case TargetOpcode::G_INSERT
:
1323 return selectG_INSERT(I
);
1324 case TargetOpcode::G_INTRINSIC
:
1325 return selectG_INTRINSIC(I
);
1326 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
1327 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
);
1328 case TargetOpcode::G_ICMP
:
1329 if (selectG_ICMP(I
))
1331 return selectImpl(I
, *CoverageInfo
);
1332 case TargetOpcode::G_LOAD
:
1333 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1334 case TargetOpcode::G_ATOMICRMW_XCHG
:
1335 case TargetOpcode::G_ATOMICRMW_ADD
:
1336 case TargetOpcode::G_ATOMICRMW_SUB
:
1337 case TargetOpcode::G_ATOMICRMW_AND
:
1338 case TargetOpcode::G_ATOMICRMW_OR
:
1339 case TargetOpcode::G_ATOMICRMW_XOR
:
1340 case TargetOpcode::G_ATOMICRMW_MIN
:
1341 case TargetOpcode::G_ATOMICRMW_MAX
:
1342 case TargetOpcode::G_ATOMICRMW_UMIN
:
1343 case TargetOpcode::G_ATOMICRMW_UMAX
:
1344 case TargetOpcode::G_ATOMICRMW_FADD
:
1345 return selectG_LOAD_ATOMICRMW(I
);
1346 case TargetOpcode::G_SELECT
:
1347 return selectG_SELECT(I
);
1348 case TargetOpcode::G_STORE
:
1349 return selectG_STORE(I
);
1350 case TargetOpcode::G_TRUNC
:
1351 return selectG_TRUNC(I
);
1352 case TargetOpcode::G_SEXT
:
1353 case TargetOpcode::G_ZEXT
:
1354 case TargetOpcode::G_ANYEXT
:
1355 return selectG_SZA_EXT(I
);
1356 case TargetOpcode::G_BRCOND
:
1357 return selectG_BRCOND(I
);
1358 case TargetOpcode::G_FRAME_INDEX
:
1359 return selectG_FRAME_INDEX(I
);
1360 case TargetOpcode::G_FENCE
:
1361 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1362 // is checking for G_CONSTANT
1363 I
.setDesc(TII
.get(AMDGPU::ATOMIC_FENCE
));
1369 InstructionSelector::ComplexRendererFns
1370 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
1372 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1377 std::pair
<Register
, unsigned>
1378 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1379 Register Src
, const MachineRegisterInfo
&MRI
) const {
1381 MachineInstr
*MI
= MRI
.getVRegDef(Src
);
1383 if (MI
&& MI
->getOpcode() == AMDGPU::G_FNEG
) {
1384 Src
= MI
->getOperand(1).getReg();
1385 Mods
|= SISrcMods::NEG
;
1386 MI
= MRI
.getVRegDef(Src
);
1389 if (MI
&& MI
->getOpcode() == AMDGPU::G_FABS
) {
1390 Src
= MI
->getOperand(1).getReg();
1391 Mods
|= SISrcMods::ABS
;
1394 return std::make_pair(Src
, Mods
);
1398 /// This will select either an SGPR or VGPR operand and will save us from
1399 /// having to write an extra tablegen pattern.
1400 InstructionSelector::ComplexRendererFns
1401 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
1403 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1407 InstructionSelector::ComplexRendererFns
1408 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
1409 MachineRegisterInfo
&MRI
1410 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1414 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1417 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1418 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1419 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1420 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1423 InstructionSelector::ComplexRendererFns
1424 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
1426 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1427 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1428 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1432 InstructionSelector::ComplexRendererFns
1433 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
1434 MachineRegisterInfo
&MRI
1435 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1439 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1442 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1443 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); } // src_mods
1447 InstructionSelector::ComplexRendererFns
1448 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand
&Root
) const {
1449 MachineRegisterInfo
&MRI
=
1450 Root
.getParent()->getParent()->getParent()->getRegInfo();
1452 SmallVector
<GEPInfo
, 4> AddrInfo
;
1453 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1455 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1458 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1460 if (!AMDGPU::isLegalSMRDImmOffset(STI
, GEPInfo
.Imm
))
1463 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1464 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1466 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1467 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1471 InstructionSelector::ComplexRendererFns
1472 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand
&Root
) const {
1473 MachineRegisterInfo
&MRI
=
1474 Root
.getParent()->getParent()->getParent()->getRegInfo();
1476 SmallVector
<GEPInfo
, 4> AddrInfo
;
1477 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1479 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1482 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1483 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1484 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1485 if (!isUInt
<32>(EncodedImm
))
1489 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1490 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1494 InstructionSelector::ComplexRendererFns
1495 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand
&Root
) const {
1496 MachineInstr
*MI
= Root
.getParent();
1497 MachineBasicBlock
*MBB
= MI
->getParent();
1498 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1500 SmallVector
<GEPInfo
, 4> AddrInfo
;
1501 getAddrModeInfo(*MI
, MRI
, AddrInfo
);
1503 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1504 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1505 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1508 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1509 if (!GEPInfo
.Imm
|| !isUInt
<32>(GEPInfo
.Imm
))
1512 // If we make it this far we have a load with an 32-bit immediate offset.
1513 // It is OK to select this using a sgpr offset, because we have already
1514 // failed trying to select this load into one of the _IMM variants since
1515 // the _IMM Patterns are considered before the _SGPR patterns.
1516 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1517 Register OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1518 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
1519 .addImm(GEPInfo
.Imm
);
1521 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1522 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(OffsetReg
); }
1526 template <bool Signed
>
1527 InstructionSelector::ComplexRendererFns
1528 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand
&Root
) const {
1529 MachineInstr
*MI
= Root
.getParent();
1530 MachineBasicBlock
*MBB
= MI
->getParent();
1531 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1533 InstructionSelector::ComplexRendererFns Default
= {{
1534 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1535 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // offset
1536 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1539 if (!STI
.hasFlatInstOffsets())
1542 const MachineInstr
*OpDef
= MRI
.getVRegDef(Root
.getReg());
1543 if (!OpDef
|| OpDef
->getOpcode() != AMDGPU::G_GEP
)
1546 Optional
<int64_t> Offset
=
1547 getConstantVRegVal(OpDef
->getOperand(2).getReg(), MRI
);
1548 if (!Offset
.hasValue())
1551 unsigned AddrSpace
= (*MI
->memoperands_begin())->getAddrSpace();
1552 if (!TII
.isLegalFLATOffset(Offset
.getValue(), AddrSpace
, Signed
))
1555 Register BasePtr
= OpDef
->getOperand(1).getReg();
1558 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(BasePtr
); },
1559 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
.getValue()); },
1560 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1564 InstructionSelector::ComplexRendererFns
1565 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand
&Root
) const {
1566 return selectFlatOffsetImpl
<false>(Root
);
1569 InstructionSelector::ComplexRendererFns
1570 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand
&Root
) const {
1571 return selectFlatOffsetImpl
<true>(Root
);
1575 static bool signBitIsZero(const MachineOperand
&Op
,
1576 const MachineRegisterInfo
&MRI
) {
1580 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1581 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1582 return PSV
&& PSV
->isStack();
1585 InstructionSelector::ComplexRendererFns
1586 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand
&Root
) const {
1587 MachineInstr
*MI
= Root
.getParent();
1588 MachineBasicBlock
*MBB
= MI
->getParent();
1589 MachineFunction
*MF
= MBB
->getParent();
1590 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1591 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1594 if (mi_match(Root
.getReg(), MRI
, m_ICst(Offset
))) {
1595 Register HighBits
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1597 // TODO: Should this be inside the render function? The iterator seems to
1599 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::V_MOV_B32_e32
),
1601 .addImm(Offset
& ~4095);
1603 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1604 MIB
.addReg(Info
->getScratchRSrcReg());
1606 [=](MachineInstrBuilder
&MIB
) { // vaddr
1607 MIB
.addReg(HighBits
);
1609 [=](MachineInstrBuilder
&MIB
) { // soffset
1610 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1611 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1613 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1614 ? Info
->getStackPtrOffsetReg()
1615 : Info
->getScratchWaveOffsetReg();
1616 MIB
.addReg(SOffsetReg
);
1618 [=](MachineInstrBuilder
&MIB
) { // offset
1619 MIB
.addImm(Offset
& 4095);
1623 assert(Offset
== 0);
1625 // Try to fold a frame index directly into the MUBUF vaddr field, and any
1628 Register VAddr
= Root
.getReg();
1629 if (const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg())) {
1630 if (isBaseWithConstantOffset(Root
, MRI
)) {
1631 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1632 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1633 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1634 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1635 if (LHSDef
&& RHSDef
) {
1636 int64_t PossibleOffset
=
1637 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1638 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset
) &&
1639 (!STI
.privateMemoryResourceIsRangeChecked() ||
1640 signBitIsZero(LHS
, MRI
))) {
1641 if (LHSDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
)
1642 FI
= LHSDef
->getOperand(1).getIndex();
1644 VAddr
= LHS
.getReg();
1645 Offset
= PossibleOffset
;
1648 } else if (RootDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
) {
1649 FI
= RootDef
->getOperand(1).getIndex();
1653 // If we don't know this private access is a local stack object, it needs to
1654 // be relative to the entry point's scratch wave offset register.
1655 // TODO: Should split large offsets that don't fit like above.
1656 // TODO: Don't use scratch wave offset just because the offset didn't fit.
1657 Register SOffset
= FI
.hasValue() ? Info
->getStackPtrOffsetReg()
1658 : Info
->getScratchWaveOffsetReg();
1660 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1661 MIB
.addReg(Info
->getScratchRSrcReg());
1663 [=](MachineInstrBuilder
&MIB
) { // vaddr
1665 MIB
.addFrameIndex(FI
.getValue());
1669 [=](MachineInstrBuilder
&MIB
) { // soffset
1670 MIB
.addReg(SOffset
);
1672 [=](MachineInstrBuilder
&MIB
) { // offset
1677 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo
&MRI
,
1678 const MachineOperand
&Base
,
1680 unsigned OffsetBits
) const {
1681 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
1682 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
1685 if (STI
.hasUsableDSOffset() || STI
.unsafeDSOffsetFoldingEnabled())
1688 // On Southern Islands instruction with a negative base value and an offset
1689 // don't seem to work.
1690 return signBitIsZero(Base
, MRI
);
1693 InstructionSelector::ComplexRendererFns
1694 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1695 MachineOperand
&Root
) const {
1696 MachineInstr
*MI
= Root
.getParent();
1697 MachineBasicBlock
*MBB
= MI
->getParent();
1698 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1701 if (!mi_match(Root
.getReg(), MRI
, m_ICst(Offset
)) ||
1702 !SIInstrInfo::isLegalMUBUFImmOffset(Offset
))
1705 const MachineFunction
*MF
= MBB
->getParent();
1706 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1707 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1708 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1710 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1711 ? Info
->getStackPtrOffsetReg()
1712 : Info
->getScratchWaveOffsetReg();
1714 [=](MachineInstrBuilder
&MIB
) {
1715 MIB
.addReg(Info
->getScratchRSrcReg());
1717 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(SOffsetReg
); }, // soffset
1718 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
); } // offset
1722 InstructionSelector::ComplexRendererFns
1723 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand
&Root
) const {
1724 MachineInstr
*MI
= Root
.getParent();
1725 MachineBasicBlock
*MBB
= MI
->getParent();
1726 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1728 const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
1731 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1732 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
1736 int64_t ConstAddr
= 0;
1737 if (isBaseWithConstantOffset(Root
, MRI
)) {
1738 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1739 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1740 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1741 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1742 if (LHSDef
&& RHSDef
) {
1743 int64_t PossibleOffset
=
1744 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1745 if (isDSOffsetLegal(MRI
, LHS
, PossibleOffset
, 16)) {
1748 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
1749 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(PossibleOffset
); }
1753 } else if (RootDef
->getOpcode() == AMDGPU::G_SUB
) {
1757 } else if (mi_match(Root
.getReg(), MRI
, m_ICst(ConstAddr
))) {
1763 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1764 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }