1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
25 #include "llvm/CodeGen/GlobalISel/Utils.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
35 #define DEBUG_TYPE "amdgpu-isel"
38 using namespace MIPatternMatch
;
40 #define GET_GLOBALISEL_IMPL
41 #define AMDGPUSubtarget GCNSubtarget
42 #include "AMDGPUGenGlobalISel.inc"
43 #undef GET_GLOBALISEL_IMPL
44 #undef AMDGPUSubtarget
46 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
47 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
48 const AMDGPUTargetMachine
&TM
)
49 : InstructionSelector(), TII(*STI
.getInstrInfo()),
50 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
52 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
53 #define GET_GLOBALISEL_PREDICATES_INIT
54 #include "AMDGPUGenGlobalISel.inc"
55 #undef GET_GLOBALISEL_PREDICATES_INIT
56 #define GET_GLOBALISEL_TEMPORARIES_INIT
57 #include "AMDGPUGenGlobalISel.inc"
58 #undef GET_GLOBALISEL_TEMPORARIES_INIT
62 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
64 static bool isSCC(Register Reg
, const MachineRegisterInfo
&MRI
) {
65 if (Register::isPhysicalRegister(Reg
))
66 return Reg
== AMDGPU::SCC
;
68 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
69 const TargetRegisterClass
*RC
=
70 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
72 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
73 // context of the register bank has been lost.
74 if (RC
->getID() != AMDGPU::SReg_32_XM0RegClassID
)
76 const LLT Ty
= MRI
.getType(Reg
);
77 return Ty
.isValid() && Ty
.getSizeInBits() == 1;
80 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
81 return RB
->getID() == AMDGPU::SCCRegBankID
;
84 bool AMDGPUInstructionSelector::isVCC(Register Reg
,
85 const MachineRegisterInfo
&MRI
) const {
86 if (Register::isPhysicalRegister(Reg
))
87 return Reg
== TRI
.getVCC();
89 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
90 const TargetRegisterClass
*RC
=
91 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
93 const LLT Ty
= MRI
.getType(Reg
);
94 return RC
->hasSuperClassEq(TRI
.getBoolRC()) &&
95 Ty
.isValid() && Ty
.getSizeInBits() == 1;
98 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
99 return RB
->getID() == AMDGPU::VCCRegBankID
;
102 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
103 const DebugLoc
&DL
= I
.getDebugLoc();
104 MachineBasicBlock
*BB
= I
.getParent();
105 MachineFunction
*MF
= BB
->getParent();
106 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
107 I
.setDesc(TII
.get(TargetOpcode::COPY
));
109 const MachineOperand
&Src
= I
.getOperand(1);
110 MachineOperand
&Dst
= I
.getOperand(0);
111 Register DstReg
= Dst
.getReg();
112 Register SrcReg
= Src
.getReg();
114 if (isVCC(DstReg
, MRI
)) {
115 if (SrcReg
== AMDGPU::SCC
) {
116 const TargetRegisterClass
*RC
117 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
120 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
123 if (!isVCC(SrcReg
, MRI
)) {
124 // TODO: Should probably leave the copy and let copyPhysReg expand it.
125 if (!RBI
.constrainGenericRegister(DstReg
, *TRI
.getBoolRC(), MRI
))
128 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CMP_NE_U32_e64
), DstReg
)
132 if (!MRI
.getRegClassOrNull(SrcReg
))
133 MRI
.setRegClass(SrcReg
, TRI
.getConstrainedRegClassForOperand(Src
, MRI
));
138 const TargetRegisterClass
*RC
=
139 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
140 if (RC
&& !RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
))
143 // Don't constrain the source register to a class so the def instruction
144 // handles it (unless it's undef).
146 // FIXME: This is a hack. When selecting the def, we neeed to know
147 // specifically know that the result is VCCRegBank, and not just an SGPR
148 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
150 const TargetRegisterClass
*SrcRC
=
151 TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
152 if (SrcRC
&& !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
159 for (const MachineOperand
&MO
: I
.operands()) {
160 if (Register::isPhysicalRegister(MO
.getReg()))
163 const TargetRegisterClass
*RC
=
164 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
167 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
172 bool AMDGPUInstructionSelector::selectPHI(MachineInstr
&I
) const {
173 MachineBasicBlock
*BB
= I
.getParent();
174 MachineFunction
*MF
= BB
->getParent();
175 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
177 const Register DefReg
= I
.getOperand(0).getReg();
178 const LLT DefTy
= MRI
.getType(DefReg
);
180 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
182 const RegClassOrRegBank
&RegClassOrBank
=
183 MRI
.getRegClassOrRegBank(DefReg
);
185 const TargetRegisterClass
*DefRC
186 = RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
188 if (!DefTy
.isValid()) {
189 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
193 const RegisterBank
&RB
= *RegClassOrBank
.get
<const RegisterBank
*>();
194 if (RB
.getID() == AMDGPU::SCCRegBankID
) {
195 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
199 DefRC
= TRI
.getRegClassForTypeOnBank(DefTy
, RB
, MRI
);
201 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
206 I
.setDesc(TII
.get(TargetOpcode::PHI
));
207 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, MRI
);
211 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
212 const TargetRegisterClass
&SubRC
,
213 unsigned SubIdx
) const {
215 MachineInstr
*MI
= MO
.getParent();
216 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
217 MachineFunction
*MF
= BB
->getParent();
218 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
219 Register DstReg
= MRI
.createVirtualRegister(&SubRC
);
222 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
223 unsigned Reg
= MO
.getReg();
224 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
225 .addReg(Reg
, 0, ComposedSubIdx
);
227 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
228 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
229 MO
.isEarlyClobber(), 0, MO
.isDebug(),
230 MO
.isInternalRead());
235 APInt
Imm(64, MO
.getImm());
239 llvm_unreachable("do not know to split immediate with this sub index.");
241 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
243 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
247 static int64_t getConstant(const MachineInstr
*MI
) {
248 return MI
->getOperand(1).getCImm()->getSExtValue();
251 static unsigned getLogicalBitOpcode(unsigned Opc
, bool Is64
) {
254 return Is64
? AMDGPU::S_AND_B64
: AMDGPU::S_AND_B32
;
256 return Is64
? AMDGPU::S_OR_B64
: AMDGPU::S_OR_B32
;
258 return Is64
? AMDGPU::S_XOR_B64
: AMDGPU::S_XOR_B32
;
260 llvm_unreachable("not a bit op");
264 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr
&I
) const {
265 MachineBasicBlock
*BB
= I
.getParent();
266 MachineFunction
*MF
= BB
->getParent();
267 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
268 MachineOperand
&Dst
= I
.getOperand(0);
269 MachineOperand
&Src0
= I
.getOperand(1);
270 MachineOperand
&Src1
= I
.getOperand(2);
271 Register DstReg
= Dst
.getReg();
272 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
274 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
275 if (DstRB
->getID() == AMDGPU::VCCRegBankID
) {
276 const TargetRegisterClass
*RC
= TRI
.getBoolRC();
277 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(),
278 RC
== &AMDGPU::SReg_64RegClass
);
279 I
.setDesc(TII
.get(InstOpc
));
281 // FIXME: Hack to avoid turning the register bank into a register class.
282 // The selector for G_ICMP relies on seeing the register bank for the result
283 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
284 // be ambiguous whether it's a scalar or vector bool.
285 if (Src0
.isUndef() && !MRI
.getRegClassOrNull(Src0
.getReg()))
286 MRI
.setRegClass(Src0
.getReg(), RC
);
287 if (Src1
.isUndef() && !MRI
.getRegClassOrNull(Src1
.getReg()))
288 MRI
.setRegClass(Src1
.getReg(), RC
);
290 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
);
293 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
295 if (DstRB
->getID() == AMDGPU::SGPRRegBankID
) {
296 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(), Size
> 32);
297 I
.setDesc(TII
.get(InstOpc
));
299 const TargetRegisterClass
*RC
300 = TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
303 return RBI
.constrainGenericRegister(DstReg
, *RC
, MRI
) &&
304 RBI
.constrainGenericRegister(Src0
.getReg(), *RC
, MRI
) &&
305 RBI
.constrainGenericRegister(Src1
.getReg(), *RC
, MRI
);
311 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr
&I
) const {
312 MachineBasicBlock
*BB
= I
.getParent();
313 MachineFunction
*MF
= BB
->getParent();
314 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
315 Register DstReg
= I
.getOperand(0).getReg();
316 const DebugLoc
&DL
= I
.getDebugLoc();
317 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
318 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
319 const bool IsSALU
= DstRB
->getID() == AMDGPU::SGPRRegBankID
;
320 const bool Sub
= I
.getOpcode() == TargetOpcode::G_SUB
;
324 const unsigned Opc
= Sub
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
326 BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
327 .add(I
.getOperand(1))
328 .add(I
.getOperand(2));
330 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
333 if (STI
.hasAddNoCarry()) {
334 const unsigned Opc
= Sub
? AMDGPU::V_SUB_U32_e64
: AMDGPU::V_ADD_U32_e64
;
335 I
.setDesc(TII
.get(Opc
));
336 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
337 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
338 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
341 const unsigned Opc
= Sub
? AMDGPU::V_SUB_I32_e64
: AMDGPU::V_ADD_I32_e64
;
343 Register UnusedCarry
= MRI
.createVirtualRegister(TRI
.getWaveMaskRegClass());
345 = BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
346 .addDef(UnusedCarry
, RegState::Dead
)
347 .add(I
.getOperand(1))
348 .add(I
.getOperand(2))
351 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
354 assert(!Sub
&& "illegal sub should not reach here");
356 const TargetRegisterClass
&RC
357 = IsSALU
? AMDGPU::SReg_64_XEXECRegClass
: AMDGPU::VReg_64RegClass
;
358 const TargetRegisterClass
&HalfRC
359 = IsSALU
? AMDGPU::SReg_32RegClass
: AMDGPU::VGPR_32RegClass
;
361 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub0
));
362 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub0
));
363 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub1
));
364 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub1
));
366 Register DstLo
= MRI
.createVirtualRegister(&HalfRC
);
367 Register DstHi
= MRI
.createVirtualRegister(&HalfRC
);
370 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
373 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
377 const TargetRegisterClass
*CarryRC
= TRI
.getWaveMaskRegClass();
378 Register CarryReg
= MRI
.createVirtualRegister(CarryRC
);
379 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADD_I32_e64
), DstLo
)
384 MachineInstr
*Addc
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADDC_U32_e64
), DstHi
)
385 .addDef(MRI
.createVirtualRegister(CarryRC
), RegState::Dead
)
388 .addReg(CarryReg
, RegState::Kill
)
391 if (!constrainSelectedInstRegOperands(*Addc
, TII
, TRI
, RBI
))
395 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
397 .addImm(AMDGPU::sub0
)
399 .addImm(AMDGPU::sub1
);
402 if (!RBI
.constrainGenericRegister(DstReg
, RC
, MRI
))
409 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr
&I
) const {
410 MachineBasicBlock
*BB
= I
.getParent();
411 MachineFunction
*MF
= BB
->getParent();
412 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
413 assert(I
.getOperand(2).getImm() % 32 == 0);
414 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(2).getImm() / 32);
415 const DebugLoc
&DL
= I
.getDebugLoc();
416 MachineInstr
*Copy
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::COPY
),
417 I
.getOperand(0).getReg())
418 .addReg(I
.getOperand(1).getReg(), 0, SubReg
);
420 for (const MachineOperand
&MO
: Copy
->operands()) {
421 const TargetRegisterClass
*RC
=
422 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
425 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
431 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr
&MI
) const {
432 MachineBasicBlock
*BB
= MI
.getParent();
433 MachineFunction
*MF
= BB
->getParent();
434 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
435 Register DstReg
= MI
.getOperand(0).getReg();
436 LLT DstTy
= MRI
.getType(DstReg
);
437 LLT SrcTy
= MRI
.getType(MI
.getOperand(1).getReg());
439 const unsigned SrcSize
= SrcTy
.getSizeInBits();
443 const DebugLoc
&DL
= MI
.getDebugLoc();
444 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
445 const unsigned DstSize
= DstTy
.getSizeInBits();
446 const TargetRegisterClass
*DstRC
=
447 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, MRI
);
451 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(DstRC
, SrcSize
/ 8);
452 MachineInstrBuilder MIB
=
453 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::REG_SEQUENCE
), DstReg
);
454 for (int I
= 0, E
= MI
.getNumOperands() - 1; I
!= E
; ++I
) {
455 MachineOperand
&Src
= MI
.getOperand(I
+ 1);
456 MIB
.addReg(Src
.getReg(), getUndefRegState(Src
.isUndef()));
457 MIB
.addImm(SubRegs
[I
]);
459 const TargetRegisterClass
*SrcRC
460 = TRI
.getConstrainedRegClassForOperand(Src
, MRI
);
461 if (SrcRC
&& !RBI
.constrainGenericRegister(Src
.getReg(), *SrcRC
, MRI
))
465 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
))
468 MI
.eraseFromParent();
472 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr
&MI
) const {
473 MachineBasicBlock
*BB
= MI
.getParent();
474 MachineFunction
*MF
= BB
->getParent();
475 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
476 const int NumDst
= MI
.getNumOperands() - 1;
478 MachineOperand
&Src
= MI
.getOperand(NumDst
);
480 Register SrcReg
= Src
.getReg();
481 Register DstReg0
= MI
.getOperand(0).getReg();
482 LLT DstTy
= MRI
.getType(DstReg0
);
483 LLT SrcTy
= MRI
.getType(SrcReg
);
485 const unsigned DstSize
= DstTy
.getSizeInBits();
486 const unsigned SrcSize
= SrcTy
.getSizeInBits();
487 const DebugLoc
&DL
= MI
.getDebugLoc();
488 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
490 const TargetRegisterClass
*SrcRC
=
491 TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcBank
, MRI
);
492 if (!SrcRC
|| !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
))
495 const unsigned SrcFlags
= getUndefRegState(Src
.isUndef());
497 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
498 // source, and this relies on the fact that the same subregister indices are
500 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(SrcRC
, DstSize
/ 8);
501 for (int I
= 0, E
= NumDst
; I
!= E
; ++I
) {
502 MachineOperand
&Dst
= MI
.getOperand(I
);
503 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::COPY
), Dst
.getReg())
504 .addReg(SrcReg
, SrcFlags
, SubRegs
[I
]);
506 const TargetRegisterClass
*DstRC
=
507 TRI
.getConstrainedRegClassForOperand(Dst
, MRI
);
508 if (DstRC
&& !RBI
.constrainGenericRegister(Dst
.getReg(), *DstRC
, MRI
))
512 MI
.eraseFromParent();
516 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
517 return selectG_ADD_SUB(I
);
520 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
521 MachineBasicBlock
*BB
= I
.getParent();
522 MachineFunction
*MF
= BB
->getParent();
523 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
524 const MachineOperand
&MO
= I
.getOperand(0);
526 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
527 // regbank check here is to know why getConstrainedRegClassForOperand failed.
528 const TargetRegisterClass
*RC
= TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
529 if ((!RC
&& !MRI
.getRegBankOrNull(MO
.getReg())) ||
530 (RC
&& RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
))) {
531 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
538 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr
&I
) const {
539 MachineBasicBlock
*BB
= I
.getParent();
540 MachineFunction
*MF
= BB
->getParent();
541 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
542 unsigned SubReg
= TRI
.getSubRegFromChannel(I
.getOperand(3).getImm() / 32);
543 DebugLoc DL
= I
.getDebugLoc();
544 MachineInstr
*Ins
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::INSERT_SUBREG
))
545 .addDef(I
.getOperand(0).getReg())
546 .addReg(I
.getOperand(1).getReg())
547 .addReg(I
.getOperand(2).getReg())
550 for (const MachineOperand
&MO
: Ins
->operands()) {
553 if (Register::isPhysicalRegister(MO
.getReg()))
556 const TargetRegisterClass
*RC
=
557 TRI
.getConstrainedRegClassForOperand(MO
, MRI
);
560 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, MRI
);
566 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
) const {
567 unsigned IntrinsicID
= I
.getOperand(I
.getNumExplicitDefs()).getIntrinsicID();
568 switch (IntrinsicID
) {
569 case Intrinsic::amdgcn_if_break
: {
570 MachineBasicBlock
*BB
= I
.getParent();
571 MachineFunction
*MF
= BB
->getParent();
572 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
574 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
575 // SelectionDAG uses for wave32 vs wave64.
576 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::SI_IF_BREAK
))
577 .add(I
.getOperand(0))
578 .add(I
.getOperand(2))
579 .add(I
.getOperand(3));
581 Register DstReg
= I
.getOperand(0).getReg();
582 Register Src0Reg
= I
.getOperand(2).getReg();
583 Register Src1Reg
= I
.getOperand(3).getReg();
587 for (Register Reg
: { DstReg
, Src0Reg
, Src1Reg
}) {
588 if (!MRI
.getRegClassOrNull(Reg
))
589 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
595 return selectImpl(I
, *CoverageInfo
);
599 static int getV_CMPOpcode(CmpInst::Predicate P
, unsigned Size
) {
600 if (Size
!= 32 && Size
!= 64)
604 llvm_unreachable("Unknown condition code!");
605 case CmpInst::ICMP_NE
:
606 return Size
== 32 ? AMDGPU::V_CMP_NE_U32_e64
: AMDGPU::V_CMP_NE_U64_e64
;
607 case CmpInst::ICMP_EQ
:
608 return Size
== 32 ? AMDGPU::V_CMP_EQ_U32_e64
: AMDGPU::V_CMP_EQ_U64_e64
;
609 case CmpInst::ICMP_SGT
:
610 return Size
== 32 ? AMDGPU::V_CMP_GT_I32_e64
: AMDGPU::V_CMP_GT_I64_e64
;
611 case CmpInst::ICMP_SGE
:
612 return Size
== 32 ? AMDGPU::V_CMP_GE_I32_e64
: AMDGPU::V_CMP_GE_I64_e64
;
613 case CmpInst::ICMP_SLT
:
614 return Size
== 32 ? AMDGPU::V_CMP_LT_I32_e64
: AMDGPU::V_CMP_LT_I64_e64
;
615 case CmpInst::ICMP_SLE
:
616 return Size
== 32 ? AMDGPU::V_CMP_LE_I32_e64
: AMDGPU::V_CMP_LE_I64_e64
;
617 case CmpInst::ICMP_UGT
:
618 return Size
== 32 ? AMDGPU::V_CMP_GT_U32_e64
: AMDGPU::V_CMP_GT_U64_e64
;
619 case CmpInst::ICMP_UGE
:
620 return Size
== 32 ? AMDGPU::V_CMP_GE_U32_e64
: AMDGPU::V_CMP_GE_U64_e64
;
621 case CmpInst::ICMP_ULT
:
622 return Size
== 32 ? AMDGPU::V_CMP_LT_U32_e64
: AMDGPU::V_CMP_LT_U64_e64
;
623 case CmpInst::ICMP_ULE
:
624 return Size
== 32 ? AMDGPU::V_CMP_LE_U32_e64
: AMDGPU::V_CMP_LE_U64_e64
;
628 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P
,
629 unsigned Size
) const {
631 if (!STI
.hasScalarCompareEq64())
635 case CmpInst::ICMP_NE
:
636 return AMDGPU::S_CMP_LG_U64
;
637 case CmpInst::ICMP_EQ
:
638 return AMDGPU::S_CMP_EQ_U64
;
648 case CmpInst::ICMP_NE
:
649 return AMDGPU::S_CMP_LG_U32
;
650 case CmpInst::ICMP_EQ
:
651 return AMDGPU::S_CMP_EQ_U32
;
652 case CmpInst::ICMP_SGT
:
653 return AMDGPU::S_CMP_GT_I32
;
654 case CmpInst::ICMP_SGE
:
655 return AMDGPU::S_CMP_GE_I32
;
656 case CmpInst::ICMP_SLT
:
657 return AMDGPU::S_CMP_LT_I32
;
658 case CmpInst::ICMP_SLE
:
659 return AMDGPU::S_CMP_LE_I32
;
660 case CmpInst::ICMP_UGT
:
661 return AMDGPU::S_CMP_GT_U32
;
662 case CmpInst::ICMP_UGE
:
663 return AMDGPU::S_CMP_GE_U32
;
664 case CmpInst::ICMP_ULT
:
665 return AMDGPU::S_CMP_LT_U32
;
666 case CmpInst::ICMP_ULE
:
667 return AMDGPU::S_CMP_LE_U32
;
669 llvm_unreachable("Unknown condition code!");
673 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr
&I
) const {
674 MachineBasicBlock
*BB
= I
.getParent();
675 MachineFunction
*MF
= BB
->getParent();
676 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
677 const DebugLoc
&DL
= I
.getDebugLoc();
679 unsigned SrcReg
= I
.getOperand(2).getReg();
680 unsigned Size
= RBI
.getSizeInBits(SrcReg
, MRI
, TRI
);
682 auto Pred
= (CmpInst::Predicate
)I
.getOperand(1).getPredicate();
684 unsigned CCReg
= I
.getOperand(0).getReg();
685 if (isSCC(CCReg
, MRI
)) {
686 int Opcode
= getS_CMPOpcode(Pred
, Size
);
689 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
690 .add(I
.getOperand(2))
691 .add(I
.getOperand(3));
692 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CCReg
)
693 .addReg(AMDGPU::SCC
);
695 constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
) &&
696 RBI
.constrainGenericRegister(CCReg
, AMDGPU::SReg_32RegClass
, MRI
);
701 int Opcode
= getV_CMPOpcode(Pred
, Size
);
705 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
),
706 I
.getOperand(0).getReg())
707 .add(I
.getOperand(2))
708 .add(I
.getOperand(3));
709 RBI
.constrainGenericRegister(ICmp
->getOperand(0).getReg(),
710 *TRI
.getBoolRC(), MRI
);
711 bool Ret
= constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
);
716 static MachineInstr
*
717 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
718 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
719 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
720 const DebugLoc
&DL
= Insert
->getDebugLoc();
721 MachineBasicBlock
&BB
= *Insert
->getParent();
722 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
723 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
734 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
735 MachineInstr
&I
) const {
736 MachineBasicBlock
*BB
= I
.getParent();
737 MachineFunction
*MF
= BB
->getParent();
738 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
740 unsigned IntrinsicID
= I
.getOperand(0).getIntrinsicID();
741 switch (IntrinsicID
) {
742 case Intrinsic::amdgcn_exp
: {
743 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
744 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
745 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(7).getReg()));
746 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(8).getReg()));
748 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
749 I
.getOperand(4).getReg(),
750 I
.getOperand(5).getReg(),
751 I
.getOperand(6).getReg(),
752 VM
, false, Enabled
, Done
);
755 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
757 case Intrinsic::amdgcn_exp_compr
: {
758 const DebugLoc
&DL
= I
.getDebugLoc();
759 int64_t Tgt
= getConstant(MRI
.getVRegDef(I
.getOperand(1).getReg()));
760 int64_t Enabled
= getConstant(MRI
.getVRegDef(I
.getOperand(2).getReg()));
761 unsigned Reg0
= I
.getOperand(3).getReg();
762 unsigned Reg1
= I
.getOperand(4).getReg();
763 unsigned Undef
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
764 int64_t Done
= getConstant(MRI
.getVRegDef(I
.getOperand(5).getReg()));
765 int64_t VM
= getConstant(MRI
.getVRegDef(I
.getOperand(6).getReg()));
767 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
768 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
769 true, Enabled
, Done
);
772 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
774 case Intrinsic::amdgcn_end_cf
: {
775 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
776 // SelectionDAG uses for wave32 vs wave64.
777 BuildMI(*BB
, &I
, I
.getDebugLoc(),
778 TII
.get(AMDGPU::SI_END_CF
))
779 .add(I
.getOperand(1));
781 Register Reg
= I
.getOperand(1).getReg();
784 if (!MRI
.getRegClassOrNull(Reg
))
785 MRI
.setRegClass(Reg
, TRI
.getWaveMaskRegClass());
789 return selectImpl(I
, *CoverageInfo
);
793 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr
&I
) const {
794 MachineBasicBlock
*BB
= I
.getParent();
795 MachineFunction
*MF
= BB
->getParent();
796 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
797 const DebugLoc
&DL
= I
.getDebugLoc();
799 unsigned DstReg
= I
.getOperand(0).getReg();
800 unsigned Size
= RBI
.getSizeInBits(DstReg
, MRI
, TRI
);
801 assert(Size
<= 32 || Size
== 64);
802 const MachineOperand
&CCOp
= I
.getOperand(1);
803 unsigned CCReg
= CCOp
.getReg();
804 if (isSCC(CCReg
, MRI
)) {
805 unsigned SelectOpcode
= Size
== 64 ? AMDGPU::S_CSELECT_B64
:
806 AMDGPU::S_CSELECT_B32
;
807 MachineInstr
*CopySCC
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
810 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
811 // bank, because it does not cover the register class that we used to represent
812 // for it. So we need to manually set the register class here.
813 if (!MRI
.getRegClassOrNull(CCReg
))
814 MRI
.setRegClass(CCReg
, TRI
.getConstrainedRegClassForOperand(CCOp
, MRI
));
815 MachineInstr
*Select
= BuildMI(*BB
, &I
, DL
, TII
.get(SelectOpcode
), DstReg
)
816 .add(I
.getOperand(2))
817 .add(I
.getOperand(3));
819 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
) |
820 constrainSelectedInstRegOperands(*CopySCC
, TII
, TRI
, RBI
);
825 // Wide VGPR select should have been split in RegBankSelect.
829 MachineInstr
*Select
=
830 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
832 .add(I
.getOperand(3))
834 .add(I
.getOperand(2))
835 .add(I
.getOperand(1));
837 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
);
842 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
844 return selectImpl(I
, *CoverageInfo
);
847 static int sizeToSubRegIndex(unsigned Size
) {
852 return AMDGPU::sub0_sub1
;
854 return AMDGPU::sub0_sub1_sub2
;
856 return AMDGPU::sub0_sub1_sub2_sub3
;
858 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
864 return sizeToSubRegIndex(PowerOf2Ceil(Size
));
868 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr
&I
) const {
869 MachineBasicBlock
*BB
= I
.getParent();
870 MachineFunction
*MF
= BB
->getParent();
871 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
873 unsigned DstReg
= I
.getOperand(0).getReg();
874 unsigned SrcReg
= I
.getOperand(1).getReg();
875 const LLT DstTy
= MRI
.getType(DstReg
);
876 const LLT SrcTy
= MRI
.getType(SrcReg
);
877 if (!DstTy
.isScalar())
880 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
881 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
885 unsigned DstSize
= DstTy
.getSizeInBits();
886 unsigned SrcSize
= SrcTy
.getSizeInBits();
888 const TargetRegisterClass
*SrcRC
889 = TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcRB
, MRI
);
890 const TargetRegisterClass
*DstRC
891 = TRI
.getRegClassForSizeOnBank(DstSize
, *DstRB
, MRI
);
894 int SubRegIdx
= sizeToSubRegIndex(DstSize
);
898 // Deal with weird cases where the class only partially supports the subreg
900 SrcRC
= TRI
.getSubClassWithSubReg(SrcRC
, SubRegIdx
);
904 I
.getOperand(1).setSubReg(SubRegIdx
);
907 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, MRI
) ||
908 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
)) {
909 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
913 I
.setDesc(TII
.get(TargetOpcode::COPY
));
917 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
918 static bool shouldUseAndMask(unsigned Size
, unsigned &Mask
) {
919 Mask
= maskTrailingOnes
<unsigned>(Size
);
920 int SignedMask
= static_cast<int>(Mask
);
921 return SignedMask
>= -16 && SignedMask
<= 64;
924 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr
&I
) const {
925 bool Signed
= I
.getOpcode() == AMDGPU::G_SEXT
;
926 const DebugLoc
&DL
= I
.getDebugLoc();
927 MachineBasicBlock
&MBB
= *I
.getParent();
928 MachineFunction
&MF
= *MBB
.getParent();
929 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
930 const unsigned DstReg
= I
.getOperand(0).getReg();
931 const unsigned SrcReg
= I
.getOperand(1).getReg();
933 const LLT DstTy
= MRI
.getType(DstReg
);
934 const LLT SrcTy
= MRI
.getType(SrcReg
);
935 const LLT S1
= LLT::scalar(1);
936 const unsigned SrcSize
= SrcTy
.getSizeInBits();
937 const unsigned DstSize
= DstTy
.getSizeInBits();
938 if (!DstTy
.isScalar())
941 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, MRI
, TRI
);
943 if (SrcBank
->getID() == AMDGPU::SCCRegBankID
) {
944 if (SrcTy
!= S1
|| DstSize
> 64) // Invalid
948 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
949 const TargetRegisterClass
*DstRC
=
950 DstSize
> 32 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass
;
952 // FIXME: Create an extra copy to avoid incorrectly constraining the result
953 // of the scc producer.
954 unsigned TmpReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
955 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), TmpReg
)
957 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
960 // The instruction operands are backwards from what you would expect.
961 BuildMI(MBB
, I
, DL
, TII
.get(Opcode
), DstReg
)
963 .addImm(Signed
? -1 : 1);
965 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
968 if (SrcBank
->getID() == AMDGPU::VCCRegBankID
&& DstSize
<= 32) {
969 if (SrcTy
!= S1
) // Invalid
973 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
974 .addImm(0) // src0_modifiers
976 .addImm(0) // src1_modifiers
977 .addImm(Signed
? -1 : 1) // src1
980 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
983 if (I
.getOpcode() == AMDGPU::G_ANYEXT
)
984 return selectCOPY(I
);
986 if (SrcBank
->getID() == AMDGPU::VGPRRegBankID
&& DstSize
<= 32) {
987 // 64-bit should have been split up in RegBankSelect
989 // Try to use an and with a mask if it will save code size.
991 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
993 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_AND_B32_e32
), DstReg
)
997 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1000 const unsigned BFE
= Signed
? AMDGPU::V_BFE_I32
: AMDGPU::V_BFE_U32
;
1001 MachineInstr
*ExtI
=
1002 BuildMI(MBB
, I
, DL
, TII
.get(BFE
), DstReg
)
1004 .addImm(0) // Offset
1005 .addImm(SrcSize
); // Width
1006 I
.eraseFromParent();
1007 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1010 if (SrcBank
->getID() == AMDGPU::SGPRRegBankID
&& DstSize
<= 64) {
1011 if (!RBI
.constrainGenericRegister(SrcReg
, AMDGPU::SReg_32RegClass
, MRI
))
1014 if (Signed
&& DstSize
== 32 && (SrcSize
== 8 || SrcSize
== 16)) {
1015 const unsigned SextOpc
= SrcSize
== 8 ?
1016 AMDGPU::S_SEXT_I32_I8
: AMDGPU::S_SEXT_I32_I16
;
1017 BuildMI(MBB
, I
, DL
, TII
.get(SextOpc
), DstReg
)
1019 I
.eraseFromParent();
1020 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1023 const unsigned BFE64
= Signed
? AMDGPU::S_BFE_I64
: AMDGPU::S_BFE_U64
;
1024 const unsigned BFE32
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1026 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1027 if (DstSize
> 32 && SrcSize
<= 32) {
1028 // We need a 64-bit register source, but the high bits don't matter.
1030 = MRI
.createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1032 = MRI
.createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1033 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), UndefReg
);
1034 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), ExtReg
)
1036 .addImm(AMDGPU::sub0
)
1038 .addImm(AMDGPU::sub1
);
1040 BuildMI(MBB
, I
, DL
, TII
.get(BFE64
), DstReg
)
1042 .addImm(SrcSize
<< 16);
1044 I
.eraseFromParent();
1045 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_64RegClass
, MRI
);
1049 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1050 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::S_AND_B32
), DstReg
)
1054 BuildMI(MBB
, I
, DL
, TII
.get(BFE32
), DstReg
)
1056 .addImm(SrcSize
<< 16);
1059 I
.eraseFromParent();
1060 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, MRI
);
1066 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
1067 MachineBasicBlock
*BB
= I
.getParent();
1068 MachineFunction
*MF
= BB
->getParent();
1069 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1070 MachineOperand
&ImmOp
= I
.getOperand(1);
1072 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1073 if (ImmOp
.isFPImm()) {
1074 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
1075 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
1076 } else if (ImmOp
.isCImm()) {
1077 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
1080 unsigned DstReg
= I
.getOperand(0).getReg();
1083 const RegisterBank
*RB
= MRI
.getRegBankOrNull(I
.getOperand(0).getReg());
1085 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
1086 Size
= MRI
.getType(DstReg
).getSizeInBits();
1088 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(MRI
, DstReg
);
1089 IsSgpr
= TRI
.isSGPRClass(RC
);
1090 Size
= TRI
.getRegSizeInBits(*RC
);
1093 if (Size
!= 32 && Size
!= 64)
1096 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
1098 I
.setDesc(TII
.get(Opcode
));
1099 I
.addImplicitDefUseOperands(*MF
);
1100 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
1103 DebugLoc DL
= I
.getDebugLoc();
1104 const TargetRegisterClass
*RC
= IsSgpr
? &AMDGPU::SReg_32_XM0RegClass
:
1105 &AMDGPU::VGPR_32RegClass
;
1106 unsigned LoReg
= MRI
.createVirtualRegister(RC
);
1107 unsigned HiReg
= MRI
.createVirtualRegister(RC
);
1108 const APInt
&Imm
= APInt(Size
, I
.getOperand(1).getImm());
1110 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
1111 .addImm(Imm
.trunc(32).getZExtValue());
1113 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
1114 .addImm(Imm
.ashr(32).getZExtValue());
1116 const MachineInstr
*RS
=
1117 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1119 .addImm(AMDGPU::sub0
)
1121 .addImm(AMDGPU::sub1
);
1123 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1124 // work for target independent opcodes
1125 I
.eraseFromParent();
1126 const TargetRegisterClass
*DstRC
=
1127 TRI
.getConstrainedRegClassForOperand(RS
->getOperand(0), MRI
);
1130 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, MRI
);
1133 static bool isConstant(const MachineInstr
&MI
) {
1134 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
1137 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
1138 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
1140 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
1144 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
1147 GEPInfo
GEPInfo(*PtrMI
);
1149 for (unsigned i
= 1, e
= 3; i
< e
; ++i
) {
1150 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
1151 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
1153 if (isConstant(*OpDef
)) {
1154 // FIXME: Is it possible to have multiple Imm parts? Maybe if we
1155 // are lacking other optimizations.
1156 assert(GEPInfo
.Imm
== 0);
1157 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
1160 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
1161 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
1162 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
1164 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
1167 AddrInfo
.push_back(GEPInfo
);
1168 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
1171 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr
&MI
) const {
1172 if (!MI
.hasOneMemOperand())
1175 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1176 const Value
*Ptr
= MMO
->getValue();
1178 // UndefValue means this is a load of a kernel input. These are uniform.
1179 // Sometimes LDS instructions have constant pointers.
1180 // If Ptr is null, then that means this mem operand contains a
1181 // PseudoSourceValue like GOT.
1182 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
1183 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
1186 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
1189 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
1190 return I
&& I
->getMetadata("amdgpu.uniform");
1193 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
1194 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
1195 if (!GEPInfo
.VgprParts
.empty())
1201 void AMDGPUInstructionSelector::initM0(MachineInstr
&I
) const {
1202 MachineBasicBlock
*BB
= I
.getParent();
1203 MachineFunction
*MF
= BB
->getParent();
1204 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1206 const LLT PtrTy
= MRI
.getType(I
.getOperand(1).getReg());
1207 unsigned AS
= PtrTy
.getAddressSpace();
1208 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
) &&
1209 STI
.ldsRequiresM0Init()) {
1210 // If DS instructions require M0 initializtion, insert it before selecting.
1211 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), AMDGPU::M0
)
1216 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr
&I
) const {
1218 return selectImpl(I
, *CoverageInfo
);
1221 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr
&I
) const {
1222 MachineBasicBlock
*BB
= I
.getParent();
1223 MachineFunction
*MF
= BB
->getParent();
1224 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1225 MachineOperand
&CondOp
= I
.getOperand(0);
1226 Register CondReg
= CondOp
.getReg();
1227 const DebugLoc
&DL
= I
.getDebugLoc();
1230 Register CondPhysReg
;
1231 const TargetRegisterClass
*ConstrainRC
;
1233 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1234 // whether the branch is uniform when selecting the instruction. In
1235 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1236 // RegBankSelect knows what it's doing if the branch condition is scc, even
1237 // though it currently does not.
1238 if (isSCC(CondReg
, MRI
)) {
1239 CondPhysReg
= AMDGPU::SCC
;
1240 BrOpcode
= AMDGPU::S_CBRANCH_SCC1
;
1241 ConstrainRC
= &AMDGPU::SReg_32_XM0RegClass
;
1242 } else if (isVCC(CondReg
, MRI
)) {
1243 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1244 // We sort of know that a VCC producer based on the register bank, that ands
1245 // inactive lanes with 0. What if there was a logical operation with vcc
1246 // producers in different blocks/with different exec masks?
1247 // FIXME: Should scc->vcc copies and with exec?
1248 CondPhysReg
= TRI
.getVCC();
1249 BrOpcode
= AMDGPU::S_CBRANCH_VCCNZ
;
1250 ConstrainRC
= TRI
.getBoolRC();
1254 if (!MRI
.getRegClassOrNull(CondReg
))
1255 MRI
.setRegClass(CondReg
, ConstrainRC
);
1257 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CondPhysReg
)
1259 BuildMI(*BB
, &I
, DL
, TII
.get(BrOpcode
))
1260 .addMBB(I
.getOperand(1).getMBB());
1262 I
.eraseFromParent();
1266 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr
&I
) const {
1267 MachineBasicBlock
*BB
= I
.getParent();
1268 MachineFunction
*MF
= BB
->getParent();
1269 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1271 Register DstReg
= I
.getOperand(0).getReg();
1272 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, MRI
, TRI
);
1273 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1274 I
.setDesc(TII
.get(IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
));
1276 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
1278 return RBI
.constrainGenericRegister(
1279 DstReg
, IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
, MRI
);
1282 bool AMDGPUInstructionSelector::select(MachineInstr
&I
) {
1284 return selectPHI(I
);
1286 if (!isPreISelGenericOpcode(I
.getOpcode())) {
1288 return selectCOPY(I
);
1292 switch (I
.getOpcode()) {
1293 case TargetOpcode::G_AND
:
1294 case TargetOpcode::G_OR
:
1295 case TargetOpcode::G_XOR
:
1296 if (selectG_AND_OR_XOR(I
))
1298 return selectImpl(I
, *CoverageInfo
);
1299 case TargetOpcode::G_ADD
:
1300 case TargetOpcode::G_SUB
:
1301 if (selectG_ADD_SUB(I
))
1305 return selectImpl(I
, *CoverageInfo
);
1306 case TargetOpcode::G_INTTOPTR
:
1307 case TargetOpcode::G_BITCAST
:
1308 return selectCOPY(I
);
1309 case TargetOpcode::G_CONSTANT
:
1310 case TargetOpcode::G_FCONSTANT
:
1311 return selectG_CONSTANT(I
);
1312 case TargetOpcode::G_EXTRACT
:
1313 return selectG_EXTRACT(I
);
1314 case TargetOpcode::G_MERGE_VALUES
:
1315 case TargetOpcode::G_BUILD_VECTOR
:
1316 case TargetOpcode::G_CONCAT_VECTORS
:
1317 return selectG_MERGE_VALUES(I
);
1318 case TargetOpcode::G_UNMERGE_VALUES
:
1319 return selectG_UNMERGE_VALUES(I
);
1320 case TargetOpcode::G_GEP
:
1321 return selectG_GEP(I
);
1322 case TargetOpcode::G_IMPLICIT_DEF
:
1323 return selectG_IMPLICIT_DEF(I
);
1324 case TargetOpcode::G_INSERT
:
1325 return selectG_INSERT(I
);
1326 case TargetOpcode::G_INTRINSIC
:
1327 return selectG_INTRINSIC(I
);
1328 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
1329 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
);
1330 case TargetOpcode::G_ICMP
:
1331 if (selectG_ICMP(I
))
1333 return selectImpl(I
, *CoverageInfo
);
1334 case TargetOpcode::G_LOAD
:
1335 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1336 case TargetOpcode::G_ATOMICRMW_XCHG
:
1337 case TargetOpcode::G_ATOMICRMW_ADD
:
1338 case TargetOpcode::G_ATOMICRMW_SUB
:
1339 case TargetOpcode::G_ATOMICRMW_AND
:
1340 case TargetOpcode::G_ATOMICRMW_OR
:
1341 case TargetOpcode::G_ATOMICRMW_XOR
:
1342 case TargetOpcode::G_ATOMICRMW_MIN
:
1343 case TargetOpcode::G_ATOMICRMW_MAX
:
1344 case TargetOpcode::G_ATOMICRMW_UMIN
:
1345 case TargetOpcode::G_ATOMICRMW_UMAX
:
1346 case TargetOpcode::G_ATOMICRMW_FADD
:
1347 return selectG_LOAD_ATOMICRMW(I
);
1348 case TargetOpcode::G_SELECT
:
1349 return selectG_SELECT(I
);
1350 case TargetOpcode::G_STORE
:
1351 return selectG_STORE(I
);
1352 case TargetOpcode::G_TRUNC
:
1353 return selectG_TRUNC(I
);
1354 case TargetOpcode::G_SEXT
:
1355 case TargetOpcode::G_ZEXT
:
1356 case TargetOpcode::G_ANYEXT
:
1357 return selectG_SZA_EXT(I
);
1358 case TargetOpcode::G_BRCOND
:
1359 return selectG_BRCOND(I
);
1360 case TargetOpcode::G_FRAME_INDEX
:
1361 return selectG_FRAME_INDEX(I
);
1362 case TargetOpcode::G_FENCE
:
1363 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1364 // is checking for G_CONSTANT
1365 I
.setDesc(TII
.get(AMDGPU::ATOMIC_FENCE
));
1371 InstructionSelector::ComplexRendererFns
1372 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
1374 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1379 std::pair
<Register
, unsigned>
1380 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1381 Register Src
, const MachineRegisterInfo
&MRI
) const {
1383 MachineInstr
*MI
= MRI
.getVRegDef(Src
);
1385 if (MI
&& MI
->getOpcode() == AMDGPU::G_FNEG
) {
1386 Src
= MI
->getOperand(1).getReg();
1387 Mods
|= SISrcMods::NEG
;
1388 MI
= MRI
.getVRegDef(Src
);
1391 if (MI
&& MI
->getOpcode() == AMDGPU::G_FABS
) {
1392 Src
= MI
->getOperand(1).getReg();
1393 Mods
|= SISrcMods::ABS
;
1396 return std::make_pair(Src
, Mods
);
1400 /// This will select either an SGPR or VGPR operand and will save us from
1401 /// having to write an extra tablegen pattern.
1402 InstructionSelector::ComplexRendererFns
1403 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
1405 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1409 InstructionSelector::ComplexRendererFns
1410 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
1411 MachineRegisterInfo
&MRI
1412 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1416 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1419 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1420 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1421 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1422 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1425 InstructionSelector::ComplexRendererFns
1426 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
1428 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1429 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1430 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1434 InstructionSelector::ComplexRendererFns
1435 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
1436 MachineRegisterInfo
&MRI
1437 = Root
.getParent()->getParent()->getParent()->getRegInfo();
1441 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg(), MRI
);
1444 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1445 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); } // src_mods
1449 InstructionSelector::ComplexRendererFns
1450 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand
&Root
) const {
1451 MachineRegisterInfo
&MRI
=
1452 Root
.getParent()->getParent()->getParent()->getRegInfo();
1454 SmallVector
<GEPInfo
, 4> AddrInfo
;
1455 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1457 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1460 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1462 if (!AMDGPU::isLegalSMRDImmOffset(STI
, GEPInfo
.Imm
))
1465 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1466 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1468 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1469 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1473 InstructionSelector::ComplexRendererFns
1474 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand
&Root
) const {
1475 MachineRegisterInfo
&MRI
=
1476 Root
.getParent()->getParent()->getParent()->getRegInfo();
1478 SmallVector
<GEPInfo
, 4> AddrInfo
;
1479 getAddrModeInfo(*Root
.getParent(), MRI
, AddrInfo
);
1481 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1484 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1485 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1486 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1487 if (!isUInt
<32>(EncodedImm
))
1491 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1492 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1496 InstructionSelector::ComplexRendererFns
1497 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand
&Root
) const {
1498 MachineInstr
*MI
= Root
.getParent();
1499 MachineBasicBlock
*MBB
= MI
->getParent();
1500 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1502 SmallVector
<GEPInfo
, 4> AddrInfo
;
1503 getAddrModeInfo(*MI
, MRI
, AddrInfo
);
1505 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1506 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1507 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1510 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1511 if (!GEPInfo
.Imm
|| !isUInt
<32>(GEPInfo
.Imm
))
1514 // If we make it this far we have a load with an 32-bit immediate offset.
1515 // It is OK to select this using a sgpr offset, because we have already
1516 // failed trying to select this load into one of the _IMM variants since
1517 // the _IMM Patterns are considered before the _SGPR patterns.
1518 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1519 unsigned OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1520 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
1521 .addImm(GEPInfo
.Imm
);
1523 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1524 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(OffsetReg
); }
1528 template <bool Signed
>
1529 InstructionSelector::ComplexRendererFns
1530 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand
&Root
) const {
1531 MachineInstr
*MI
= Root
.getParent();
1532 MachineBasicBlock
*MBB
= MI
->getParent();
1533 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1535 InstructionSelector::ComplexRendererFns Default
= {{
1536 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1537 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // offset
1538 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1541 if (!STI
.hasFlatInstOffsets())
1544 const MachineInstr
*OpDef
= MRI
.getVRegDef(Root
.getReg());
1545 if (!OpDef
|| OpDef
->getOpcode() != AMDGPU::G_GEP
)
1548 Optional
<int64_t> Offset
=
1549 getConstantVRegVal(OpDef
->getOperand(2).getReg(), MRI
);
1550 if (!Offset
.hasValue())
1553 unsigned AddrSpace
= (*MI
->memoperands_begin())->getAddrSpace();
1554 if (!TII
.isLegalFLATOffset(Offset
.getValue(), AddrSpace
, Signed
))
1557 Register BasePtr
= OpDef
->getOperand(1).getReg();
1560 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(BasePtr
); },
1561 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
.getValue()); },
1562 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1566 InstructionSelector::ComplexRendererFns
1567 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand
&Root
) const {
1568 return selectFlatOffsetImpl
<false>(Root
);
1571 InstructionSelector::ComplexRendererFns
1572 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand
&Root
) const {
1573 return selectFlatOffsetImpl
<true>(Root
);
1577 static bool signBitIsZero(const MachineOperand
&Op
,
1578 const MachineRegisterInfo
&MRI
) {
1582 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1583 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1584 return PSV
&& PSV
->isStack();
1587 InstructionSelector::ComplexRendererFns
1588 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand
&Root
) const {
1589 MachineInstr
*MI
= Root
.getParent();
1590 MachineBasicBlock
*MBB
= MI
->getParent();
1591 MachineFunction
*MF
= MBB
->getParent();
1592 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1593 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1596 if (mi_match(Root
.getReg(), MRI
, m_ICst(Offset
))) {
1597 Register HighBits
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1599 // TODO: Should this be inside the render function? The iterator seems to
1601 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::V_MOV_B32_e32
),
1603 .addImm(Offset
& ~4095);
1605 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1606 MIB
.addReg(Info
->getScratchRSrcReg());
1608 [=](MachineInstrBuilder
&MIB
) { // vaddr
1609 MIB
.addReg(HighBits
);
1611 [=](MachineInstrBuilder
&MIB
) { // soffset
1612 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1613 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1615 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1616 ? Info
->getStackPtrOffsetReg()
1617 : Info
->getScratchWaveOffsetReg();
1618 MIB
.addReg(SOffsetReg
);
1620 [=](MachineInstrBuilder
&MIB
) { // offset
1621 MIB
.addImm(Offset
& 4095);
1625 assert(Offset
== 0);
1627 // Try to fold a frame index directly into the MUBUF vaddr field, and any
1630 Register VAddr
= Root
.getReg();
1631 if (const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg())) {
1632 if (isBaseWithConstantOffset(Root
, MRI
)) {
1633 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1634 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1635 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1636 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1637 if (LHSDef
&& RHSDef
) {
1638 int64_t PossibleOffset
=
1639 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1640 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset
) &&
1641 (!STI
.privateMemoryResourceIsRangeChecked() ||
1642 signBitIsZero(LHS
, MRI
))) {
1643 if (LHSDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
)
1644 FI
= LHSDef
->getOperand(1).getIndex();
1646 VAddr
= LHS
.getReg();
1647 Offset
= PossibleOffset
;
1650 } else if (RootDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
) {
1651 FI
= RootDef
->getOperand(1).getIndex();
1655 // If we don't know this private access is a local stack object, it needs to
1656 // be relative to the entry point's scratch wave offset register.
1657 // TODO: Should split large offsets that don't fit like above.
1658 // TODO: Don't use scratch wave offset just because the offset didn't fit.
1659 Register SOffset
= FI
.hasValue() ? Info
->getStackPtrOffsetReg()
1660 : Info
->getScratchWaveOffsetReg();
1662 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
1663 MIB
.addReg(Info
->getScratchRSrcReg());
1665 [=](MachineInstrBuilder
&MIB
) { // vaddr
1667 MIB
.addFrameIndex(FI
.getValue());
1671 [=](MachineInstrBuilder
&MIB
) { // soffset
1672 MIB
.addReg(SOffset
);
1674 [=](MachineInstrBuilder
&MIB
) { // offset
1679 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo
&MRI
,
1680 const MachineOperand
&Base
,
1682 unsigned OffsetBits
) const {
1683 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
1684 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
1687 if (STI
.hasUsableDSOffset() || STI
.unsafeDSOffsetFoldingEnabled())
1690 // On Southern Islands instruction with a negative base value and an offset
1691 // don't seem to work.
1692 return signBitIsZero(Base
, MRI
);
1695 InstructionSelector::ComplexRendererFns
1696 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1697 MachineOperand
&Root
) const {
1698 MachineInstr
*MI
= Root
.getParent();
1699 MachineBasicBlock
*MBB
= MI
->getParent();
1700 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1703 if (!mi_match(Root
.getReg(), MRI
, m_ICst(Offset
)) ||
1704 !SIInstrInfo::isLegalMUBUFImmOffset(Offset
))
1707 const MachineFunction
*MF
= MBB
->getParent();
1708 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
1709 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
1710 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
1712 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
1713 ? Info
->getStackPtrOffsetReg()
1714 : Info
->getScratchWaveOffsetReg();
1716 [=](MachineInstrBuilder
&MIB
) {
1717 MIB
.addReg(Info
->getScratchRSrcReg());
1719 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(SOffsetReg
); }, // soffset
1720 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
); } // offset
1724 InstructionSelector::ComplexRendererFns
1725 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand
&Root
) const {
1726 MachineInstr
*MI
= Root
.getParent();
1727 MachineBasicBlock
*MBB
= MI
->getParent();
1728 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1730 const MachineInstr
*RootDef
= MRI
.getVRegDef(Root
.getReg());
1733 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1734 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
1738 int64_t ConstAddr
= 0;
1739 if (isBaseWithConstantOffset(Root
, MRI
)) {
1740 const MachineOperand
&LHS
= RootDef
->getOperand(1);
1741 const MachineOperand
&RHS
= RootDef
->getOperand(2);
1742 const MachineInstr
*LHSDef
= MRI
.getVRegDef(LHS
.getReg());
1743 const MachineInstr
*RHSDef
= MRI
.getVRegDef(RHS
.getReg());
1744 if (LHSDef
&& RHSDef
) {
1745 int64_t PossibleOffset
=
1746 RHSDef
->getOperand(1).getCImm()->getSExtValue();
1747 if (isDSOffsetLegal(MRI
, LHS
, PossibleOffset
, 16)) {
1750 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
1751 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(PossibleOffset
); }
1755 } else if (RootDef
->getOpcode() == AMDGPU::G_SUB
) {
1759 } else if (mi_match(Root
.getReg(), MRI
, m_ICst(ConstAddr
))) {
1765 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1766 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }