1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
27 #include "llvm/CodeGen/GlobalISel/Utils.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
37 #define DEBUG_TYPE "amdgpu-isel"
40 using namespace MIPatternMatch
;
42 #define GET_GLOBALISEL_IMPL
43 #define AMDGPUSubtarget GCNSubtarget
44 #include "AMDGPUGenGlobalISel.inc"
45 #undef GET_GLOBALISEL_IMPL
46 #undef AMDGPUSubtarget
48 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
49 const GCNSubtarget
&STI
, const AMDGPURegisterBankInfo
&RBI
,
50 const AMDGPUTargetMachine
&TM
)
51 : InstructionSelector(), TII(*STI
.getInstrInfo()),
52 TRI(*STI
.getRegisterInfo()), RBI(RBI
), TM(TM
),
54 EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG
),
55 #define GET_GLOBALISEL_PREDICATES_INIT
56 #include "AMDGPUGenGlobalISel.inc"
57 #undef GET_GLOBALISEL_PREDICATES_INIT
58 #define GET_GLOBALISEL_TEMPORARIES_INIT
59 #include "AMDGPUGenGlobalISel.inc"
60 #undef GET_GLOBALISEL_TEMPORARIES_INIT
64 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE
; }
66 void AMDGPUInstructionSelector::setupMF(MachineFunction
&MF
, GISelKnownBits
&KB
,
67 CodeGenCoverage
&CoverageInfo
) {
68 MRI
= &MF
.getRegInfo();
69 InstructionSelector::setupMF(MF
, KB
, CoverageInfo
);
72 static bool isSCC(Register Reg
, const MachineRegisterInfo
&MRI
) {
73 if (Register::isPhysicalRegister(Reg
))
74 return Reg
== AMDGPU::SCC
;
76 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
77 const TargetRegisterClass
*RC
=
78 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
80 // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
81 // context of the register bank has been lost.
82 // Has a hack getRegClassForSizeOnBank uses exactly SGPR_32RegClass, which
83 // won't ever beconstrained any further.
84 if (RC
!= &AMDGPU::SGPR_32RegClass
)
86 const LLT Ty
= MRI
.getType(Reg
);
87 return Ty
.isValid() && Ty
.getSizeInBits() == 1;
90 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
91 return RB
->getID() == AMDGPU::SCCRegBankID
;
94 bool AMDGPUInstructionSelector::isVCC(Register Reg
,
95 const MachineRegisterInfo
&MRI
) const {
96 if (Register::isPhysicalRegister(Reg
))
97 return Reg
== TRI
.getVCC();
99 auto &RegClassOrBank
= MRI
.getRegClassOrRegBank(Reg
);
100 const TargetRegisterClass
*RC
=
101 RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
103 const LLT Ty
= MRI
.getType(Reg
);
104 return RC
->hasSuperClassEq(TRI
.getBoolRC()) &&
105 Ty
.isValid() && Ty
.getSizeInBits() == 1;
108 const RegisterBank
*RB
= RegClassOrBank
.get
<const RegisterBank
*>();
109 return RB
->getID() == AMDGPU::VCCRegBankID
;
112 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr
&I
) const {
113 const DebugLoc
&DL
= I
.getDebugLoc();
114 MachineBasicBlock
*BB
= I
.getParent();
115 I
.setDesc(TII
.get(TargetOpcode::COPY
));
117 const MachineOperand
&Src
= I
.getOperand(1);
118 MachineOperand
&Dst
= I
.getOperand(0);
119 Register DstReg
= Dst
.getReg();
120 Register SrcReg
= Src
.getReg();
122 if (isVCC(DstReg
, *MRI
)) {
123 if (SrcReg
== AMDGPU::SCC
) {
124 const TargetRegisterClass
*RC
125 = TRI
.getConstrainedRegClassForOperand(Dst
, *MRI
);
128 return RBI
.constrainGenericRegister(DstReg
, *RC
, *MRI
);
131 if (!isVCC(SrcReg
, *MRI
)) {
132 // TODO: Should probably leave the copy and let copyPhysReg expand it.
133 if (!RBI
.constrainGenericRegister(DstReg
, *TRI
.getBoolRC(), *MRI
))
136 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CMP_NE_U32_e64
), DstReg
)
140 if (!MRI
->getRegClassOrNull(SrcReg
))
141 MRI
->setRegClass(SrcReg
, TRI
.getConstrainedRegClassForOperand(Src
, *MRI
));
146 const TargetRegisterClass
*RC
=
147 TRI
.getConstrainedRegClassForOperand(Dst
, *MRI
);
148 if (RC
&& !RBI
.constrainGenericRegister(DstReg
, *RC
, *MRI
))
151 // Don't constrain the source register to a class so the def instruction
152 // handles it (unless it's undef).
154 // FIXME: This is a hack. When selecting the def, we neeed to know
155 // specifically know that the result is VCCRegBank, and not just an SGPR
156 // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
158 const TargetRegisterClass
*SrcRC
=
159 TRI
.getConstrainedRegClassForOperand(Src
, *MRI
);
160 if (SrcRC
&& !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, *MRI
))
167 for (const MachineOperand
&MO
: I
.operands()) {
168 if (Register::isPhysicalRegister(MO
.getReg()))
171 const TargetRegisterClass
*RC
=
172 TRI
.getConstrainedRegClassForOperand(MO
, *MRI
);
175 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, *MRI
);
180 bool AMDGPUInstructionSelector::selectPHI(MachineInstr
&I
) const {
181 const Register DefReg
= I
.getOperand(0).getReg();
182 const LLT DefTy
= MRI
->getType(DefReg
);
184 // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
186 const RegClassOrRegBank
&RegClassOrBank
=
187 MRI
->getRegClassOrRegBank(DefReg
);
189 const TargetRegisterClass
*DefRC
190 = RegClassOrBank
.dyn_cast
<const TargetRegisterClass
*>();
192 if (!DefTy
.isValid()) {
193 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
197 const RegisterBank
&RB
= *RegClassOrBank
.get
<const RegisterBank
*>();
198 if (RB
.getID() == AMDGPU::SCCRegBankID
) {
199 LLVM_DEBUG(dbgs() << "illegal scc phi\n");
203 DefRC
= TRI
.getRegClassForTypeOnBank(DefTy
, RB
, *MRI
);
205 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
210 I
.setDesc(TII
.get(TargetOpcode::PHI
));
211 return RBI
.constrainGenericRegister(DefReg
, *DefRC
, *MRI
);
215 AMDGPUInstructionSelector::getSubOperand64(MachineOperand
&MO
,
216 const TargetRegisterClass
&SubRC
,
217 unsigned SubIdx
) const {
219 MachineInstr
*MI
= MO
.getParent();
220 MachineBasicBlock
*BB
= MO
.getParent()->getParent();
221 Register DstReg
= MRI
->createVirtualRegister(&SubRC
);
224 unsigned ComposedSubIdx
= TRI
.composeSubRegIndices(MO
.getSubReg(), SubIdx
);
225 Register Reg
= MO
.getReg();
226 BuildMI(*BB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::COPY
), DstReg
)
227 .addReg(Reg
, 0, ComposedSubIdx
);
229 return MachineOperand::CreateReg(DstReg
, MO
.isDef(), MO
.isImplicit(),
230 MO
.isKill(), MO
.isDead(), MO
.isUndef(),
231 MO
.isEarlyClobber(), 0, MO
.isDebug(),
232 MO
.isInternalRead());
237 APInt
Imm(64, MO
.getImm());
241 llvm_unreachable("do not know to split immediate with this sub index.");
243 return MachineOperand::CreateImm(Imm
.getLoBits(32).getSExtValue());
245 return MachineOperand::CreateImm(Imm
.getHiBits(32).getSExtValue());
249 static unsigned getLogicalBitOpcode(unsigned Opc
, bool Is64
) {
252 return Is64
? AMDGPU::S_AND_B64
: AMDGPU::S_AND_B32
;
254 return Is64
? AMDGPU::S_OR_B64
: AMDGPU::S_OR_B32
;
256 return Is64
? AMDGPU::S_XOR_B64
: AMDGPU::S_XOR_B32
;
258 llvm_unreachable("not a bit op");
262 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr
&I
) const {
263 MachineOperand
&Dst
= I
.getOperand(0);
264 MachineOperand
&Src0
= I
.getOperand(1);
265 MachineOperand
&Src1
= I
.getOperand(2);
266 Register DstReg
= Dst
.getReg();
267 unsigned Size
= RBI
.getSizeInBits(DstReg
, *MRI
, TRI
);
269 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
270 if (DstRB
->getID() == AMDGPU::VCCRegBankID
) {
271 const TargetRegisterClass
*RC
= TRI
.getBoolRC();
272 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(),
273 RC
== &AMDGPU::SReg_64RegClass
);
274 I
.setDesc(TII
.get(InstOpc
));
276 // FIXME: Hack to avoid turning the register bank into a register class.
277 // The selector for G_ICMP relies on seeing the register bank for the result
278 // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
279 // be ambiguous whether it's a scalar or vector bool.
280 if (Src0
.isUndef() && !MRI
->getRegClassOrNull(Src0
.getReg()))
281 MRI
->setRegClass(Src0
.getReg(), RC
);
282 if (Src1
.isUndef() && !MRI
->getRegClassOrNull(Src1
.getReg()))
283 MRI
->setRegClass(Src1
.getReg(), RC
);
285 return RBI
.constrainGenericRegister(DstReg
, *RC
, *MRI
);
288 // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
290 if (DstRB
->getID() == AMDGPU::SGPRRegBankID
) {
291 unsigned InstOpc
= getLogicalBitOpcode(I
.getOpcode(), Size
> 32);
292 I
.setDesc(TII
.get(InstOpc
));
293 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
299 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr
&I
) const {
300 MachineBasicBlock
*BB
= I
.getParent();
301 MachineFunction
*MF
= BB
->getParent();
302 Register DstReg
= I
.getOperand(0).getReg();
303 const DebugLoc
&DL
= I
.getDebugLoc();
304 unsigned Size
= RBI
.getSizeInBits(DstReg
, *MRI
, TRI
);
305 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
306 const bool IsSALU
= DstRB
->getID() == AMDGPU::SGPRRegBankID
;
307 const bool Sub
= I
.getOpcode() == TargetOpcode::G_SUB
;
311 const unsigned Opc
= Sub
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
313 BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
314 .add(I
.getOperand(1))
315 .add(I
.getOperand(2));
317 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
320 if (STI
.hasAddNoCarry()) {
321 const unsigned Opc
= Sub
? AMDGPU::V_SUB_U32_e64
: AMDGPU::V_ADD_U32_e64
;
322 I
.setDesc(TII
.get(Opc
));
323 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
324 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
325 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
328 const unsigned Opc
= Sub
? AMDGPU::V_SUB_I32_e64
: AMDGPU::V_ADD_I32_e64
;
330 Register UnusedCarry
= MRI
->createVirtualRegister(TRI
.getWaveMaskRegClass());
332 = BuildMI(*BB
, &I
, DL
, TII
.get(Opc
), DstReg
)
333 .addDef(UnusedCarry
, RegState::Dead
)
334 .add(I
.getOperand(1))
335 .add(I
.getOperand(2))
338 return constrainSelectedInstRegOperands(*Add
, TII
, TRI
, RBI
);
341 assert(!Sub
&& "illegal sub should not reach here");
343 const TargetRegisterClass
&RC
344 = IsSALU
? AMDGPU::SReg_64_XEXECRegClass
: AMDGPU::VReg_64RegClass
;
345 const TargetRegisterClass
&HalfRC
346 = IsSALU
? AMDGPU::SReg_32RegClass
: AMDGPU::VGPR_32RegClass
;
348 MachineOperand
Lo1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub0
));
349 MachineOperand
Lo2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub0
));
350 MachineOperand
Hi1(getSubOperand64(I
.getOperand(1), HalfRC
, AMDGPU::sub1
));
351 MachineOperand
Hi2(getSubOperand64(I
.getOperand(2), HalfRC
, AMDGPU::sub1
));
353 Register DstLo
= MRI
->createVirtualRegister(&HalfRC
);
354 Register DstHi
= MRI
->createVirtualRegister(&HalfRC
);
357 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADD_U32
), DstLo
)
360 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_ADDC_U32
), DstHi
)
364 const TargetRegisterClass
*CarryRC
= TRI
.getWaveMaskRegClass();
365 Register CarryReg
= MRI
->createVirtualRegister(CarryRC
);
366 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADD_I32_e64
), DstLo
)
371 MachineInstr
*Addc
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_ADDC_U32_e64
), DstHi
)
372 .addDef(MRI
->createVirtualRegister(CarryRC
), RegState::Dead
)
375 .addReg(CarryReg
, RegState::Kill
)
378 if (!constrainSelectedInstRegOperands(*Addc
, TII
, TRI
, RBI
))
382 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
384 .addImm(AMDGPU::sub0
)
386 .addImm(AMDGPU::sub1
);
389 if (!RBI
.constrainGenericRegister(DstReg
, RC
, *MRI
))
396 bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr
&I
) const {
397 MachineBasicBlock
*BB
= I
.getParent();
398 MachineFunction
*MF
= BB
->getParent();
399 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
400 const DebugLoc
&DL
= I
.getDebugLoc();
401 Register Dst0Reg
= I
.getOperand(0).getReg();
402 Register Dst1Reg
= I
.getOperand(1).getReg();
403 const bool IsAdd
= I
.getOpcode() == AMDGPU::G_UADDO
;
405 if (!isSCC(Dst1Reg
, MRI
)) {
406 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
407 // carry out despite the _i32 name. These were renamed in VI to _U32.
408 // FIXME: We should probably rename the opcodes here.
409 unsigned NewOpc
= IsAdd
? AMDGPU::V_ADD_I32_e64
: AMDGPU::V_SUB_I32_e64
;
410 I
.setDesc(TII
.get(NewOpc
));
411 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
412 I
.addOperand(*MF
, MachineOperand::CreateImm(0));
413 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
416 Register Src0Reg
= I
.getOperand(2).getReg();
417 Register Src1Reg
= I
.getOperand(3).getReg();
418 unsigned NewOpc
= IsAdd
? AMDGPU::S_ADD_U32
: AMDGPU::S_SUB_U32
;
419 BuildMI(*BB
, &I
, DL
, TII
.get(NewOpc
), Dst0Reg
)
420 .add(I
.getOperand(2))
421 .add(I
.getOperand(3));
422 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), Dst1Reg
)
423 .addReg(AMDGPU::SCC
);
425 if (!MRI
.getRegClassOrNull(Dst1Reg
))
426 MRI
.setRegClass(Dst1Reg
, &AMDGPU::SReg_32RegClass
);
428 if (!RBI
.constrainGenericRegister(Dst0Reg
, AMDGPU::SReg_32RegClass
, MRI
) ||
429 !RBI
.constrainGenericRegister(Src0Reg
, AMDGPU::SReg_32RegClass
, MRI
) ||
430 !RBI
.constrainGenericRegister(Src1Reg
, AMDGPU::SReg_32RegClass
, MRI
))
437 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr
&I
) const {
438 MachineBasicBlock
*BB
= I
.getParent();
439 unsigned Offset
= I
.getOperand(2).getImm();
440 if (Offset
% 32 != 0)
443 unsigned SubReg
= TRI
.getSubRegFromChannel(Offset
/ 32);
444 const DebugLoc
&DL
= I
.getDebugLoc();
445 MachineInstr
*Copy
= BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::COPY
),
446 I
.getOperand(0).getReg())
447 .addReg(I
.getOperand(1).getReg(), 0, SubReg
);
449 for (const MachineOperand
&MO
: Copy
->operands()) {
450 const TargetRegisterClass
*RC
=
451 TRI
.getConstrainedRegClassForOperand(MO
, *MRI
);
454 RBI
.constrainGenericRegister(MO
.getReg(), *RC
, *MRI
);
460 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr
&MI
) const {
461 MachineBasicBlock
*BB
= MI
.getParent();
462 Register DstReg
= MI
.getOperand(0).getReg();
463 LLT DstTy
= MRI
->getType(DstReg
);
464 LLT SrcTy
= MRI
->getType(MI
.getOperand(1).getReg());
466 const unsigned SrcSize
= SrcTy
.getSizeInBits();
470 const DebugLoc
&DL
= MI
.getDebugLoc();
471 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
472 const unsigned DstSize
= DstTy
.getSizeInBits();
473 const TargetRegisterClass
*DstRC
=
474 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, *MRI
);
478 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(DstRC
, SrcSize
/ 8);
479 MachineInstrBuilder MIB
=
480 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::REG_SEQUENCE
), DstReg
);
481 for (int I
= 0, E
= MI
.getNumOperands() - 1; I
!= E
; ++I
) {
482 MachineOperand
&Src
= MI
.getOperand(I
+ 1);
483 MIB
.addReg(Src
.getReg(), getUndefRegState(Src
.isUndef()));
484 MIB
.addImm(SubRegs
[I
]);
486 const TargetRegisterClass
*SrcRC
487 = TRI
.getConstrainedRegClassForOperand(Src
, *MRI
);
488 if (SrcRC
&& !RBI
.constrainGenericRegister(Src
.getReg(), *SrcRC
, *MRI
))
492 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
))
495 MI
.eraseFromParent();
499 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr
&MI
) const {
500 MachineBasicBlock
*BB
= MI
.getParent();
501 const int NumDst
= MI
.getNumOperands() - 1;
503 MachineOperand
&Src
= MI
.getOperand(NumDst
);
505 Register SrcReg
= Src
.getReg();
506 Register DstReg0
= MI
.getOperand(0).getReg();
507 LLT DstTy
= MRI
->getType(DstReg0
);
508 LLT SrcTy
= MRI
->getType(SrcReg
);
510 const unsigned DstSize
= DstTy
.getSizeInBits();
511 const unsigned SrcSize
= SrcTy
.getSizeInBits();
512 const DebugLoc
&DL
= MI
.getDebugLoc();
513 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, *MRI
, TRI
);
515 const TargetRegisterClass
*SrcRC
=
516 TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcBank
, *MRI
);
517 if (!SrcRC
|| !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, *MRI
))
520 const unsigned SrcFlags
= getUndefRegState(Src
.isUndef());
522 // Note we could have mixed SGPR and VGPR destination banks for an SGPR
523 // source, and this relies on the fact that the same subregister indices are
525 ArrayRef
<int16_t> SubRegs
= TRI
.getRegSplitParts(SrcRC
, DstSize
/ 8);
526 for (int I
= 0, E
= NumDst
; I
!= E
; ++I
) {
527 MachineOperand
&Dst
= MI
.getOperand(I
);
528 BuildMI(*BB
, &MI
, DL
, TII
.get(TargetOpcode::COPY
), Dst
.getReg())
529 .addReg(SrcReg
, SrcFlags
, SubRegs
[I
]);
531 const TargetRegisterClass
*DstRC
=
532 TRI
.getConstrainedRegClassForOperand(Dst
, *MRI
);
533 if (DstRC
&& !RBI
.constrainGenericRegister(Dst
.getReg(), *DstRC
, *MRI
))
537 MI
.eraseFromParent();
541 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr
&I
) const {
542 return selectG_ADD_SUB(I
);
545 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr
&I
) const {
546 const MachineOperand
&MO
= I
.getOperand(0);
548 // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
549 // regbank check here is to know why getConstrainedRegClassForOperand failed.
550 const TargetRegisterClass
*RC
= TRI
.getConstrainedRegClassForOperand(MO
, *MRI
);
551 if ((!RC
&& !MRI
->getRegBankOrNull(MO
.getReg())) ||
552 (RC
&& RBI
.constrainGenericRegister(MO
.getReg(), *RC
, *MRI
))) {
553 I
.setDesc(TII
.get(TargetOpcode::IMPLICIT_DEF
));
560 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr
&I
) const {
561 MachineBasicBlock
*BB
= I
.getParent();
563 Register DstReg
= I
.getOperand(0).getReg();
564 Register Src0Reg
= I
.getOperand(1).getReg();
565 Register Src1Reg
= I
.getOperand(2).getReg();
566 LLT Src1Ty
= MRI
->getType(Src1Reg
);
568 unsigned DstSize
= MRI
->getType(DstReg
).getSizeInBits();
569 unsigned InsSize
= Src1Ty
.getSizeInBits();
571 int64_t Offset
= I
.getOperand(3).getImm();
572 if (Offset
% 32 != 0)
575 unsigned SubReg
= TRI
.getSubRegFromChannel(Offset
/ 32, InsSize
/ 32);
576 if (SubReg
== AMDGPU::NoSubRegister
)
579 const RegisterBank
*DstBank
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
580 const TargetRegisterClass
*DstRC
=
581 TRI
.getRegClassForSizeOnBank(DstSize
, *DstBank
, *MRI
);
585 const RegisterBank
*Src0Bank
= RBI
.getRegBank(Src0Reg
, *MRI
, TRI
);
586 const RegisterBank
*Src1Bank
= RBI
.getRegBank(Src1Reg
, *MRI
, TRI
);
587 const TargetRegisterClass
*Src0RC
=
588 TRI
.getRegClassForSizeOnBank(DstSize
, *Src0Bank
, *MRI
);
589 const TargetRegisterClass
*Src1RC
=
590 TRI
.getRegClassForSizeOnBank(InsSize
, *Src1Bank
, *MRI
);
592 // Deal with weird cases where the class only partially supports the subreg
594 Src0RC
= TRI
.getSubClassWithSubReg(Src0RC
, SubReg
);
598 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
) ||
599 !RBI
.constrainGenericRegister(Src0Reg
, *Src0RC
, *MRI
) ||
600 !RBI
.constrainGenericRegister(Src1Reg
, *Src1RC
, *MRI
))
603 const DebugLoc
&DL
= I
.getDebugLoc();
604 BuildMI(*BB
, &I
, DL
, TII
.get(TargetOpcode::INSERT_SUBREG
), DstReg
)
613 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr
&I
) const {
614 unsigned IntrinsicID
= I
.getIntrinsicID();
615 switch (IntrinsicID
) {
616 case Intrinsic::amdgcn_if_break
: {
617 MachineBasicBlock
*BB
= I
.getParent();
619 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
620 // SelectionDAG uses for wave32 vs wave64.
621 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::SI_IF_BREAK
))
622 .add(I
.getOperand(0))
623 .add(I
.getOperand(2))
624 .add(I
.getOperand(3));
626 Register DstReg
= I
.getOperand(0).getReg();
627 Register Src0Reg
= I
.getOperand(2).getReg();
628 Register Src1Reg
= I
.getOperand(3).getReg();
632 for (Register Reg
: { DstReg
, Src0Reg
, Src1Reg
})
633 MRI
->setRegClass(Reg
, TRI
.getWaveMaskRegClass());
638 return selectImpl(I
, *CoverageInfo
);
642 static int getV_CMPOpcode(CmpInst::Predicate P
, unsigned Size
) {
643 if (Size
!= 32 && Size
!= 64)
647 llvm_unreachable("Unknown condition code!");
648 case CmpInst::ICMP_NE
:
649 return Size
== 32 ? AMDGPU::V_CMP_NE_U32_e64
: AMDGPU::V_CMP_NE_U64_e64
;
650 case CmpInst::ICMP_EQ
:
651 return Size
== 32 ? AMDGPU::V_CMP_EQ_U32_e64
: AMDGPU::V_CMP_EQ_U64_e64
;
652 case CmpInst::ICMP_SGT
:
653 return Size
== 32 ? AMDGPU::V_CMP_GT_I32_e64
: AMDGPU::V_CMP_GT_I64_e64
;
654 case CmpInst::ICMP_SGE
:
655 return Size
== 32 ? AMDGPU::V_CMP_GE_I32_e64
: AMDGPU::V_CMP_GE_I64_e64
;
656 case CmpInst::ICMP_SLT
:
657 return Size
== 32 ? AMDGPU::V_CMP_LT_I32_e64
: AMDGPU::V_CMP_LT_I64_e64
;
658 case CmpInst::ICMP_SLE
:
659 return Size
== 32 ? AMDGPU::V_CMP_LE_I32_e64
: AMDGPU::V_CMP_LE_I64_e64
;
660 case CmpInst::ICMP_UGT
:
661 return Size
== 32 ? AMDGPU::V_CMP_GT_U32_e64
: AMDGPU::V_CMP_GT_U64_e64
;
662 case CmpInst::ICMP_UGE
:
663 return Size
== 32 ? AMDGPU::V_CMP_GE_U32_e64
: AMDGPU::V_CMP_GE_U64_e64
;
664 case CmpInst::ICMP_ULT
:
665 return Size
== 32 ? AMDGPU::V_CMP_LT_U32_e64
: AMDGPU::V_CMP_LT_U64_e64
;
666 case CmpInst::ICMP_ULE
:
667 return Size
== 32 ? AMDGPU::V_CMP_LE_U32_e64
: AMDGPU::V_CMP_LE_U64_e64
;
671 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P
,
672 unsigned Size
) const {
674 if (!STI
.hasScalarCompareEq64())
678 case CmpInst::ICMP_NE
:
679 return AMDGPU::S_CMP_LG_U64
;
680 case CmpInst::ICMP_EQ
:
681 return AMDGPU::S_CMP_EQ_U64
;
691 case CmpInst::ICMP_NE
:
692 return AMDGPU::S_CMP_LG_U32
;
693 case CmpInst::ICMP_EQ
:
694 return AMDGPU::S_CMP_EQ_U32
;
695 case CmpInst::ICMP_SGT
:
696 return AMDGPU::S_CMP_GT_I32
;
697 case CmpInst::ICMP_SGE
:
698 return AMDGPU::S_CMP_GE_I32
;
699 case CmpInst::ICMP_SLT
:
700 return AMDGPU::S_CMP_LT_I32
;
701 case CmpInst::ICMP_SLE
:
702 return AMDGPU::S_CMP_LE_I32
;
703 case CmpInst::ICMP_UGT
:
704 return AMDGPU::S_CMP_GT_U32
;
705 case CmpInst::ICMP_UGE
:
706 return AMDGPU::S_CMP_GE_U32
;
707 case CmpInst::ICMP_ULT
:
708 return AMDGPU::S_CMP_LT_U32
;
709 case CmpInst::ICMP_ULE
:
710 return AMDGPU::S_CMP_LE_U32
;
712 llvm_unreachable("Unknown condition code!");
716 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr
&I
) const {
717 MachineBasicBlock
*BB
= I
.getParent();
718 const DebugLoc
&DL
= I
.getDebugLoc();
720 Register SrcReg
= I
.getOperand(2).getReg();
721 unsigned Size
= RBI
.getSizeInBits(SrcReg
, *MRI
, TRI
);
723 auto Pred
= (CmpInst::Predicate
)I
.getOperand(1).getPredicate();
725 Register CCReg
= I
.getOperand(0).getReg();
726 if (isSCC(CCReg
, *MRI
)) {
727 int Opcode
= getS_CMPOpcode(Pred
, Size
);
730 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
))
731 .add(I
.getOperand(2))
732 .add(I
.getOperand(3));
733 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CCReg
)
734 .addReg(AMDGPU::SCC
);
736 constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
) &&
737 RBI
.constrainGenericRegister(CCReg
, AMDGPU::SReg_32RegClass
, *MRI
);
742 int Opcode
= getV_CMPOpcode(Pred
, Size
);
746 MachineInstr
*ICmp
= BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
),
747 I
.getOperand(0).getReg())
748 .add(I
.getOperand(2))
749 .add(I
.getOperand(3));
750 RBI
.constrainGenericRegister(ICmp
->getOperand(0).getReg(),
751 *TRI
.getBoolRC(), *MRI
);
752 bool Ret
= constrainSelectedInstRegOperands(*ICmp
, TII
, TRI
, RBI
);
757 static MachineInstr
*
758 buildEXP(const TargetInstrInfo
&TII
, MachineInstr
*Insert
, unsigned Tgt
,
759 unsigned Reg0
, unsigned Reg1
, unsigned Reg2
, unsigned Reg3
,
760 unsigned VM
, bool Compr
, unsigned Enabled
, bool Done
) {
761 const DebugLoc
&DL
= Insert
->getDebugLoc();
762 MachineBasicBlock
&BB
= *Insert
->getParent();
763 unsigned Opcode
= Done
? AMDGPU::EXP_DONE
: AMDGPU::EXP
;
764 return BuildMI(BB
, Insert
, DL
, TII
.get(Opcode
))
775 static bool isZero(Register Reg
, MachineRegisterInfo
&MRI
) {
777 if (mi_match(Reg
, MRI
, m_ICst(C
)) && C
== 0)
780 // FIXME: matcher should ignore copies
781 return mi_match(Reg
, MRI
, m_Copy(m_ICst(C
))) && C
== 0;
784 static unsigned extractGLC(unsigned AuxiliaryData
) {
785 return AuxiliaryData
& 1;
788 static unsigned extractSLC(unsigned AuxiliaryData
) {
789 return (AuxiliaryData
>> 1) & 1;
792 static unsigned extractDLC(unsigned AuxiliaryData
) {
793 return (AuxiliaryData
>> 2) & 1;
796 static unsigned extractSWZ(unsigned AuxiliaryData
) {
797 return (AuxiliaryData
>> 3) & 1;
800 // Returns Base register, constant offset, and offset def point.
801 static std::tuple
<Register
, unsigned, MachineInstr
*>
802 getBaseWithConstantOffset(MachineRegisterInfo
&MRI
, Register Reg
) {
803 MachineInstr
*Def
= getDefIgnoringCopies(Reg
, MRI
);
805 return std::make_tuple(Reg
, 0, nullptr);
807 if (Def
->getOpcode() == AMDGPU::G_CONSTANT
) {
809 const MachineOperand
&Op
= Def
->getOperand(1);
811 Offset
= Op
.getImm();
813 Offset
= Op
.getCImm()->getZExtValue();
815 return std::make_tuple(Register(), Offset
, Def
);
819 if (Def
->getOpcode() == AMDGPU::G_ADD
) {
820 // TODO: Handle G_OR used for add case
821 if (mi_match(Def
->getOperand(1).getReg(), MRI
, m_ICst(Offset
)))
822 return std::make_tuple(Def
->getOperand(0).getReg(), Offset
, Def
);
824 // FIXME: matcher should ignore copies
825 if (mi_match(Def
->getOperand(1).getReg(), MRI
, m_Copy(m_ICst(Offset
))))
826 return std::make_tuple(Def
->getOperand(0).getReg(), Offset
, Def
);
829 return std::make_tuple(Reg
, 0, Def
);
832 static unsigned getBufferStoreOpcode(LLT Ty
,
833 const unsigned MemSize
,
835 const int Size
= Ty
.getSizeInBits();
836 switch (8 * MemSize
) {
838 return Offen
? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact
:
839 AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact
;
841 return Offen
? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact
:
842 AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact
;
844 unsigned Opc
= Offen
? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact
:
845 AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact
;
847 Opc
= AMDGPU::getMUBUFOpcode(Opc
, Size
/ 32);
852 static unsigned getBufferStoreFormatOpcode(LLT Ty
,
853 const unsigned MemSize
,
855 bool IsD16Packed
= Ty
.getScalarSizeInBits() == 16;
856 bool IsD16Unpacked
= 8 * MemSize
< Ty
.getSizeInBits();
857 int NumElts
= Ty
.isVector() ? Ty
.getNumElements() : 1;
862 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact
:
863 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact
;
865 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact
:
866 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact
;
868 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact
:
869 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact
;
871 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact
:
872 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact
;
881 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact
:
882 AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact
;
884 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact
:
885 AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact
;
887 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact
:
888 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact
;
890 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact
:
891 AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact
;
899 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact
:
900 AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact
;
902 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact
:
903 AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact
;
905 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact
:
906 AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact
;
908 return Offen
? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact
:
909 AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact
;
914 llvm_unreachable("unhandled buffer store");
917 // TODO: Move this to combiner
918 // Returns base register, imm offset, total constant offset.
919 std::tuple
<Register
, unsigned, unsigned>
920 AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder
&B
,
921 Register OrigOffset
) const {
922 const unsigned MaxImm
= 4095;
924 unsigned TotalConstOffset
;
925 MachineInstr
*OffsetDef
;
927 std::tie(BaseReg
, TotalConstOffset
, OffsetDef
)
928 = getBaseWithConstantOffset(*MRI
, OrigOffset
);
930 unsigned ImmOffset
= TotalConstOffset
;
932 // If the immediate value is too big for the immoffset field, put the value
933 // and -4096 into the immoffset field so that the value that is copied/added
934 // for the voffset field is a multiple of 4096, and it stands more chance
935 // of being CSEd with the copy/add for another similar load/store.f
936 // However, do not do that rounding down to a multiple of 4096 if that is a
937 // negative number, as it appears to be illegal to have a negative offset
938 // in the vgpr, even if adding the immediate offset makes it positive.
939 unsigned Overflow
= ImmOffset
& ~MaxImm
;
940 ImmOffset
-= Overflow
;
941 if ((int32_t)Overflow
< 0) {
942 Overflow
+= ImmOffset
;
947 // In case this is in a waterfall loop, insert offset code at the def point
948 // of the offset, not inside the loop.
949 MachineBasicBlock::iterator OldInsPt
= B
.getInsertPt();
950 MachineBasicBlock
&OldMBB
= B
.getMBB();
951 B
.setInstr(*OffsetDef
);
954 BaseReg
= MRI
->createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
955 B
.buildInstr(AMDGPU::V_MOV_B32_e32
)
959 Register OverflowVal
= MRI
->createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
960 B
.buildInstr(AMDGPU::V_MOV_B32_e32
)
964 Register NewBaseReg
= MRI
->createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
965 TII
.getAddNoCarry(B
.getMBB(), B
.getInsertPt(), B
.getDebugLoc(), NewBaseReg
)
967 .addReg(OverflowVal
, RegState::Kill
)
969 BaseReg
= NewBaseReg
;
972 B
.setInsertPt(OldMBB
, OldInsPt
);
975 return std::make_tuple(BaseReg
, ImmOffset
, TotalConstOffset
);
978 bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr
&MI
,
979 bool IsFormat
) const {
980 MachineIRBuilder
B(MI
);
981 MachineFunction
&MF
= B
.getMF();
982 Register VData
= MI
.getOperand(1).getReg();
983 LLT Ty
= MRI
->getType(VData
);
985 int Size
= Ty
.getSizeInBits();
989 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
990 MachineMemOperand
*MMO
= *MI
.memoperands_begin();
991 const int MemSize
= MMO
->getSize();
993 Register RSrc
= MI
.getOperand(2).getReg();
994 Register VOffset
= MI
.getOperand(3).getReg();
995 Register SOffset
= MI
.getOperand(4).getReg();
996 unsigned AuxiliaryData
= MI
.getOperand(5).getImm();
998 unsigned TotalOffset
;
1000 std::tie(VOffset
, ImmOffset
, TotalOffset
) = splitBufferOffsets(B
, VOffset
);
1001 if (TotalOffset
!= 0)
1002 MMO
= MF
.getMachineMemOperand(MMO
, TotalOffset
, MemSize
);
1004 const bool Offen
= !isZero(VOffset
, *MRI
);
1006 int Opc
= IsFormat
? getBufferStoreFormatOpcode(Ty
, MemSize
, Offen
) :
1007 getBufferStoreOpcode(Ty
, MemSize
, Offen
);
1011 MachineInstrBuilder MIB
= B
.buildInstr(Opc
)
1015 MIB
.addUse(VOffset
);
1020 .addImm(extractGLC(AuxiliaryData
))
1021 .addImm(extractSLC(AuxiliaryData
))
1022 .addImm(0) // tfe: FIXME: Remove from inst
1023 .addImm(extractDLC(AuxiliaryData
))
1024 .addImm(extractSWZ(AuxiliaryData
))
1025 .addMemOperand(MMO
);
1027 MI
.eraseFromParent();
1029 return constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
);
1032 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
1033 MachineInstr
&I
) const {
1034 MachineBasicBlock
*BB
= I
.getParent();
1035 unsigned IntrinsicID
= I
.getIntrinsicID();
1036 switch (IntrinsicID
) {
1037 case Intrinsic::amdgcn_exp
: {
1038 int64_t Tgt
= I
.getOperand(1).getImm();
1039 int64_t Enabled
= I
.getOperand(2).getImm();
1040 int64_t Done
= I
.getOperand(7).getImm();
1041 int64_t VM
= I
.getOperand(8).getImm();
1043 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, I
.getOperand(3).getReg(),
1044 I
.getOperand(4).getReg(),
1045 I
.getOperand(5).getReg(),
1046 I
.getOperand(6).getReg(),
1047 VM
, false, Enabled
, Done
);
1049 I
.eraseFromParent();
1050 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
1052 case Intrinsic::amdgcn_exp_compr
: {
1053 const DebugLoc
&DL
= I
.getDebugLoc();
1054 int64_t Tgt
= I
.getOperand(1).getImm();
1055 int64_t Enabled
= I
.getOperand(2).getImm();
1056 Register Reg0
= I
.getOperand(3).getReg();
1057 Register Reg1
= I
.getOperand(4).getReg();
1058 Register Undef
= MRI
->createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1059 int64_t Done
= I
.getOperand(5).getImm();
1060 int64_t VM
= I
.getOperand(6).getImm();
1062 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), Undef
);
1063 MachineInstr
*Exp
= buildEXP(TII
, &I
, Tgt
, Reg0
, Reg1
, Undef
, Undef
, VM
,
1064 true, Enabled
, Done
);
1066 I
.eraseFromParent();
1067 return constrainSelectedInstRegOperands(*Exp
, TII
, TRI
, RBI
);
1069 case Intrinsic::amdgcn_end_cf
: {
1070 // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
1071 // SelectionDAG uses for wave32 vs wave64.
1072 BuildMI(*BB
, &I
, I
.getDebugLoc(),
1073 TII
.get(AMDGPU::SI_END_CF
))
1074 .add(I
.getOperand(1));
1076 Register Reg
= I
.getOperand(1).getReg();
1077 I
.eraseFromParent();
1079 if (!MRI
->getRegClassOrNull(Reg
))
1080 MRI
->setRegClass(Reg
, TRI
.getWaveMaskRegClass());
1083 case Intrinsic::amdgcn_raw_buffer_store
:
1084 return selectStoreIntrinsic(I
, false);
1085 case Intrinsic::amdgcn_raw_buffer_store_format
:
1086 return selectStoreIntrinsic(I
, true);
1088 return selectImpl(I
, *CoverageInfo
);
1092 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr
&I
) const {
1093 MachineBasicBlock
*BB
= I
.getParent();
1094 const DebugLoc
&DL
= I
.getDebugLoc();
1096 Register DstReg
= I
.getOperand(0).getReg();
1097 unsigned Size
= RBI
.getSizeInBits(DstReg
, *MRI
, TRI
);
1098 assert(Size
<= 32 || Size
== 64);
1099 const MachineOperand
&CCOp
= I
.getOperand(1);
1100 Register CCReg
= CCOp
.getReg();
1101 if (isSCC(CCReg
, *MRI
)) {
1102 unsigned SelectOpcode
= Size
== 64 ? AMDGPU::S_CSELECT_B64
:
1103 AMDGPU::S_CSELECT_B32
;
1104 MachineInstr
*CopySCC
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
1107 // The generic constrainSelectedInstRegOperands doesn't work for the scc register
1108 // bank, because it does not cover the register class that we used to represent
1109 // for it. So we need to manually set the register class here.
1110 if (!MRI
->getRegClassOrNull(CCReg
))
1111 MRI
->setRegClass(CCReg
, TRI
.getConstrainedRegClassForOperand(CCOp
, *MRI
));
1112 MachineInstr
*Select
= BuildMI(*BB
, &I
, DL
, TII
.get(SelectOpcode
), DstReg
)
1113 .add(I
.getOperand(2))
1114 .add(I
.getOperand(3));
1116 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
) |
1117 constrainSelectedInstRegOperands(*CopySCC
, TII
, TRI
, RBI
);
1118 I
.eraseFromParent();
1122 // Wide VGPR select should have been split in RegBankSelect.
1126 MachineInstr
*Select
=
1127 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
1129 .add(I
.getOperand(3))
1131 .add(I
.getOperand(2))
1132 .add(I
.getOperand(1));
1134 bool Ret
= constrainSelectedInstRegOperands(*Select
, TII
, TRI
, RBI
);
1135 I
.eraseFromParent();
1139 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr
&I
) const {
1141 return selectImpl(I
, *CoverageInfo
);
1144 static int sizeToSubRegIndex(unsigned Size
) {
1147 return AMDGPU::sub0
;
1149 return AMDGPU::sub0_sub1
;
1151 return AMDGPU::sub0_sub1_sub2
;
1153 return AMDGPU::sub0_sub1_sub2_sub3
;
1155 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7
;
1158 return AMDGPU::sub0
;
1161 return sizeToSubRegIndex(PowerOf2Ceil(Size
));
1165 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr
&I
) const {
1166 Register DstReg
= I
.getOperand(0).getReg();
1167 Register SrcReg
= I
.getOperand(1).getReg();
1168 const LLT DstTy
= MRI
->getType(DstReg
);
1169 const LLT SrcTy
= MRI
->getType(SrcReg
);
1170 if (!DstTy
.isScalar())
1173 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
1174 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, *MRI
, TRI
);
1178 unsigned DstSize
= DstTy
.getSizeInBits();
1179 unsigned SrcSize
= SrcTy
.getSizeInBits();
1181 const TargetRegisterClass
*SrcRC
1182 = TRI
.getRegClassForSizeOnBank(SrcSize
, *SrcRB
, *MRI
);
1183 const TargetRegisterClass
*DstRC
1184 = TRI
.getRegClassForSizeOnBank(DstSize
, *DstRB
, *MRI
);
1187 int SubRegIdx
= sizeToSubRegIndex(DstSize
);
1188 if (SubRegIdx
== -1)
1191 // Deal with weird cases where the class only partially supports the subreg
1193 SrcRC
= TRI
.getSubClassWithSubReg(SrcRC
, SubRegIdx
);
1197 I
.getOperand(1).setSubReg(SubRegIdx
);
1200 if (!RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, *MRI
) ||
1201 !RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
)) {
1202 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
1206 I
.setDesc(TII
.get(TargetOpcode::COPY
));
1210 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
1211 static bool shouldUseAndMask(unsigned Size
, unsigned &Mask
) {
1212 Mask
= maskTrailingOnes
<unsigned>(Size
);
1213 int SignedMask
= static_cast<int>(Mask
);
1214 return SignedMask
>= -16 && SignedMask
<= 64;
1217 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr
&I
) const {
1218 bool Signed
= I
.getOpcode() == AMDGPU::G_SEXT
;
1219 const DebugLoc
&DL
= I
.getDebugLoc();
1220 MachineBasicBlock
&MBB
= *I
.getParent();
1221 const Register DstReg
= I
.getOperand(0).getReg();
1222 const Register SrcReg
= I
.getOperand(1).getReg();
1224 const LLT DstTy
= MRI
->getType(DstReg
);
1225 const LLT SrcTy
= MRI
->getType(SrcReg
);
1226 const LLT S1
= LLT::scalar(1);
1227 const unsigned SrcSize
= SrcTy
.getSizeInBits();
1228 const unsigned DstSize
= DstTy
.getSizeInBits();
1229 if (!DstTy
.isScalar())
1232 const RegisterBank
*SrcBank
= RBI
.getRegBank(SrcReg
, *MRI
, TRI
);
1234 if (SrcBank
->getID() == AMDGPU::SCCRegBankID
) {
1235 if (SrcTy
!= S1
|| DstSize
> 64) // Invalid
1239 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
1240 const TargetRegisterClass
*DstRC
=
1241 DstSize
> 32 ? &AMDGPU::SReg_64RegClass
: &AMDGPU::SReg_32RegClass
;
1243 // FIXME: Create an extra copy to avoid incorrectly constraining the result
1244 // of the scc producer.
1245 Register TmpReg
= MRI
->createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1246 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), TmpReg
)
1248 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
1251 // The instruction operands are backwards from what you would expect.
1252 BuildMI(MBB
, I
, DL
, TII
.get(Opcode
), DstReg
)
1254 .addImm(Signed
? -1 : 1);
1255 I
.eraseFromParent();
1256 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
);
1259 if (SrcBank
->getID() == AMDGPU::VCCRegBankID
&& DstSize
<= 32) {
1260 if (SrcTy
!= S1
) // Invalid
1263 MachineInstr
*ExtI
=
1264 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_CNDMASK_B32_e64
), DstReg
)
1265 .addImm(0) // src0_modifiers
1267 .addImm(0) // src1_modifiers
1268 .addImm(Signed
? -1 : 1) // src1
1270 I
.eraseFromParent();
1271 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1274 if (I
.getOpcode() == AMDGPU::G_ANYEXT
)
1275 return selectCOPY(I
);
1277 if (SrcBank
->getID() == AMDGPU::VGPRRegBankID
&& DstSize
<= 32) {
1278 // 64-bit should have been split up in RegBankSelect
1280 // Try to use an and with a mask if it will save code size.
1282 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1283 MachineInstr
*ExtI
=
1284 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::V_AND_B32_e32
), DstReg
)
1287 I
.eraseFromParent();
1288 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1291 const unsigned BFE
= Signed
? AMDGPU::V_BFE_I32
: AMDGPU::V_BFE_U32
;
1292 MachineInstr
*ExtI
=
1293 BuildMI(MBB
, I
, DL
, TII
.get(BFE
), DstReg
)
1295 .addImm(0) // Offset
1296 .addImm(SrcSize
); // Width
1297 I
.eraseFromParent();
1298 return constrainSelectedInstRegOperands(*ExtI
, TII
, TRI
, RBI
);
1301 if (SrcBank
->getID() == AMDGPU::SGPRRegBankID
&& DstSize
<= 64) {
1302 if (!RBI
.constrainGenericRegister(SrcReg
, AMDGPU::SReg_32RegClass
, *MRI
))
1305 if (Signed
&& DstSize
== 32 && (SrcSize
== 8 || SrcSize
== 16)) {
1306 const unsigned SextOpc
= SrcSize
== 8 ?
1307 AMDGPU::S_SEXT_I32_I8
: AMDGPU::S_SEXT_I32_I16
;
1308 BuildMI(MBB
, I
, DL
, TII
.get(SextOpc
), DstReg
)
1310 I
.eraseFromParent();
1311 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, *MRI
);
1314 const unsigned BFE64
= Signed
? AMDGPU::S_BFE_I64
: AMDGPU::S_BFE_U64
;
1315 const unsigned BFE32
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1317 // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1318 if (DstSize
> 32 && SrcSize
<= 32) {
1319 // We need a 64-bit register source, but the high bits don't matter.
1320 Register ExtReg
= MRI
->createVirtualRegister(&AMDGPU::SReg_64RegClass
);
1321 Register UndefReg
= MRI
->createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1322 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::IMPLICIT_DEF
), UndefReg
);
1323 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), ExtReg
)
1325 .addImm(AMDGPU::sub0
)
1327 .addImm(AMDGPU::sub1
);
1329 BuildMI(MBB
, I
, DL
, TII
.get(BFE64
), DstReg
)
1331 .addImm(SrcSize
<< 16);
1333 I
.eraseFromParent();
1334 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_64RegClass
, *MRI
);
1338 if (!Signed
&& shouldUseAndMask(SrcSize
, Mask
)) {
1339 BuildMI(MBB
, I
, DL
, TII
.get(AMDGPU::S_AND_B32
), DstReg
)
1343 BuildMI(MBB
, I
, DL
, TII
.get(BFE32
), DstReg
)
1345 .addImm(SrcSize
<< 16);
1348 I
.eraseFromParent();
1349 return RBI
.constrainGenericRegister(DstReg
, AMDGPU::SReg_32RegClass
, *MRI
);
1355 static int64_t getFPTrueImmVal(unsigned Size
, bool Signed
) {
1358 return Signed
? 0xBC00 : 0x3C00;
1360 return Signed
? 0xbf800000 : 0x3f800000;
1362 return Signed
? 0xbff0000000000000 : 0x3ff0000000000000;
1364 llvm_unreachable("Invalid FP type size");
1368 bool AMDGPUInstructionSelector::selectG_SITOFP_UITOFP(MachineInstr
&I
) const {
1369 MachineBasicBlock
*MBB
= I
.getParent();
1370 MachineFunction
*MF
= MBB
->getParent();
1371 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1372 Register Src
= I
.getOperand(1).getReg();
1373 if (!isSCC(Src
, MRI
))
1374 return selectImpl(I
, *CoverageInfo
);
1376 bool Signed
= I
.getOpcode() == AMDGPU::G_SITOFP
;
1377 Register DstReg
= I
.getOperand(0).getReg();
1378 const LLT DstTy
= MRI
.getType(DstReg
);
1379 const unsigned DstSize
= DstTy
.getSizeInBits();
1380 const DebugLoc
&DL
= I
.getDebugLoc();
1382 BuildMI(*MBB
, I
, DL
, TII
.get(AMDGPU::COPY
), AMDGPU::SCC
)
1386 DstSize
> 32 ? AMDGPU::S_CSELECT_B64
: AMDGPU::S_CSELECT_B32
;
1387 auto MIB
= BuildMI(*MBB
, I
, DL
, TII
.get(NewOpc
), DstReg
)
1389 .addImm(getFPTrueImmVal(DstSize
, Signed
));
1391 if (!constrainSelectedInstRegOperands(*MIB
, TII
, TRI
, RBI
))
1394 I
.eraseFromParent();
1398 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr
&I
) const {
1399 MachineBasicBlock
*BB
= I
.getParent();
1400 MachineOperand
&ImmOp
= I
.getOperand(1);
1402 // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1403 if (ImmOp
.isFPImm()) {
1404 const APInt
&Imm
= ImmOp
.getFPImm()->getValueAPF().bitcastToAPInt();
1405 ImmOp
.ChangeToImmediate(Imm
.getZExtValue());
1406 } else if (ImmOp
.isCImm()) {
1407 ImmOp
.ChangeToImmediate(ImmOp
.getCImm()->getZExtValue());
1410 Register DstReg
= I
.getOperand(0).getReg();
1413 const RegisterBank
*RB
= MRI
->getRegBankOrNull(I
.getOperand(0).getReg());
1415 IsSgpr
= RB
->getID() == AMDGPU::SGPRRegBankID
;
1416 Size
= MRI
->getType(DstReg
).getSizeInBits();
1418 const TargetRegisterClass
*RC
= TRI
.getRegClassForReg(*MRI
, DstReg
);
1419 IsSgpr
= TRI
.isSGPRClass(RC
);
1420 Size
= TRI
.getRegSizeInBits(*RC
);
1423 if (Size
!= 32 && Size
!= 64)
1426 unsigned Opcode
= IsSgpr
? AMDGPU::S_MOV_B32
: AMDGPU::V_MOV_B32_e32
;
1428 I
.setDesc(TII
.get(Opcode
));
1429 I
.addImplicitDefUseOperands(*MF
);
1430 return constrainSelectedInstRegOperands(I
, TII
, TRI
, RBI
);
1433 const DebugLoc
&DL
= I
.getDebugLoc();
1435 APInt
Imm(Size
, I
.getOperand(1).getImm());
1437 MachineInstr
*ResInst
;
1438 if (IsSgpr
&& TII
.isInlineConstant(Imm
)) {
1439 ResInst
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::S_MOV_B64
), DstReg
)
1440 .addImm(I
.getOperand(1).getImm());
1442 const TargetRegisterClass
*RC
= IsSgpr
?
1443 &AMDGPU::SReg_32RegClass
: &AMDGPU::VGPR_32RegClass
;
1444 Register LoReg
= MRI
->createVirtualRegister(RC
);
1445 Register HiReg
= MRI
->createVirtualRegister(RC
);
1447 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), LoReg
)
1448 .addImm(Imm
.trunc(32).getZExtValue());
1450 BuildMI(*BB
, &I
, DL
, TII
.get(Opcode
), HiReg
)
1451 .addImm(Imm
.ashr(32).getZExtValue());
1453 ResInst
= BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1455 .addImm(AMDGPU::sub0
)
1457 .addImm(AMDGPU::sub1
);
1460 // We can't call constrainSelectedInstRegOperands here, because it doesn't
1461 // work for target independent opcodes
1462 I
.eraseFromParent();
1463 const TargetRegisterClass
*DstRC
=
1464 TRI
.getConstrainedRegClassForOperand(ResInst
->getOperand(0), *MRI
);
1467 return RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
);
1470 static bool isConstant(const MachineInstr
&MI
) {
1471 return MI
.getOpcode() == TargetOpcode::G_CONSTANT
;
1474 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr
&Load
,
1475 const MachineRegisterInfo
&MRI
, SmallVectorImpl
<GEPInfo
> &AddrInfo
) const {
1477 const MachineInstr
*PtrMI
= MRI
.getUniqueVRegDef(Load
.getOperand(1).getReg());
1481 if (PtrMI
->getOpcode() != TargetOpcode::G_GEP
)
1484 GEPInfo
GEPInfo(*PtrMI
);
1486 for (unsigned i
= 1; i
!= 3; ++i
) {
1487 const MachineOperand
&GEPOp
= PtrMI
->getOperand(i
);
1488 const MachineInstr
*OpDef
= MRI
.getUniqueVRegDef(GEPOp
.getReg());
1490 if (i
== 2 && isConstant(*OpDef
)) {
1491 // TODO: Could handle constant base + variable offset, but a combine
1492 // probably should have commuted it.
1493 assert(GEPInfo
.Imm
== 0);
1494 GEPInfo
.Imm
= OpDef
->getOperand(1).getCImm()->getSExtValue();
1497 const RegisterBank
*OpBank
= RBI
.getRegBank(GEPOp
.getReg(), MRI
, TRI
);
1498 if (OpBank
->getID() == AMDGPU::SGPRRegBankID
)
1499 GEPInfo
.SgprParts
.push_back(GEPOp
.getReg());
1501 GEPInfo
.VgprParts
.push_back(GEPOp
.getReg());
1504 AddrInfo
.push_back(GEPInfo
);
1505 getAddrModeInfo(*PtrMI
, MRI
, AddrInfo
);
1508 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr
&MI
) const {
1509 if (!MI
.hasOneMemOperand())
1512 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
1513 const Value
*Ptr
= MMO
->getValue();
1515 // UndefValue means this is a load of a kernel input. These are uniform.
1516 // Sometimes LDS instructions have constant pointers.
1517 // If Ptr is null, then that means this mem operand contains a
1518 // PseudoSourceValue like GOT.
1519 if (!Ptr
|| isa
<UndefValue
>(Ptr
) || isa
<Argument
>(Ptr
) ||
1520 isa
<Constant
>(Ptr
) || isa
<GlobalValue
>(Ptr
))
1523 if (MMO
->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
)
1526 const Instruction
*I
= dyn_cast
<Instruction
>(Ptr
);
1527 return I
&& I
->getMetadata("amdgpu.uniform");
1530 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef
<GEPInfo
> AddrInfo
) const {
1531 for (const GEPInfo
&GEPInfo
: AddrInfo
) {
1532 if (!GEPInfo
.VgprParts
.empty())
1538 void AMDGPUInstructionSelector::initM0(MachineInstr
&I
) const {
1539 MachineBasicBlock
*BB
= I
.getParent();
1541 const LLT PtrTy
= MRI
->getType(I
.getOperand(1).getReg());
1542 unsigned AS
= PtrTy
.getAddressSpace();
1543 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
) &&
1544 STI
.ldsRequiresM0Init()) {
1545 // If DS instructions require M0 initializtion, insert it before selecting.
1546 BuildMI(*BB
, &I
, I
.getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), AMDGPU::M0
)
1551 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr
&I
) const {
1553 return selectImpl(I
, *CoverageInfo
);
1556 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr
&I
) const {
1557 MachineBasicBlock
*BB
= I
.getParent();
1558 MachineOperand
&CondOp
= I
.getOperand(0);
1559 Register CondReg
= CondOp
.getReg();
1560 const DebugLoc
&DL
= I
.getDebugLoc();
1563 Register CondPhysReg
;
1564 const TargetRegisterClass
*ConstrainRC
;
1566 // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1567 // whether the branch is uniform when selecting the instruction. In
1568 // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1569 // RegBankSelect knows what it's doing if the branch condition is scc, even
1570 // though it currently does not.
1571 if (isSCC(CondReg
, *MRI
)) {
1572 CondPhysReg
= AMDGPU::SCC
;
1573 BrOpcode
= AMDGPU::S_CBRANCH_SCC1
;
1574 // FIXME: Hack for isSCC tests
1575 ConstrainRC
= &AMDGPU::SGPR_32RegClass
;
1576 } else if (isVCC(CondReg
, *MRI
)) {
1577 // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1578 // We sort of know that a VCC producer based on the register bank, that ands
1579 // inactive lanes with 0. What if there was a logical operation with vcc
1580 // producers in different blocks/with different exec masks?
1581 // FIXME: Should scc->vcc copies and with exec?
1582 CondPhysReg
= TRI
.getVCC();
1583 BrOpcode
= AMDGPU::S_CBRANCH_VCCNZ
;
1584 ConstrainRC
= TRI
.getBoolRC();
1588 if (!MRI
->getRegClassOrNull(CondReg
))
1589 MRI
->setRegClass(CondReg
, ConstrainRC
);
1591 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), CondPhysReg
)
1593 BuildMI(*BB
, &I
, DL
, TII
.get(BrOpcode
))
1594 .addMBB(I
.getOperand(1).getMBB());
1596 I
.eraseFromParent();
1600 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr
&I
) const {
1601 Register DstReg
= I
.getOperand(0).getReg();
1602 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
1603 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1604 I
.setDesc(TII
.get(IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
));
1606 I
.addOperand(*MF
, MachineOperand::CreateReg(AMDGPU::EXEC
, false, true));
1608 return RBI
.constrainGenericRegister(
1609 DstReg
, IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
, *MRI
);
1612 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr
&I
) const {
1613 uint64_t Align
= I
.getOperand(2).getImm();
1614 const uint64_t Mask
= ~((UINT64_C(1) << Align
) - 1);
1616 MachineBasicBlock
*BB
= I
.getParent();
1618 Register DstReg
= I
.getOperand(0).getReg();
1619 Register SrcReg
= I
.getOperand(1).getReg();
1621 const RegisterBank
*DstRB
= RBI
.getRegBank(DstReg
, *MRI
, TRI
);
1622 const RegisterBank
*SrcRB
= RBI
.getRegBank(SrcReg
, *MRI
, TRI
);
1623 const bool IsVGPR
= DstRB
->getID() == AMDGPU::VGPRRegBankID
;
1624 unsigned NewOpc
= IsVGPR
? AMDGPU::V_AND_B32_e64
: AMDGPU::S_AND_B32
;
1625 unsigned MovOpc
= IsVGPR
? AMDGPU::V_MOV_B32_e32
: AMDGPU::S_MOV_B32
;
1626 const TargetRegisterClass
&RegRC
1627 = IsVGPR
? AMDGPU::VGPR_32RegClass
: AMDGPU::SReg_32RegClass
;
1629 LLT Ty
= MRI
->getType(DstReg
);
1631 const TargetRegisterClass
*DstRC
= TRI
.getRegClassForTypeOnBank(Ty
, *DstRB
,
1633 const TargetRegisterClass
*SrcRC
= TRI
.getRegClassForTypeOnBank(Ty
, *SrcRB
,
1635 if (!RBI
.constrainGenericRegister(DstReg
, *DstRC
, *MRI
) ||
1636 !RBI
.constrainGenericRegister(SrcReg
, *SrcRC
, *MRI
))
1639 const DebugLoc
&DL
= I
.getDebugLoc();
1640 Register ImmReg
= MRI
->createVirtualRegister(&RegRC
);
1641 BuildMI(*BB
, &I
, DL
, TII
.get(MovOpc
), ImmReg
)
1644 if (Ty
.getSizeInBits() == 32) {
1645 BuildMI(*BB
, &I
, DL
, TII
.get(NewOpc
), DstReg
)
1648 I
.eraseFromParent();
1652 Register HiReg
= MRI
->createVirtualRegister(&RegRC
);
1653 Register LoReg
= MRI
->createVirtualRegister(&RegRC
);
1654 Register MaskLo
= MRI
->createVirtualRegister(&RegRC
);
1656 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), LoReg
)
1657 .addReg(SrcReg
, 0, AMDGPU::sub0
);
1658 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::COPY
), HiReg
)
1659 .addReg(SrcReg
, 0, AMDGPU::sub1
);
1661 BuildMI(*BB
, &I
, DL
, TII
.get(NewOpc
), MaskLo
)
1664 BuildMI(*BB
, &I
, DL
, TII
.get(AMDGPU::REG_SEQUENCE
), DstReg
)
1666 .addImm(AMDGPU::sub0
)
1668 .addImm(AMDGPU::sub1
);
1669 I
.eraseFromParent();
1673 bool AMDGPUInstructionSelector::select(MachineInstr
&I
) {
1675 return selectPHI(I
);
1677 if (!I
.isPreISelOpcode()) {
1679 return selectCOPY(I
);
1683 switch (I
.getOpcode()) {
1684 case TargetOpcode::G_AND
:
1685 case TargetOpcode::G_OR
:
1686 case TargetOpcode::G_XOR
:
1687 if (selectG_AND_OR_XOR(I
))
1689 return selectImpl(I
, *CoverageInfo
);
1690 case TargetOpcode::G_ADD
:
1691 case TargetOpcode::G_SUB
:
1692 if (selectImpl(I
, *CoverageInfo
))
1694 return selectG_ADD_SUB(I
);
1695 case TargetOpcode::G_UADDO
:
1696 case TargetOpcode::G_USUBO
:
1697 return selectG_UADDO_USUBO(I
);
1698 case TargetOpcode::G_INTTOPTR
:
1699 case TargetOpcode::G_BITCAST
:
1700 case TargetOpcode::G_PTRTOINT
:
1701 return selectCOPY(I
);
1702 case TargetOpcode::G_CONSTANT
:
1703 case TargetOpcode::G_FCONSTANT
:
1704 return selectG_CONSTANT(I
);
1705 case TargetOpcode::G_EXTRACT
:
1706 return selectG_EXTRACT(I
);
1707 case TargetOpcode::G_MERGE_VALUES
:
1708 case TargetOpcode::G_BUILD_VECTOR
:
1709 case TargetOpcode::G_CONCAT_VECTORS
:
1710 return selectG_MERGE_VALUES(I
);
1711 case TargetOpcode::G_UNMERGE_VALUES
:
1712 return selectG_UNMERGE_VALUES(I
);
1713 case TargetOpcode::G_GEP
:
1714 return selectG_GEP(I
);
1715 case TargetOpcode::G_IMPLICIT_DEF
:
1716 return selectG_IMPLICIT_DEF(I
);
1717 case TargetOpcode::G_INSERT
:
1718 return selectG_INSERT(I
);
1719 case TargetOpcode::G_INTRINSIC
:
1720 return selectG_INTRINSIC(I
);
1721 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
:
1722 return selectG_INTRINSIC_W_SIDE_EFFECTS(I
);
1723 case TargetOpcode::G_ICMP
:
1724 if (selectG_ICMP(I
))
1726 return selectImpl(I
, *CoverageInfo
);
1727 case TargetOpcode::G_LOAD
:
1728 case TargetOpcode::G_ATOMIC_CMPXCHG
:
1729 case TargetOpcode::G_ATOMICRMW_XCHG
:
1730 case TargetOpcode::G_ATOMICRMW_ADD
:
1731 case TargetOpcode::G_ATOMICRMW_SUB
:
1732 case TargetOpcode::G_ATOMICRMW_AND
:
1733 case TargetOpcode::G_ATOMICRMW_OR
:
1734 case TargetOpcode::G_ATOMICRMW_XOR
:
1735 case TargetOpcode::G_ATOMICRMW_MIN
:
1736 case TargetOpcode::G_ATOMICRMW_MAX
:
1737 case TargetOpcode::G_ATOMICRMW_UMIN
:
1738 case TargetOpcode::G_ATOMICRMW_UMAX
:
1739 case TargetOpcode::G_ATOMICRMW_FADD
:
1740 return selectG_LOAD_ATOMICRMW(I
);
1741 case TargetOpcode::G_SELECT
:
1742 return selectG_SELECT(I
);
1743 case TargetOpcode::G_STORE
:
1744 return selectG_STORE(I
);
1745 case TargetOpcode::G_TRUNC
:
1746 return selectG_TRUNC(I
);
1747 case TargetOpcode::G_SEXT
:
1748 case TargetOpcode::G_ZEXT
:
1749 case TargetOpcode::G_ANYEXT
:
1750 return selectG_SZA_EXT(I
);
1751 case TargetOpcode::G_SITOFP
:
1752 case TargetOpcode::G_UITOFP
:
1753 return selectG_SITOFP_UITOFP(I
);
1754 case TargetOpcode::G_BRCOND
:
1755 return selectG_BRCOND(I
);
1756 case TargetOpcode::G_FRAME_INDEX
:
1757 return selectG_FRAME_INDEX(I
);
1758 case TargetOpcode::G_FENCE
:
1759 // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1760 // is checking for G_CONSTANT
1761 I
.setDesc(TII
.get(AMDGPU::ATOMIC_FENCE
));
1763 case TargetOpcode::G_PTR_MASK
:
1764 return selectG_PTR_MASK(I
);
1766 return selectImpl(I
, *CoverageInfo
);
1771 InstructionSelector::ComplexRendererFns
1772 AMDGPUInstructionSelector::selectVCSRC(MachineOperand
&Root
) const {
1774 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1779 std::pair
<Register
, unsigned>
1780 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1781 Register Src
) const {
1783 MachineInstr
*MI
= MRI
->getVRegDef(Src
);
1785 if (MI
&& MI
->getOpcode() == AMDGPU::G_FNEG
) {
1786 Src
= MI
->getOperand(1).getReg();
1787 Mods
|= SISrcMods::NEG
;
1788 MI
= MRI
->getVRegDef(Src
);
1791 if (MI
&& MI
->getOpcode() == AMDGPU::G_FABS
) {
1792 Src
= MI
->getOperand(1).getReg();
1793 Mods
|= SISrcMods::ABS
;
1796 return std::make_pair(Src
, Mods
);
1800 /// This will select either an SGPR or VGPR operand and will save us from
1801 /// having to write an extra tablegen pattern.
1802 InstructionSelector::ComplexRendererFns
1803 AMDGPUInstructionSelector::selectVSRC0(MachineOperand
&Root
) const {
1805 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); }
1809 InstructionSelector::ComplexRendererFns
1810 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand
&Root
) const {
1813 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg());
1816 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1817 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1818 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1819 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1823 InstructionSelector::ComplexRendererFns
1824 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand
&Root
) const {
1827 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg());
1830 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1831 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); }, // src0_mods
1832 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1833 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1837 InstructionSelector::ComplexRendererFns
1838 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand
&Root
) const {
1840 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
1841 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // clamp
1842 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // omod
1846 InstructionSelector::ComplexRendererFns
1847 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand
&Root
) const {
1850 std::tie(Src
, Mods
) = selectVOP3ModsImpl(Root
.getReg());
1853 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Src
); },
1854 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Mods
); } // src_mods
1858 InstructionSelector::ComplexRendererFns
1859 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand
&Root
) const {
1860 // FIXME: Handle clamp and op_sel
1862 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1863 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // src_mods
1864 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // clamp
1868 InstructionSelector::ComplexRendererFns
1869 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand
&Root
) const {
1870 // FIXME: Handle op_sel
1872 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1873 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // src_mods
1877 InstructionSelector::ComplexRendererFns
1878 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand
&Root
) const {
1879 SmallVector
<GEPInfo
, 4> AddrInfo
;
1880 getAddrModeInfo(*Root
.getParent(), *MRI
, AddrInfo
);
1882 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1885 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1887 if (!AMDGPU::isLegalSMRDImmOffset(STI
, GEPInfo
.Imm
))
1890 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1891 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1893 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1894 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1898 InstructionSelector::ComplexRendererFns
1899 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand
&Root
) const {
1900 SmallVector
<GEPInfo
, 4> AddrInfo
;
1901 getAddrModeInfo(*Root
.getParent(), *MRI
, AddrInfo
);
1903 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1906 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1907 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1908 int64_t EncodedImm
= AMDGPU::getSMRDEncodedOffset(STI
, GEPInfo
.Imm
);
1909 if (!isUInt
<32>(EncodedImm
))
1913 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1914 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(EncodedImm
); }
1918 InstructionSelector::ComplexRendererFns
1919 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand
&Root
) const {
1920 MachineInstr
*MI
= Root
.getParent();
1921 MachineBasicBlock
*MBB
= MI
->getParent();
1923 SmallVector
<GEPInfo
, 4> AddrInfo
;
1924 getAddrModeInfo(*MI
, *MRI
, AddrInfo
);
1926 // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1927 // then we can select all ptr + 32-bit offsets not just immediate offsets.
1928 if (AddrInfo
.empty() || AddrInfo
[0].SgprParts
.size() != 1)
1931 const GEPInfo
&GEPInfo
= AddrInfo
[0];
1932 if (!GEPInfo
.Imm
|| !isUInt
<32>(GEPInfo
.Imm
))
1935 // If we make it this far we have a load with an 32-bit immediate offset.
1936 // It is OK to select this using a sgpr offset, because we have already
1937 // failed trying to select this load into one of the _IMM variants since
1938 // the _IMM Patterns are considered before the _SGPR patterns.
1939 unsigned PtrReg
= GEPInfo
.SgprParts
[0];
1940 Register OffsetReg
= MRI
->createVirtualRegister(&AMDGPU::SReg_32RegClass
);
1941 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::S_MOV_B32
), OffsetReg
)
1942 .addImm(GEPInfo
.Imm
);
1944 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(PtrReg
); },
1945 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(OffsetReg
); }
1949 template <bool Signed
>
1950 InstructionSelector::ComplexRendererFns
1951 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand
&Root
) const {
1952 MachineInstr
*MI
= Root
.getParent();
1954 InstructionSelector::ComplexRendererFns Default
= {{
1955 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(Root
.getReg()); },
1956 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }, // offset
1957 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1960 if (!STI
.hasFlatInstOffsets())
1963 const MachineInstr
*OpDef
= MRI
->getVRegDef(Root
.getReg());
1964 if (!OpDef
|| OpDef
->getOpcode() != AMDGPU::G_GEP
)
1967 Optional
<int64_t> Offset
=
1968 getConstantVRegVal(OpDef
->getOperand(2).getReg(), *MRI
);
1969 if (!Offset
.hasValue())
1972 unsigned AddrSpace
= (*MI
->memoperands_begin())->getAddrSpace();
1973 if (!TII
.isLegalFLATOffset(Offset
.getValue(), AddrSpace
, Signed
))
1976 Register BasePtr
= OpDef
->getOperand(1).getReg();
1979 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(BasePtr
); },
1980 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
.getValue()); },
1981 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); } // slc
1985 InstructionSelector::ComplexRendererFns
1986 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand
&Root
) const {
1987 return selectFlatOffsetImpl
<false>(Root
);
1990 InstructionSelector::ComplexRendererFns
1991 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand
&Root
) const {
1992 return selectFlatOffsetImpl
<true>(Root
);
1995 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1996 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1997 return PSV
&& PSV
->isStack();
2000 InstructionSelector::ComplexRendererFns
2001 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand
&Root
) const {
2002 MachineInstr
*MI
= Root
.getParent();
2003 MachineBasicBlock
*MBB
= MI
->getParent();
2004 MachineFunction
*MF
= MBB
->getParent();
2005 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
2008 if (mi_match(Root
.getReg(), *MRI
, m_ICst(Offset
))) {
2009 Register HighBits
= MRI
->createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
2011 // TODO: Should this be inside the render function? The iterator seems to
2013 BuildMI(*MBB
, MI
, MI
->getDebugLoc(), TII
.get(AMDGPU::V_MOV_B32_e32
),
2015 .addImm(Offset
& ~4095);
2017 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
2018 MIB
.addReg(Info
->getScratchRSrcReg());
2020 [=](MachineInstrBuilder
&MIB
) { // vaddr
2021 MIB
.addReg(HighBits
);
2023 [=](MachineInstrBuilder
&MIB
) { // soffset
2024 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
2025 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
2027 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
2028 ? Info
->getStackPtrOffsetReg()
2029 : Info
->getScratchWaveOffsetReg();
2030 MIB
.addReg(SOffsetReg
);
2032 [=](MachineInstrBuilder
&MIB
) { // offset
2033 MIB
.addImm(Offset
& 4095);
2037 assert(Offset
== 0);
2039 // Try to fold a frame index directly into the MUBUF vaddr field, and any
2042 Register VAddr
= Root
.getReg();
2043 if (const MachineInstr
*RootDef
= MRI
->getVRegDef(Root
.getReg())) {
2044 if (isBaseWithConstantOffset(Root
, *MRI
)) {
2045 const MachineOperand
&LHS
= RootDef
->getOperand(1);
2046 const MachineOperand
&RHS
= RootDef
->getOperand(2);
2047 const MachineInstr
*LHSDef
= MRI
->getVRegDef(LHS
.getReg());
2048 const MachineInstr
*RHSDef
= MRI
->getVRegDef(RHS
.getReg());
2049 if (LHSDef
&& RHSDef
) {
2050 int64_t PossibleOffset
=
2051 RHSDef
->getOperand(1).getCImm()->getSExtValue();
2052 if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset
) &&
2053 (!STI
.privateMemoryResourceIsRangeChecked() ||
2054 KnownBits
->signBitIsZero(LHS
.getReg()))) {
2055 if (LHSDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
)
2056 FI
= LHSDef
->getOperand(1).getIndex();
2058 VAddr
= LHS
.getReg();
2059 Offset
= PossibleOffset
;
2062 } else if (RootDef
->getOpcode() == AMDGPU::G_FRAME_INDEX
) {
2063 FI
= RootDef
->getOperand(1).getIndex();
2067 // If we don't know this private access is a local stack object, it needs to
2068 // be relative to the entry point's scratch wave offset register.
2069 // TODO: Should split large offsets that don't fit like above.
2070 // TODO: Don't use scratch wave offset just because the offset didn't fit.
2071 Register SOffset
= FI
.hasValue() ? Info
->getStackPtrOffsetReg()
2072 : Info
->getScratchWaveOffsetReg();
2074 return {{[=](MachineInstrBuilder
&MIB
) { // rsrc
2075 MIB
.addReg(Info
->getScratchRSrcReg());
2077 [=](MachineInstrBuilder
&MIB
) { // vaddr
2079 MIB
.addFrameIndex(FI
.getValue());
2083 [=](MachineInstrBuilder
&MIB
) { // soffset
2084 MIB
.addReg(SOffset
);
2086 [=](MachineInstrBuilder
&MIB
) { // offset
2091 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo
&MRI
,
2092 const MachineOperand
&Base
,
2094 unsigned OffsetBits
) const {
2095 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
2096 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
2099 if (STI
.hasUsableDSOffset() || STI
.unsafeDSOffsetFoldingEnabled())
2102 // On Southern Islands instruction with a negative base value and an offset
2103 // don't seem to work.
2104 return KnownBits
->signBitIsZero(Base
.getReg());
2107 InstructionSelector::ComplexRendererFns
2108 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
2109 MachineOperand
&Root
) const {
2110 MachineInstr
*MI
= Root
.getParent();
2111 MachineBasicBlock
*MBB
= MI
->getParent();
2114 if (!mi_match(Root
.getReg(), *MRI
, m_ICst(Offset
)) ||
2115 !SIInstrInfo::isLegalMUBUFImmOffset(Offset
))
2118 const MachineFunction
*MF
= MBB
->getParent();
2119 const SIMachineFunctionInfo
*Info
= MF
->getInfo
<SIMachineFunctionInfo
>();
2120 const MachineMemOperand
*MMO
= *MI
->memoperands_begin();
2121 const MachinePointerInfo
&PtrInfo
= MMO
->getPointerInfo();
2123 Register SOffsetReg
= isStackPtrRelative(PtrInfo
)
2124 ? Info
->getStackPtrOffsetReg()
2125 : Info
->getScratchWaveOffsetReg();
2127 [=](MachineInstrBuilder
&MIB
) {
2128 MIB
.addReg(Info
->getScratchRSrcReg());
2130 [=](MachineInstrBuilder
&MIB
) { MIB
.addReg(SOffsetReg
); }, // soffset
2131 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(Offset
); } // offset
2135 InstructionSelector::ComplexRendererFns
2136 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand
&Root
) const {
2137 const MachineInstr
*RootDef
= MRI
->getVRegDef(Root
.getReg());
2140 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
2141 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
2145 int64_t ConstAddr
= 0;
2146 if (isBaseWithConstantOffset(Root
, *MRI
)) {
2147 const MachineOperand
&LHS
= RootDef
->getOperand(1);
2148 const MachineOperand
&RHS
= RootDef
->getOperand(2);
2149 const MachineInstr
*LHSDef
= MRI
->getVRegDef(LHS
.getReg());
2150 const MachineInstr
*RHSDef
= MRI
->getVRegDef(RHS
.getReg());
2151 if (LHSDef
&& RHSDef
) {
2152 int64_t PossibleOffset
=
2153 RHSDef
->getOperand(1).getCImm()->getSExtValue();
2154 if (isDSOffsetLegal(*MRI
, LHS
, PossibleOffset
, 16)) {
2157 [=](MachineInstrBuilder
&MIB
) { MIB
.add(LHS
); },
2158 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(PossibleOffset
); }
2162 } else if (RootDef
->getOpcode() == AMDGPU::G_SUB
) {
2166 } else if (mi_match(Root
.getReg(), *MRI
, m_ICst(ConstAddr
))) {
2172 [=](MachineInstrBuilder
&MIB
) { MIB
.add(Root
); },
2173 [=](MachineInstrBuilder
&MIB
) { MIB
.addImm(0); }
2177 void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder
&MIB
,
2178 const MachineInstr
&MI
) const {
2179 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
2180 assert(MI
.getOpcode() == TargetOpcode::G_CONSTANT
&& "Expected G_CONSTANT");
2181 Optional
<int64_t> CstVal
= getConstantVRegVal(MI
.getOperand(0).getReg(), MRI
);
2182 assert(CstVal
&& "Expected constant value");
2183 MIB
.addImm(CstVal
.getValue());