1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
10 /// Defines an instruction selector for the AMDGPU target.
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUArgumentUsageInfo.h"
16 #include "AMDGPUISelLowering.h" // For AMDGPUISD
17 #include "AMDGPUInstrInfo.h"
18 #include "AMDGPUPerfHintAnalysis.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
27 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
32 #include "llvm/Analysis/ValueTracking.h"
33 #include "llvm/CodeGen/FunctionLoweringInfo.h"
34 #include "llvm/CodeGen/ISDOpcodes.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/SelectionDAG.h"
38 #include "llvm/CodeGen/SelectionDAGISel.h"
39 #include "llvm/CodeGen/SelectionDAGNodes.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/MC/MCInstrDesc.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/MachineValueType.h"
48 #include "llvm/Support/MathExtras.h"
60 } // end namespace llvm
62 //===----------------------------------------------------------------------===//
63 // Instruction Selector Implementation
64 //===----------------------------------------------------------------------===//
68 /// AMDGPU specific code to select AMDGPU machine instructions for
69 /// SelectionDAG operations.
70 class AMDGPUDAGToDAGISel
: public SelectionDAGISel
{
71 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
72 // make the right decision when generating code for different targets.
73 const GCNSubtarget
*Subtarget
;
74 bool EnableLateStructurizeCFG
;
77 explicit AMDGPUDAGToDAGISel(TargetMachine
*TM
= nullptr,
78 CodeGenOpt::Level OptLevel
= CodeGenOpt::Default
)
79 : SelectionDAGISel(*TM
, OptLevel
) {
80 EnableLateStructurizeCFG
= AMDGPUTargetMachine::EnableLateStructurizeCFG
;
82 ~AMDGPUDAGToDAGISel() override
= default;
84 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
85 AU
.addRequired
<AMDGPUArgumentUsageInfo
>();
86 AU
.addRequired
<AMDGPUPerfHintAnalysis
>();
87 AU
.addRequired
<LegacyDivergenceAnalysis
>();
88 SelectionDAGISel::getAnalysisUsage(AU
);
91 bool runOnMachineFunction(MachineFunction
&MF
) override
;
92 void Select(SDNode
*N
) override
;
93 StringRef
getPassName() const override
;
94 void PostprocessISelDAG() override
;
97 void SelectBuildVector(SDNode
*N
, unsigned RegClassID
);
100 std::pair
<SDValue
, SDValue
> foldFrameIndex(SDValue N
) const;
101 bool isNoNanSrc(SDValue N
) const;
102 bool isInlineImmediate(const SDNode
*N
) const;
103 bool isVGPRImm(const SDNode
*N
) const;
104 bool isUniformLoad(const SDNode
*N
) const;
105 bool isUniformBr(const SDNode
*N
) const;
107 MachineSDNode
*buildSMovImm64(SDLoc
&DL
, uint64_t Val
, EVT VT
) const;
109 SDNode
*glueCopyToM0LDSInit(SDNode
*N
) const;
110 SDNode
*glueCopyToM0(SDNode
*N
, SDValue Val
) const;
112 const TargetRegisterClass
*getOperandRegClass(SDNode
*N
, unsigned OpNo
) const;
113 virtual bool SelectADDRVTX_READ(SDValue Addr
, SDValue
&Base
, SDValue
&Offset
);
114 virtual bool SelectADDRIndirect(SDValue Addr
, SDValue
&Base
, SDValue
&Offset
);
115 bool isDSOffsetLegal(SDValue Base
, unsigned Offset
,
116 unsigned OffsetBits
) const;
117 bool SelectDS1Addr1Offset(SDValue Ptr
, SDValue
&Base
, SDValue
&Offset
) const;
118 bool SelectDS64Bit4ByteAligned(SDValue Ptr
, SDValue
&Base
, SDValue
&Offset0
,
119 SDValue
&Offset1
) const;
120 bool SelectMUBUF(SDValue Addr
, SDValue
&SRsrc
, SDValue
&VAddr
,
121 SDValue
&SOffset
, SDValue
&Offset
, SDValue
&Offen
,
122 SDValue
&Idxen
, SDValue
&Addr64
, SDValue
&GLC
, SDValue
&SLC
,
124 bool SelectMUBUFAddr64(SDValue Addr
, SDValue
&SRsrc
, SDValue
&VAddr
,
125 SDValue
&SOffset
, SDValue
&Offset
, SDValue
&GLC
,
126 SDValue
&SLC
, SDValue
&TFE
) const;
127 bool SelectMUBUFAddr64(SDValue Addr
, SDValue
&SRsrc
,
128 SDValue
&VAddr
, SDValue
&SOffset
, SDValue
&Offset
,
130 bool SelectMUBUFScratchOffen(SDNode
*Parent
,
131 SDValue Addr
, SDValue
&RSrc
, SDValue
&VAddr
,
132 SDValue
&SOffset
, SDValue
&ImmOffset
) const;
133 bool SelectMUBUFScratchOffset(SDNode
*Parent
,
134 SDValue Addr
, SDValue
&SRsrc
, SDValue
&Soffset
,
135 SDValue
&Offset
) const;
137 bool SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
, SDValue
&SOffset
,
138 SDValue
&Offset
, SDValue
&GLC
, SDValue
&SLC
,
140 bool SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
, SDValue
&Soffset
,
141 SDValue
&Offset
, SDValue
&SLC
) const;
142 bool SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
, SDValue
&Soffset
,
143 SDValue
&Offset
) const;
145 bool SelectFlatAtomic(SDValue Addr
, SDValue
&VAddr
,
146 SDValue
&Offset
, SDValue
&SLC
) const;
147 bool SelectFlatAtomicSigned(SDValue Addr
, SDValue
&VAddr
,
148 SDValue
&Offset
, SDValue
&SLC
) const;
150 template <bool IsSigned
>
151 bool SelectFlatOffset(SDValue Addr
, SDValue
&VAddr
,
152 SDValue
&Offset
, SDValue
&SLC
) const;
154 bool SelectSMRDOffset(SDValue ByteOffsetNode
, SDValue
&Offset
,
156 SDValue
Expand32BitAddress(SDValue Addr
) const;
157 bool SelectSMRD(SDValue Addr
, SDValue
&SBase
, SDValue
&Offset
,
159 bool SelectSMRDImm(SDValue Addr
, SDValue
&SBase
, SDValue
&Offset
) const;
160 bool SelectSMRDImm32(SDValue Addr
, SDValue
&SBase
, SDValue
&Offset
) const;
161 bool SelectSMRDSgpr(SDValue Addr
, SDValue
&SBase
, SDValue
&Offset
) const;
162 bool SelectSMRDBufferImm(SDValue Addr
, SDValue
&Offset
) const;
163 bool SelectSMRDBufferImm32(SDValue Addr
, SDValue
&Offset
) const;
164 bool SelectMOVRELOffset(SDValue Index
, SDValue
&Base
, SDValue
&Offset
) const;
166 bool SelectVOP3Mods_NNaN(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
167 bool SelectVOP3ModsImpl(SDValue In
, SDValue
&Src
, unsigned &SrcMods
) const;
168 bool SelectVOP3Mods(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
169 bool SelectVOP3NoMods(SDValue In
, SDValue
&Src
) const;
170 bool SelectVOP3Mods0(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
171 SDValue
&Clamp
, SDValue
&Omod
) const;
172 bool SelectVOP3NoMods0(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
173 SDValue
&Clamp
, SDValue
&Omod
) const;
175 bool SelectVOP3Mods0Clamp0OMod(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
177 SDValue
&Omod
) const;
179 bool SelectVOP3OMods(SDValue In
, SDValue
&Src
,
180 SDValue
&Clamp
, SDValue
&Omod
) const;
182 bool SelectVOP3PMods(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
183 bool SelectVOP3PMods0(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
184 SDValue
&Clamp
) const;
186 bool SelectVOP3OpSel(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
187 bool SelectVOP3OpSel0(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
188 SDValue
&Clamp
) const;
190 bool SelectVOP3OpSelMods(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
191 bool SelectVOP3OpSelMods0(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
,
192 SDValue
&Clamp
) const;
193 bool SelectVOP3PMadMixModsImpl(SDValue In
, SDValue
&Src
, unsigned &Mods
) const;
194 bool SelectVOP3PMadMixMods(SDValue In
, SDValue
&Src
, SDValue
&SrcMods
) const;
196 bool SelectHi16Elt(SDValue In
, SDValue
&Src
) const;
198 void SelectADD_SUB_I64(SDNode
*N
);
199 void SelectUADDO_USUBO(SDNode
*N
);
200 void SelectDIV_SCALE(SDNode
*N
);
201 void SelectMAD_64_32(SDNode
*N
);
202 void SelectFMA_W_CHAIN(SDNode
*N
);
203 void SelectFMUL_W_CHAIN(SDNode
*N
);
205 SDNode
*getS_BFE(unsigned Opcode
, const SDLoc
&DL
, SDValue Val
,
206 uint32_t Offset
, uint32_t Width
);
207 void SelectS_BFEFromShifts(SDNode
*N
);
208 void SelectS_BFE(SDNode
*N
);
209 bool isCBranchSCC(const SDNode
*N
) const;
210 void SelectBRCOND(SDNode
*N
);
211 void SelectFMAD_FMA(SDNode
*N
);
212 void SelectATOMIC_CMP_SWAP(SDNode
*N
);
213 void SelectINTRINSIC_W_CHAIN(SDNode
*N
);
216 // Include the pieces autogenerated from the target description.
217 #include "AMDGPUGenDAGISel.inc"
220 class R600DAGToDAGISel
: public AMDGPUDAGToDAGISel
{
221 const R600Subtarget
*Subtarget
;
223 bool isConstantLoad(const MemSDNode
*N
, int cbID
) const;
224 bool SelectGlobalValueConstantOffset(SDValue Addr
, SDValue
& IntPtr
);
225 bool SelectGlobalValueVariableOffset(SDValue Addr
, SDValue
&BaseReg
,
228 explicit R600DAGToDAGISel(TargetMachine
*TM
, CodeGenOpt::Level OptLevel
) :
229 AMDGPUDAGToDAGISel(TM
, OptLevel
) {}
231 void Select(SDNode
*N
) override
;
233 bool SelectADDRIndirect(SDValue Addr
, SDValue
&Base
,
234 SDValue
&Offset
) override
;
235 bool SelectADDRVTX_READ(SDValue Addr
, SDValue
&Base
,
236 SDValue
&Offset
) override
;
238 bool runOnMachineFunction(MachineFunction
&MF
) override
;
240 // Include the pieces autogenerated from the target description.
241 #include "R600GenDAGISel.inc"
244 } // end anonymous namespace
246 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel
, "amdgpu-isel",
247 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
248 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo
)
249 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis
)
250 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis
)
251 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel
, "amdgpu-isel",
252 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
254 /// This pass converts a legalized DAG into a AMDGPU-specific
255 // DAG, ready for instruction scheduling.
256 FunctionPass
*llvm::createAMDGPUISelDag(TargetMachine
*TM
,
257 CodeGenOpt::Level OptLevel
) {
258 return new AMDGPUDAGToDAGISel(TM
, OptLevel
);
261 /// This pass converts a legalized DAG into a R600-specific
262 // DAG, ready for instruction scheduling.
263 FunctionPass
*llvm::createR600ISelDag(TargetMachine
*TM
,
264 CodeGenOpt::Level OptLevel
) {
265 return new R600DAGToDAGISel(TM
, OptLevel
);
268 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction
&MF
) {
269 Subtarget
= &MF
.getSubtarget
<GCNSubtarget
>();
270 return SelectionDAGISel::runOnMachineFunction(MF
);
273 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N
) const {
274 if (TM
.Options
.NoNaNsFPMath
)
277 // TODO: Move into isKnownNeverNaN
278 if (N
->getFlags().isDefined())
279 return N
->getFlags().hasNoNaNs();
281 return CurDAG
->isKnownNeverNaN(N
);
284 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode
*N
) const {
285 const SIInstrInfo
*TII
= Subtarget
->getInstrInfo();
287 if (const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
))
288 return TII
->isInlineConstant(C
->getAPIntValue());
290 if (const ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(N
))
291 return TII
->isInlineConstant(C
->getValueAPF().bitcastToAPInt());
296 /// Determine the register class for \p OpNo
297 /// \returns The register class of the virtual register that will be used for
298 /// the given operand number \OpNo or NULL if the register class cannot be
300 const TargetRegisterClass
*AMDGPUDAGToDAGISel::getOperandRegClass(SDNode
*N
,
301 unsigned OpNo
) const {
302 if (!N
->isMachineOpcode()) {
303 if (N
->getOpcode() == ISD::CopyToReg
) {
304 unsigned Reg
= cast
<RegisterSDNode
>(N
->getOperand(1))->getReg();
305 if (TargetRegisterInfo::isVirtualRegister(Reg
)) {
306 MachineRegisterInfo
&MRI
= CurDAG
->getMachineFunction().getRegInfo();
307 return MRI
.getRegClass(Reg
);
310 const SIRegisterInfo
*TRI
311 = static_cast<const GCNSubtarget
*>(Subtarget
)->getRegisterInfo();
312 return TRI
->getPhysRegClass(Reg
);
318 switch (N
->getMachineOpcode()) {
320 const MCInstrDesc
&Desc
=
321 Subtarget
->getInstrInfo()->get(N
->getMachineOpcode());
322 unsigned OpIdx
= Desc
.getNumDefs() + OpNo
;
323 if (OpIdx
>= Desc
.getNumOperands())
325 int RegClass
= Desc
.OpInfo
[OpIdx
].RegClass
;
329 return Subtarget
->getRegisterInfo()->getRegClass(RegClass
);
331 case AMDGPU::REG_SEQUENCE
: {
332 unsigned RCID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
333 const TargetRegisterClass
*SuperRC
=
334 Subtarget
->getRegisterInfo()->getRegClass(RCID
);
336 SDValue SubRegOp
= N
->getOperand(OpNo
+ 1);
337 unsigned SubRegIdx
= cast
<ConstantSDNode
>(SubRegOp
)->getZExtValue();
338 return Subtarget
->getRegisterInfo()->getSubClassWithSubReg(SuperRC
,
344 SDNode
*AMDGPUDAGToDAGISel::glueCopyToM0(SDNode
*N
, SDValue Val
) const {
345 const SITargetLowering
& Lowering
=
346 *static_cast<const SITargetLowering
*>(getTargetLowering());
348 // Write max value to m0 before each load operation
350 SDValue M0
= Lowering
.copyToM0(*CurDAG
, CurDAG
->getEntryNode(), SDLoc(N
),
353 SDValue Glue
= M0
.getValue(1);
355 SmallVector
<SDValue
, 8> Ops
;
356 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
)
357 Ops
.push_back(N
->getOperand(i
));
360 return CurDAG
->MorphNodeTo(N
, N
->getOpcode(), N
->getVTList(), Ops
);
363 SDNode
*AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode
*N
) const {
364 if (cast
<MemSDNode
>(N
)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS
||
365 !Subtarget
->ldsRequiresM0Init())
367 return glueCopyToM0(N
, CurDAG
->getTargetConstant(-1, SDLoc(N
), MVT::i32
));
370 MachineSDNode
*AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc
&DL
, uint64_t Imm
,
372 SDNode
*Lo
= CurDAG
->getMachineNode(
373 AMDGPU::S_MOV_B32
, DL
, MVT::i32
,
374 CurDAG
->getConstant(Imm
& 0xFFFFFFFF, DL
, MVT::i32
));
376 CurDAG
->getMachineNode(AMDGPU::S_MOV_B32
, DL
, MVT::i32
,
377 CurDAG
->getConstant(Imm
>> 32, DL
, MVT::i32
));
378 const SDValue Ops
[] = {
379 CurDAG
->getTargetConstant(AMDGPU::SReg_64RegClassID
, DL
, MVT::i32
),
380 SDValue(Lo
, 0), CurDAG
->getTargetConstant(AMDGPU::sub0
, DL
, MVT::i32
),
381 SDValue(Hi
, 0), CurDAG
->getTargetConstant(AMDGPU::sub1
, DL
, MVT::i32
)};
383 return CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, DL
, VT
, Ops
);
386 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts
) {
387 switch (NumVectorElts
) {
389 return AMDGPU::SReg_32_XM0RegClassID
;
391 return AMDGPU::SReg_64RegClassID
;
393 return AMDGPU::SReg_128RegClassID
;
395 return AMDGPU::SReg_256RegClassID
;
397 return AMDGPU::SReg_512RegClassID
;
400 llvm_unreachable("invalid vector size");
403 static bool getConstantValue(SDValue N
, uint32_t &Out
) {
404 if (const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
)) {
405 Out
= C
->getAPIntValue().getZExtValue();
409 if (const ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(N
)) {
410 Out
= C
->getValueAPF().bitcastToAPInt().getZExtValue();
417 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode
*N
, unsigned RegClassID
) {
418 EVT VT
= N
->getValueType(0);
419 unsigned NumVectorElts
= VT
.getVectorNumElements();
420 EVT EltVT
= VT
.getVectorElementType();
422 SDValue RegClass
= CurDAG
->getTargetConstant(RegClassID
, DL
, MVT::i32
);
424 if (NumVectorElts
== 1) {
425 CurDAG
->SelectNodeTo(N
, AMDGPU::COPY_TO_REGCLASS
, EltVT
, N
->getOperand(0),
430 assert(NumVectorElts
<= 16 && "Vectors with more than 16 elements not "
432 // 16 = Max Num Vector Elements
433 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
434 // 1 = Vector Register Class
435 SmallVector
<SDValue
, 16 * 2 + 1> RegSeqArgs(NumVectorElts
* 2 + 1);
437 RegSeqArgs
[0] = CurDAG
->getTargetConstant(RegClassID
, DL
, MVT::i32
);
438 bool IsRegSeq
= true;
439 unsigned NOps
= N
->getNumOperands();
440 for (unsigned i
= 0; i
< NOps
; i
++) {
441 // XXX: Why is this here?
442 if (isa
<RegisterSDNode
>(N
->getOperand(i
))) {
446 unsigned Sub
= AMDGPURegisterInfo::getSubRegFromChannel(i
);
447 RegSeqArgs
[1 + (2 * i
)] = N
->getOperand(i
);
448 RegSeqArgs
[1 + (2 * i
) + 1] = CurDAG
->getTargetConstant(Sub
, DL
, MVT::i32
);
450 if (NOps
!= NumVectorElts
) {
451 // Fill in the missing undef elements if this was a scalar_to_vector.
452 assert(N
->getOpcode() == ISD::SCALAR_TO_VECTOR
&& NOps
< NumVectorElts
);
453 MachineSDNode
*ImpDef
= CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,
455 for (unsigned i
= NOps
; i
< NumVectorElts
; ++i
) {
456 unsigned Sub
= AMDGPURegisterInfo::getSubRegFromChannel(i
);
457 RegSeqArgs
[1 + (2 * i
)] = SDValue(ImpDef
, 0);
458 RegSeqArgs
[1 + (2 * i
) + 1] =
459 CurDAG
->getTargetConstant(Sub
, DL
, MVT::i32
);
465 CurDAG
->SelectNodeTo(N
, AMDGPU::REG_SEQUENCE
, N
->getVTList(), RegSeqArgs
);
468 void AMDGPUDAGToDAGISel::Select(SDNode
*N
) {
469 unsigned int Opc
= N
->getOpcode();
470 if (N
->isMachineOpcode()) {
472 return; // Already selected.
475 if (isa
<AtomicSDNode
>(N
) ||
476 (Opc
== AMDGPUISD::ATOMIC_INC
|| Opc
== AMDGPUISD::ATOMIC_DEC
||
477 Opc
== ISD::ATOMIC_LOAD_FADD
||
478 Opc
== AMDGPUISD::ATOMIC_LOAD_FMIN
||
479 Opc
== AMDGPUISD::ATOMIC_LOAD_FMAX
))
480 N
= glueCopyToM0LDSInit(N
);
485 // We are selecting i64 ADD here instead of custom lower it during
486 // DAG legalization, so we can fold some i64 ADDs used for address
487 // calculation into the LOAD and STORE instructions.
492 if (N
->getValueType(0) != MVT::i64
)
495 SelectADD_SUB_I64(N
);
500 SelectUADDO_USUBO(N
);
503 case AMDGPUISD::FMUL_W_CHAIN
: {
504 SelectFMUL_W_CHAIN(N
);
507 case AMDGPUISD::FMA_W_CHAIN
: {
508 SelectFMA_W_CHAIN(N
);
512 case ISD::SCALAR_TO_VECTOR
:
513 case ISD::BUILD_VECTOR
: {
514 EVT VT
= N
->getValueType(0);
515 unsigned NumVectorElts
= VT
.getVectorNumElements();
516 if (VT
.getScalarSizeInBits() == 16) {
517 if (Opc
== ISD::BUILD_VECTOR
&& NumVectorElts
== 2) {
518 uint32_t LHSVal
, RHSVal
;
519 if (getConstantValue(N
->getOperand(0), LHSVal
) &&
520 getConstantValue(N
->getOperand(1), RHSVal
)) {
521 uint32_t K
= LHSVal
| (RHSVal
<< 16);
522 CurDAG
->SelectNodeTo(N
, AMDGPU::S_MOV_B32
, VT
,
523 CurDAG
->getTargetConstant(K
, SDLoc(N
), MVT::i32
));
531 assert(VT
.getVectorElementType().bitsEq(MVT::i32
));
532 unsigned RegClassID
= selectSGPRVectorRegClassID(NumVectorElts
);
533 SelectBuildVector(N
, RegClassID
);
536 case ISD::BUILD_PAIR
: {
537 SDValue RC
, SubReg0
, SubReg1
;
539 if (N
->getValueType(0) == MVT::i128
) {
540 RC
= CurDAG
->getTargetConstant(AMDGPU::SReg_128RegClassID
, DL
, MVT::i32
);
541 SubReg0
= CurDAG
->getTargetConstant(AMDGPU::sub0_sub1
, DL
, MVT::i32
);
542 SubReg1
= CurDAG
->getTargetConstant(AMDGPU::sub2_sub3
, DL
, MVT::i32
);
543 } else if (N
->getValueType(0) == MVT::i64
) {
544 RC
= CurDAG
->getTargetConstant(AMDGPU::SReg_64RegClassID
, DL
, MVT::i32
);
545 SubReg0
= CurDAG
->getTargetConstant(AMDGPU::sub0
, DL
, MVT::i32
);
546 SubReg1
= CurDAG
->getTargetConstant(AMDGPU::sub1
, DL
, MVT::i32
);
548 llvm_unreachable("Unhandled value type for BUILD_PAIR");
550 const SDValue Ops
[] = { RC
, N
->getOperand(0), SubReg0
,
551 N
->getOperand(1), SubReg1
};
552 ReplaceNode(N
, CurDAG
->getMachineNode(TargetOpcode::REG_SEQUENCE
, DL
,
553 N
->getValueType(0), Ops
));
558 case ISD::ConstantFP
: {
559 if (N
->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N
))
563 if (ConstantFPSDNode
*FP
= dyn_cast
<ConstantFPSDNode
>(N
))
564 Imm
= FP
->getValueAPF().bitcastToAPInt().getZExtValue();
566 ConstantSDNode
*C
= cast
<ConstantSDNode
>(N
);
567 Imm
= C
->getZExtValue();
571 ReplaceNode(N
, buildSMovImm64(DL
, Imm
, N
->getValueType(0)));
576 case ISD::ATOMIC_LOAD
:
577 case ISD::ATOMIC_STORE
: {
578 N
= glueCopyToM0LDSInit(N
);
582 case AMDGPUISD::BFE_I32
:
583 case AMDGPUISD::BFE_U32
: {
584 // There is a scalar version available, but unlike the vector version which
585 // has a separate operand for the offset and width, the scalar version packs
586 // the width and offset into a single operand. Try to move to the scalar
587 // version if the offsets are constant, so that we can try to keep extended
588 // loads of kernel arguments in SGPRs.
590 // TODO: Technically we could try to pattern match scalar bitshifts of
591 // dynamic values, but it's probably not useful.
592 ConstantSDNode
*Offset
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
596 ConstantSDNode
*Width
= dyn_cast
<ConstantSDNode
>(N
->getOperand(2));
600 bool Signed
= Opc
== AMDGPUISD::BFE_I32
;
602 uint32_t OffsetVal
= Offset
->getZExtValue();
603 uint32_t WidthVal
= Width
->getZExtValue();
605 ReplaceNode(N
, getS_BFE(Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
,
606 SDLoc(N
), N
->getOperand(0), OffsetVal
, WidthVal
));
609 case AMDGPUISD::DIV_SCALE
: {
613 case AMDGPUISD::MAD_I64_I32
:
614 case AMDGPUISD::MAD_U64_U32
: {
618 case ISD::CopyToReg
: {
619 const SITargetLowering
& Lowering
=
620 *static_cast<const SITargetLowering
*>(getTargetLowering());
621 N
= Lowering
.legalizeTargetIndependentNode(N
, *CurDAG
);
627 case ISD::SIGN_EXTEND_INREG
:
628 if (N
->getValueType(0) != MVT::i32
)
640 case AMDGPUISD::ATOMIC_CMP_SWAP
:
641 SelectATOMIC_CMP_SWAP(N
);
643 case AMDGPUISD::CVT_PKRTZ_F16_F32
:
644 case AMDGPUISD::CVT_PKNORM_I16_F32
:
645 case AMDGPUISD::CVT_PKNORM_U16_F32
:
646 case AMDGPUISD::CVT_PK_U16_U32
:
647 case AMDGPUISD::CVT_PK_I16_I32
: {
648 // Hack around using a legal type if f16 is illegal.
649 if (N
->getValueType(0) == MVT::i32
) {
650 MVT NewVT
= Opc
== AMDGPUISD::CVT_PKRTZ_F16_F32
? MVT::v2f16
: MVT::v2i16
;
651 N
= CurDAG
->MorphNodeTo(N
, N
->getOpcode(), CurDAG
->getVTList(NewVT
),
652 { N
->getOperand(0), N
->getOperand(1) });
659 case ISD::INTRINSIC_W_CHAIN
: {
660 SelectINTRINSIC_W_CHAIN(N
);
668 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode
*N
) const {
669 const BasicBlock
*BB
= FuncInfo
->MBB
->getBasicBlock();
670 const Instruction
*Term
= BB
->getTerminator();
671 return Term
->getMetadata("amdgpu.uniform") ||
672 Term
->getMetadata("structurizecfg.uniform");
675 StringRef
AMDGPUDAGToDAGISel::getPassName() const {
676 return "AMDGPU DAG->DAG Pattern Instruction Selection";
679 //===----------------------------------------------------------------------===//
681 //===----------------------------------------------------------------------===//
683 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr
, SDValue
&Base
,
688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr
, SDValue
&Base
,
693 if ((C
= dyn_cast
<ConstantSDNode
>(Addr
))) {
694 Base
= CurDAG
->getRegister(R600::INDIRECT_BASE_ADDR
, MVT::i32
);
695 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
696 } else if ((Addr
.getOpcode() == AMDGPUISD::DWORDADDR
) &&
697 (C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(0)))) {
698 Base
= CurDAG
->getRegister(R600::INDIRECT_BASE_ADDR
, MVT::i32
);
699 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
700 } else if ((Addr
.getOpcode() == ISD::ADD
|| Addr
.getOpcode() == ISD::OR
) &&
701 (C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(1)))) {
702 Base
= Addr
.getOperand(0);
703 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
706 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
712 // FIXME: Should only handle addcarry/subcarry
713 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode
*N
) {
715 SDValue LHS
= N
->getOperand(0);
716 SDValue RHS
= N
->getOperand(1);
718 unsigned Opcode
= N
->getOpcode();
719 bool ConsumeCarry
= (Opcode
== ISD::ADDE
|| Opcode
== ISD::SUBE
);
721 ConsumeCarry
|| Opcode
== ISD::ADDC
|| Opcode
== ISD::SUBC
;
722 bool IsAdd
= Opcode
== ISD::ADD
|| Opcode
== ISD::ADDC
|| Opcode
== ISD::ADDE
;
724 SDValue Sub0
= CurDAG
->getTargetConstant(AMDGPU::sub0
, DL
, MVT::i32
);
725 SDValue Sub1
= CurDAG
->getTargetConstant(AMDGPU::sub1
, DL
, MVT::i32
);
727 SDNode
*Lo0
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
728 DL
, MVT::i32
, LHS
, Sub0
);
729 SDNode
*Hi0
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
730 DL
, MVT::i32
, LHS
, Sub1
);
732 SDNode
*Lo1
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
733 DL
, MVT::i32
, RHS
, Sub0
);
734 SDNode
*Hi1
= CurDAG
->getMachineNode(TargetOpcode::EXTRACT_SUBREG
,
735 DL
, MVT::i32
, RHS
, Sub1
);
737 SDVTList VTList
= CurDAG
->getVTList(MVT::i32
, MVT::Glue
);
739 unsigned Opc
= IsAdd
? AMDGPU::S_ADD_U32
: AMDGPU::S_SUB_U32
;
740 unsigned CarryOpc
= IsAdd
? AMDGPU::S_ADDC_U32
: AMDGPU::S_SUBB_U32
;
744 SDValue Args
[] = { SDValue(Lo0
, 0), SDValue(Lo1
, 0) };
745 AddLo
= CurDAG
->getMachineNode(Opc
, DL
, VTList
, Args
);
747 SDValue Args
[] = { SDValue(Lo0
, 0), SDValue(Lo1
, 0), N
->getOperand(2) };
748 AddLo
= CurDAG
->getMachineNode(CarryOpc
, DL
, VTList
, Args
);
750 SDValue AddHiArgs
[] = {
755 SDNode
*AddHi
= CurDAG
->getMachineNode(CarryOpc
, DL
, VTList
, AddHiArgs
);
757 SDValue RegSequenceArgs
[] = {
758 CurDAG
->getTargetConstant(AMDGPU::SReg_64RegClassID
, DL
, MVT::i32
),
764 SDNode
*RegSequence
= CurDAG
->getMachineNode(AMDGPU::REG_SEQUENCE
, DL
,
765 MVT::i64
, RegSequenceArgs
);
768 // Replace the carry-use
769 ReplaceUses(SDValue(N
, 1), SDValue(AddHi
, 1));
772 // Replace the remaining uses.
773 ReplaceNode(N
, RegSequence
);
776 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode
*N
) {
777 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
778 // carry out despite the _i32 name. These were renamed in VI to _U32.
779 // FIXME: We should probably rename the opcodes here.
780 unsigned Opc
= N
->getOpcode() == ISD::UADDO
?
781 AMDGPU::V_ADD_I32_e64
: AMDGPU::V_SUB_I32_e64
;
783 CurDAG
->SelectNodeTo(N
, Opc
, N
->getVTList(),
784 { N
->getOperand(0), N
->getOperand(1) });
787 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode
*N
) {
789 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
792 SelectVOP3Mods0(N
->getOperand(1), Ops
[1], Ops
[0], Ops
[6], Ops
[7]);
793 SelectVOP3Mods(N
->getOperand(2), Ops
[3], Ops
[2]);
794 SelectVOP3Mods(N
->getOperand(3), Ops
[5], Ops
[4]);
795 Ops
[8] = N
->getOperand(0);
796 Ops
[9] = N
->getOperand(4);
798 CurDAG
->SelectNodeTo(N
, AMDGPU::V_FMA_F32
, N
->getVTList(), Ops
);
801 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode
*N
) {
803 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
806 SelectVOP3Mods0(N
->getOperand(1), Ops
[1], Ops
[0], Ops
[4], Ops
[5]);
807 SelectVOP3Mods(N
->getOperand(2), Ops
[3], Ops
[2]);
808 Ops
[6] = N
->getOperand(0);
809 Ops
[7] = N
->getOperand(3);
811 CurDAG
->SelectNodeTo(N
, AMDGPU::V_MUL_F32_e64
, N
->getVTList(), Ops
);
814 // We need to handle this here because tablegen doesn't support matching
815 // instructions with multiple outputs.
816 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode
*N
) {
818 EVT VT
= N
->getValueType(0);
820 assert(VT
== MVT::f32
|| VT
== MVT::f64
);
823 = (VT
== MVT::f64
) ? AMDGPU::V_DIV_SCALE_F64
: AMDGPU::V_DIV_SCALE_F32
;
825 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2) };
826 CurDAG
->SelectNodeTo(N
, Opc
, N
->getVTList(), Ops
);
829 // We need to handle this here because tablegen doesn't support matching
830 // instructions with multiple outputs.
831 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode
*N
) {
833 bool Signed
= N
->getOpcode() == AMDGPUISD::MAD_I64_I32
;
834 unsigned Opc
= Signed
? AMDGPU::V_MAD_I64_I32
: AMDGPU::V_MAD_U64_U32
;
836 SDValue Clamp
= CurDAG
->getTargetConstant(0, SL
, MVT::i1
);
837 SDValue Ops
[] = { N
->getOperand(0), N
->getOperand(1), N
->getOperand(2),
839 CurDAG
->SelectNodeTo(N
, Opc
, N
->getVTList(), Ops
);
842 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base
, unsigned Offset
,
843 unsigned OffsetBits
) const {
844 if ((OffsetBits
== 16 && !isUInt
<16>(Offset
)) ||
845 (OffsetBits
== 8 && !isUInt
<8>(Offset
)))
848 if (Subtarget
->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS
||
849 Subtarget
->unsafeDSOffsetFoldingEnabled())
852 // On Southern Islands instruction with a negative base value and an offset
853 // don't seem to work.
854 return CurDAG
->SignBitIsZero(Base
);
857 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr
, SDValue
&Base
,
858 SDValue
&Offset
) const {
860 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
861 SDValue N0
= Addr
.getOperand(0);
862 SDValue N1
= Addr
.getOperand(1);
863 ConstantSDNode
*C1
= cast
<ConstantSDNode
>(N1
);
864 if (isDSOffsetLegal(N0
, C1
->getSExtValue(), 16)) {
867 Offset
= CurDAG
->getTargetConstant(C1
->getZExtValue(), DL
, MVT::i16
);
870 } else if (Addr
.getOpcode() == ISD::SUB
) {
871 // sub C, x -> add (sub 0, x), C
872 if (const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(0))) {
873 int64_t ByteOffset
= C
->getSExtValue();
874 if (isUInt
<16>(ByteOffset
)) {
875 SDValue Zero
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
877 // XXX - This is kind of hacky. Create a dummy sub node so we can check
878 // the known bits in isDSOffsetLegal. We need to emit the selected node
879 // here, so this is thrown away.
880 SDValue Sub
= CurDAG
->getNode(ISD::SUB
, DL
, MVT::i32
,
881 Zero
, Addr
.getOperand(1));
883 if (isDSOffsetLegal(Sub
, ByteOffset
, 16)) {
884 // FIXME: Select to VOP3 version for with-carry.
885 unsigned SubOp
= Subtarget
->hasAddNoCarry() ?
886 AMDGPU::V_SUB_U32_e64
: AMDGPU::V_SUB_I32_e32
;
888 MachineSDNode
*MachineSub
889 = CurDAG
->getMachineNode(SubOp
, DL
, MVT::i32
,
890 Zero
, Addr
.getOperand(1));
892 Base
= SDValue(MachineSub
, 0);
893 Offset
= CurDAG
->getTargetConstant(ByteOffset
, DL
, MVT::i16
);
898 } else if (const ConstantSDNode
*CAddr
= dyn_cast
<ConstantSDNode
>(Addr
)) {
899 // If we have a constant address, prefer to put the constant into the
900 // offset. This can save moves to load the constant address since multiple
901 // operations can share the zero base address register, and enables merging
902 // into read2 / write2 instructions.
906 if (isUInt
<16>(CAddr
->getZExtValue())) {
907 SDValue Zero
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
908 MachineSDNode
*MovZero
= CurDAG
->getMachineNode(AMDGPU::V_MOV_B32_e32
,
910 Base
= SDValue(MovZero
, 0);
911 Offset
= CurDAG
->getTargetConstant(CAddr
->getZExtValue(), DL
, MVT::i16
);
918 Offset
= CurDAG
->getTargetConstant(0, SDLoc(Addr
), MVT::i16
);
922 // TODO: If offset is too big, put low 16-bit into offset.
923 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr
, SDValue
&Base
,
925 SDValue
&Offset1
) const {
928 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
929 SDValue N0
= Addr
.getOperand(0);
930 SDValue N1
= Addr
.getOperand(1);
931 ConstantSDNode
*C1
= cast
<ConstantSDNode
>(N1
);
932 unsigned DWordOffset0
= C1
->getZExtValue() / 4;
933 unsigned DWordOffset1
= DWordOffset0
+ 1;
935 if (isDSOffsetLegal(N0
, DWordOffset1
, 8)) {
937 Offset0
= CurDAG
->getTargetConstant(DWordOffset0
, DL
, MVT::i8
);
938 Offset1
= CurDAG
->getTargetConstant(DWordOffset1
, DL
, MVT::i8
);
941 } else if (Addr
.getOpcode() == ISD::SUB
) {
942 // sub C, x -> add (sub 0, x), C
943 if (const ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(0))) {
944 unsigned DWordOffset0
= C
->getZExtValue() / 4;
945 unsigned DWordOffset1
= DWordOffset0
+ 1;
947 if (isUInt
<8>(DWordOffset0
)) {
949 SDValue Zero
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
951 // XXX - This is kind of hacky. Create a dummy sub node so we can check
952 // the known bits in isDSOffsetLegal. We need to emit the selected node
953 // here, so this is thrown away.
954 SDValue Sub
= CurDAG
->getNode(ISD::SUB
, DL
, MVT::i32
,
955 Zero
, Addr
.getOperand(1));
957 if (isDSOffsetLegal(Sub
, DWordOffset1
, 8)) {
958 unsigned SubOp
= Subtarget
->hasAddNoCarry() ?
959 AMDGPU::V_SUB_U32_e64
: AMDGPU::V_SUB_I32_e32
;
961 MachineSDNode
*MachineSub
962 = CurDAG
->getMachineNode(SubOp
, DL
, MVT::i32
,
963 Zero
, Addr
.getOperand(1));
965 Base
= SDValue(MachineSub
, 0);
966 Offset0
= CurDAG
->getTargetConstant(DWordOffset0
, DL
, MVT::i8
);
967 Offset1
= CurDAG
->getTargetConstant(DWordOffset1
, DL
, MVT::i8
);
972 } else if (const ConstantSDNode
*CAddr
= dyn_cast
<ConstantSDNode
>(Addr
)) {
973 unsigned DWordOffset0
= CAddr
->getZExtValue() / 4;
974 unsigned DWordOffset1
= DWordOffset0
+ 1;
975 assert(4 * DWordOffset0
== CAddr
->getZExtValue());
977 if (isUInt
<8>(DWordOffset0
) && isUInt
<8>(DWordOffset1
)) {
978 SDValue Zero
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
979 MachineSDNode
*MovZero
980 = CurDAG
->getMachineNode(AMDGPU::V_MOV_B32_e32
,
982 Base
= SDValue(MovZero
, 0);
983 Offset0
= CurDAG
->getTargetConstant(DWordOffset0
, DL
, MVT::i8
);
984 Offset1
= CurDAG
->getTargetConstant(DWordOffset1
, DL
, MVT::i8
);
992 Offset0
= CurDAG
->getTargetConstant(0, DL
, MVT::i8
);
993 Offset1
= CurDAG
->getTargetConstant(1, DL
, MVT::i8
);
997 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr
, SDValue
&Ptr
,
998 SDValue
&VAddr
, SDValue
&SOffset
,
999 SDValue
&Offset
, SDValue
&Offen
,
1000 SDValue
&Idxen
, SDValue
&Addr64
,
1001 SDValue
&GLC
, SDValue
&SLC
,
1002 SDValue
&TFE
) const {
1003 // Subtarget prefers to use flat instruction
1004 if (Subtarget
->useFlatForGlobal())
1010 GLC
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1012 SLC
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1013 TFE
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1015 Idxen
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1016 Offen
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1017 Addr64
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1018 SOffset
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
1020 ConstantSDNode
*C1
= nullptr;
1022 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
1023 C1
= cast
<ConstantSDNode
>(Addr
.getOperand(1));
1024 if (isUInt
<32>(C1
->getZExtValue()))
1025 N0
= Addr
.getOperand(0);
1030 if (N0
.getOpcode() == ISD::ADD
) {
1031 // (add N2, N3) -> addr64, or
1032 // (add (add N2, N3), C1) -> addr64
1033 SDValue N2
= N0
.getOperand(0);
1034 SDValue N3
= N0
.getOperand(1);
1035 Addr64
= CurDAG
->getTargetConstant(1, DL
, MVT::i1
);
1037 if (N2
->isDivergent()) {
1038 if (N3
->isDivergent()) {
1039 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1040 // addr64, and construct the resource from a 0 address.
1041 Ptr
= SDValue(buildSMovImm64(DL
, 0, MVT::v2i32
), 0);
1044 // N2 is divergent, N3 is not.
1049 // N2 is not divergent.
1053 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i16
);
1054 } else if (N0
->isDivergent()) {
1055 // N0 is divergent. Use it as the addr64, and construct the resource from a
1057 Ptr
= SDValue(buildSMovImm64(DL
, 0, MVT::v2i32
), 0);
1059 Addr64
= CurDAG
->getTargetConstant(1, DL
, MVT::i1
);
1062 // (N0 + C1) -> offset
1063 VAddr
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
1069 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i16
);
1073 if (SIInstrInfo::isLegalMUBUFImmOffset(C1
->getZExtValue())) {
1074 // Legal offset for instruction.
1075 Offset
= CurDAG
->getTargetConstant(C1
->getZExtValue(), DL
, MVT::i16
);
1079 // Illegal offset, store it in soffset.
1080 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i16
);
1082 SDValue(CurDAG
->getMachineNode(
1083 AMDGPU::S_MOV_B32
, DL
, MVT::i32
,
1084 CurDAG
->getTargetConstant(C1
->getZExtValue(), DL
, MVT::i32
)),
1089 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr
, SDValue
&SRsrc
,
1090 SDValue
&VAddr
, SDValue
&SOffset
,
1091 SDValue
&Offset
, SDValue
&GLC
,
1092 SDValue
&SLC
, SDValue
&TFE
) const {
1093 SDValue Ptr
, Offen
, Idxen
, Addr64
;
1095 // addr64 bit was removed for volcanic islands.
1096 if (Subtarget
->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS
)
1099 if (!SelectMUBUF(Addr
, Ptr
, VAddr
, SOffset
, Offset
, Offen
, Idxen
, Addr64
,
1103 ConstantSDNode
*C
= cast
<ConstantSDNode
>(Addr64
);
1104 if (C
->getSExtValue()) {
1107 const SITargetLowering
& Lowering
=
1108 *static_cast<const SITargetLowering
*>(getTargetLowering());
1110 SRsrc
= SDValue(Lowering
.wrapAddr64Rsrc(*CurDAG
, DL
, Ptr
), 0);
1117 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr
, SDValue
&SRsrc
,
1118 SDValue
&VAddr
, SDValue
&SOffset
,
1120 SDValue
&SLC
) const {
1121 SLC
= CurDAG
->getTargetConstant(0, SDLoc(Addr
), MVT::i1
);
1124 return SelectMUBUFAddr64(Addr
, SRsrc
, VAddr
, SOffset
, Offset
, GLC
, SLC
, TFE
);
1127 static bool isStackPtrRelative(const MachinePointerInfo
&PtrInfo
) {
1128 auto PSV
= PtrInfo
.V
.dyn_cast
<const PseudoSourceValue
*>();
1129 return PSV
&& PSV
->isStack();
1132 std::pair
<SDValue
, SDValue
> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N
) const {
1133 const MachineFunction
&MF
= CurDAG
->getMachineFunction();
1134 const SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
1136 if (auto FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
1137 SDValue TFI
= CurDAG
->getTargetFrameIndex(FI
->getIndex(),
1138 FI
->getValueType(0));
1140 // If we can resolve this to a frame index access, this is relative to the
1141 // frame pointer SGPR.
1142 return std::make_pair(TFI
, CurDAG
->getRegister(Info
->getFrameOffsetReg(),
1146 // If we don't know this private access is a local stack object, it needs to
1147 // be relative to the entry point's scratch wave offset register.
1148 return std::make_pair(N
, CurDAG
->getRegister(Info
->getScratchWaveOffsetReg(),
1152 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode
*Parent
,
1153 SDValue Addr
, SDValue
&Rsrc
,
1154 SDValue
&VAddr
, SDValue
&SOffset
,
1155 SDValue
&ImmOffset
) const {
1158 MachineFunction
&MF
= CurDAG
->getMachineFunction();
1159 const SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
1161 Rsrc
= CurDAG
->getRegister(Info
->getScratchRSrcReg(), MVT::v4i32
);
1163 if (ConstantSDNode
*CAddr
= dyn_cast
<ConstantSDNode
>(Addr
)) {
1164 unsigned Imm
= CAddr
->getZExtValue();
1166 SDValue HighBits
= CurDAG
->getTargetConstant(Imm
& ~4095, DL
, MVT::i32
);
1167 MachineSDNode
*MovHighBits
= CurDAG
->getMachineNode(AMDGPU::V_MOV_B32_e32
,
1168 DL
, MVT::i32
, HighBits
);
1169 VAddr
= SDValue(MovHighBits
, 0);
1171 // In a call sequence, stores to the argument stack area are relative to the
1173 const MachinePointerInfo
&PtrInfo
= cast
<MemSDNode
>(Parent
)->getPointerInfo();
1174 unsigned SOffsetReg
= isStackPtrRelative(PtrInfo
) ?
1175 Info
->getStackPtrOffsetReg() : Info
->getScratchWaveOffsetReg();
1177 SOffset
= CurDAG
->getRegister(SOffsetReg
, MVT::i32
);
1178 ImmOffset
= CurDAG
->getTargetConstant(Imm
& 4095, DL
, MVT::i16
);
1182 if (CurDAG
->isBaseWithConstantOffset(Addr
)) {
1185 SDValue N0
= Addr
.getOperand(0);
1186 SDValue N1
= Addr
.getOperand(1);
1188 // Offsets in vaddr must be positive if range checking is enabled.
1190 // The total computation of vaddr + soffset + offset must not overflow. If
1191 // vaddr is negative, even if offset is 0 the sgpr offset add will end up
1194 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1195 // always perform a range check. If a negative vaddr base index was used,
1196 // this would fail the range check. The overall address computation would
1197 // compute a valid address, but this doesn't happen due to the range
1198 // check. For out-of-bounds MUBUF loads, a 0 is returned.
1200 // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1201 // MUBUF vaddr, but not on older subtargets which can only do this if the
1202 // sign bit is known 0.
1203 ConstantSDNode
*C1
= cast
<ConstantSDNode
>(N1
);
1204 if (SIInstrInfo::isLegalMUBUFImmOffset(C1
->getZExtValue()) &&
1205 (!Subtarget
->privateMemoryResourceIsRangeChecked() ||
1206 CurDAG
->SignBitIsZero(N0
))) {
1207 std::tie(VAddr
, SOffset
) = foldFrameIndex(N0
);
1208 ImmOffset
= CurDAG
->getTargetConstant(C1
->getZExtValue(), DL
, MVT::i16
);
1214 std::tie(VAddr
, SOffset
) = foldFrameIndex(Addr
);
1215 ImmOffset
= CurDAG
->getTargetConstant(0, DL
, MVT::i16
);
1219 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode
*Parent
,
1223 SDValue
&Offset
) const {
1224 ConstantSDNode
*CAddr
= dyn_cast
<ConstantSDNode
>(Addr
);
1225 if (!CAddr
|| !SIInstrInfo::isLegalMUBUFImmOffset(CAddr
->getZExtValue()))
1229 MachineFunction
&MF
= CurDAG
->getMachineFunction();
1230 const SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
1232 SRsrc
= CurDAG
->getRegister(Info
->getScratchRSrcReg(), MVT::v4i32
);
1234 const MachinePointerInfo
&PtrInfo
= cast
<MemSDNode
>(Parent
)->getPointerInfo();
1235 unsigned SOffsetReg
= isStackPtrRelative(PtrInfo
) ?
1236 Info
->getStackPtrOffsetReg() : Info
->getScratchWaveOffsetReg();
1238 // FIXME: Get from MachinePointerInfo? We should only be using the frame
1239 // offset if we know this is in a call sequence.
1240 SOffset
= CurDAG
->getRegister(SOffsetReg
, MVT::i32
);
1242 Offset
= CurDAG
->getTargetConstant(CAddr
->getZExtValue(), DL
, MVT::i16
);
1246 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
,
1247 SDValue
&SOffset
, SDValue
&Offset
,
1248 SDValue
&GLC
, SDValue
&SLC
,
1249 SDValue
&TFE
) const {
1250 SDValue Ptr
, VAddr
, Offen
, Idxen
, Addr64
;
1251 const SIInstrInfo
*TII
=
1252 static_cast<const SIInstrInfo
*>(Subtarget
->getInstrInfo());
1254 if (!SelectMUBUF(Addr
, Ptr
, VAddr
, SOffset
, Offset
, Offen
, Idxen
, Addr64
,
1258 if (!cast
<ConstantSDNode
>(Offen
)->getSExtValue() &&
1259 !cast
<ConstantSDNode
>(Idxen
)->getSExtValue() &&
1260 !cast
<ConstantSDNode
>(Addr64
)->getSExtValue()) {
1261 uint64_t Rsrc
= TII
->getDefaultRsrcDataFormat() |
1262 APInt::getAllOnesValue(32).getZExtValue(); // Size
1265 const SITargetLowering
& Lowering
=
1266 *static_cast<const SITargetLowering
*>(getTargetLowering());
1268 SRsrc
= SDValue(Lowering
.buildRSRC(*CurDAG
, DL
, Ptr
, 0, Rsrc
), 0);
1274 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
,
1275 SDValue
&Soffset
, SDValue
&Offset
1277 SDValue GLC
, SLC
, TFE
;
1279 return SelectMUBUFOffset(Addr
, SRsrc
, Soffset
, Offset
, GLC
, SLC
, TFE
);
1281 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr
, SDValue
&SRsrc
,
1282 SDValue
&Soffset
, SDValue
&Offset
,
1283 SDValue
&SLC
) const {
1286 return SelectMUBUFOffset(Addr
, SRsrc
, Soffset
, Offset
, GLC
, SLC
, TFE
);
1289 template <bool IsSigned
>
1290 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr
,
1293 SDValue
&SLC
) const {
1294 int64_t OffsetVal
= 0;
1296 if (Subtarget
->hasFlatInstOffsets() &&
1297 CurDAG
->isBaseWithConstantOffset(Addr
)) {
1298 SDValue N0
= Addr
.getOperand(0);
1299 SDValue N1
= Addr
.getOperand(1);
1300 int64_t COffsetVal
= cast
<ConstantSDNode
>(N1
)->getSExtValue();
1302 if ((IsSigned
&& isInt
<13>(COffsetVal
)) ||
1303 (!IsSigned
&& isUInt
<12>(COffsetVal
))) {
1305 OffsetVal
= COffsetVal
;
1310 Offset
= CurDAG
->getTargetConstant(OffsetVal
, SDLoc(), MVT::i16
);
1311 SLC
= CurDAG
->getTargetConstant(0, SDLoc(), MVT::i1
);
1316 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr
,
1319 SDValue
&SLC
) const {
1320 return SelectFlatOffset
<false>(Addr
, VAddr
, Offset
, SLC
);
1323 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr
,
1326 SDValue
&SLC
) const {
1327 return SelectFlatOffset
<true>(Addr
, VAddr
, Offset
, SLC
);
1330 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode
,
1331 SDValue
&Offset
, bool &Imm
) const {
1333 // FIXME: Handle non-constant offsets.
1334 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(ByteOffsetNode
);
1338 SDLoc
SL(ByteOffsetNode
);
1339 GCNSubtarget::Generation Gen
= Subtarget
->getGeneration();
1340 int64_t ByteOffset
= C
->getSExtValue();
1341 int64_t EncodedOffset
= AMDGPU::getSMRDEncodedOffset(*Subtarget
, ByteOffset
);
1343 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget
, ByteOffset
)) {
1344 Offset
= CurDAG
->getTargetConstant(EncodedOffset
, SL
, MVT::i32
);
1349 if (!isUInt
<32>(EncodedOffset
) || !isUInt
<32>(ByteOffset
))
1352 if (Gen
== AMDGPUSubtarget::SEA_ISLANDS
&& isUInt
<32>(EncodedOffset
)) {
1353 // 32-bit Immediates are supported on Sea Islands.
1354 Offset
= CurDAG
->getTargetConstant(EncodedOffset
, SL
, MVT::i32
);
1356 SDValue C32Bit
= CurDAG
->getTargetConstant(ByteOffset
, SL
, MVT::i32
);
1357 Offset
= SDValue(CurDAG
->getMachineNode(AMDGPU::S_MOV_B32
, SL
, MVT::i32
,
1364 SDValue
AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr
) const {
1365 if (Addr
.getValueType() != MVT::i32
)
1368 // Zero-extend a 32-bit address.
1371 const MachineFunction
&MF
= CurDAG
->getMachineFunction();
1372 const SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
1373 unsigned AddrHiVal
= Info
->get32BitAddressHighBits();
1374 SDValue AddrHi
= CurDAG
->getTargetConstant(AddrHiVal
, SL
, MVT::i32
);
1376 const SDValue Ops
[] = {
1377 CurDAG
->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID
, SL
, MVT::i32
),
1379 CurDAG
->getTargetConstant(AMDGPU::sub0
, SL
, MVT::i32
),
1380 SDValue(CurDAG
->getMachineNode(AMDGPU::S_MOV_B32
, SL
, MVT::i32
, AddrHi
),
1382 CurDAG
->getTargetConstant(AMDGPU::sub1
, SL
, MVT::i32
),
1385 return SDValue(CurDAG
->getMachineNode(AMDGPU::REG_SEQUENCE
, SL
, MVT::i64
,
1389 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr
, SDValue
&SBase
,
1390 SDValue
&Offset
, bool &Imm
) const {
1393 // A 32-bit (address + offset) should not cause unsigned 32-bit integer
1394 // wraparound, because s_load instructions perform the addition in 64 bits.
1395 if ((Addr
.getValueType() != MVT::i32
||
1396 Addr
->getFlags().hasNoUnsignedWrap()) &&
1397 CurDAG
->isBaseWithConstantOffset(Addr
)) {
1398 SDValue N0
= Addr
.getOperand(0);
1399 SDValue N1
= Addr
.getOperand(1);
1401 if (SelectSMRDOffset(N1
, Offset
, Imm
)) {
1402 SBase
= Expand32BitAddress(N0
);
1406 SBase
= Expand32BitAddress(Addr
);
1407 Offset
= CurDAG
->getTargetConstant(0, SL
, MVT::i32
);
1412 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr
, SDValue
&SBase
,
1413 SDValue
&Offset
) const {
1415 return SelectSMRD(Addr
, SBase
, Offset
, Imm
) && Imm
;
1418 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr
, SDValue
&SBase
,
1419 SDValue
&Offset
) const {
1421 if (Subtarget
->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS
)
1425 if (!SelectSMRD(Addr
, SBase
, Offset
, Imm
))
1428 return !Imm
&& isa
<ConstantSDNode
>(Offset
);
1431 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr
, SDValue
&SBase
,
1432 SDValue
&Offset
) const {
1434 return SelectSMRD(Addr
, SBase
, Offset
, Imm
) && !Imm
&&
1435 !isa
<ConstantSDNode
>(Offset
);
1438 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr
,
1439 SDValue
&Offset
) const {
1441 return SelectSMRDOffset(Addr
, Offset
, Imm
) && Imm
;
1444 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr
,
1445 SDValue
&Offset
) const {
1446 if (Subtarget
->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS
)
1450 if (!SelectSMRDOffset(Addr
, Offset
, Imm
))
1453 return !Imm
&& isa
<ConstantSDNode
>(Offset
);
1456 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index
,
1458 SDValue
&Offset
) const {
1461 if (CurDAG
->isBaseWithConstantOffset(Index
)) {
1462 SDValue N0
= Index
.getOperand(0);
1463 SDValue N1
= Index
.getOperand(1);
1464 ConstantSDNode
*C1
= cast
<ConstantSDNode
>(N1
);
1467 // Don't peel off the offset (c0) if doing so could possibly lead
1468 // the base (n0) to be negative.
1469 if (C1
->getSExtValue() <= 0 || CurDAG
->SignBitIsZero(N0
)) {
1471 Offset
= CurDAG
->getTargetConstant(C1
->getZExtValue(), DL
, MVT::i32
);
1476 if (isa
<ConstantSDNode
>(Index
))
1480 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
1484 SDNode
*AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode
, const SDLoc
&DL
,
1485 SDValue Val
, uint32_t Offset
,
1487 // Transformation function, pack the offset and width of a BFE into
1488 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1489 // source, bits [5:0] contain the offset and bits [22:16] the width.
1490 uint32_t PackedVal
= Offset
| (Width
<< 16);
1491 SDValue PackedConst
= CurDAG
->getTargetConstant(PackedVal
, DL
, MVT::i32
);
1493 return CurDAG
->getMachineNode(Opcode
, DL
, MVT::i32
, Val
, PackedConst
);
1496 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode
*N
) {
1497 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1498 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1499 // Predicate: 0 < b <= c < 32
1501 const SDValue
&Shl
= N
->getOperand(0);
1502 ConstantSDNode
*B
= dyn_cast
<ConstantSDNode
>(Shl
->getOperand(1));
1503 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
1506 uint32_t BVal
= B
->getZExtValue();
1507 uint32_t CVal
= C
->getZExtValue();
1509 if (0 < BVal
&& BVal
<= CVal
&& CVal
< 32) {
1510 bool Signed
= N
->getOpcode() == ISD::SRA
;
1511 unsigned Opcode
= Signed
? AMDGPU::S_BFE_I32
: AMDGPU::S_BFE_U32
;
1513 ReplaceNode(N
, getS_BFE(Opcode
, SDLoc(N
), Shl
.getOperand(0), CVal
- BVal
,
1521 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode
*N
) {
1522 switch (N
->getOpcode()) {
1524 if (N
->getOperand(0).getOpcode() == ISD::SRL
) {
1525 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1526 // Predicate: isMask(mask)
1527 const SDValue
&Srl
= N
->getOperand(0);
1528 ConstantSDNode
*Shift
= dyn_cast
<ConstantSDNode
>(Srl
.getOperand(1));
1529 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
1531 if (Shift
&& Mask
) {
1532 uint32_t ShiftVal
= Shift
->getZExtValue();
1533 uint32_t MaskVal
= Mask
->getZExtValue();
1535 if (isMask_32(MaskVal
)) {
1536 uint32_t WidthVal
= countPopulation(MaskVal
);
1538 ReplaceNode(N
, getS_BFE(AMDGPU::S_BFE_U32
, SDLoc(N
),
1539 Srl
.getOperand(0), ShiftVal
, WidthVal
));
1546 if (N
->getOperand(0).getOpcode() == ISD::AND
) {
1547 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1548 // Predicate: isMask(mask >> b)
1549 const SDValue
&And
= N
->getOperand(0);
1550 ConstantSDNode
*Shift
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
1551 ConstantSDNode
*Mask
= dyn_cast
<ConstantSDNode
>(And
->getOperand(1));
1553 if (Shift
&& Mask
) {
1554 uint32_t ShiftVal
= Shift
->getZExtValue();
1555 uint32_t MaskVal
= Mask
->getZExtValue() >> ShiftVal
;
1557 if (isMask_32(MaskVal
)) {
1558 uint32_t WidthVal
= countPopulation(MaskVal
);
1560 ReplaceNode(N
, getS_BFE(AMDGPU::S_BFE_U32
, SDLoc(N
),
1561 And
.getOperand(0), ShiftVal
, WidthVal
));
1565 } else if (N
->getOperand(0).getOpcode() == ISD::SHL
) {
1566 SelectS_BFEFromShifts(N
);
1571 if (N
->getOperand(0).getOpcode() == ISD::SHL
) {
1572 SelectS_BFEFromShifts(N
);
1577 case ISD::SIGN_EXTEND_INREG
: {
1578 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1579 SDValue Src
= N
->getOperand(0);
1580 if (Src
.getOpcode() != ISD::SRL
)
1583 const ConstantSDNode
*Amt
= dyn_cast
<ConstantSDNode
>(Src
.getOperand(1));
1587 unsigned Width
= cast
<VTSDNode
>(N
->getOperand(1))->getVT().getSizeInBits();
1588 ReplaceNode(N
, getS_BFE(AMDGPU::S_BFE_I32
, SDLoc(N
), Src
.getOperand(0),
1589 Amt
->getZExtValue(), Width
));
1597 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode
*N
) const {
1598 assert(N
->getOpcode() == ISD::BRCOND
);
1599 if (!N
->hasOneUse())
1602 SDValue Cond
= N
->getOperand(1);
1603 if (Cond
.getOpcode() == ISD::CopyToReg
)
1604 Cond
= Cond
.getOperand(2);
1606 if (Cond
.getOpcode() != ISD::SETCC
|| !Cond
.hasOneUse())
1609 MVT VT
= Cond
.getOperand(0).getSimpleValueType();
1613 if (VT
== MVT::i64
) {
1614 auto ST
= static_cast<const GCNSubtarget
*>(Subtarget
);
1616 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
1617 return (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) && ST
->hasScalarCompareEq64();
1623 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode
*N
) {
1624 SDValue Cond
= N
->getOperand(1);
1626 if (Cond
.isUndef()) {
1627 CurDAG
->SelectNodeTo(N
, AMDGPU::SI_BR_UNDEF
, MVT::Other
,
1628 N
->getOperand(2), N
->getOperand(0));
1632 bool UseSCCBr
= isCBranchSCC(N
) && isUniformBr(N
);
1633 unsigned BrOp
= UseSCCBr
? AMDGPU::S_CBRANCH_SCC1
: AMDGPU::S_CBRANCH_VCCNZ
;
1634 unsigned CondReg
= UseSCCBr
? AMDGPU::SCC
: AMDGPU::VCC
;
1638 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
1639 // analyzed what generates the vcc value, so we do not know whether vcc
1640 // bits for disabled lanes are 0. Thus we need to mask out bits for
1643 // For the case that we select S_CBRANCH_SCC1 and it gets
1644 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
1645 // SIInstrInfo::moveToVALU which inserts the S_AND).
1647 // We could add an analysis of what generates the vcc value here and omit
1648 // the S_AND when is unnecessary. But it would be better to add a separate
1649 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
1650 // catches both cases.
1651 Cond
= SDValue(CurDAG
->getMachineNode(AMDGPU::S_AND_B64
, SL
, MVT::i1
,
1652 CurDAG
->getRegister(AMDGPU::EXEC
, MVT::i1
),
1657 SDValue VCC
= CurDAG
->getCopyToReg(N
->getOperand(0), SL
, CondReg
, Cond
);
1658 CurDAG
->SelectNodeTo(N
, BrOp
, MVT::Other
,
1659 N
->getOperand(2), // Basic Block
1663 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode
*N
) {
1664 MVT VT
= N
->getSimpleValueType(0);
1665 bool IsFMA
= N
->getOpcode() == ISD::FMA
;
1666 if (VT
!= MVT::f32
|| (!Subtarget
->hasMadMixInsts() &&
1667 !Subtarget
->hasFmaMixInsts()) ||
1668 ((IsFMA
&& Subtarget
->hasMadMixInsts()) ||
1669 (!IsFMA
&& Subtarget
->hasFmaMixInsts()))) {
1674 SDValue Src0
= N
->getOperand(0);
1675 SDValue Src1
= N
->getOperand(1);
1676 SDValue Src2
= N
->getOperand(2);
1677 unsigned Src0Mods
, Src1Mods
, Src2Mods
;
1679 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
1680 // using the conversion from f16.
1681 bool Sel0
= SelectVOP3PMadMixModsImpl(Src0
, Src0
, Src0Mods
);
1682 bool Sel1
= SelectVOP3PMadMixModsImpl(Src1
, Src1
, Src1Mods
);
1683 bool Sel2
= SelectVOP3PMadMixModsImpl(Src2
, Src2
, Src2Mods
);
1685 assert((IsFMA
|| !Subtarget
->hasFP32Denormals()) &&
1686 "fmad selected with denormals enabled");
1687 // TODO: We can select this with f32 denormals enabled if all the sources are
1688 // converted from f16 (in which case fmad isn't legal).
1690 if (Sel0
|| Sel1
|| Sel2
) {
1691 // For dummy operands.
1692 SDValue Zero
= CurDAG
->getTargetConstant(0, SDLoc(), MVT::i32
);
1694 CurDAG
->getTargetConstant(Src0Mods
, SDLoc(), MVT::i32
), Src0
,
1695 CurDAG
->getTargetConstant(Src1Mods
, SDLoc(), MVT::i32
), Src1
,
1696 CurDAG
->getTargetConstant(Src2Mods
, SDLoc(), MVT::i32
), Src2
,
1697 CurDAG
->getTargetConstant(0, SDLoc(), MVT::i1
),
1701 CurDAG
->SelectNodeTo(N
,
1702 IsFMA
? AMDGPU::V_FMA_MIX_F32
: AMDGPU::V_MAD_MIX_F32
,
1709 // This is here because there isn't a way to use the generated sub0_sub1 as the
1710 // subreg index to EXTRACT_SUBREG in tablegen.
1711 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode
*N
) {
1712 MemSDNode
*Mem
= cast
<MemSDNode
>(N
);
1713 unsigned AS
= Mem
->getAddressSpace();
1714 if (AS
== AMDGPUAS::FLAT_ADDRESS
) {
1719 MVT VT
= N
->getSimpleValueType(0);
1720 bool Is32
= (VT
== MVT::i32
);
1723 MachineSDNode
*CmpSwap
= nullptr;
1724 if (Subtarget
->hasAddr64()) {
1725 SDValue SRsrc
, VAddr
, SOffset
, Offset
, SLC
;
1727 if (SelectMUBUFAddr64(Mem
->getBasePtr(), SRsrc
, VAddr
, SOffset
, Offset
, SLC
)) {
1728 unsigned Opcode
= Is32
? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN
:
1729 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN
;
1730 SDValue CmpVal
= Mem
->getOperand(2);
1732 // XXX - Do we care about glue operands?
1735 CmpVal
, VAddr
, SRsrc
, SOffset
, Offset
, SLC
, Mem
->getChain()
1738 CmpSwap
= CurDAG
->getMachineNode(Opcode
, SL
, Mem
->getVTList(), Ops
);
1743 SDValue SRsrc
, SOffset
, Offset
, SLC
;
1744 if (SelectMUBUFOffset(Mem
->getBasePtr(), SRsrc
, SOffset
, Offset
, SLC
)) {
1745 unsigned Opcode
= Is32
? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
:
1746 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN
;
1748 SDValue CmpVal
= Mem
->getOperand(2);
1750 CmpVal
, SRsrc
, SOffset
, Offset
, SLC
, Mem
->getChain()
1753 CmpSwap
= CurDAG
->getMachineNode(Opcode
, SL
, Mem
->getVTList(), Ops
);
1762 MachineMemOperand
*MMO
= Mem
->getMemOperand();
1763 CurDAG
->setNodeMemRefs(CmpSwap
, {MMO
});
1765 unsigned SubReg
= Is32
? AMDGPU::sub0
: AMDGPU::sub0_sub1
;
1767 = CurDAG
->getTargetExtractSubreg(SubReg
, SL
, VT
, SDValue(CmpSwap
, 0));
1769 ReplaceUses(SDValue(N
, 0), Extract
);
1770 ReplaceUses(SDValue(N
, 1), SDValue(CmpSwap
, 1));
1771 CurDAG
->RemoveDeadNode(N
);
1774 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode
*N
) {
1775 unsigned IntrID
= cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
1776 if ((IntrID
!= Intrinsic::amdgcn_ds_append
&&
1777 IntrID
!= Intrinsic::amdgcn_ds_consume
) ||
1778 N
->getValueType(0) != MVT::i32
) {
1783 // The address is assumed to be uniform, so if it ends up in a VGPR, it will
1784 // be copied to an SGPR with readfirstlane.
1785 unsigned Opc
= IntrID
== Intrinsic::amdgcn_ds_append
?
1786 AMDGPU::DS_APPEND
: AMDGPU::DS_CONSUME
;
1788 SDValue Chain
= N
->getOperand(0);
1789 SDValue Ptr
= N
->getOperand(2);
1790 MemIntrinsicSDNode
*M
= cast
<MemIntrinsicSDNode
>(N
);
1791 bool IsGDS
= M
->getAddressSpace() == AMDGPUAS::REGION_ADDRESS
;
1794 if (CurDAG
->isBaseWithConstantOffset(Ptr
)) {
1795 SDValue PtrBase
= Ptr
.getOperand(0);
1796 SDValue PtrOffset
= Ptr
.getOperand(1);
1798 const APInt
&OffsetVal
= cast
<ConstantSDNode
>(PtrOffset
)->getAPIntValue();
1799 if (isDSOffsetLegal(PtrBase
, OffsetVal
.getZExtValue(), 16)) {
1800 N
= glueCopyToM0(N
, PtrBase
);
1801 Offset
= CurDAG
->getTargetConstant(OffsetVal
, SDLoc(), MVT::i32
);
1806 N
= glueCopyToM0(N
, Ptr
);
1807 Offset
= CurDAG
->getTargetConstant(0, SDLoc(), MVT::i32
);
1812 CurDAG
->getTargetConstant(IsGDS
, SDLoc(), MVT::i32
),
1814 N
->getOperand(N
->getNumOperands() - 1) // New glue
1817 CurDAG
->SelectNodeTo(N
, Opc
, N
->getVTList(), Ops
);
1820 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In
, SDValue
&Src
,
1821 unsigned &Mods
) const {
1825 if (Src
.getOpcode() == ISD::FNEG
) {
1826 Mods
|= SISrcMods::NEG
;
1827 Src
= Src
.getOperand(0);
1830 if (Src
.getOpcode() == ISD::FABS
) {
1831 Mods
|= SISrcMods::ABS
;
1832 Src
= Src
.getOperand(0);
1838 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In
, SDValue
&Src
,
1839 SDValue
&SrcMods
) const {
1841 if (SelectVOP3ModsImpl(In
, Src
, Mods
)) {
1842 SrcMods
= CurDAG
->getTargetConstant(Mods
, SDLoc(In
), MVT::i32
);
1849 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In
, SDValue
&Src
,
1850 SDValue
&SrcMods
) const {
1851 SelectVOP3Mods(In
, Src
, SrcMods
);
1852 return isNoNanSrc(Src
);
1855 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In
, SDValue
&Src
) const {
1856 if (In
.getOpcode() == ISD::FABS
|| In
.getOpcode() == ISD::FNEG
)
1863 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In
, SDValue
&Src
,
1864 SDValue
&SrcMods
, SDValue
&Clamp
,
1865 SDValue
&Omod
) const {
1867 Clamp
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1868 Omod
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1870 return SelectVOP3Mods(In
, Src
, SrcMods
);
1873 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In
, SDValue
&Src
,
1876 SDValue
&Omod
) const {
1877 Clamp
= Omod
= CurDAG
->getTargetConstant(0, SDLoc(In
), MVT::i32
);
1878 return SelectVOP3Mods(In
, Src
, SrcMods
);
1881 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In
, SDValue
&Src
,
1882 SDValue
&Clamp
, SDValue
&Omod
) const {
1886 Clamp
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1887 Omod
= CurDAG
->getTargetConstant(0, DL
, MVT::i1
);
1892 static SDValue
stripBitcast(SDValue Val
) {
1893 return Val
.getOpcode() == ISD::BITCAST
? Val
.getOperand(0) : Val
;
1896 // Figure out if this is really an extract of the high 16-bits of a dword.
1897 static bool isExtractHiElt(SDValue In
, SDValue
&Out
) {
1898 In
= stripBitcast(In
);
1899 if (In
.getOpcode() != ISD::TRUNCATE
)
1902 SDValue Srl
= In
.getOperand(0);
1903 if (Srl
.getOpcode() == ISD::SRL
) {
1904 if (ConstantSDNode
*ShiftAmt
= dyn_cast
<ConstantSDNode
>(Srl
.getOperand(1))) {
1905 if (ShiftAmt
->getZExtValue() == 16) {
1906 Out
= stripBitcast(Srl
.getOperand(0));
1915 // Look through operations that obscure just looking at the low 16-bits of the
1917 static SDValue
stripExtractLoElt(SDValue In
) {
1918 if (In
.getOpcode() == ISD::TRUNCATE
) {
1919 SDValue Src
= In
.getOperand(0);
1920 if (Src
.getValueType().getSizeInBits() == 32)
1921 return stripBitcast(Src
);
1927 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In
, SDValue
&Src
,
1928 SDValue
&SrcMods
) const {
1932 if (Src
.getOpcode() == ISD::FNEG
) {
1933 Mods
^= (SISrcMods::NEG
| SISrcMods::NEG_HI
);
1934 Src
= Src
.getOperand(0);
1937 if (Src
.getOpcode() == ISD::BUILD_VECTOR
) {
1938 unsigned VecMods
= Mods
;
1940 SDValue Lo
= stripBitcast(Src
.getOperand(0));
1941 SDValue Hi
= stripBitcast(Src
.getOperand(1));
1943 if (Lo
.getOpcode() == ISD::FNEG
) {
1944 Lo
= stripBitcast(Lo
.getOperand(0));
1945 Mods
^= SISrcMods::NEG
;
1948 if (Hi
.getOpcode() == ISD::FNEG
) {
1949 Hi
= stripBitcast(Hi
.getOperand(0));
1950 Mods
^= SISrcMods::NEG_HI
;
1953 if (isExtractHiElt(Lo
, Lo
))
1954 Mods
|= SISrcMods::OP_SEL_0
;
1956 if (isExtractHiElt(Hi
, Hi
))
1957 Mods
|= SISrcMods::OP_SEL_1
;
1959 Lo
= stripExtractLoElt(Lo
);
1960 Hi
= stripExtractLoElt(Hi
);
1962 if (Lo
== Hi
&& !isInlineImmediate(Lo
.getNode())) {
1963 // Really a scalar input. Just select from the low half of the register to
1967 SrcMods
= CurDAG
->getTargetConstant(Mods
, SDLoc(In
), MVT::i32
);
1974 // Packed instructions do not have abs modifiers.
1975 Mods
|= SISrcMods::OP_SEL_1
;
1977 SrcMods
= CurDAG
->getTargetConstant(Mods
, SDLoc(In
), MVT::i32
);
1981 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In
, SDValue
&Src
,
1983 SDValue
&Clamp
) const {
1986 // FIXME: Handle clamp and op_sel
1987 Clamp
= CurDAG
->getTargetConstant(0, SL
, MVT::i32
);
1989 return SelectVOP3PMods(In
, Src
, SrcMods
);
1992 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In
, SDValue
&Src
,
1993 SDValue
&SrcMods
) const {
1995 // FIXME: Handle op_sel
1996 SrcMods
= CurDAG
->getTargetConstant(0, SDLoc(In
), MVT::i32
);
2000 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In
, SDValue
&Src
,
2002 SDValue
&Clamp
) const {
2005 // FIXME: Handle clamp
2006 Clamp
= CurDAG
->getTargetConstant(0, SL
, MVT::i32
);
2008 return SelectVOP3OpSel(In
, Src
, SrcMods
);
2011 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In
, SDValue
&Src
,
2012 SDValue
&SrcMods
) const {
2013 // FIXME: Handle op_sel
2014 return SelectVOP3Mods(In
, Src
, SrcMods
);
2017 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In
, SDValue
&Src
,
2019 SDValue
&Clamp
) const {
2022 // FIXME: Handle clamp
2023 Clamp
= CurDAG
->getTargetConstant(0, SL
, MVT::i32
);
2025 return SelectVOP3OpSelMods(In
, Src
, SrcMods
);
2028 // The return value is not whether the match is possible (which it always is),
2029 // but whether or not it a conversion is really used.
2030 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In
, SDValue
&Src
,
2031 unsigned &Mods
) const {
2033 SelectVOP3ModsImpl(In
, Src
, Mods
);
2035 if (Src
.getOpcode() == ISD::FP_EXTEND
) {
2036 Src
= Src
.getOperand(0);
2037 assert(Src
.getValueType() == MVT::f16
);
2038 Src
= stripBitcast(Src
);
2040 // Be careful about folding modifiers if we already have an abs. fneg is
2041 // applied last, so we don't want to apply an earlier fneg.
2042 if ((Mods
& SISrcMods::ABS
) == 0) {
2044 SelectVOP3ModsImpl(Src
, Src
, ModsTmp
);
2046 if ((ModsTmp
& SISrcMods::NEG
) != 0)
2047 Mods
^= SISrcMods::NEG
;
2049 if ((ModsTmp
& SISrcMods::ABS
) != 0)
2050 Mods
|= SISrcMods::ABS
;
2053 // op_sel/op_sel_hi decide the source type and source.
2054 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2055 // If the sources's op_sel is set, it picks the high half of the source
2058 Mods
|= SISrcMods::OP_SEL_1
;
2059 if (isExtractHiElt(Src
, Src
)) {
2060 Mods
|= SISrcMods::OP_SEL_0
;
2062 // TODO: Should we try to look for neg/abs here?
2071 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In
, SDValue
&Src
,
2072 SDValue
&SrcMods
) const {
2074 SelectVOP3PMadMixModsImpl(In
, Src
, Mods
);
2075 SrcMods
= CurDAG
->getTargetConstant(Mods
, SDLoc(In
), MVT::i32
);
2079 // TODO: Can we identify things like v_mad_mixhi_f16?
2080 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In
, SDValue
&Src
) const {
2086 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(In
)) {
2088 SDValue K
= CurDAG
->getTargetConstant(C
->getZExtValue() << 16, SL
, MVT::i32
);
2089 MachineSDNode
*MovK
= CurDAG
->getMachineNode(AMDGPU::V_MOV_B32_e32
,
2091 Src
= SDValue(MovK
, 0);
2095 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(In
)) {
2097 SDValue K
= CurDAG
->getTargetConstant(
2098 C
->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL
, MVT::i32
);
2099 MachineSDNode
*MovK
= CurDAG
->getMachineNode(AMDGPU::V_MOV_B32_e32
,
2101 Src
= SDValue(MovK
, 0);
2105 return isExtractHiElt(In
, Src
);
2108 bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode
* N
) const {
2109 if (Subtarget
->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS
) {
2112 const SIRegisterInfo
*SIRI
=
2113 static_cast<const SIRegisterInfo
*>(Subtarget
->getRegisterInfo());
2114 const SIInstrInfo
* SII
=
2115 static_cast<const SIInstrInfo
*>(Subtarget
->getInstrInfo());
2118 bool AllUsesAcceptSReg
= true;
2119 for (SDNode::use_iterator U
= N
->use_begin(), E
= SDNode::use_end();
2120 Limit
< 10 && U
!= E
; ++U
, ++Limit
) {
2121 const TargetRegisterClass
*RC
= getOperandRegClass(*U
, U
.getOperandNo());
2123 // If the register class is unknown, it could be an unknown
2124 // register class that needs to be an SGPR, e.g. an inline asm
2126 if (!RC
|| SIRI
->isSGPRClass(RC
))
2129 if (RC
!= &AMDGPU::VS_32RegClass
) {
2130 AllUsesAcceptSReg
= false;
2132 if (User
->isMachineOpcode()) {
2133 unsigned Opc
= User
->getMachineOpcode();
2134 MCInstrDesc Desc
= SII
->get(Opc
);
2135 if (Desc
.isCommutable()) {
2136 unsigned OpIdx
= Desc
.getNumDefs() + U
.getOperandNo();
2137 unsigned CommuteIdx1
= TargetInstrInfo::CommuteAnyOperandIndex
;
2138 if (SII
->findCommutedOpIndices(Desc
, OpIdx
, CommuteIdx1
)) {
2139 unsigned CommutedOpNo
= CommuteIdx1
- Desc
.getNumDefs();
2140 const TargetRegisterClass
*CommutedRC
= getOperandRegClass(*U
, CommutedOpNo
);
2141 if (CommutedRC
== &AMDGPU::VS_32RegClass
)
2142 AllUsesAcceptSReg
= true;
2146 // If "AllUsesAcceptSReg == false" so far we haven't suceeded
2147 // commuting current user. This means have at least one use
2148 // that strictly require VGPR. Thus, we will not attempt to commute
2149 // other user instructions.
2150 if (!AllUsesAcceptSReg
)
2154 return !AllUsesAcceptSReg
&& (Limit
< 10);
2157 bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode
* N
) const {
2158 auto Ld
= cast
<LoadSDNode
>(N
);
2160 return Ld
->getAlignment() >= 4 &&
2164 Ld
->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS
||
2165 Ld
->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
2172 Subtarget
->getScalarizeGlobalBehavior() &&
2173 Ld
->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS
&&
2174 !Ld
->isVolatile() &&
2175 !N
->isDivergent() &&
2176 static_cast<const SITargetLowering
*>(
2177 getTargetLowering())->isMemOpHasNoClobberedMemOperand(N
)
2182 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2183 const AMDGPUTargetLowering
& Lowering
=
2184 *static_cast<const AMDGPUTargetLowering
*>(getTargetLowering());
2185 bool IsModified
= false;
2189 // Go over all selected nodes and try to fold them a bit more
2190 SelectionDAG::allnodes_iterator Position
= CurDAG
->allnodes_begin();
2191 while (Position
!= CurDAG
->allnodes_end()) {
2192 SDNode
*Node
= &*Position
++;
2193 MachineSDNode
*MachineNode
= dyn_cast
<MachineSDNode
>(Node
);
2197 SDNode
*ResNode
= Lowering
.PostISelFolding(MachineNode
, *CurDAG
);
2198 if (ResNode
!= Node
) {
2200 ReplaceUses(Node
, ResNode
);
2204 CurDAG
->RemoveDeadNodes();
2205 } while (IsModified
);
2208 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction
&MF
) {
2209 Subtarget
= &MF
.getSubtarget
<R600Subtarget
>();
2210 return SelectionDAGISel::runOnMachineFunction(MF
);
2213 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode
*N
, int CbId
) const {
2217 return N
->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS
||
2218 N
->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT
;
2220 return N
->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0
+ CbId
;
2223 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr
,
2225 if (ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(Addr
)) {
2226 IntPtr
= CurDAG
->getIntPtrConstant(Cst
->getZExtValue() / 4, SDLoc(Addr
),
2233 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr
,
2234 SDValue
& BaseReg
, SDValue
&Offset
) {
2235 if (!isa
<ConstantSDNode
>(Addr
)) {
2237 Offset
= CurDAG
->getIntPtrConstant(0, SDLoc(Addr
), true);
2243 void R600DAGToDAGISel::Select(SDNode
*N
) {
2244 unsigned int Opc
= N
->getOpcode();
2245 if (N
->isMachineOpcode()) {
2247 return; // Already selected.
2252 case AMDGPUISD::BUILD_VERTICAL_VECTOR
:
2253 case ISD::SCALAR_TO_VECTOR
:
2254 case ISD::BUILD_VECTOR
: {
2255 EVT VT
= N
->getValueType(0);
2256 unsigned NumVectorElts
= VT
.getVectorNumElements();
2257 unsigned RegClassID
;
2258 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2259 // that adds a 128 bits reg copy when going through TwoAddressInstructions
2260 // pass. We want to avoid 128 bits copies as much as possible because they
2261 // can't be bundled by our scheduler.
2262 switch(NumVectorElts
) {
2263 case 2: RegClassID
= R600::R600_Reg64RegClassID
; break;
2265 if (Opc
== AMDGPUISD::BUILD_VERTICAL_VECTOR
)
2266 RegClassID
= R600::R600_Reg128VerticalRegClassID
;
2268 RegClassID
= R600::R600_Reg128RegClassID
;
2270 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2272 SelectBuildVector(N
, RegClassID
);
2280 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr
, SDValue
&Base
,
2285 if ((C
= dyn_cast
<ConstantSDNode
>(Addr
))) {
2286 Base
= CurDAG
->getRegister(R600::INDIRECT_BASE_ADDR
, MVT::i32
);
2287 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
2288 } else if ((Addr
.getOpcode() == AMDGPUISD::DWORDADDR
) &&
2289 (C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(0)))) {
2290 Base
= CurDAG
->getRegister(R600::INDIRECT_BASE_ADDR
, MVT::i32
);
2291 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
2292 } else if ((Addr
.getOpcode() == ISD::ADD
|| Addr
.getOpcode() == ISD::OR
) &&
2293 (C
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(1)))) {
2294 Base
= Addr
.getOperand(0);
2295 Offset
= CurDAG
->getTargetConstant(C
->getZExtValue(), DL
, MVT::i32
);
2298 Offset
= CurDAG
->getTargetConstant(0, DL
, MVT::i32
);
2304 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr
, SDValue
&Base
,
2306 ConstantSDNode
*IMMOffset
;
2308 if (Addr
.getOpcode() == ISD::ADD
2309 && (IMMOffset
= dyn_cast
<ConstantSDNode
>(Addr
.getOperand(1)))
2310 && isInt
<16>(IMMOffset
->getZExtValue())) {
2312 Base
= Addr
.getOperand(0);
2313 Offset
= CurDAG
->getTargetConstant(IMMOffset
->getZExtValue(), SDLoc(Addr
),
2316 // If the pointer address is constant, we can move it to the offset field.
2317 } else if ((IMMOffset
= dyn_cast
<ConstantSDNode
>(Addr
))
2318 && isInt
<16>(IMMOffset
->getZExtValue())) {
2319 Base
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(),
2320 SDLoc(CurDAG
->getEntryNode()),
2321 R600::ZERO
, MVT::i32
);
2322 Offset
= CurDAG
->getTargetConstant(IMMOffset
->getZExtValue(), SDLoc(Addr
),
2327 // Default case, no offset
2329 Offset
= CurDAG
->getTargetConstant(0, SDLoc(Addr
), MVT::i32
);