1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Custom DAG lowering for R600
12 //===----------------------------------------------------------------------===//
14 #include "R600ISelLowering.h"
15 #include "AMDGPUFrameLowering.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600FrameLowering.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/CodeGen/CallingConvLower.h"
29 #include "llvm/CodeGen/DAGCombine.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/SelectionDAG.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MachineValueType.h"
52 #include "R600GenCallingConv.inc"
54 R600TargetLowering::R600TargetLowering(const TargetMachine
&TM
,
55 const R600Subtarget
&STI
)
56 : AMDGPUTargetLowering(TM
, STI
), Subtarget(&STI
), Gen(STI
.getGeneration()) {
57 addRegisterClass(MVT::f32
, &R600::R600_Reg32RegClass
);
58 addRegisterClass(MVT::i32
, &R600::R600_Reg32RegClass
);
59 addRegisterClass(MVT::v2f32
, &R600::R600_Reg64RegClass
);
60 addRegisterClass(MVT::v2i32
, &R600::R600_Reg64RegClass
);
61 addRegisterClass(MVT::v4f32
, &R600::R600_Reg128RegClass
);
62 addRegisterClass(MVT::v4i32
, &R600::R600_Reg128RegClass
);
64 computeRegisterProperties(Subtarget
->getRegisterInfo());
66 // Legalize loads and stores to the private address space.
67 setOperationAction(ISD::LOAD
, MVT::i32
, Custom
);
68 setOperationAction(ISD::LOAD
, MVT::v2i32
, Custom
);
69 setOperationAction(ISD::LOAD
, MVT::v4i32
, Custom
);
71 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
72 // spaces, so it is custom lowered to handle those where it isn't.
73 for (MVT VT
: MVT::integer_valuetypes()) {
74 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
75 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Custom
);
76 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i16
, Custom
);
78 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
79 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i8
, Custom
);
80 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i16
, Custom
);
82 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i1
, Promote
);
83 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i8
, Custom
);
84 setLoadExtAction(ISD::EXTLOAD
, VT
, MVT::i16
, Custom
);
87 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
88 setLoadExtAction(ISD::EXTLOAD
, MVT::v2i32
, MVT::v2i1
, Expand
);
89 setLoadExtAction(ISD::SEXTLOAD
, MVT::v2i32
, MVT::v2i1
, Expand
);
90 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v2i32
, MVT::v2i1
, Expand
);
92 setLoadExtAction(ISD::EXTLOAD
, MVT::v4i32
, MVT::v4i1
, Expand
);
93 setLoadExtAction(ISD::SEXTLOAD
, MVT::v4i32
, MVT::v4i1
, Expand
);
94 setLoadExtAction(ISD::ZEXTLOAD
, MVT::v4i32
, MVT::v4i1
, Expand
);
96 setOperationAction(ISD::STORE
, MVT::i8
, Custom
);
97 setOperationAction(ISD::STORE
, MVT::i32
, Custom
);
98 setOperationAction(ISD::STORE
, MVT::v2i32
, Custom
);
99 setOperationAction(ISD::STORE
, MVT::v4i32
, Custom
);
101 setTruncStoreAction(MVT::i32
, MVT::i8
, Custom
);
102 setTruncStoreAction(MVT::i32
, MVT::i16
, Custom
);
103 // We need to include these since trunc STORES to PRIVATE need
104 // special handling to accommodate RMW
105 setTruncStoreAction(MVT::v2i32
, MVT::v2i16
, Custom
);
106 setTruncStoreAction(MVT::v4i32
, MVT::v4i16
, Custom
);
107 setTruncStoreAction(MVT::v8i32
, MVT::v8i16
, Custom
);
108 setTruncStoreAction(MVT::v16i32
, MVT::v16i16
, Custom
);
109 setTruncStoreAction(MVT::v32i32
, MVT::v32i16
, Custom
);
110 setTruncStoreAction(MVT::v2i32
, MVT::v2i8
, Custom
);
111 setTruncStoreAction(MVT::v4i32
, MVT::v4i8
, Custom
);
112 setTruncStoreAction(MVT::v8i32
, MVT::v8i8
, Custom
);
113 setTruncStoreAction(MVT::v16i32
, MVT::v16i8
, Custom
);
114 setTruncStoreAction(MVT::v32i32
, MVT::v32i8
, Custom
);
116 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
117 setTruncStoreAction(MVT::v2i32
, MVT::v2i1
, Expand
);
118 setTruncStoreAction(MVT::v4i32
, MVT::v4i1
, Expand
);
120 // Set condition code actions
121 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
122 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
123 setCondCodeAction(ISD::SETLT
, MVT::f32
, Expand
);
124 setCondCodeAction(ISD::SETLE
, MVT::f32
, Expand
);
125 setCondCodeAction(ISD::SETOLT
, MVT::f32
, Expand
);
126 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
127 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
128 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
129 setCondCodeAction(ISD::SETUGE
, MVT::f32
, Expand
);
130 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
131 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
132 setCondCodeAction(ISD::SETULE
, MVT::f32
, Expand
);
134 setCondCodeAction(ISD::SETLE
, MVT::i32
, Expand
);
135 setCondCodeAction(ISD::SETLT
, MVT::i32
, Expand
);
136 setCondCodeAction(ISD::SETULE
, MVT::i32
, Expand
);
137 setCondCodeAction(ISD::SETULT
, MVT::i32
, Expand
);
139 setOperationAction(ISD::FCOS
, MVT::f32
, Custom
);
140 setOperationAction(ISD::FSIN
, MVT::f32
, Custom
);
142 setOperationAction(ISD::SETCC
, MVT::v4i32
, Expand
);
143 setOperationAction(ISD::SETCC
, MVT::v2i32
, Expand
);
145 setOperationAction(ISD::BR_CC
, MVT::i32
, Expand
);
146 setOperationAction(ISD::BR_CC
, MVT::f32
, Expand
);
147 setOperationAction(ISD::BRCOND
, MVT::Other
, Custom
);
149 setOperationAction(ISD::FSUB
, MVT::f32
, Expand
);
151 setOperationAction(ISD::FCEIL
, MVT::f64
, Custom
);
152 setOperationAction(ISD::FTRUNC
, MVT::f64
, Custom
);
153 setOperationAction(ISD::FRINT
, MVT::f64
, Custom
);
154 setOperationAction(ISD::FFLOOR
, MVT::f64
, Custom
);
156 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
157 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
159 setOperationAction(ISD::SETCC
, MVT::i32
, Expand
);
160 setOperationAction(ISD::SETCC
, MVT::f32
, Expand
);
161 setOperationAction(ISD::FP_TO_UINT
, MVT::i1
, Custom
);
162 setOperationAction(ISD::FP_TO_SINT
, MVT::i1
, Custom
);
163 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
164 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
166 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
167 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
168 setOperationAction(ISD::SELECT
, MVT::v2i32
, Expand
);
169 setOperationAction(ISD::SELECT
, MVT::v4i32
, Expand
);
171 // ADD, SUB overflow.
172 // TODO: turn these into Legal?
173 if (Subtarget
->hasCARRY())
174 setOperationAction(ISD::UADDO
, MVT::i32
, Custom
);
176 if (Subtarget
->hasBORROW())
177 setOperationAction(ISD::USUBO
, MVT::i32
, Custom
);
179 // Expand sign extension of vectors
180 if (!Subtarget
->hasBFE())
181 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
183 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i1
, Expand
);
184 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i1
, Expand
);
186 if (!Subtarget
->hasBFE())
187 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i8
, Expand
);
188 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i8
, Expand
);
189 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i8
, Expand
);
191 if (!Subtarget
->hasBFE())
192 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i16
, Expand
);
193 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i16
, Expand
);
194 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i16
, Expand
);
196 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i32
, Legal
);
197 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v2i32
, Expand
);
198 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::v4i32
, Expand
);
200 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::Other
, Expand
);
202 setOperationAction(ISD::FrameIndex
, MVT::i32
, Custom
);
204 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i32
, Custom
);
205 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f32
, Custom
);
206 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Custom
);
207 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Custom
);
209 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2i32
, Custom
);
210 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v2f32
, Custom
);
211 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
212 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
214 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
215 // to be Legal/Custom in order to avoid library calls.
216 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
217 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
218 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
220 if (!Subtarget
->hasFMA()) {
221 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
222 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
225 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
227 if (!Subtarget
->hasFP32Denormals())
228 setOperationAction(ISD::FMAD
, MVT::f32
, Legal
);
230 if (!Subtarget
->hasBFI()) {
231 // fcopysign can be done in a single instruction with BFI.
232 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
233 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
236 if (!Subtarget
->hasBCNT(32))
237 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
239 if (!Subtarget
->hasBCNT(64))
240 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
242 if (Subtarget
->hasFFBH())
243 setOperationAction(ISD::CTLZ_ZERO_UNDEF
, MVT::i32
, Custom
);
245 if (Subtarget
->hasFFBL())
246 setOperationAction(ISD::CTTZ_ZERO_UNDEF
, MVT::i32
, Custom
);
248 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
250 if (Subtarget
->hasBFE())
251 setHasExtractBitsInsn(true);
253 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
255 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
256 for (MVT VT
: ScalarIntVTs
) {
257 setOperationAction(ISD::ADDC
, VT
, Expand
);
258 setOperationAction(ISD::SUBC
, VT
, Expand
);
259 setOperationAction(ISD::ADDE
, VT
, Expand
);
260 setOperationAction(ISD::SUBE
, VT
, Expand
);
263 // LLVM will expand these to atomic_cmp_swap(0)
264 // and atomic_swap, respectively.
265 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i32
, Expand
);
266 setOperationAction(ISD::ATOMIC_STORE
, MVT::i32
, Expand
);
268 // We need to custom lower some of the intrinsics
269 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
270 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
272 setSchedulingPreference(Sched::Source
);
274 setTargetDAGCombine(ISD::FP_ROUND
);
275 setTargetDAGCombine(ISD::FP_TO_SINT
);
276 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT
);
277 setTargetDAGCombine(ISD::SELECT_CC
);
278 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT
);
279 setTargetDAGCombine(ISD::LOAD
);
282 static inline bool isEOP(MachineBasicBlock::iterator I
) {
283 if (std::next(I
) == I
->getParent()->end())
285 return std::next(I
)->getOpcode() == R600::RETURN
;
289 R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
290 MachineBasicBlock
*BB
) const {
291 MachineFunction
*MF
= BB
->getParent();
292 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
293 MachineBasicBlock::iterator I
= MI
;
294 const R600InstrInfo
*TII
= Subtarget
->getInstrInfo();
296 switch (MI
.getOpcode()) {
298 // Replace LDS_*_RET instruction that don't have any uses with the
299 // equivalent LDS_*_NORET instruction.
300 if (TII
->isLDSRetInstr(MI
.getOpcode())) {
301 int DstIdx
= TII
->getOperandIdx(MI
.getOpcode(), R600::OpName::dst
);
302 assert(DstIdx
!= -1);
303 MachineInstrBuilder NewMI
;
304 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
305 // LDS_1A2D support and remove this special case.
306 if (!MRI
.use_empty(MI
.getOperand(DstIdx
).getReg()) ||
307 MI
.getOpcode() == R600::LDS_CMPST_RET
)
310 NewMI
= BuildMI(*BB
, I
, BB
->findDebugLoc(I
),
311 TII
->get(R600::getLDSNoRetOp(MI
.getOpcode())));
312 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
< e
; ++i
) {
313 NewMI
.add(MI
.getOperand(i
));
316 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI
, BB
);
320 case R600::FABS_R600
: {
321 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(
322 *BB
, I
, R600::MOV
, MI
.getOperand(0).getReg(),
323 MI
.getOperand(1).getReg());
324 TII
->addFlag(*NewMI
, 0, MO_FLAG_ABS
);
328 case R600::FNEG_R600
: {
329 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(
330 *BB
, I
, R600::MOV
, MI
.getOperand(0).getReg(),
331 MI
.getOperand(1).getReg());
332 TII
->addFlag(*NewMI
, 0, MO_FLAG_NEG
);
336 case R600::MASK_WRITE
: {
337 Register maskedRegister
= MI
.getOperand(0).getReg();
338 assert(Register::isVirtualRegister(maskedRegister
));
339 MachineInstr
* defInstr
= MRI
.getVRegDef(maskedRegister
);
340 TII
->addFlag(*defInstr
, 0, MO_FLAG_MASK
);
344 case R600::MOV_IMM_F32
:
345 TII
->buildMovImm(*BB
, I
, MI
.getOperand(0).getReg(), MI
.getOperand(1)
352 case R600::MOV_IMM_I32
:
353 TII
->buildMovImm(*BB
, I
, MI
.getOperand(0).getReg(),
354 MI
.getOperand(1).getImm());
357 case R600::MOV_IMM_GLOBAL_ADDR
: {
358 //TODO: Perhaps combine this instruction with the next if possible
359 auto MIB
= TII
->buildDefaultInstruction(
360 *BB
, MI
, R600::MOV
, MI
.getOperand(0).getReg(), R600::ALU_LITERAL_X
);
361 int Idx
= TII
->getOperandIdx(*MIB
, R600::OpName::literal
);
362 //TODO: Ugh this is rather ugly
363 MIB
->getOperand(Idx
) = MI
.getOperand(1);
367 case R600::CONST_COPY
: {
368 MachineInstr
*NewMI
= TII
->buildDefaultInstruction(
369 *BB
, MI
, R600::MOV
, MI
.getOperand(0).getReg(), R600::ALU_CONST
);
370 TII
->setImmOperand(*NewMI
, R600::OpName::src0_sel
,
371 MI
.getOperand(1).getImm());
375 case R600::RAT_WRITE_CACHELESS_32_eg
:
376 case R600::RAT_WRITE_CACHELESS_64_eg
:
377 case R600::RAT_WRITE_CACHELESS_128_eg
:
378 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
.getOpcode()))
379 .add(MI
.getOperand(0))
380 .add(MI
.getOperand(1))
381 .addImm(isEOP(I
)); // Set End of program bit
384 case R600::RAT_STORE_TYPED_eg
:
385 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
.getOpcode()))
386 .add(MI
.getOperand(0))
387 .add(MI
.getOperand(1))
388 .add(MI
.getOperand(2))
389 .addImm(isEOP(I
)); // Set End of program bit
393 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(R600::JUMP
))
394 .add(MI
.getOperand(0));
397 case R600::BRANCH_COND_f32
: {
398 MachineInstr
*NewMI
=
399 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(R600::PRED_X
),
401 .add(MI
.getOperand(1))
402 .addImm(R600::PRED_SETNE
)
404 TII
->addFlag(*NewMI
, 0, MO_FLAG_PUSH
);
405 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(R600::JUMP_COND
))
406 .add(MI
.getOperand(0))
407 .addReg(R600::PREDICATE_BIT
, RegState::Kill
);
411 case R600::BRANCH_COND_i32
: {
412 MachineInstr
*NewMI
=
413 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(R600::PRED_X
),
415 .add(MI
.getOperand(1))
416 .addImm(R600::PRED_SETNE_INT
)
418 TII
->addFlag(*NewMI
, 0, MO_FLAG_PUSH
);
419 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(R600::JUMP_COND
))
420 .add(MI
.getOperand(0))
421 .addReg(R600::PREDICATE_BIT
, RegState::Kill
);
425 case R600::EG_ExportSwz
:
426 case R600::R600_ExportSwz
: {
427 // Instruction is left unmodified if its not the last one of its type
428 bool isLastInstructionOfItsType
= true;
429 unsigned InstExportType
= MI
.getOperand(1).getImm();
430 for (MachineBasicBlock::iterator NextExportInst
= std::next(I
),
431 EndBlock
= BB
->end(); NextExportInst
!= EndBlock
;
432 NextExportInst
= std::next(NextExportInst
)) {
433 if (NextExportInst
->getOpcode() == R600::EG_ExportSwz
||
434 NextExportInst
->getOpcode() == R600::R600_ExportSwz
) {
435 unsigned CurrentInstExportType
= NextExportInst
->getOperand(1)
437 if (CurrentInstExportType
== InstExportType
) {
438 isLastInstructionOfItsType
= false;
444 if (!EOP
&& !isLastInstructionOfItsType
)
446 unsigned CfInst
= (MI
.getOpcode() == R600::EG_ExportSwz
) ? 84 : 40;
447 BuildMI(*BB
, I
, BB
->findDebugLoc(I
), TII
->get(MI
.getOpcode()))
448 .add(MI
.getOperand(0))
449 .add(MI
.getOperand(1))
450 .add(MI
.getOperand(2))
451 .add(MI
.getOperand(3))
452 .add(MI
.getOperand(4))
453 .add(MI
.getOperand(5))
454 .add(MI
.getOperand(6))
464 MI
.eraseFromParent();
468 //===----------------------------------------------------------------------===//
469 // Custom DAG Lowering Operations
470 //===----------------------------------------------------------------------===//
472 SDValue
R600TargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
473 MachineFunction
&MF
= DAG
.getMachineFunction();
474 R600MachineFunctionInfo
*MFI
= MF
.getInfo
<R600MachineFunctionInfo
>();
475 switch (Op
.getOpcode()) {
476 default: return AMDGPUTargetLowering::LowerOperation(Op
, DAG
);
477 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
478 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
479 case ISD::SHL_PARTS
: return LowerSHLParts(Op
, DAG
);
481 case ISD::SRL_PARTS
: return LowerSRXParts(Op
, DAG
);
482 case ISD::UADDO
: return LowerUADDSUBO(Op
, DAG
, ISD::ADD
, AMDGPUISD::CARRY
);
483 case ISD::USUBO
: return LowerUADDSUBO(Op
, DAG
, ISD::SUB
, AMDGPUISD::BORROW
);
485 case ISD::FSIN
: return LowerTrig(Op
, DAG
);
486 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
487 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
489 SDValue Result
= LowerLOAD(Op
, DAG
);
490 assert((!Result
.getNode() ||
491 Result
.getNode()->getNumValues() == 2) &&
492 "Load should return a value and a chain");
496 case ISD::BRCOND
: return LowerBRCOND(Op
, DAG
);
497 case ISD::GlobalAddress
: return LowerGlobalAddress(MFI
, Op
, DAG
);
498 case ISD::FrameIndex
: return lowerFrameIndex(Op
, DAG
);
499 case ISD::INTRINSIC_VOID
: {
500 SDValue Chain
= Op
.getOperand(0);
501 unsigned IntrinsicID
=
502 cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue();
503 switch (IntrinsicID
) {
504 case Intrinsic::r600_store_swizzle
: {
506 const SDValue Args
[8] = {
508 Op
.getOperand(2), // Export Value
509 Op
.getOperand(3), // ArrayBase
510 Op
.getOperand(4), // Type
511 DAG
.getConstant(0, DL
, MVT::i32
), // SWZ_X
512 DAG
.getConstant(1, DL
, MVT::i32
), // SWZ_Y
513 DAG
.getConstant(2, DL
, MVT::i32
), // SWZ_Z
514 DAG
.getConstant(3, DL
, MVT::i32
) // SWZ_W
516 return DAG
.getNode(AMDGPUISD::R600_EXPORT
, DL
, Op
.getValueType(), Args
);
519 // default for switch(IntrinsicID)
522 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
525 case ISD::INTRINSIC_WO_CHAIN
: {
526 unsigned IntrinsicID
=
527 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
528 EVT VT
= Op
.getValueType();
530 switch (IntrinsicID
) {
531 case Intrinsic::r600_tex
:
532 case Intrinsic::r600_texc
: {
534 switch (IntrinsicID
) {
535 case Intrinsic::r600_tex
:
538 case Intrinsic::r600_texc
:
542 llvm_unreachable("unhandled texture operation");
545 SDValue TexArgs
[19] = {
546 DAG
.getConstant(TextureOp
, DL
, MVT::i32
),
548 DAG
.getConstant(0, DL
, MVT::i32
),
549 DAG
.getConstant(1, DL
, MVT::i32
),
550 DAG
.getConstant(2, DL
, MVT::i32
),
551 DAG
.getConstant(3, DL
, MVT::i32
),
555 DAG
.getConstant(0, DL
, MVT::i32
),
556 DAG
.getConstant(1, DL
, MVT::i32
),
557 DAG
.getConstant(2, DL
, MVT::i32
),
558 DAG
.getConstant(3, DL
, MVT::i32
),
566 return DAG
.getNode(AMDGPUISD::TEXTURE_FETCH
, DL
, MVT::v4f32
, TexArgs
);
568 case Intrinsic::r600_dot4
: {
570 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
571 DAG
.getConstant(0, DL
, MVT::i32
)),
572 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
573 DAG
.getConstant(0, DL
, MVT::i32
)),
574 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
575 DAG
.getConstant(1, DL
, MVT::i32
)),
576 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
577 DAG
.getConstant(1, DL
, MVT::i32
)),
578 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
579 DAG
.getConstant(2, DL
, MVT::i32
)),
580 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
581 DAG
.getConstant(2, DL
, MVT::i32
)),
582 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(1),
583 DAG
.getConstant(3, DL
, MVT::i32
)),
584 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::f32
, Op
.getOperand(2),
585 DAG
.getConstant(3, DL
, MVT::i32
))
587 return DAG
.getNode(AMDGPUISD::DOT4
, DL
, MVT::f32
, Args
);
590 case Intrinsic::r600_implicitarg_ptr
: {
591 MVT PtrVT
= getPointerTy(DAG
.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS
);
592 uint32_t ByteOffset
= getImplicitParameterOffset(MF
, FIRST_IMPLICIT
);
593 return DAG
.getConstant(ByteOffset
, DL
, PtrVT
);
595 case Intrinsic::r600_read_ngroups_x
:
596 return LowerImplicitParameter(DAG
, VT
, DL
, 0);
597 case Intrinsic::r600_read_ngroups_y
:
598 return LowerImplicitParameter(DAG
, VT
, DL
, 1);
599 case Intrinsic::r600_read_ngroups_z
:
600 return LowerImplicitParameter(DAG
, VT
, DL
, 2);
601 case Intrinsic::r600_read_global_size_x
:
602 return LowerImplicitParameter(DAG
, VT
, DL
, 3);
603 case Intrinsic::r600_read_global_size_y
:
604 return LowerImplicitParameter(DAG
, VT
, DL
, 4);
605 case Intrinsic::r600_read_global_size_z
:
606 return LowerImplicitParameter(DAG
, VT
, DL
, 5);
607 case Intrinsic::r600_read_local_size_x
:
608 return LowerImplicitParameter(DAG
, VT
, DL
, 6);
609 case Intrinsic::r600_read_local_size_y
:
610 return LowerImplicitParameter(DAG
, VT
, DL
, 7);
611 case Intrinsic::r600_read_local_size_z
:
612 return LowerImplicitParameter(DAG
, VT
, DL
, 8);
614 case Intrinsic::r600_read_tgid_x
:
615 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
617 case Intrinsic::r600_read_tgid_y
:
618 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
620 case Intrinsic::r600_read_tgid_z
:
621 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
623 case Intrinsic::r600_read_tidig_x
:
624 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
626 case Intrinsic::r600_read_tidig_y
:
627 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
629 case Intrinsic::r600_read_tidig_z
:
630 return CreateLiveInRegisterRaw(DAG
, &R600::R600_TReg32RegClass
,
633 case Intrinsic::r600_recipsqrt_ieee
:
634 return DAG
.getNode(AMDGPUISD::RSQ
, DL
, VT
, Op
.getOperand(1));
636 case Intrinsic::r600_recipsqrt_clamped
:
637 return DAG
.getNode(AMDGPUISD::RSQ_CLAMP
, DL
, VT
, Op
.getOperand(1));
642 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
645 } // end switch(Op.getOpcode())
649 void R600TargetLowering::ReplaceNodeResults(SDNode
*N
,
650 SmallVectorImpl
<SDValue
> &Results
,
651 SelectionDAG
&DAG
) const {
652 switch (N
->getOpcode()) {
654 AMDGPUTargetLowering::ReplaceNodeResults(N
, Results
, DAG
);
656 case ISD::FP_TO_UINT
:
657 if (N
->getValueType(0) == MVT::i1
) {
658 Results
.push_back(lowerFP_TO_UINT(N
->getOperand(0), DAG
));
661 // Since we don't care about out of bounds values we can use FP_TO_SINT for
662 // uints too. The DAGLegalizer code for uint considers some extra cases
663 // which are not necessary here.
665 case ISD::FP_TO_SINT
: {
666 if (N
->getValueType(0) == MVT::i1
) {
667 Results
.push_back(lowerFP_TO_SINT(N
->getOperand(0), DAG
));
672 if (expandFP_TO_SINT(N
, Result
, DAG
))
673 Results
.push_back(Result
);
677 SDValue Op
= SDValue(N
, 1);
678 SDValue RES
= LowerSDIVREM(Op
, DAG
);
679 Results
.push_back(RES
);
680 Results
.push_back(RES
.getValue(1));
684 SDValue Op
= SDValue(N
, 0);
685 LowerUDIVREM64(Op
, DAG
, Results
);
691 SDValue
R600TargetLowering::vectorToVerticalVector(SelectionDAG
&DAG
,
692 SDValue Vector
) const {
694 EVT VecVT
= Vector
.getValueType();
695 EVT EltVT
= VecVT
.getVectorElementType();
696 SmallVector
<SDValue
, 8> Args
;
698 for (unsigned i
= 0, e
= VecVT
.getVectorNumElements(); i
!= e
; ++i
) {
699 Args
.push_back(DAG
.getNode(
700 ISD::EXTRACT_VECTOR_ELT
, DL
, EltVT
, Vector
,
701 DAG
.getConstant(i
, DL
, getVectorIdxTy(DAG
.getDataLayout()))));
704 return DAG
.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR
, DL
, VecVT
, Args
);
707 SDValue
R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
708 SelectionDAG
&DAG
) const {
710 SDValue Vector
= Op
.getOperand(0);
711 SDValue Index
= Op
.getOperand(1);
713 if (isa
<ConstantSDNode
>(Index
) ||
714 Vector
.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR
)
717 Vector
= vectorToVerticalVector(DAG
, Vector
);
718 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, Op
.getValueType(),
722 SDValue
R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
723 SelectionDAG
&DAG
) const {
725 SDValue Vector
= Op
.getOperand(0);
726 SDValue Value
= Op
.getOperand(1);
727 SDValue Index
= Op
.getOperand(2);
729 if (isa
<ConstantSDNode
>(Index
) ||
730 Vector
.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR
)
733 Vector
= vectorToVerticalVector(DAG
, Vector
);
734 SDValue Insert
= DAG
.getNode(ISD::INSERT_VECTOR_ELT
, DL
, Op
.getValueType(),
735 Vector
, Value
, Index
);
736 return vectorToVerticalVector(DAG
, Insert
);
739 SDValue
R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction
*MFI
,
741 SelectionDAG
&DAG
) const {
742 GlobalAddressSDNode
*GSD
= cast
<GlobalAddressSDNode
>(Op
);
743 if (GSD
->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS
)
744 return AMDGPUTargetLowering::LowerGlobalAddress(MFI
, Op
, DAG
);
746 const DataLayout
&DL
= DAG
.getDataLayout();
747 const GlobalValue
*GV
= GSD
->getGlobal();
748 MVT ConstPtrVT
= getPointerTy(DL
, AMDGPUAS::CONSTANT_ADDRESS
);
750 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, SDLoc(GSD
), ConstPtrVT
);
751 return DAG
.getNode(AMDGPUISD::CONST_DATA_PTR
, SDLoc(GSD
), ConstPtrVT
, GA
);
754 SDValue
R600TargetLowering::LowerTrig(SDValue Op
, SelectionDAG
&DAG
) const {
755 // On hw >= R700, COS/SIN input must be between -1. and 1.
756 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
757 EVT VT
= Op
.getValueType();
758 SDValue Arg
= Op
.getOperand(0);
761 // TODO: Should this propagate fast-math-flags?
762 SDValue FractPart
= DAG
.getNode(AMDGPUISD::FRACT
, DL
, VT
,
763 DAG
.getNode(ISD::FADD
, DL
, VT
,
764 DAG
.getNode(ISD::FMUL
, DL
, VT
, Arg
,
765 DAG
.getConstantFP(0.15915494309, DL
, MVT::f32
)),
766 DAG
.getConstantFP(0.5, DL
, MVT::f32
)));
768 switch (Op
.getOpcode()) {
770 TrigNode
= AMDGPUISD::COS_HW
;
773 TrigNode
= AMDGPUISD::SIN_HW
;
776 llvm_unreachable("Wrong trig opcode");
778 SDValue TrigVal
= DAG
.getNode(TrigNode
, DL
, VT
,
779 DAG
.getNode(ISD::FADD
, DL
, VT
, FractPart
,
780 DAG
.getConstantFP(-0.5, DL
, MVT::f32
)));
781 if (Gen
>= AMDGPUSubtarget::R700
)
783 // On R600 hw, COS/SIN input must be between -Pi and Pi.
784 return DAG
.getNode(ISD::FMUL
, DL
, VT
, TrigVal
,
785 DAG
.getConstantFP(3.14159265359, DL
, MVT::f32
));
788 SDValue
R600TargetLowering::LowerSHLParts(SDValue Op
, SelectionDAG
&DAG
) const {
790 EVT VT
= Op
.getValueType();
792 SDValue Lo
= Op
.getOperand(0);
793 SDValue Hi
= Op
.getOperand(1);
794 SDValue Shift
= Op
.getOperand(2);
795 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
796 SDValue One
= DAG
.getConstant(1, DL
, VT
);
798 SDValue Width
= DAG
.getConstant(VT
.getSizeInBits(), DL
, VT
);
799 SDValue Width1
= DAG
.getConstant(VT
.getSizeInBits() - 1, DL
, VT
);
800 SDValue BigShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Shift
, Width
);
801 SDValue CompShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Width1
, Shift
);
803 // The dance around Width1 is necessary for 0 special case.
804 // Without it the CompShift might be 32, producing incorrect results in
805 // Overflow. So we do the shift in two steps, the alternative is to
806 // add a conditional to filter the special case.
808 SDValue Overflow
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, CompShift
);
809 Overflow
= DAG
.getNode(ISD::SRL
, DL
, VT
, Overflow
, One
);
811 SDValue HiSmall
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, Shift
);
812 HiSmall
= DAG
.getNode(ISD::OR
, DL
, VT
, HiSmall
, Overflow
);
813 SDValue LoSmall
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, Shift
);
815 SDValue HiBig
= DAG
.getNode(ISD::SHL
, DL
, VT
, Lo
, BigShift
);
816 SDValue LoBig
= Zero
;
818 Hi
= DAG
.getSelectCC(DL
, Shift
, Width
, HiSmall
, HiBig
, ISD::SETULT
);
819 Lo
= DAG
.getSelectCC(DL
, Shift
, Width
, LoSmall
, LoBig
, ISD::SETULT
);
821 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, DAG
.getVTList(VT
,VT
), Lo
, Hi
);
824 SDValue
R600TargetLowering::LowerSRXParts(SDValue Op
, SelectionDAG
&DAG
) const {
826 EVT VT
= Op
.getValueType();
828 SDValue Lo
= Op
.getOperand(0);
829 SDValue Hi
= Op
.getOperand(1);
830 SDValue Shift
= Op
.getOperand(2);
831 SDValue Zero
= DAG
.getConstant(0, DL
, VT
);
832 SDValue One
= DAG
.getConstant(1, DL
, VT
);
834 const bool SRA
= Op
.getOpcode() == ISD::SRA_PARTS
;
836 SDValue Width
= DAG
.getConstant(VT
.getSizeInBits(), DL
, VT
);
837 SDValue Width1
= DAG
.getConstant(VT
.getSizeInBits() - 1, DL
, VT
);
838 SDValue BigShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Shift
, Width
);
839 SDValue CompShift
= DAG
.getNode(ISD::SUB
, DL
, VT
, Width1
, Shift
);
841 // The dance around Width1 is necessary for 0 special case.
842 // Without it the CompShift might be 32, producing incorrect results in
843 // Overflow. So we do the shift in two steps, the alternative is to
844 // add a conditional to filter the special case.
846 SDValue Overflow
= DAG
.getNode(ISD::SHL
, DL
, VT
, Hi
, CompShift
);
847 Overflow
= DAG
.getNode(ISD::SHL
, DL
, VT
, Overflow
, One
);
849 SDValue HiSmall
= DAG
.getNode(SRA
? ISD::SRA
: ISD::SRL
, DL
, VT
, Hi
, Shift
);
850 SDValue LoSmall
= DAG
.getNode(ISD::SRL
, DL
, VT
, Lo
, Shift
);
851 LoSmall
= DAG
.getNode(ISD::OR
, DL
, VT
, LoSmall
, Overflow
);
853 SDValue LoBig
= DAG
.getNode(SRA
? ISD::SRA
: ISD::SRL
, DL
, VT
, Hi
, BigShift
);
854 SDValue HiBig
= SRA
? DAG
.getNode(ISD::SRA
, DL
, VT
, Hi
, Width1
) : Zero
;
856 Hi
= DAG
.getSelectCC(DL
, Shift
, Width
, HiSmall
, HiBig
, ISD::SETULT
);
857 Lo
= DAG
.getSelectCC(DL
, Shift
, Width
, LoSmall
, LoBig
, ISD::SETULT
);
859 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, DAG
.getVTList(VT
,VT
), Lo
, Hi
);
862 SDValue
R600TargetLowering::LowerUADDSUBO(SDValue Op
, SelectionDAG
&DAG
,
863 unsigned mainop
, unsigned ovf
) const {
865 EVT VT
= Op
.getValueType();
867 SDValue Lo
= Op
.getOperand(0);
868 SDValue Hi
= Op
.getOperand(1);
870 SDValue OVF
= DAG
.getNode(ovf
, DL
, VT
, Lo
, Hi
);
872 OVF
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, VT
, OVF
,
873 DAG
.getValueType(MVT::i1
));
875 SDValue Res
= DAG
.getNode(mainop
, DL
, VT
, Lo
, Hi
);
877 return DAG
.getNode(ISD::MERGE_VALUES
, DL
, DAG
.getVTList(VT
, VT
), Res
, OVF
);
880 SDValue
R600TargetLowering::lowerFP_TO_UINT(SDValue Op
, SelectionDAG
&DAG
) const {
886 Op
, DAG
.getConstantFP(1.0f
, DL
, MVT::f32
),
887 DAG
.getCondCode(ISD::SETEQ
));
890 SDValue
R600TargetLowering::lowerFP_TO_SINT(SDValue Op
, SelectionDAG
&DAG
) const {
896 Op
, DAG
.getConstantFP(-1.0f
, DL
, MVT::f32
),
897 DAG
.getCondCode(ISD::SETEQ
));
900 SDValue
R600TargetLowering::LowerImplicitParameter(SelectionDAG
&DAG
, EVT VT
,
902 unsigned DwordOffset
) const {
903 unsigned ByteOffset
= DwordOffset
* 4;
904 PointerType
* PtrType
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
905 AMDGPUAS::PARAM_I_ADDRESS
);
907 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
908 assert(isInt
<16>(ByteOffset
));
910 return DAG
.getLoad(VT
, DL
, DAG
.getEntryNode(),
911 DAG
.getConstant(ByteOffset
, DL
, MVT::i32
), // PTR
912 MachinePointerInfo(ConstantPointerNull::get(PtrType
)));
915 bool R600TargetLowering::isZero(SDValue Op
) const {
916 if(ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(Op
)) {
917 return Cst
->isNullValue();
918 } else if(ConstantFPSDNode
*CstFP
= dyn_cast
<ConstantFPSDNode
>(Op
)){
919 return CstFP
->isZero();
925 bool R600TargetLowering::isHWTrueValue(SDValue Op
) const {
926 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
927 return CFP
->isExactlyValue(1.0);
929 return isAllOnesConstant(Op
);
932 bool R600TargetLowering::isHWFalseValue(SDValue Op
) const {
933 if (ConstantFPSDNode
* CFP
= dyn_cast
<ConstantFPSDNode
>(Op
)) {
934 return CFP
->getValueAPF().isZero();
936 return isNullConstant(Op
);
939 SDValue
R600TargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
941 EVT VT
= Op
.getValueType();
943 SDValue LHS
= Op
.getOperand(0);
944 SDValue RHS
= Op
.getOperand(1);
945 SDValue True
= Op
.getOperand(2);
946 SDValue False
= Op
.getOperand(3);
947 SDValue CC
= Op
.getOperand(4);
950 if (VT
== MVT::f32
) {
951 DAGCombinerInfo
DCI(DAG
, AfterLegalizeVectorOps
, true, nullptr);
952 SDValue MinMax
= combineFMinMaxLegacy(DL
, VT
, LHS
, RHS
, True
, False
, CC
, DCI
);
957 // LHS and RHS are guaranteed to be the same value type
958 EVT CompareVT
= LHS
.getValueType();
960 // Check if we can lower this to a native operation.
962 // Try to lower to a SET* instruction:
964 // SET* can match the following patterns:
966 // select_cc f32, f32, -1, 0, cc_supported
967 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
968 // select_cc i32, i32, -1, 0, cc_supported
971 // Move hardware True/False values to the correct operand.
972 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
973 ISD::CondCode InverseCC
=
974 ISD::getSetCCInverse(CCOpcode
, CompareVT
== MVT::i32
);
975 if (isHWTrueValue(False
) && isHWFalseValue(True
)) {
976 if (isCondCodeLegal(InverseCC
, CompareVT
.getSimpleVT())) {
977 std::swap(False
, True
);
978 CC
= DAG
.getCondCode(InverseCC
);
980 ISD::CondCode SwapInvCC
= ISD::getSetCCSwappedOperands(InverseCC
);
981 if (isCondCodeLegal(SwapInvCC
, CompareVT
.getSimpleVT())) {
982 std::swap(False
, True
);
984 CC
= DAG
.getCondCode(SwapInvCC
);
989 if (isHWTrueValue(True
) && isHWFalseValue(False
) &&
990 (CompareVT
== VT
|| VT
== MVT::i32
)) {
991 // This can be matched by a SET* instruction.
992 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
, LHS
, RHS
, True
, False
, CC
);
995 // Try to lower to a CND* instruction:
997 // CND* can match the following patterns:
999 // select_cc f32, 0.0, f32, f32, cc_supported
1000 // select_cc f32, 0.0, i32, i32, cc_supported
1001 // select_cc i32, 0, f32, f32, cc_supported
1002 // select_cc i32, 0, i32, i32, cc_supported
1005 // Try to move the zero value to the RHS
1007 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1008 // Try swapping the operands
1009 ISD::CondCode CCSwapped
= ISD::getSetCCSwappedOperands(CCOpcode
);
1010 if (isCondCodeLegal(CCSwapped
, CompareVT
.getSimpleVT())) {
1011 std::swap(LHS
, RHS
);
1012 CC
= DAG
.getCondCode(CCSwapped
);
1014 // Try inverting the conditon and then swapping the operands
1015 ISD::CondCode CCInv
= ISD::getSetCCInverse(CCOpcode
, CompareVT
.isInteger());
1016 CCSwapped
= ISD::getSetCCSwappedOperands(CCInv
);
1017 if (isCondCodeLegal(CCSwapped
, CompareVT
.getSimpleVT())) {
1018 std::swap(True
, False
);
1019 std::swap(LHS
, RHS
);
1020 CC
= DAG
.getCondCode(CCSwapped
);
1027 ISD::CondCode CCOpcode
= cast
<CondCodeSDNode
>(CC
)->get();
1028 if (CompareVT
!= VT
) {
1029 // Bitcast True / False to the correct types. This will end up being
1030 // a nop, but it allows us to define only a single pattern in the
1031 // .TD files for each CND* instruction rather than having to have
1032 // one pattern for integer True/False and one for fp True/False
1033 True
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, True
);
1034 False
= DAG
.getNode(ISD::BITCAST
, DL
, CompareVT
, False
);
1041 CCOpcode
= ISD::getSetCCInverse(CCOpcode
, CompareVT
== MVT::i32
);
1049 SDValue SelectNode
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
,
1052 DAG
.getCondCode(CCOpcode
));
1053 return DAG
.getNode(ISD::BITCAST
, DL
, VT
, SelectNode
);
1056 // If we make it this for it means we have no native instructions to handle
1057 // this SELECT_CC, so we must lower it.
1058 SDValue HWTrue
, HWFalse
;
1060 if (CompareVT
== MVT::f32
) {
1061 HWTrue
= DAG
.getConstantFP(1.0f
, DL
, CompareVT
);
1062 HWFalse
= DAG
.getConstantFP(0.0f
, DL
, CompareVT
);
1063 } else if (CompareVT
== MVT::i32
) {
1064 HWTrue
= DAG
.getConstant(-1, DL
, CompareVT
);
1065 HWFalse
= DAG
.getConstant(0, DL
, CompareVT
);
1068 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1071 // Lower this unsupported SELECT_CC into a combination of two supported
1072 // SELECT_CC operations.
1073 SDValue Cond
= DAG
.getNode(ISD::SELECT_CC
, DL
, CompareVT
, LHS
, RHS
, HWTrue
, HWFalse
, CC
);
1075 return DAG
.getNode(ISD::SELECT_CC
, DL
, VT
,
1078 DAG
.getCondCode(ISD::SETNE
));
1081 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1082 /// convert these pointers to a register index. Each register holds
1083 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1084 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1085 /// for indirect addressing.
1086 SDValue
R600TargetLowering::stackPtrToRegIndex(SDValue Ptr
,
1087 unsigned StackWidth
,
1088 SelectionDAG
&DAG
) const {
1090 switch(StackWidth
) {
1100 default: llvm_unreachable("Invalid stack width");
1104 return DAG
.getNode(ISD::SRL
, DL
, Ptr
.getValueType(), Ptr
,
1105 DAG
.getConstant(SRLPad
, DL
, MVT::i32
));
1108 void R600TargetLowering::getStackAddress(unsigned StackWidth
,
1111 unsigned &PtrIncr
) const {
1112 switch (StackWidth
) {
1123 Channel
= ElemIdx
% 2;
1137 SDValue
R600TargetLowering::lowerPrivateTruncStore(StoreSDNode
*Store
,
1138 SelectionDAG
&DAG
) const {
1140 //TODO: Who creates the i8 stores?
1141 assert(Store
->isTruncatingStore()
1142 || Store
->getValue().getValueType() == MVT::i8
);
1143 assert(Store
->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
);
1146 if (Store
->getMemoryVT() == MVT::i8
) {
1147 assert(Store
->getAlignment() >= 1);
1148 Mask
= DAG
.getConstant(0xff, DL
, MVT::i32
);
1149 } else if (Store
->getMemoryVT() == MVT::i16
) {
1150 assert(Store
->getAlignment() >= 2);
1151 Mask
= DAG
.getConstant(0xffff, DL
, MVT::i32
);
1153 llvm_unreachable("Unsupported private trunc store");
1156 SDValue OldChain
= Store
->getChain();
1157 bool VectorTrunc
= (OldChain
.getOpcode() == AMDGPUISD::DUMMY_CHAIN
);
1159 SDValue Chain
= VectorTrunc
? OldChain
->getOperand(0) : OldChain
;
1160 SDValue BasePtr
= Store
->getBasePtr();
1161 SDValue Offset
= Store
->getOffset();
1162 EVT MemVT
= Store
->getMemoryVT();
1164 SDValue LoadPtr
= BasePtr
;
1165 if (!Offset
.isUndef()) {
1166 LoadPtr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, BasePtr
, Offset
);
1169 // Get dword location
1170 // TODO: this should be eliminated by the future SHR ptr, 2
1171 SDValue Ptr
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, LoadPtr
,
1172 DAG
.getConstant(0xfffffffc, DL
, MVT::i32
));
1175 // TODO: can we be smarter about machine pointer info?
1176 MachinePointerInfo
PtrInfo(UndefValue::get(
1177 Type::getInt32PtrTy(*DAG
.getContext(), AMDGPUAS::PRIVATE_ADDRESS
)));
1178 SDValue Dst
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, PtrInfo
);
1180 Chain
= Dst
.getValue(1);
1182 // Get offset in dword
1183 SDValue ByteIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, LoadPtr
,
1184 DAG
.getConstant(0x3, DL
, MVT::i32
));
1186 // Convert byte offset to bit shift
1187 SDValue ShiftAmt
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, ByteIdx
,
1188 DAG
.getConstant(3, DL
, MVT::i32
));
1190 // TODO: Contrary to the name of the functiom,
1191 // it also handles sub i32 non-truncating stores (like i1)
1192 SDValue SExtValue
= DAG
.getNode(ISD::SIGN_EXTEND
, DL
, MVT::i32
,
1195 // Mask the value to the right type
1196 SDValue MaskedValue
= DAG
.getZeroExtendInReg(SExtValue
, DL
, MemVT
);
1198 // Shift the value in place
1199 SDValue ShiftedValue
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
,
1200 MaskedValue
, ShiftAmt
);
1202 // Shift the mask in place
1203 SDValue DstMask
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, Mask
, ShiftAmt
);
1205 // Invert the mask. NOTE: if we had native ROL instructions we could
1206 // use inverted mask
1207 DstMask
= DAG
.getNOT(DL
, DstMask
, MVT::i32
);
1209 // Cleanup the target bits
1210 Dst
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, Dst
, DstMask
);
1213 SDValue Value
= DAG
.getNode(ISD::OR
, DL
, MVT::i32
, Dst
, ShiftedValue
);
1216 // TODO: Can we be smarter about MachinePointerInfo?
1217 SDValue NewStore
= DAG
.getStore(Chain
, DL
, Value
, Ptr
, PtrInfo
);
1219 // If we are part of expanded vector, make our neighbors depend on this store
1221 // Make all other vector elements depend on this store
1222 Chain
= DAG
.getNode(AMDGPUISD::DUMMY_CHAIN
, DL
, MVT::Other
, NewStore
);
1223 DAG
.ReplaceAllUsesOfValueWith(OldChain
, Chain
);
1228 SDValue
R600TargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
1229 StoreSDNode
*StoreNode
= cast
<StoreSDNode
>(Op
);
1230 unsigned AS
= StoreNode
->getAddressSpace();
1232 SDValue Chain
= StoreNode
->getChain();
1233 SDValue Ptr
= StoreNode
->getBasePtr();
1234 SDValue Value
= StoreNode
->getValue();
1236 EVT VT
= Value
.getValueType();
1237 EVT MemVT
= StoreNode
->getMemoryVT();
1238 EVT PtrVT
= Ptr
.getValueType();
1242 const bool TruncatingStore
= StoreNode
->isTruncatingStore();
1244 // Neither LOCAL nor PRIVATE can do vectors at the moment
1245 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::PRIVATE_ADDRESS
||
1248 if ((AS
== AMDGPUAS::PRIVATE_ADDRESS
) && TruncatingStore
) {
1249 // Add an extra level of chain to isolate this vector
1250 SDValue NewChain
= DAG
.getNode(AMDGPUISD::DUMMY_CHAIN
, DL
, MVT::Other
, Chain
);
1251 // TODO: can the chain be replaced without creating a new store?
1252 SDValue NewStore
= DAG
.getTruncStore(
1253 NewChain
, DL
, Value
, Ptr
, StoreNode
->getPointerInfo(),
1254 MemVT
, StoreNode
->getAlignment(),
1255 StoreNode
->getMemOperand()->getFlags(), StoreNode
->getAAInfo());
1256 StoreNode
= cast
<StoreSDNode
>(NewStore
);
1259 return scalarizeVectorStore(StoreNode
, DAG
);
1262 unsigned Align
= StoreNode
->getAlignment();
1263 if (Align
< MemVT
.getStoreSize() &&
1264 !allowsMisalignedMemoryAccesses(
1265 MemVT
, AS
, Align
, StoreNode
->getMemOperand()->getFlags(), nullptr)) {
1266 return expandUnalignedStore(StoreNode
, DAG
);
1269 SDValue DWordAddr
= DAG
.getNode(ISD::SRL
, DL
, PtrVT
, Ptr
,
1270 DAG
.getConstant(2, DL
, PtrVT
));
1272 if (AS
== AMDGPUAS::GLOBAL_ADDRESS
) {
1273 // It is beneficial to create MSKOR here instead of combiner to avoid
1274 // artificial dependencies introduced by RMW
1275 if (TruncatingStore
) {
1276 assert(VT
.bitsLE(MVT::i32
));
1277 SDValue MaskConstant
;
1278 if (MemVT
== MVT::i8
) {
1279 MaskConstant
= DAG
.getConstant(0xFF, DL
, MVT::i32
);
1281 assert(MemVT
== MVT::i16
);
1282 assert(StoreNode
->getAlignment() >= 2);
1283 MaskConstant
= DAG
.getConstant(0xFFFF, DL
, MVT::i32
);
1286 SDValue ByteIndex
= DAG
.getNode(ISD::AND
, DL
, PtrVT
, Ptr
,
1287 DAG
.getConstant(0x00000003, DL
, PtrVT
));
1288 SDValue BitShift
= DAG
.getNode(ISD::SHL
, DL
, VT
, ByteIndex
,
1289 DAG
.getConstant(3, DL
, VT
));
1291 // Put the mask in correct place
1292 SDValue Mask
= DAG
.getNode(ISD::SHL
, DL
, VT
, MaskConstant
, BitShift
);
1294 // Put the value bits in correct place
1295 SDValue TruncValue
= DAG
.getNode(ISD::AND
, DL
, VT
, Value
, MaskConstant
);
1296 SDValue ShiftedValue
= DAG
.getNode(ISD::SHL
, DL
, VT
, TruncValue
, BitShift
);
1298 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1302 DAG
.getConstant(0, DL
, MVT::i32
),
1303 DAG
.getConstant(0, DL
, MVT::i32
),
1306 SDValue Input
= DAG
.getBuildVector(MVT::v4i32
, DL
, Src
);
1307 SDValue Args
[3] = { Chain
, Input
, DWordAddr
};
1308 return DAG
.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR
, DL
,
1309 Op
->getVTList(), Args
, MemVT
,
1310 StoreNode
->getMemOperand());
1311 } else if (Ptr
->getOpcode() != AMDGPUISD::DWORDADDR
&& VT
.bitsGE(MVT::i32
)) {
1312 // Convert pointer from byte address to dword address.
1313 Ptr
= DAG
.getNode(AMDGPUISD::DWORDADDR
, DL
, PtrVT
, DWordAddr
);
1315 if (StoreNode
->isIndexed()) {
1316 llvm_unreachable("Indexed stores not supported yet");
1318 Chain
= DAG
.getStore(Chain
, DL
, Value
, Ptr
, StoreNode
->getMemOperand());
1324 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1325 if (AS
!= AMDGPUAS::PRIVATE_ADDRESS
)
1328 if (MemVT
.bitsLT(MVT::i32
))
1329 return lowerPrivateTruncStore(StoreNode
, DAG
);
1331 // Standard i32+ store, tag it with DWORDADDR to note that the address
1333 if (Ptr
.getOpcode() != AMDGPUISD::DWORDADDR
) {
1334 Ptr
= DAG
.getNode(AMDGPUISD::DWORDADDR
, DL
, PtrVT
, DWordAddr
);
1335 return DAG
.getStore(Chain
, DL
, Value
, Ptr
, StoreNode
->getMemOperand());
1338 // Tagged i32+ stores will be matched by patterns
1342 // return (512 + (kc_bank << 12)
1344 ConstantAddressBlock(unsigned AddressSpace
) {
1345 switch (AddressSpace
) {
1346 case AMDGPUAS::CONSTANT_BUFFER_0
:
1348 case AMDGPUAS::CONSTANT_BUFFER_1
:
1350 case AMDGPUAS::CONSTANT_BUFFER_2
:
1351 return 512 + 4096 * 2;
1352 case AMDGPUAS::CONSTANT_BUFFER_3
:
1353 return 512 + 4096 * 3;
1354 case AMDGPUAS::CONSTANT_BUFFER_4
:
1355 return 512 + 4096 * 4;
1356 case AMDGPUAS::CONSTANT_BUFFER_5
:
1357 return 512 + 4096 * 5;
1358 case AMDGPUAS::CONSTANT_BUFFER_6
:
1359 return 512 + 4096 * 6;
1360 case AMDGPUAS::CONSTANT_BUFFER_7
:
1361 return 512 + 4096 * 7;
1362 case AMDGPUAS::CONSTANT_BUFFER_8
:
1363 return 512 + 4096 * 8;
1364 case AMDGPUAS::CONSTANT_BUFFER_9
:
1365 return 512 + 4096 * 9;
1366 case AMDGPUAS::CONSTANT_BUFFER_10
:
1367 return 512 + 4096 * 10;
1368 case AMDGPUAS::CONSTANT_BUFFER_11
:
1369 return 512 + 4096 * 11;
1370 case AMDGPUAS::CONSTANT_BUFFER_12
:
1371 return 512 + 4096 * 12;
1372 case AMDGPUAS::CONSTANT_BUFFER_13
:
1373 return 512 + 4096 * 13;
1374 case AMDGPUAS::CONSTANT_BUFFER_14
:
1375 return 512 + 4096 * 14;
1376 case AMDGPUAS::CONSTANT_BUFFER_15
:
1377 return 512 + 4096 * 15;
1383 SDValue
R600TargetLowering::lowerPrivateExtLoad(SDValue Op
,
1384 SelectionDAG
&DAG
) const {
1386 LoadSDNode
*Load
= cast
<LoadSDNode
>(Op
);
1387 ISD::LoadExtType ExtType
= Load
->getExtensionType();
1388 EVT MemVT
= Load
->getMemoryVT();
1389 assert(Load
->getAlignment() >= MemVT
.getStoreSize());
1391 SDValue BasePtr
= Load
->getBasePtr();
1392 SDValue Chain
= Load
->getChain();
1393 SDValue Offset
= Load
->getOffset();
1395 SDValue LoadPtr
= BasePtr
;
1396 if (!Offset
.isUndef()) {
1397 LoadPtr
= DAG
.getNode(ISD::ADD
, DL
, MVT::i32
, BasePtr
, Offset
);
1400 // Get dword location
1401 // NOTE: this should be eliminated by the future SHR ptr, 2
1402 SDValue Ptr
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
, LoadPtr
,
1403 DAG
.getConstant(0xfffffffc, DL
, MVT::i32
));
1406 // TODO: can we be smarter about machine pointer info?
1407 MachinePointerInfo
PtrInfo(UndefValue::get(
1408 Type::getInt32PtrTy(*DAG
.getContext(), AMDGPUAS::PRIVATE_ADDRESS
)));
1409 SDValue Read
= DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, PtrInfo
);
1411 // Get offset within the register.
1412 SDValue ByteIdx
= DAG
.getNode(ISD::AND
, DL
, MVT::i32
,
1413 LoadPtr
, DAG
.getConstant(0x3, DL
, MVT::i32
));
1415 // Bit offset of target byte (byteIdx * 8).
1416 SDValue ShiftAmt
= DAG
.getNode(ISD::SHL
, DL
, MVT::i32
, ByteIdx
,
1417 DAG
.getConstant(3, DL
, MVT::i32
));
1419 // Shift to the right.
1420 SDValue Ret
= DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, Read
, ShiftAmt
);
1422 // Eliminate the upper bits by setting them to ...
1423 EVT MemEltVT
= MemVT
.getScalarType();
1425 if (ExtType
== ISD::SEXTLOAD
) { // ... ones.
1426 SDValue MemEltVTNode
= DAG
.getValueType(MemEltVT
);
1427 Ret
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, MVT::i32
, Ret
, MemEltVTNode
);
1428 } else { // ... or zeros.
1429 Ret
= DAG
.getZeroExtendInReg(Ret
, DL
, MemEltVT
);
1434 Read
.getValue(1) // This should be our output chain
1437 return DAG
.getMergeValues(Ops
, DL
);
1440 SDValue
R600TargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
1441 LoadSDNode
*LoadNode
= cast
<LoadSDNode
>(Op
);
1442 unsigned AS
= LoadNode
->getAddressSpace();
1443 EVT MemVT
= LoadNode
->getMemoryVT();
1444 ISD::LoadExtType ExtType
= LoadNode
->getExtensionType();
1446 if (AS
== AMDGPUAS::PRIVATE_ADDRESS
&&
1447 ExtType
!= ISD::NON_EXTLOAD
&& MemVT
.bitsLT(MVT::i32
)) {
1448 return lowerPrivateExtLoad(Op
, DAG
);
1452 EVT VT
= Op
.getValueType();
1453 SDValue Chain
= LoadNode
->getChain();
1454 SDValue Ptr
= LoadNode
->getBasePtr();
1456 if ((LoadNode
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
||
1457 LoadNode
->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
) &&
1459 return scalarizeVectorLoad(LoadNode
, DAG
);
1462 // This is still used for explicit load from addrspace(8)
1463 int ConstantBlock
= ConstantAddressBlock(LoadNode
->getAddressSpace());
1464 if (ConstantBlock
> -1 &&
1465 ((LoadNode
->getExtensionType() == ISD::NON_EXTLOAD
) ||
1466 (LoadNode
->getExtensionType() == ISD::ZEXTLOAD
))) {
1468 if (isa
<Constant
>(LoadNode
->getMemOperand()->getValue()) ||
1469 isa
<ConstantSDNode
>(Ptr
)) {
1470 return constBufferLoad(LoadNode
, LoadNode
->getAddressSpace(), DAG
);
1472 //TODO: Does this even work?
1473 // non-constant ptr can't be folded, keeps it as a v4f32 load
1474 Result
= DAG
.getNode(AMDGPUISD::CONST_ADDRESS
, DL
, MVT::v4i32
,
1475 DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, Ptr
,
1476 DAG
.getConstant(4, DL
, MVT::i32
)),
1477 DAG
.getConstant(LoadNode
->getAddressSpace() -
1478 AMDGPUAS::CONSTANT_BUFFER_0
, DL
, MVT::i32
)
1482 if (!VT
.isVector()) {
1483 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
, Result
,
1484 DAG
.getConstant(0, DL
, MVT::i32
));
1487 SDValue MergedValues
[2] = {
1491 return DAG
.getMergeValues(MergedValues
, DL
);
1494 // For most operations returning SDValue() will result in the node being
1495 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1496 // need to manually expand loads that may be legal in some address spaces and
1497 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1498 // compute shaders, since the data is sign extended when it is uploaded to the
1499 // buffer. However SEXT loads from other address spaces are not supported, so
1500 // we need to expand them here.
1501 if (LoadNode
->getExtensionType() == ISD::SEXTLOAD
) {
1502 EVT MemVT
= LoadNode
->getMemoryVT();
1503 assert(!MemVT
.isVector() && (MemVT
== MVT::i16
|| MemVT
== MVT::i8
));
1504 SDValue NewLoad
= DAG
.getExtLoad(
1505 ISD::EXTLOAD
, DL
, VT
, Chain
, Ptr
, LoadNode
->getPointerInfo(), MemVT
,
1506 LoadNode
->getAlignment(), LoadNode
->getMemOperand()->getFlags());
1507 SDValue Res
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, DL
, VT
, NewLoad
,
1508 DAG
.getValueType(MemVT
));
1510 SDValue MergedValues
[2] = { Res
, Chain
};
1511 return DAG
.getMergeValues(MergedValues
, DL
);
1514 if (LoadNode
->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS
) {
1518 // DWORDADDR ISD marks already shifted address
1519 if (Ptr
.getOpcode() != AMDGPUISD::DWORDADDR
) {
1520 assert(VT
== MVT::i32
);
1521 Ptr
= DAG
.getNode(ISD::SRL
, DL
, MVT::i32
, Ptr
, DAG
.getConstant(2, DL
, MVT::i32
));
1522 Ptr
= DAG
.getNode(AMDGPUISD::DWORDADDR
, DL
, MVT::i32
, Ptr
);
1523 return DAG
.getLoad(MVT::i32
, DL
, Chain
, Ptr
, LoadNode
->getMemOperand());
1528 SDValue
R600TargetLowering::LowerBRCOND(SDValue Op
, SelectionDAG
&DAG
) const {
1529 SDValue Chain
= Op
.getOperand(0);
1530 SDValue Cond
= Op
.getOperand(1);
1531 SDValue Jump
= Op
.getOperand(2);
1533 return DAG
.getNode(AMDGPUISD::BRANCH_COND
, SDLoc(Op
), Op
.getValueType(),
1537 SDValue
R600TargetLowering::lowerFrameIndex(SDValue Op
,
1538 SelectionDAG
&DAG
) const {
1539 MachineFunction
&MF
= DAG
.getMachineFunction();
1540 const R600FrameLowering
*TFL
= Subtarget
->getFrameLowering();
1542 FrameIndexSDNode
*FIN
= cast
<FrameIndexSDNode
>(Op
);
1544 unsigned FrameIndex
= FIN
->getIndex();
1545 unsigned IgnoredFrameReg
;
1547 TFL
->getFrameIndexReference(MF
, FrameIndex
, IgnoredFrameReg
);
1548 return DAG
.getConstant(Offset
* 4 * TFL
->getStackWidth(MF
), SDLoc(Op
),
1552 CCAssignFn
*R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC
,
1553 bool IsVarArg
) const {
1555 case CallingConv::AMDGPU_KERNEL
:
1556 case CallingConv::SPIR_KERNEL
:
1557 case CallingConv::C
:
1558 case CallingConv::Fast
:
1559 case CallingConv::Cold
:
1560 llvm_unreachable("kernels should not be handled here");
1561 case CallingConv::AMDGPU_VS
:
1562 case CallingConv::AMDGPU_GS
:
1563 case CallingConv::AMDGPU_PS
:
1564 case CallingConv::AMDGPU_CS
:
1565 case CallingConv::AMDGPU_HS
:
1566 case CallingConv::AMDGPU_ES
:
1567 case CallingConv::AMDGPU_LS
:
1570 report_fatal_error("Unsupported calling convention.");
1574 /// XXX Only kernel functions are supported, so we can assume for now that
1575 /// every function is a kernel function, but in the future we should use
1576 /// separate calling conventions for kernel and non-kernel functions.
1577 SDValue
R600TargetLowering::LowerFormalArguments(
1578 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
1579 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&DL
,
1580 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
1581 SmallVector
<CCValAssign
, 16> ArgLocs
;
1582 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
1584 MachineFunction
&MF
= DAG
.getMachineFunction();
1585 SmallVector
<ISD::InputArg
, 8> LocalIns
;
1587 if (AMDGPU::isShader(CallConv
)) {
1588 CCInfo
.AnalyzeFormalArguments(Ins
, CCAssignFnForCall(CallConv
, isVarArg
));
1590 analyzeFormalArgumentsCompute(CCInfo
, Ins
);
1593 for (unsigned i
= 0, e
= Ins
.size(); i
< e
; ++i
) {
1594 CCValAssign
&VA
= ArgLocs
[i
];
1595 const ISD::InputArg
&In
= Ins
[i
];
1597 EVT MemVT
= VA
.getLocVT();
1598 if (!VT
.isVector() && MemVT
.isVector()) {
1599 // Get load source type if scalarized.
1600 MemVT
= MemVT
.getVectorElementType();
1603 if (AMDGPU::isShader(CallConv
)) {
1604 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), &R600::R600_Reg128RegClass
);
1605 SDValue Register
= DAG
.getCopyFromReg(Chain
, DL
, Reg
, VT
);
1606 InVals
.push_back(Register
);
1610 PointerType
*PtrTy
= PointerType::get(VT
.getTypeForEVT(*DAG
.getContext()),
1611 AMDGPUAS::PARAM_I_ADDRESS
);
1613 // i64 isn't a legal type, so the register type used ends up as i32, which
1614 // isn't expected here. It attempts to create this sextload, but it ends up
1615 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1618 // The first 36 bytes of the input buffer contains information about
1619 // thread group and global sizes.
1620 ISD::LoadExtType Ext
= ISD::NON_EXTLOAD
;
1621 if (MemVT
.getScalarSizeInBits() != VT
.getScalarSizeInBits()) {
1622 // FIXME: This should really check the extload type, but the handling of
1623 // extload vector parameters seems to be broken.
1625 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1626 Ext
= ISD::SEXTLOAD
;
1629 // Compute the offset from the value.
1630 // XXX - I think PartOffset should give you this, but it seems to give the
1631 // size of the register which isn't useful.
1633 unsigned ValBase
= ArgLocs
[In
.getOrigArgIndex()].getLocMemOffset();
1634 unsigned PartOffset
= VA
.getLocMemOffset();
1635 unsigned Alignment
= MinAlign(VT
.getStoreSize(), PartOffset
);
1637 MachinePointerInfo
PtrInfo(UndefValue::get(PtrTy
), PartOffset
- ValBase
);
1638 SDValue Arg
= DAG
.getLoad(
1639 ISD::UNINDEXED
, Ext
, VT
, DL
, Chain
,
1640 DAG
.getConstant(PartOffset
, DL
, MVT::i32
), DAG
.getUNDEF(MVT::i32
),
1642 MemVT
, Alignment
, MachineMemOperand::MONonTemporal
|
1643 MachineMemOperand::MODereferenceable
|
1644 MachineMemOperand::MOInvariant
);
1646 InVals
.push_back(Arg
);
1651 EVT
R600TargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&,
1655 return VT
.changeVectorElementTypeToInteger();
1658 bool R600TargetLowering::canMergeStoresTo(unsigned AS
, EVT MemVT
,
1659 const SelectionDAG
&DAG
) const {
1660 // Local and Private addresses do not handle vectors. Limit to i32
1661 if ((AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::PRIVATE_ADDRESS
)) {
1662 return (MemVT
.getSizeInBits() <= 32);
1667 bool R600TargetLowering::allowsMisalignedMemoryAccesses(
1668 EVT VT
, unsigned AddrSpace
, unsigned Align
, MachineMemOperand::Flags Flags
,
1669 bool *IsFast
) const {
1673 if (!VT
.isSimple() || VT
== MVT::Other
)
1676 if (VT
.bitsLT(MVT::i32
))
1679 // TODO: This is a rough estimate.
1683 return VT
.bitsGT(MVT::i32
) && Align
% 4 == 0;
1686 static SDValue
CompactSwizzlableVector(
1687 SelectionDAG
&DAG
, SDValue VectorEntry
,
1688 DenseMap
<unsigned, unsigned> &RemapSwizzle
) {
1689 assert(RemapSwizzle
.empty());
1691 SDLoc
DL(VectorEntry
);
1692 EVT EltTy
= VectorEntry
.getValueType().getVectorElementType();
1694 SDValue NewBldVec
[4];
1695 for (unsigned i
= 0; i
< 4; i
++)
1696 NewBldVec
[i
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltTy
, VectorEntry
,
1697 DAG
.getIntPtrConstant(i
, DL
));
1699 for (unsigned i
= 0; i
< 4; i
++) {
1700 if (NewBldVec
[i
].isUndef())
1701 // We mask write here to teach later passes that the ith element of this
1702 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1703 // break false dependencies and additionnaly make assembly easier to read.
1704 RemapSwizzle
[i
] = 7; // SEL_MASK_WRITE
1705 if (ConstantFPSDNode
*C
= dyn_cast
<ConstantFPSDNode
>(NewBldVec
[i
])) {
1707 RemapSwizzle
[i
] = 4; // SEL_0
1708 NewBldVec
[i
] = DAG
.getUNDEF(MVT::f32
);
1709 } else if (C
->isExactlyValue(1.0)) {
1710 RemapSwizzle
[i
] = 5; // SEL_1
1711 NewBldVec
[i
] = DAG
.getUNDEF(MVT::f32
);
1715 if (NewBldVec
[i
].isUndef())
1717 // Fix spurious warning with gcc 7.3 -O3
1718 // warning: array subscript is above array bounds [-Warray-bounds]
1719 // if (NewBldVec[i] == NewBldVec[j]) {
1723 for (unsigned j
= 0; j
< i
; j
++) {
1724 if (NewBldVec
[i
] == NewBldVec
[j
]) {
1725 NewBldVec
[i
] = DAG
.getUNDEF(NewBldVec
[i
].getValueType());
1726 RemapSwizzle
[i
] = j
;
1732 return DAG
.getBuildVector(VectorEntry
.getValueType(), SDLoc(VectorEntry
),
1736 static SDValue
ReorganizeVector(SelectionDAG
&DAG
, SDValue VectorEntry
,
1737 DenseMap
<unsigned, unsigned> &RemapSwizzle
) {
1738 assert(RemapSwizzle
.empty());
1740 SDLoc
DL(VectorEntry
);
1741 EVT EltTy
= VectorEntry
.getValueType().getVectorElementType();
1743 SDValue NewBldVec
[4];
1744 bool isUnmovable
[4] = {false, false, false, false};
1745 for (unsigned i
= 0; i
< 4; i
++)
1746 NewBldVec
[i
] = DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, EltTy
, VectorEntry
,
1747 DAG
.getIntPtrConstant(i
, DL
));
1749 for (unsigned i
= 0; i
< 4; i
++) {
1750 RemapSwizzle
[i
] = i
;
1751 if (NewBldVec
[i
].getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1752 unsigned Idx
= dyn_cast
<ConstantSDNode
>(NewBldVec
[i
].getOperand(1))
1755 isUnmovable
[Idx
] = true;
1759 for (unsigned i
= 0; i
< 4; i
++) {
1760 if (NewBldVec
[i
].getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
1761 unsigned Idx
= dyn_cast
<ConstantSDNode
>(NewBldVec
[i
].getOperand(1))
1763 if (isUnmovable
[Idx
])
1766 std::swap(NewBldVec
[Idx
], NewBldVec
[i
]);
1767 std::swap(RemapSwizzle
[i
], RemapSwizzle
[Idx
]);
1772 return DAG
.getBuildVector(VectorEntry
.getValueType(), SDLoc(VectorEntry
),
1776 SDValue
R600TargetLowering::OptimizeSwizzle(SDValue BuildVector
, SDValue Swz
[4],
1778 const SDLoc
&DL
) const {
1779 // Old -> New swizzle values
1780 DenseMap
<unsigned, unsigned> SwizzleRemap
;
1782 BuildVector
= CompactSwizzlableVector(DAG
, BuildVector
, SwizzleRemap
);
1783 for (unsigned i
= 0; i
< 4; i
++) {
1784 unsigned Idx
= cast
<ConstantSDNode
>(Swz
[i
])->getZExtValue();
1785 if (SwizzleRemap
.find(Idx
) != SwizzleRemap
.end())
1786 Swz
[i
] = DAG
.getConstant(SwizzleRemap
[Idx
], DL
, MVT::i32
);
1789 SwizzleRemap
.clear();
1790 BuildVector
= ReorganizeVector(DAG
, BuildVector
, SwizzleRemap
);
1791 for (unsigned i
= 0; i
< 4; i
++) {
1792 unsigned Idx
= cast
<ConstantSDNode
>(Swz
[i
])->getZExtValue();
1793 if (SwizzleRemap
.find(Idx
) != SwizzleRemap
.end())
1794 Swz
[i
] = DAG
.getConstant(SwizzleRemap
[Idx
], DL
, MVT::i32
);
1800 SDValue
R600TargetLowering::constBufferLoad(LoadSDNode
*LoadNode
, int Block
,
1801 SelectionDAG
&DAG
) const {
1803 EVT VT
= LoadNode
->getValueType(0);
1804 SDValue Chain
= LoadNode
->getChain();
1805 SDValue Ptr
= LoadNode
->getBasePtr();
1806 assert (isa
<ConstantSDNode
>(Ptr
));
1808 //TODO: Support smaller loads
1809 if (LoadNode
->getMemoryVT().getScalarType() != MVT::i32
|| !ISD::isNON_EXTLoad(LoadNode
))
1812 if (LoadNode
->getAlignment() < 4)
1815 int ConstantBlock
= ConstantAddressBlock(Block
);
1818 for (unsigned i
= 0; i
< 4; i
++) {
1819 // We want Const position encoded with the following formula :
1820 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1821 // const_index is Ptr computed by llvm using an alignment of 16.
1822 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1823 // then div by 4 at the ISel step
1824 SDValue NewPtr
= DAG
.getNode(ISD::ADD
, DL
, Ptr
.getValueType(), Ptr
,
1825 DAG
.getConstant(4 * i
+ ConstantBlock
* 16, DL
, MVT::i32
));
1826 Slots
[i
] = DAG
.getNode(AMDGPUISD::CONST_ADDRESS
, DL
, MVT::i32
, NewPtr
);
1828 EVT NewVT
= MVT::v4i32
;
1829 unsigned NumElements
= 4;
1830 if (VT
.isVector()) {
1832 NumElements
= VT
.getVectorNumElements();
1834 SDValue Result
= DAG
.getBuildVector(NewVT
, DL
, makeArrayRef(Slots
, NumElements
));
1835 if (!VT
.isVector()) {
1836 Result
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, DL
, MVT::i32
, Result
,
1837 DAG
.getConstant(0, DL
, MVT::i32
));
1839 SDValue MergedValues
[2] = {
1843 return DAG
.getMergeValues(MergedValues
, DL
);
1846 //===----------------------------------------------------------------------===//
1847 // Custom DAG Optimizations
1848 //===----------------------------------------------------------------------===//
1850 SDValue
R600TargetLowering::PerformDAGCombine(SDNode
*N
,
1851 DAGCombinerInfo
&DCI
) const {
1852 SelectionDAG
&DAG
= DCI
.DAG
;
1855 switch (N
->getOpcode()) {
1856 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1857 case ISD::FP_ROUND
: {
1858 SDValue Arg
= N
->getOperand(0);
1859 if (Arg
.getOpcode() == ISD::UINT_TO_FP
&& Arg
.getValueType() == MVT::f64
) {
1860 return DAG
.getNode(ISD::UINT_TO_FP
, DL
, N
->getValueType(0),
1866 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1867 // (i32 select_cc f32, f32, -1, 0 cc)
1869 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1870 // this to one of the SET*_DX10 instructions.
1871 case ISD::FP_TO_SINT
: {
1872 SDValue FNeg
= N
->getOperand(0);
1873 if (FNeg
.getOpcode() != ISD::FNEG
) {
1876 SDValue SelectCC
= FNeg
.getOperand(0);
1877 if (SelectCC
.getOpcode() != ISD::SELECT_CC
||
1878 SelectCC
.getOperand(0).getValueType() != MVT::f32
|| // LHS
1879 SelectCC
.getOperand(2).getValueType() != MVT::f32
|| // True
1880 !isHWTrueValue(SelectCC
.getOperand(2)) ||
1881 !isHWFalseValue(SelectCC
.getOperand(3))) {
1885 return DAG
.getNode(ISD::SELECT_CC
, DL
, N
->getValueType(0),
1886 SelectCC
.getOperand(0), // LHS
1887 SelectCC
.getOperand(1), // RHS
1888 DAG
.getConstant(-1, DL
, MVT::i32
), // True
1889 DAG
.getConstant(0, DL
, MVT::i32
), // False
1890 SelectCC
.getOperand(4)); // CC
1895 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1896 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1897 case ISD::INSERT_VECTOR_ELT
: {
1898 SDValue InVec
= N
->getOperand(0);
1899 SDValue InVal
= N
->getOperand(1);
1900 SDValue EltNo
= N
->getOperand(2);
1902 // If the inserted element is an UNDEF, just use the input vector.
1903 if (InVal
.isUndef())
1906 EVT VT
= InVec
.getValueType();
1908 // If we can't generate a legal BUILD_VECTOR, exit
1909 if (!isOperationLegal(ISD::BUILD_VECTOR
, VT
))
1912 // Check that we know which element is being inserted
1913 if (!isa
<ConstantSDNode
>(EltNo
))
1915 unsigned Elt
= cast
<ConstantSDNode
>(EltNo
)->getZExtValue();
1917 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1918 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1920 SmallVector
<SDValue
, 8> Ops
;
1921 if (InVec
.getOpcode() == ISD::BUILD_VECTOR
) {
1922 Ops
.append(InVec
.getNode()->op_begin(),
1923 InVec
.getNode()->op_end());
1924 } else if (InVec
.isUndef()) {
1925 unsigned NElts
= VT
.getVectorNumElements();
1926 Ops
.append(NElts
, DAG
.getUNDEF(InVal
.getValueType()));
1931 // Insert the element
1932 if (Elt
< Ops
.size()) {
1933 // All the operands of BUILD_VECTOR must have the same type;
1934 // we enforce that here.
1935 EVT OpVT
= Ops
[0].getValueType();
1936 if (InVal
.getValueType() != OpVT
)
1937 InVal
= OpVT
.bitsGT(InVal
.getValueType()) ?
1938 DAG
.getNode(ISD::ANY_EXTEND
, DL
, OpVT
, InVal
) :
1939 DAG
.getNode(ISD::TRUNCATE
, DL
, OpVT
, InVal
);
1943 // Return the new vector
1944 return DAG
.getBuildVector(VT
, DL
, Ops
);
1947 // Extract_vec (Build_vector) generated by custom lowering
1948 // also needs to be customly combined
1949 case ISD::EXTRACT_VECTOR_ELT
: {
1950 SDValue Arg
= N
->getOperand(0);
1951 if (Arg
.getOpcode() == ISD::BUILD_VECTOR
) {
1952 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
1953 unsigned Element
= Const
->getZExtValue();
1954 return Arg
->getOperand(Element
);
1957 if (Arg
.getOpcode() == ISD::BITCAST
&&
1958 Arg
.getOperand(0).getOpcode() == ISD::BUILD_VECTOR
&&
1959 (Arg
.getOperand(0).getValueType().getVectorNumElements() ==
1960 Arg
.getValueType().getVectorNumElements())) {
1961 if (ConstantSDNode
*Const
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1))) {
1962 unsigned Element
= Const
->getZExtValue();
1963 return DAG
.getNode(ISD::BITCAST
, DL
, N
->getVTList(),
1964 Arg
->getOperand(0).getOperand(Element
));
1970 case ISD::SELECT_CC
: {
1971 // Try common optimizations
1972 if (SDValue Ret
= AMDGPUTargetLowering::PerformDAGCombine(N
, DCI
))
1975 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1976 // selectcc x, y, a, b, inv(cc)
1978 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1979 // selectcc x, y, a, b, cc
1980 SDValue LHS
= N
->getOperand(0);
1981 if (LHS
.getOpcode() != ISD::SELECT_CC
) {
1985 SDValue RHS
= N
->getOperand(1);
1986 SDValue True
= N
->getOperand(2);
1987 SDValue False
= N
->getOperand(3);
1988 ISD::CondCode NCC
= cast
<CondCodeSDNode
>(N
->getOperand(4))->get();
1990 if (LHS
.getOperand(2).getNode() != True
.getNode() ||
1991 LHS
.getOperand(3).getNode() != False
.getNode() ||
1992 RHS
.getNode() != False
.getNode()) {
1997 default: return SDValue();
1998 case ISD::SETNE
: return LHS
;
2000 ISD::CondCode LHSCC
= cast
<CondCodeSDNode
>(LHS
.getOperand(4))->get();
2001 LHSCC
= ISD::getSetCCInverse(LHSCC
,
2002 LHS
.getOperand(0).getValueType().isInteger());
2003 if (DCI
.isBeforeLegalizeOps() ||
2004 isCondCodeLegal(LHSCC
, LHS
.getOperand(0).getSimpleValueType()))
2005 return DAG
.getSelectCC(DL
,
2017 case AMDGPUISD::R600_EXPORT
: {
2018 SDValue Arg
= N
->getOperand(1);
2019 if (Arg
.getOpcode() != ISD::BUILD_VECTOR
)
2022 SDValue NewArgs
[8] = {
2023 N
->getOperand(0), // Chain
2025 N
->getOperand(2), // ArrayBase
2026 N
->getOperand(3), // Type
2027 N
->getOperand(4), // SWZ_X
2028 N
->getOperand(5), // SWZ_Y
2029 N
->getOperand(6), // SWZ_Z
2030 N
->getOperand(7) // SWZ_W
2032 NewArgs
[1] = OptimizeSwizzle(N
->getOperand(1), &NewArgs
[4], DAG
, DL
);
2033 return DAG
.getNode(AMDGPUISD::R600_EXPORT
, DL
, N
->getVTList(), NewArgs
);
2035 case AMDGPUISD::TEXTURE_FETCH
: {
2036 SDValue Arg
= N
->getOperand(1);
2037 if (Arg
.getOpcode() != ISD::BUILD_VECTOR
)
2040 SDValue NewArgs
[19] = {
2061 NewArgs
[1] = OptimizeSwizzle(N
->getOperand(1), &NewArgs
[2], DAG
, DL
);
2062 return DAG
.getNode(AMDGPUISD::TEXTURE_FETCH
, DL
, N
->getVTList(), NewArgs
);
2066 LoadSDNode
*LoadNode
= cast
<LoadSDNode
>(N
);
2067 SDValue Ptr
= LoadNode
->getBasePtr();
2068 if (LoadNode
->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS
&&
2069 isa
<ConstantSDNode
>(Ptr
))
2070 return constBufferLoad(LoadNode
, AMDGPUAS::CONSTANT_BUFFER_0
, DAG
);
2077 return AMDGPUTargetLowering::PerformDAGCombine(N
, DCI
);
2080 bool R600TargetLowering::FoldOperand(SDNode
*ParentNode
, unsigned SrcIdx
,
2081 SDValue
&Src
, SDValue
&Neg
, SDValue
&Abs
,
2082 SDValue
&Sel
, SDValue
&Imm
,
2083 SelectionDAG
&DAG
) const {
2084 const R600InstrInfo
*TII
= Subtarget
->getInstrInfo();
2085 if (!Src
.isMachineOpcode())
2088 switch (Src
.getMachineOpcode()) {
2089 case R600::FNEG_R600
:
2092 Src
= Src
.getOperand(0);
2093 Neg
= DAG
.getTargetConstant(1, SDLoc(ParentNode
), MVT::i32
);
2095 case R600::FABS_R600
:
2098 Src
= Src
.getOperand(0);
2099 Abs
= DAG
.getTargetConstant(1, SDLoc(ParentNode
), MVT::i32
);
2101 case R600::CONST_COPY
: {
2102 unsigned Opcode
= ParentNode
->getMachineOpcode();
2103 bool HasDst
= TII
->getOperandIdx(Opcode
, R600::OpName::dst
) > -1;
2108 SDValue CstOffset
= Src
.getOperand(0);
2109 if (ParentNode
->getValueType(0).isVector())
2112 // Gather constants values
2113 int SrcIndices
[] = {
2114 TII
->getOperandIdx(Opcode
, R600::OpName::src0
),
2115 TII
->getOperandIdx(Opcode
, R600::OpName::src1
),
2116 TII
->getOperandIdx(Opcode
, R600::OpName::src2
),
2117 TII
->getOperandIdx(Opcode
, R600::OpName::src0_X
),
2118 TII
->getOperandIdx(Opcode
, R600::OpName::src0_Y
),
2119 TII
->getOperandIdx(Opcode
, R600::OpName::src0_Z
),
2120 TII
->getOperandIdx(Opcode
, R600::OpName::src0_W
),
2121 TII
->getOperandIdx(Opcode
, R600::OpName::src1_X
),
2122 TII
->getOperandIdx(Opcode
, R600::OpName::src1_Y
),
2123 TII
->getOperandIdx(Opcode
, R600::OpName::src1_Z
),
2124 TII
->getOperandIdx(Opcode
, R600::OpName::src1_W
)
2126 std::vector
<unsigned> Consts
;
2127 for (int OtherSrcIdx
: SrcIndices
) {
2128 int OtherSelIdx
= TII
->getSelIdx(Opcode
, OtherSrcIdx
);
2129 if (OtherSrcIdx
< 0 || OtherSelIdx
< 0)
2135 if (RegisterSDNode
*Reg
=
2136 dyn_cast
<RegisterSDNode
>(ParentNode
->getOperand(OtherSrcIdx
))) {
2137 if (Reg
->getReg() == R600::ALU_CONST
) {
2139 = cast
<ConstantSDNode
>(ParentNode
->getOperand(OtherSelIdx
));
2140 Consts
.push_back(Cst
->getZExtValue());
2145 ConstantSDNode
*Cst
= cast
<ConstantSDNode
>(CstOffset
);
2146 Consts
.push_back(Cst
->getZExtValue());
2147 if (!TII
->fitsConstReadLimitations(Consts
)) {
2152 Src
= DAG
.getRegister(R600::ALU_CONST
, MVT::f32
);
2155 case R600::MOV_IMM_GLOBAL_ADDR
:
2156 // Check if the Imm slot is used. Taken from below.
2157 if (cast
<ConstantSDNode
>(Imm
)->getZExtValue())
2159 Imm
= Src
.getOperand(0);
2160 Src
= DAG
.getRegister(R600::ALU_LITERAL_X
, MVT::i32
);
2162 case R600::MOV_IMM_I32
:
2163 case R600::MOV_IMM_F32
: {
2164 unsigned ImmReg
= R600::ALU_LITERAL_X
;
2165 uint64_t ImmValue
= 0;
2167 if (Src
.getMachineOpcode() == R600::MOV_IMM_F32
) {
2168 ConstantFPSDNode
*FPC
= dyn_cast
<ConstantFPSDNode
>(Src
.getOperand(0));
2169 float FloatValue
= FPC
->getValueAPF().convertToFloat();
2170 if (FloatValue
== 0.0) {
2171 ImmReg
= R600::ZERO
;
2172 } else if (FloatValue
== 0.5) {
2173 ImmReg
= R600::HALF
;
2174 } else if (FloatValue
== 1.0) {
2177 ImmValue
= FPC
->getValueAPF().bitcastToAPInt().getZExtValue();
2180 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Src
.getOperand(0));
2181 uint64_t Value
= C
->getZExtValue();
2183 ImmReg
= R600::ZERO
;
2184 } else if (Value
== 1) {
2185 ImmReg
= R600::ONE_INT
;
2191 // Check that we aren't already using an immediate.
2192 // XXX: It's possible for an instruction to have more than one
2193 // immediate operand, but this is not supported yet.
2194 if (ImmReg
== R600::ALU_LITERAL_X
) {
2197 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Imm
);
2199 if (C
->getZExtValue())
2201 Imm
= DAG
.getTargetConstant(ImmValue
, SDLoc(ParentNode
), MVT::i32
);
2203 Src
= DAG
.getRegister(ImmReg
, MVT::i32
);
2211 /// Fold the instructions after selecting them
2212 SDNode
*R600TargetLowering::PostISelFolding(MachineSDNode
*Node
,
2213 SelectionDAG
&DAG
) const {
2214 const R600InstrInfo
*TII
= Subtarget
->getInstrInfo();
2215 if (!Node
->isMachineOpcode())
2218 unsigned Opcode
= Node
->getMachineOpcode();
2221 std::vector
<SDValue
> Ops(Node
->op_begin(), Node
->op_end());
2223 if (Opcode
== R600::DOT_4
) {
2224 int OperandIdx
[] = {
2225 TII
->getOperandIdx(Opcode
, R600::OpName::src0_X
),
2226 TII
->getOperandIdx(Opcode
, R600::OpName::src0_Y
),
2227 TII
->getOperandIdx(Opcode
, R600::OpName::src0_Z
),
2228 TII
->getOperandIdx(Opcode
, R600::OpName::src0_W
),
2229 TII
->getOperandIdx(Opcode
, R600::OpName::src1_X
),
2230 TII
->getOperandIdx(Opcode
, R600::OpName::src1_Y
),
2231 TII
->getOperandIdx(Opcode
, R600::OpName::src1_Z
),
2232 TII
->getOperandIdx(Opcode
, R600::OpName::src1_W
)
2235 TII
->getOperandIdx(Opcode
, R600::OpName::src0_neg_X
),
2236 TII
->getOperandIdx(Opcode
, R600::OpName::src0_neg_Y
),
2237 TII
->getOperandIdx(Opcode
, R600::OpName::src0_neg_Z
),
2238 TII
->getOperandIdx(Opcode
, R600::OpName::src0_neg_W
),
2239 TII
->getOperandIdx(Opcode
, R600::OpName::src1_neg_X
),
2240 TII
->getOperandIdx(Opcode
, R600::OpName::src1_neg_Y
),
2241 TII
->getOperandIdx(Opcode
, R600::OpName::src1_neg_Z
),
2242 TII
->getOperandIdx(Opcode
, R600::OpName::src1_neg_W
)
2245 TII
->getOperandIdx(Opcode
, R600::OpName::src0_abs_X
),
2246 TII
->getOperandIdx(Opcode
, R600::OpName::src0_abs_Y
),
2247 TII
->getOperandIdx(Opcode
, R600::OpName::src0_abs_Z
),
2248 TII
->getOperandIdx(Opcode
, R600::OpName::src0_abs_W
),
2249 TII
->getOperandIdx(Opcode
, R600::OpName::src1_abs_X
),
2250 TII
->getOperandIdx(Opcode
, R600::OpName::src1_abs_Y
),
2251 TII
->getOperandIdx(Opcode
, R600::OpName::src1_abs_Z
),
2252 TII
->getOperandIdx(Opcode
, R600::OpName::src1_abs_W
)
2254 for (unsigned i
= 0; i
< 8; i
++) {
2255 if (OperandIdx
[i
] < 0)
2257 SDValue
&Src
= Ops
[OperandIdx
[i
] - 1];
2258 SDValue
&Neg
= Ops
[NegIdx
[i
] - 1];
2259 SDValue
&Abs
= Ops
[AbsIdx
[i
] - 1];
2260 bool HasDst
= TII
->getOperandIdx(Opcode
, R600::OpName::dst
) > -1;
2261 int SelIdx
= TII
->getSelIdx(Opcode
, OperandIdx
[i
]);
2264 SDValue
&Sel
= (SelIdx
> -1) ? Ops
[SelIdx
] : FakeOp
;
2265 if (FoldOperand(Node
, i
, Src
, Neg
, Abs
, Sel
, FakeOp
, DAG
))
2266 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);
2268 } else if (Opcode
== R600::REG_SEQUENCE
) {
2269 for (unsigned i
= 1, e
= Node
->getNumOperands(); i
< e
; i
+= 2) {
2270 SDValue
&Src
= Ops
[i
];
2271 if (FoldOperand(Node
, i
, Src
, FakeOp
, FakeOp
, FakeOp
, FakeOp
, DAG
))
2272 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);
2275 if (!TII
->hasInstrModifiers(Opcode
))
2277 int OperandIdx
[] = {
2278 TII
->getOperandIdx(Opcode
, R600::OpName::src0
),
2279 TII
->getOperandIdx(Opcode
, R600::OpName::src1
),
2280 TII
->getOperandIdx(Opcode
, R600::OpName::src2
)
2283 TII
->getOperandIdx(Opcode
, R600::OpName::src0_neg
),
2284 TII
->getOperandIdx(Opcode
, R600::OpName::src1_neg
),
2285 TII
->getOperandIdx(Opcode
, R600::OpName::src2_neg
)
2288 TII
->getOperandIdx(Opcode
, R600::OpName::src0_abs
),
2289 TII
->getOperandIdx(Opcode
, R600::OpName::src1_abs
),
2292 for (unsigned i
= 0; i
< 3; i
++) {
2293 if (OperandIdx
[i
] < 0)
2295 SDValue
&Src
= Ops
[OperandIdx
[i
] - 1];
2296 SDValue
&Neg
= Ops
[NegIdx
[i
] - 1];
2298 SDValue
&Abs
= (AbsIdx
[i
] > -1) ? Ops
[AbsIdx
[i
] - 1] : FakeAbs
;
2299 bool HasDst
= TII
->getOperandIdx(Opcode
, R600::OpName::dst
) > -1;
2300 int SelIdx
= TII
->getSelIdx(Opcode
, OperandIdx
[i
]);
2301 int ImmIdx
= TII
->getOperandIdx(Opcode
, R600::OpName::literal
);
2306 SDValue
&Sel
= (SelIdx
> -1) ? Ops
[SelIdx
] : FakeOp
;
2307 SDValue
&Imm
= Ops
[ImmIdx
];
2308 if (FoldOperand(Node
, i
, Src
, Neg
, Abs
, Sel
, Imm
, DAG
))
2309 return DAG
.getMachineNode(Opcode
, SDLoc(Node
), Node
->getVTList(), Ops
);