1 //===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the RegisterBankInfo class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPURegisterBankInfo.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
24 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
25 #include "llvm/CodeGen/TargetRegisterInfo.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/IR/Constants.h"
29 #define GET_TARGET_REGBANK_IMPL
30 #include "AMDGPUGenRegisterBank.inc"
32 // This file will be TableGen'ed at some point.
33 #include "AMDGPUGenRegisterBankInfo.def"
36 using namespace MIPatternMatch
;
40 // Observer to apply a register bank to new registers created by LegalizerHelper.
41 class ApplyRegBankMapping final
: public GISelChangeObserver
{
43 MachineRegisterInfo
&MRI
;
44 const RegisterBank
*NewBank
;
45 SmallVector
<MachineInstr
*, 4> NewInsts
;
48 ApplyRegBankMapping(MachineRegisterInfo
&MRI_
, const RegisterBank
*RB
)
49 : MRI(MRI_
), NewBank(RB
) {}
51 ~ApplyRegBankMapping() {
52 for (MachineInstr
*MI
: NewInsts
)
56 /// Set any registers that don't have a set register class or bank to SALU.
57 void applyBank(MachineInstr
&MI
) {
58 for (MachineOperand
&Op
: MI
.operands()) {
62 Register Reg
= Op
.getReg();
63 if (MRI
.getRegClassOrRegBank(Reg
))
66 const RegisterBank
*RB
= NewBank
;
67 // FIXME: This might not be enough to detect when SCC should be used.
68 if (MRI
.getType(Reg
) == LLT::scalar(1))
69 RB
= (NewBank
== &AMDGPU::SGPRRegBank
?
70 &AMDGPU::SCCRegBank
: &AMDGPU::VCCRegBank
);
72 MRI
.setRegBank(Reg
, *RB
);
76 void erasingInstr(MachineInstr
&MI
) override
{}
78 void createdInstr(MachineInstr
&MI
) override
{
79 // At this point, the instruction was just inserted and has no operands.
80 NewInsts
.push_back(&MI
);
83 void changingInstr(MachineInstr
&MI
) override
{}
84 void changedInstr(MachineInstr
&MI
) override
{}
88 AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget
&ST
)
89 : AMDGPUGenRegisterBankInfo(),
91 TRI(Subtarget
.getRegisterInfo()),
92 TII(Subtarget
.getInstrInfo()) {
94 // HACK: Until this is fully tablegen'd.
95 static bool AlreadyInit
= false;
101 const RegisterBank
&RBSGPR
= getRegBank(AMDGPU::SGPRRegBankID
);
103 assert(&RBSGPR
== &AMDGPU::SGPRRegBank
);
105 const RegisterBank
&RBVGPR
= getRegBank(AMDGPU::VGPRRegBankID
);
107 assert(&RBVGPR
== &AMDGPU::VGPRRegBank
);
111 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank
&Dst
,
112 const RegisterBank
&Src
,
113 unsigned Size
) const {
114 // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
115 if (Dst
.getID() == AMDGPU::SGPRRegBankID
&&
116 Src
.getID() == AMDGPU::VGPRRegBankID
) {
117 return std::numeric_limits
<unsigned>::max();
120 // Bool values are tricky, because the meaning is based on context. The SCC
121 // and VCC banks are for the natural scalar and vector conditions produced by
124 // Legalization doesn't know about the necessary context, so an s1 use may
125 // have been a truncate from an arbitrary value, in which case a copy (lowered
126 // as a compare with 0) needs to be inserted.
128 (Dst
.getID() == AMDGPU::SCCRegBankID
||
129 Dst
.getID() == AMDGPU::SGPRRegBankID
) &&
130 (Src
.getID() == AMDGPU::SGPRRegBankID
||
131 Src
.getID() == AMDGPU::VGPRRegBankID
||
132 Src
.getID() == AMDGPU::VCCRegBankID
))
133 return std::numeric_limits
<unsigned>::max();
135 if (Dst
.getID() == AMDGPU::SCCRegBankID
&&
136 Src
.getID() == AMDGPU::VCCRegBankID
)
137 return std::numeric_limits
<unsigned>::max();
139 return RegisterBankInfo::copyCost(Dst
, Src
, Size
);
142 unsigned AMDGPURegisterBankInfo::getBreakDownCost(
143 const ValueMapping
&ValMapping
,
144 const RegisterBank
*CurBank
) const {
145 // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
147 // FIXME: Is there a better way to do this?
148 if (ValMapping
.NumBreakDowns
>= 2 || ValMapping
.BreakDown
[0].Length
>= 64)
149 return 10; // This is expensive.
151 assert(ValMapping
.NumBreakDowns
== 2 &&
152 ValMapping
.BreakDown
[0].Length
== 32 &&
153 ValMapping
.BreakDown
[0].StartIdx
== 0 &&
154 ValMapping
.BreakDown
[1].Length
== 32 &&
155 ValMapping
.BreakDown
[1].StartIdx
== 32 &&
156 ValMapping
.BreakDown
[0].RegBank
== ValMapping
.BreakDown
[1].RegBank
);
158 // 32-bit extract of a 64-bit value is just access of a subregister, so free.
159 // TODO: Cost of 0 hits assert, though it's not clear it's what we really
162 // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
163 // alignment restrictions, but this probably isn't important.
167 const RegisterBank
&AMDGPURegisterBankInfo::getRegBankFromRegClass(
168 const TargetRegisterClass
&RC
) const {
169 if (&RC
== &AMDGPU::SReg_1RegClass
)
170 return AMDGPU::VCCRegBank
;
172 return TRI
->isSGPRClass(&RC
) ? AMDGPU::SGPRRegBank
: AMDGPU::VGPRRegBank
;
175 template <unsigned NumOps
>
176 RegisterBankInfo::InstructionMappings
177 AMDGPURegisterBankInfo::addMappingFromTable(
178 const MachineInstr
&MI
, const MachineRegisterInfo
&MRI
,
179 const std::array
<unsigned, NumOps
> RegSrcOpIdx
,
180 ArrayRef
<OpRegBankEntry
<NumOps
>> Table
) const {
182 InstructionMappings AltMappings
;
184 SmallVector
<const ValueMapping
*, 10> Operands(MI
.getNumOperands());
186 unsigned Sizes
[NumOps
];
187 for (unsigned I
= 0; I
< NumOps
; ++I
) {
188 Register Reg
= MI
.getOperand(RegSrcOpIdx
[I
]).getReg();
189 Sizes
[I
] = getSizeInBits(Reg
, MRI
, *TRI
);
192 for (unsigned I
= 0, E
= MI
.getNumExplicitDefs(); I
!= E
; ++I
) {
193 unsigned SizeI
= getSizeInBits(MI
.getOperand(I
).getReg(), MRI
, *TRI
);
194 Operands
[I
] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, SizeI
);
197 // getInstrMapping's default mapping uses ID 1, so start at 2.
198 unsigned MappingID
= 2;
199 for (const auto &Entry
: Table
) {
200 for (unsigned I
= 0; I
< NumOps
; ++I
) {
201 int OpIdx
= RegSrcOpIdx
[I
];
202 Operands
[OpIdx
] = AMDGPU::getValueMapping(Entry
.RegBanks
[I
], Sizes
[I
]);
205 AltMappings
.push_back(&getInstructionMapping(MappingID
++, Entry
.Cost
,
206 getOperandsMapping(Operands
),
213 RegisterBankInfo::InstructionMappings
214 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
215 const MachineInstr
&MI
, const MachineRegisterInfo
&MRI
) const {
216 switch (MI
.getIntrinsicID()) {
217 case Intrinsic::amdgcn_readlane
: {
218 static const OpRegBankEntry
<3> Table
[2] = {
220 { { AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1 },
222 // Need a readfirstlane for the index.
223 { { AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 2 }
226 const std::array
<unsigned, 3> RegSrcOpIdx
= { { 0, 2, 3 } };
227 return addMappingFromTable
<3>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
229 case Intrinsic::amdgcn_writelane
: {
230 static const OpRegBankEntry
<4> Table
[4] = {
232 { { AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1 },
234 // Need readfirstlane of first op
235 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 2 },
237 // Need readfirstlane of second op
238 { { AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 2 },
240 // Need readfirstlane of both ops
241 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 3 }
244 // rsrc, voffset, offset
245 const std::array
<unsigned, 4> RegSrcOpIdx
= { { 0, 2, 3, 4 } };
246 return addMappingFromTable
<4>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
249 return RegisterBankInfo::getInstrAlternativeMappings(MI
);
253 RegisterBankInfo::InstructionMappings
254 AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
255 const MachineInstr
&MI
, const MachineRegisterInfo
&MRI
) const {
257 switch (MI
.getIntrinsicID()) {
258 case Intrinsic::amdgcn_buffer_load
: {
259 static const OpRegBankEntry
<3> Table
[4] = {
261 { { AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1 },
262 { { AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1 },
264 // Waterfall loop needed for rsrc. In the worst case this will execute
265 // approximately an extra 10 * wavesize + 2 instructions.
266 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1000 },
267 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1000 }
270 // rsrc, voffset, offset
271 const std::array
<unsigned, 3> RegSrcOpIdx
= { { 2, 3, 4 } };
272 return addMappingFromTable
<3>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
274 case Intrinsic::amdgcn_s_buffer_load
: {
275 static const OpRegBankEntry
<2> Table
[4] = {
277 { { AMDGPU::SGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1 },
279 // Only need 1 register in loop
280 { { AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 300 },
282 // Have to waterfall the resource.
283 { { AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1000 },
285 // Have to waterfall the resource, and the offset.
286 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1500 }
290 const std::array
<unsigned, 2> RegSrcOpIdx
= { { 2, 3 } };
291 return addMappingFromTable
<2>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
293 case Intrinsic::amdgcn_ds_ordered_add
:
294 case Intrinsic::amdgcn_ds_ordered_swap
: {
296 static const OpRegBankEntry
<3> Table
[2] = {
298 { { AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1 },
300 // Need a readfirstlane for m0
301 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 2 }
304 const std::array
<unsigned, 3> RegSrcOpIdx
= { { 0, 2, 3 } };
305 return addMappingFromTable
<3>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
307 case Intrinsic::amdgcn_s_sendmsg
:
308 case Intrinsic::amdgcn_s_sendmsghalt
: {
309 // FIXME: Should have no register for immediate
310 static const OpRegBankEntry
<1> Table
[2] = {
312 { { AMDGPU::SGPRRegBankID
}, 1 },
315 { { AMDGPU::VGPRRegBankID
}, 3 }
318 const std::array
<unsigned, 1> RegSrcOpIdx
= { { 2 } };
319 return addMappingFromTable
<1>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
322 return RegisterBankInfo::getInstrAlternativeMappings(MI
);
326 // FIXME: Returns uniform if there's no source value information. This is
328 static bool isInstrUniformNonExtLoadAlign4(const MachineInstr
&MI
) {
329 if (!MI
.hasOneMemOperand())
332 const MachineMemOperand
*MMO
= *MI
.memoperands_begin();
333 return MMO
->getSize() >= 4 && MMO
->getAlignment() >= 4 &&
334 AMDGPUInstrInfo::isUniformMMO(MMO
);
337 RegisterBankInfo::InstructionMappings
338 AMDGPURegisterBankInfo::getInstrAlternativeMappings(
339 const MachineInstr
&MI
) const {
341 const MachineFunction
&MF
= *MI
.getParent()->getParent();
342 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
345 InstructionMappings AltMappings
;
346 switch (MI
.getOpcode()) {
347 case TargetOpcode::G_CONSTANT
: {
348 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
350 static const OpRegBankEntry
<1> Table
[4] = {
351 { { AMDGPU::VGPRRegBankID
}, 1 },
352 { { AMDGPU::SGPRRegBankID
}, 1 },
353 { { AMDGPU::VCCRegBankID
}, 1 },
354 { { AMDGPU::SCCRegBankID
}, 1 }
357 return addMappingFromTable
<1>(MI
, MRI
, {{ 0 }}, Table
);
362 case TargetOpcode::G_FCONSTANT
:
363 case TargetOpcode::G_FRAME_INDEX
:
364 case TargetOpcode::G_GLOBAL_VALUE
: {
365 static const OpRegBankEntry
<1> Table
[2] = {
366 { { AMDGPU::VGPRRegBankID
}, 1 },
367 { { AMDGPU::SGPRRegBankID
}, 1 }
370 return addMappingFromTable
<1>(MI
, MRI
, {{ 0 }}, Table
);
372 case TargetOpcode::G_AND
:
373 case TargetOpcode::G_OR
:
374 case TargetOpcode::G_XOR
: {
375 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
378 // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
379 const InstructionMapping
&SCCMapping
= getInstructionMapping(
380 1, 1, getOperandsMapping(
381 {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, Size
),
382 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
383 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
385 AltMappings
.push_back(&SCCMapping
);
387 const InstructionMapping
&SGPRMapping
= getInstructionMapping(
388 1, 1, getOperandsMapping(
389 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
390 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
391 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
393 AltMappings
.push_back(&SGPRMapping
);
395 const InstructionMapping
&VCCMapping0
= getInstructionMapping(
396 2, 10, getOperandsMapping(
397 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, Size
),
398 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, Size
),
399 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, Size
)}),
401 AltMappings
.push_back(&VCCMapping0
);
408 const InstructionMapping
&SSMapping
= getInstructionMapping(
409 1, 1, getOperandsMapping(
410 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
411 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
412 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
414 AltMappings
.push_back(&SSMapping
);
416 const InstructionMapping
&VVMapping
= getInstructionMapping(
417 2, 2, getOperandsMapping(
418 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
419 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
420 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
)}),
422 AltMappings
.push_back(&VVMapping
);
424 const InstructionMapping
&SVMapping
= getInstructionMapping(
425 3, 3, getOperandsMapping(
426 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
427 AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID
, Size
),
428 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
)}),
430 AltMappings
.push_back(&SVMapping
);
432 // SGPR in LHS is slightly preferrable, so make it VS more expensive than
434 const InstructionMapping
&VSMapping
= getInstructionMapping(
435 3, 4, getOperandsMapping(
436 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
437 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
438 AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID
, Size
)}),
440 AltMappings
.push_back(&VSMapping
);
443 case TargetOpcode::G_LOAD
:
444 case TargetOpcode::G_ZEXTLOAD
:
445 case TargetOpcode::G_SEXTLOAD
: {
446 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
447 LLT PtrTy
= MRI
.getType(MI
.getOperand(1).getReg());
448 unsigned PtrSize
= PtrTy
.getSizeInBits();
449 unsigned AS
= PtrTy
.getAddressSpace();
450 LLT LoadTy
= MRI
.getType(MI
.getOperand(0).getReg());
451 if ((AS
!= AMDGPUAS::LOCAL_ADDRESS
&& AS
!= AMDGPUAS::REGION_ADDRESS
&&
452 AS
!= AMDGPUAS::PRIVATE_ADDRESS
) &&
453 isInstrUniformNonExtLoadAlign4(MI
)) {
454 const InstructionMapping
&SSMapping
= getInstructionMapping(
455 1, 1, getOperandsMapping(
456 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
457 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, PtrSize
)}),
459 AltMappings
.push_back(&SSMapping
);
462 const InstructionMapping
&VVMapping
= getInstructionMapping(
463 2, 1, getOperandsMapping(
464 {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID
, LoadTy
),
465 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, PtrSize
)}),
467 AltMappings
.push_back(&VVMapping
);
469 // It may be possible to have a vgpr = load sgpr mapping here, because
470 // the mubuf instructions support this kind of load, but probably for only
471 // gfx7 and older. However, the addressing mode matching in the instruction
472 // selector should be able to do a better job of detecting and selecting
473 // these kinds of loads from the vgpr = load vgpr mapping.
478 case TargetOpcode::G_ICMP
: {
479 unsigned Size
= getSizeInBits(MI
.getOperand(2).getReg(), MRI
, *TRI
);
480 const InstructionMapping
&SSMapping
= getInstructionMapping(1, 1,
481 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, 1),
482 nullptr, // Predicate operand.
483 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
484 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
486 AltMappings
.push_back(&SSMapping
);
488 const InstructionMapping
&SVMapping
= getInstructionMapping(2, 1,
489 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1),
490 nullptr, // Predicate operand.
491 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
492 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
)}),
494 AltMappings
.push_back(&SVMapping
);
496 const InstructionMapping
&VSMapping
= getInstructionMapping(3, 1,
497 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1),
498 nullptr, // Predicate operand.
499 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
),
500 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
502 AltMappings
.push_back(&VSMapping
);
504 const InstructionMapping
&VVMapping
= getInstructionMapping(4, 1,
505 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1),
506 nullptr, // Predicate operand.
507 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
),
508 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
)}),
510 AltMappings
.push_back(&VVMapping
);
514 case TargetOpcode::G_SELECT
: {
515 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
516 const InstructionMapping
&SSMapping
= getInstructionMapping(1, 1,
517 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
518 AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, 1),
519 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
520 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
)}),
522 AltMappings
.push_back(&SSMapping
);
524 const InstructionMapping
&VVMapping
= getInstructionMapping(2, 1,
525 getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
526 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1),
527 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
),
528 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
)}),
530 AltMappings
.push_back(&VVMapping
);
534 case TargetOpcode::G_SMIN
:
535 case TargetOpcode::G_SMAX
:
536 case TargetOpcode::G_UMIN
:
537 case TargetOpcode::G_UMAX
: {
538 static const OpRegBankEntry
<3> Table
[4] = {
539 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1 },
540 { { AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::VGPRRegBankID
}, 1 },
541 { { AMDGPU::VGPRRegBankID
, AMDGPU::VGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 1 },
543 // Scalar requires cmp+select, and extends if 16-bit.
544 // FIXME: Should there be separate costs for 32 and 16-bit
545 { { AMDGPU::SGPRRegBankID
, AMDGPU::SGPRRegBankID
, AMDGPU::SGPRRegBankID
}, 3 }
548 const std::array
<unsigned, 3> RegSrcOpIdx
= { { 0, 1, 2 } };
549 return addMappingFromTable
<3>(MI
, MRI
, RegSrcOpIdx
, makeArrayRef(Table
));
551 case TargetOpcode::G_UADDE
:
552 case TargetOpcode::G_USUBE
:
553 case TargetOpcode::G_SADDE
:
554 case TargetOpcode::G_SSUBE
: {
555 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
556 const InstructionMapping
&SSMapping
= getInstructionMapping(1, 1,
558 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
559 AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, 1),
560 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
561 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
),
562 AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, 1)}),
564 AltMappings
.push_back(&SSMapping
);
566 const InstructionMapping
&VVMapping
= getInstructionMapping(2, 1,
567 getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
),
568 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1),
569 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
),
570 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
),
571 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1)}),
573 AltMappings
.push_back(&VVMapping
);
576 case AMDGPU::G_BRCOND
: {
577 assert(MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits() == 1);
579 const InstructionMapping
&SMapping
= getInstructionMapping(
580 1, 1, getOperandsMapping(
581 {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID
, 1), nullptr}),
583 AltMappings
.push_back(&SMapping
);
585 const InstructionMapping
&VMapping
= getInstructionMapping(
586 1, 1, getOperandsMapping(
587 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1), nullptr }),
589 AltMappings
.push_back(&VMapping
);
592 case AMDGPU::G_INTRINSIC
:
593 return getInstrAlternativeMappingsIntrinsic(MI
, MRI
);
594 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS
:
595 return getInstrAlternativeMappingsIntrinsicWSideEffects(MI
, MRI
);
599 return RegisterBankInfo::getInstrAlternativeMappings(MI
);
602 void AMDGPURegisterBankInfo::split64BitValueForMapping(
604 SmallVector
<Register
, 2> &Regs
,
606 Register Reg
) const {
607 assert(HalfTy
.getSizeInBits() == 32);
608 MachineRegisterInfo
*MRI
= B
.getMRI();
609 Register LoLHS
= MRI
->createGenericVirtualRegister(HalfTy
);
610 Register HiLHS
= MRI
->createGenericVirtualRegister(HalfTy
);
611 const RegisterBank
*Bank
= getRegBank(Reg
, *MRI
, *TRI
);
612 MRI
->setRegBank(LoLHS
, *Bank
);
613 MRI
->setRegBank(HiLHS
, *Bank
);
615 Regs
.push_back(LoLHS
);
616 Regs
.push_back(HiLHS
);
618 B
.buildInstr(AMDGPU::G_UNMERGE_VALUES
)
624 /// Replace the current type each register in \p Regs has with \p NewTy
625 static void setRegsToType(MachineRegisterInfo
&MRI
, ArrayRef
<Register
> Regs
,
627 for (Register Reg
: Regs
) {
628 assert(MRI
.getType(Reg
).getSizeInBits() == NewTy
.getSizeInBits());
629 MRI
.setType(Reg
, NewTy
);
633 static LLT
getHalfSizedType(LLT Ty
) {
635 assert(Ty
.getNumElements() % 2 == 0);
636 return LLT::scalarOrVector(Ty
.getNumElements() / 2, Ty
.getElementType());
639 assert(Ty
.getSizeInBits() % 2 == 0);
640 return LLT::scalar(Ty
.getSizeInBits() / 2);
643 /// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
644 /// any of the required SGPR operands are VGPRs, perform a waterfall loop to
645 /// execute the instruction for each unique combination of values in all lanes
646 /// in the wave. The block will be split such that rest of the instructions are
647 /// moved to a new block.
649 /// Essentially performs this loop:
651 /// Save Execution Mask
652 /// For (Lane : Wavefront) {
653 /// Enable Lane, Disable all other lanes
654 /// SGPR = read SGPR value for current lane from VGPR
655 /// VGPRResult[Lane] = use_op SGPR
657 /// Restore Execution Mask
659 /// There is additional complexity to try for compare values to identify the
660 /// unique values used.
661 bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
663 iterator_range
<MachineBasicBlock::iterator
> Range
,
664 SmallSet
<Register
, 4> &SGPROperandRegs
,
665 MachineRegisterInfo
&MRI
) const {
666 SmallVector
<Register
, 4> ResultRegs
;
667 SmallVector
<Register
, 4> InitResultRegs
;
668 SmallVector
<Register
, 4> PhiRegs
;
670 MachineBasicBlock
&MBB
= B
.getMBB();
671 MachineFunction
*MF
= &B
.getMF();
673 const TargetRegisterClass
*WaveRC
= TRI
->getWaveMaskRegClass();
674 const unsigned WaveAndOpc
= Subtarget
.isWave32() ?
675 AMDGPU::S_AND_B32
: AMDGPU::S_AND_B64
;
676 const unsigned MovTermOpc
= Subtarget
.isWave32() ?
677 AMDGPU::S_MOV_B32_term
: AMDGPU::S_MOV_B64_term
;
678 const unsigned XorTermOpc
= Subtarget
.isWave32() ?
679 AMDGPU::S_XOR_B32_term
: AMDGPU::S_XOR_B64_term
;
680 const unsigned AndSaveExecOpc
= Subtarget
.isWave32() ?
681 AMDGPU::S_AND_SAVEEXEC_B32
: AMDGPU::S_AND_SAVEEXEC_B64
;
682 const unsigned ExecReg
= Subtarget
.isWave32() ?
683 AMDGPU::EXEC_LO
: AMDGPU::EXEC
;
685 for (MachineInstr
&MI
: Range
) {
686 for (MachineOperand
&Def
: MI
.defs()) {
687 LLT ResTy
= MRI
.getType(Def
.getReg());
688 const RegisterBank
*DefBank
= getRegBank(Def
.getReg(), MRI
, *TRI
);
689 ResultRegs
.push_back(Def
.getReg());
690 Register InitReg
= B
.buildUndef(ResTy
).getReg(0);
691 Register PhiReg
= MRI
.createGenericVirtualRegister(ResTy
);
692 InitResultRegs
.push_back(InitReg
);
693 PhiRegs
.push_back(PhiReg
);
694 MRI
.setRegBank(PhiReg
, *DefBank
);
695 MRI
.setRegBank(InitReg
, *DefBank
);
699 Register SaveExecReg
= MRI
.createVirtualRegister(WaveRC
);
700 Register InitSaveExecReg
= MRI
.createVirtualRegister(WaveRC
);
702 // Don't bother using generic instructions/registers for the exec mask.
703 B
.buildInstr(TargetOpcode::IMPLICIT_DEF
)
704 .addDef(InitSaveExecReg
);
706 Register PhiExec
= MRI
.createVirtualRegister(WaveRC
);
707 Register NewExec
= MRI
.createVirtualRegister(WaveRC
);
709 // To insert the loop we need to split the block. Move everything before this
710 // point to a new block, and insert a new empty block before this instruction.
711 MachineBasicBlock
*LoopBB
= MF
->CreateMachineBasicBlock();
712 MachineBasicBlock
*RemainderBB
= MF
->CreateMachineBasicBlock();
713 MachineBasicBlock
*RestoreExecBB
= MF
->CreateMachineBasicBlock();
714 MachineFunction::iterator
MBBI(MBB
);
716 MF
->insert(MBBI
, LoopBB
);
717 MF
->insert(MBBI
, RestoreExecBB
);
718 MF
->insert(MBBI
, RemainderBB
);
720 LoopBB
->addSuccessor(RestoreExecBB
);
721 LoopBB
->addSuccessor(LoopBB
);
723 // Move the rest of the block into a new block.
724 RemainderBB
->transferSuccessorsAndUpdatePHIs(&MBB
);
725 RemainderBB
->splice(RemainderBB
->begin(), &MBB
, Range
.end(), MBB
.end());
727 MBB
.addSuccessor(LoopBB
);
728 RestoreExecBB
->addSuccessor(RemainderBB
);
730 B
.setInsertPt(*LoopBB
, LoopBB
->end());
732 B
.buildInstr(TargetOpcode::PHI
)
734 .addReg(InitSaveExecReg
)
739 for (auto Result
: zip(InitResultRegs
, ResultRegs
, PhiRegs
)) {
740 B
.buildInstr(TargetOpcode::G_PHI
)
741 .addDef(std::get
<2>(Result
))
742 .addReg(std::get
<0>(Result
)) // Initial value / implicit_def
744 .addReg(std::get
<1>(Result
)) // Mid-loop value.
748 const DebugLoc
&DL
= B
.getDL();
750 // Figure out the iterator range after splicing the instructions.
751 auto NewBegin
= std::prev(LoopBB
->end());
753 // Move the instruction into the loop. Note we moved everything after
754 // Range.end() already into a new block, so Range.end() is no longer valid.
755 LoopBB
->splice(LoopBB
->end(), &MBB
, Range
.begin(), MBB
.end());
757 auto NewEnd
= LoopBB
->end();
759 MachineBasicBlock::iterator I
= Range
.begin();
760 B
.setInsertPt(*LoopBB
, I
);
764 for (MachineInstr
&MI
: make_range(NewBegin
, NewEnd
)) {
765 for (MachineOperand
&Op
: MI
.uses()) {
766 if (!Op
.isReg() || Op
.isDef())
769 if (SGPROperandRegs
.count(Op
.getReg())) {
770 LLT OpTy
= MRI
.getType(Op
.getReg());
771 unsigned OpSize
= OpTy
.getSizeInBits();
773 // Can only do a readlane of 32-bit pieces.
775 // Avoid extra copies in the simple case of one 32-bit register.
776 Register CurrentLaneOpReg
777 = MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
778 MRI
.setType(CurrentLaneOpReg
, OpTy
);
780 constrainGenericRegister(Op
.getReg(), AMDGPU::VGPR_32RegClass
, MRI
);
781 // Read the next variant <- also loop target.
782 BuildMI(*LoopBB
, I
, DL
, TII
->get(AMDGPU::V_READFIRSTLANE_B32
),
784 .addReg(Op
.getReg());
786 Register NewCondReg
= MRI
.createVirtualRegister(WaveRC
);
787 bool First
= CondReg
== AMDGPU::NoRegister
;
789 CondReg
= NewCondReg
;
791 // Compare the just read M0 value to all possible Idx values.
792 B
.buildInstr(AMDGPU::V_CMP_EQ_U32_e64
)
794 .addReg(CurrentLaneOpReg
)
795 .addReg(Op
.getReg());
796 Op
.setReg(CurrentLaneOpReg
);
799 Register AndReg
= MRI
.createVirtualRegister(WaveRC
);
801 // If there are multiple operands to consider, and the conditions.
802 B
.buildInstr(WaveAndOpc
)
809 LLT S32
= LLT::scalar(32);
810 SmallVector
<Register
, 8> ReadlanePieces
;
812 // The compares can be done as 64-bit, but the extract needs to be done
815 bool Is64
= OpSize
% 64 == 0;
817 LLT UnmergeTy
= OpSize
% 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
818 unsigned CmpOp
= OpSize
% 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
819 : AMDGPU::V_CMP_EQ_U32_e64
;
821 // The compares can be done as 64-bit, but the extract needs to be done
824 // Insert the unmerge before the loop.
827 auto Unmerge
= B
.buildUnmerge(UnmergeTy
, Op
.getReg());
830 unsigned NumPieces
= Unmerge
->getNumOperands() - 1;
831 for (unsigned PieceIdx
= 0; PieceIdx
!= NumPieces
; ++PieceIdx
) {
832 Register UnmergePiece
= Unmerge
.getReg(PieceIdx
);
834 Register CurrentLaneOpReg
;
836 Register CurrentLaneOpRegLo
= MRI
.createGenericVirtualRegister(S32
);
837 Register CurrentLaneOpRegHi
= MRI
.createGenericVirtualRegister(S32
);
839 MRI
.setRegClass(UnmergePiece
, &AMDGPU::VReg_64RegClass
);
840 MRI
.setRegClass(CurrentLaneOpRegLo
, &AMDGPU::SReg_32_XM0RegClass
);
841 MRI
.setRegClass(CurrentLaneOpRegHi
, &AMDGPU::SReg_32_XM0RegClass
);
843 // Read the next variant <- also loop target.
844 BuildMI(*LoopBB
, I
, DL
, TII
->get(AMDGPU::V_READFIRSTLANE_B32
),
846 .addReg(UnmergePiece
, 0, AMDGPU::sub0
);
848 // Read the next variant <- also loop target.
849 BuildMI(*LoopBB
, I
, DL
, TII
->get(AMDGPU::V_READFIRSTLANE_B32
),
851 .addReg(UnmergePiece
, 0, AMDGPU::sub1
);
854 B
.buildMerge(LLT::scalar(64),
855 {CurrentLaneOpRegLo
, CurrentLaneOpRegHi
})
858 MRI
.setRegClass(CurrentLaneOpReg
, &AMDGPU::SReg_64_XEXECRegClass
);
860 if (OpTy
.getScalarSizeInBits() == 64) {
861 // If we need to produce a 64-bit element vector, so use the
863 ReadlanePieces
.push_back(CurrentLaneOpReg
);
865 // 32-bit element type.
866 ReadlanePieces
.push_back(CurrentLaneOpRegLo
);
867 ReadlanePieces
.push_back(CurrentLaneOpRegHi
);
870 CurrentLaneOpReg
= MRI
.createGenericVirtualRegister(S32
);
871 MRI
.setRegClass(UnmergePiece
, &AMDGPU::VGPR_32RegClass
);
872 MRI
.setRegClass(CurrentLaneOpReg
, &AMDGPU::SReg_32_XM0RegClass
);
874 // Read the next variant <- also loop target.
875 BuildMI(*LoopBB
, I
, DL
, TII
->get(AMDGPU::V_READFIRSTLANE_B32
),
877 .addReg(UnmergePiece
);
878 ReadlanePieces
.push_back(CurrentLaneOpReg
);
881 Register NewCondReg
= MRI
.createVirtualRegister(WaveRC
);
882 bool First
= CondReg
== AMDGPU::NoRegister
;
884 CondReg
= NewCondReg
;
888 .addReg(CurrentLaneOpReg
)
889 .addReg(UnmergePiece
);
892 Register AndReg
= MRI
.createVirtualRegister(WaveRC
);
894 // If there are multiple operands to consider, and the conditions.
895 B
.buildInstr(WaveAndOpc
)
903 // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
905 if (OpTy
.isVector()) {
906 auto Merge
= B
.buildBuildVector(OpTy
, ReadlanePieces
);
907 Op
.setReg(Merge
.getReg(0));
909 auto Merge
= B
.buildMerge(OpTy
, ReadlanePieces
);
910 Op
.setReg(Merge
.getReg(0));
913 MRI
.setRegBank(Op
.getReg(), getRegBank(AMDGPU::SGPRRegBankID
));
919 B
.setInsertPt(*LoopBB
, LoopBB
->end());
921 // Update EXEC, save the original EXEC value to VCC.
922 B
.buildInstr(AndSaveExecOpc
)
924 .addReg(CondReg
, RegState::Kill
);
926 MRI
.setSimpleHint(NewExec
, CondReg
);
928 // Update EXEC, switch all done bits to 0 and all todo bits to 1.
929 B
.buildInstr(XorTermOpc
)
934 // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
937 // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
938 B
.buildInstr(AMDGPU::S_CBRANCH_EXECNZ
)
941 // Save the EXEC mask before the loop.
942 BuildMI(MBB
, MBB
.end(), DL
, TII
->get(MovTermOpc
), SaveExecReg
)
945 // Restore the EXEC mask after the loop.
946 B
.setMBB(*RestoreExecBB
);
947 B
.buildInstr(MovTermOpc
)
949 .addReg(SaveExecReg
);
951 // Restore the insert point before the original instruction.
952 B
.setInsertPt(MBB
, MBB
.end());
957 // Return any unique registers used by \p MI at \p OpIndices that need to be
958 // handled in a waterfall loop. Returns these registers in \p
959 // SGPROperandRegs. Returns true if there are any operansd to handle and a
960 // waterfall loop is necessary.
961 bool AMDGPURegisterBankInfo::collectWaterfallOperands(
962 SmallSet
<Register
, 4> &SGPROperandRegs
, MachineInstr
&MI
,
963 MachineRegisterInfo
&MRI
, ArrayRef
<unsigned> OpIndices
) const {
964 for (unsigned Op
: OpIndices
) {
965 assert(MI
.getOperand(Op
).isUse());
966 Register Reg
= MI
.getOperand(Op
).getReg();
967 const RegisterBank
*OpBank
= getRegBank(Reg
, MRI
, *TRI
);
968 if (OpBank
->getID() == AMDGPU::VGPRRegBankID
)
969 SGPROperandRegs
.insert(Reg
);
972 // No operands need to be replaced, so no need to loop.
973 return !SGPROperandRegs
.empty();
976 bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
977 MachineIRBuilder
&B
, MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
978 ArrayRef
<unsigned> OpIndices
) const {
979 // Use a set to avoid extra readfirstlanes in the case where multiple operands
980 // are the same register.
981 SmallSet
<Register
, 4> SGPROperandRegs
;
983 if (!collectWaterfallOperands(SGPROperandRegs
, MI
, MRI
, OpIndices
))
986 MachineBasicBlock::iterator I
= MI
.getIterator();
987 return executeInWaterfallLoop(B
, make_range(I
, std::next(I
)),
988 SGPROperandRegs
, MRI
);
991 bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
992 MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
993 ArrayRef
<unsigned> OpIndices
) const {
994 MachineIRBuilder
B(MI
);
995 return executeInWaterfallLoop(B
, MI
, MRI
, OpIndices
);
998 // Legalize an operand that must be an SGPR by inserting a readfirstlane.
999 void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
1000 MachineInstr
&MI
, MachineRegisterInfo
&MRI
, unsigned OpIdx
) const {
1001 Register Reg
= MI
.getOperand(OpIdx
).getReg();
1002 const RegisterBank
*Bank
= getRegBank(Reg
, MRI
, *TRI
);
1003 if (Bank
!= &AMDGPU::VGPRRegBank
)
1006 MachineIRBuilder
B(MI
);
1007 Register SGPR
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1008 B
.buildInstr(AMDGPU::V_READFIRSTLANE_B32
)
1012 const TargetRegisterClass
*Constrained
=
1013 constrainGenericRegister(Reg
, AMDGPU::VGPR_32RegClass
, MRI
);
1015 assert(Constrained
&& "Failed to constrain readfirstlane src reg");
1017 MI
.getOperand(OpIdx
).setReg(SGPR
);
1020 // When regbankselect repairs registers, it will insert a repair instruction
1021 // which defines the repaired register. Then it calls applyMapping and expects
1022 // that the targets will either delete or rewrite the originally wrote to the
1023 // repaired registers. Beccause of this, we end up in a situation where
1024 // we have 2 instructions defining the same registers.
1025 static MachineInstr
*getOtherVRegDef(const MachineRegisterInfo
&MRI
,
1027 const MachineInstr
&MI
) {
1028 // Is there some way we can assert that there are exactly 2 def instructions?
1029 for (MachineInstr
&Other
: MRI
.def_instructions(Reg
)) {
1037 bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr
&MI
,
1038 const AMDGPURegisterBankInfo::OperandsMapper
&OpdMapper
,
1039 MachineRegisterInfo
&MRI
) const {
1040 Register DstReg
= MI
.getOperand(0).getReg();
1041 const LLT LoadTy
= MRI
.getType(DstReg
);
1042 unsigned LoadSize
= LoadTy
.getSizeInBits();
1043 const unsigned MaxNonSmrdLoadSize
= 128;
1044 // 128-bit loads are supported for all instruction types.
1045 if (LoadSize
<= MaxNonSmrdLoadSize
)
1048 SmallVector
<unsigned, 16> DefRegs(OpdMapper
.getVRegs(0));
1049 SmallVector
<unsigned, 1> SrcRegs(OpdMapper
.getVRegs(1));
1051 // If the pointer is an SGPR, we have nothing to do.
1052 if (SrcRegs
.empty()) {
1053 Register PtrReg
= MI
.getOperand(1).getReg();
1054 const RegisterBank
*PtrBank
= getRegBank(PtrReg
, MRI
, *TRI
);
1055 if (PtrBank
== &AMDGPU::SGPRRegBank
)
1057 SrcRegs
.push_back(PtrReg
);
1060 assert(LoadSize
% MaxNonSmrdLoadSize
== 0);
1062 // We want to get the repair instruction now, because it will help us
1063 // determine which instruction the legalizer inserts that will also
1065 MachineInstr
*RepairInst
= getOtherVRegDef(MRI
, DstReg
, MI
);
1067 // RegBankSelect only emits scalar types, so we need to reset the pointer
1068 // operand to a pointer type.
1069 Register BasePtrReg
= SrcRegs
[0];
1070 LLT PtrTy
= MRI
.getType(MI
.getOperand(1).getReg());
1071 MRI
.setType(BasePtrReg
, PtrTy
);
1073 MachineIRBuilder
B(MI
);
1075 unsigned SplitElts
=
1076 MaxNonSmrdLoadSize
/ LoadTy
.getScalarType().getSizeInBits();
1077 const LLT LoadSplitTy
= LLT::vector(SplitElts
, LoadTy
.getScalarType());
1078 ApplyRegBankMapping
O(MRI
, &AMDGPU::VGPRRegBank
);
1079 GISelObserverWrapper
Observer(&O
);
1080 B
.setChangeObserver(Observer
);
1081 LegalizerHelper
Helper(B
.getMF(), Observer
, B
);
1082 if (Helper
.fewerElementsVector(MI
, 0, LoadSplitTy
) != LegalizerHelper::Legalized
)
1085 // At this point, the legalizer has split the original load into smaller
1086 // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
1087 // that combines the outputs of the lower loads and writes it to DstReg.
1088 // The register bank selector has also added the RepairInst which writes to
1091 MachineInstr
*LegalizedInst
= getOtherVRegDef(MRI
, DstReg
, *RepairInst
);
1093 // Replace the output of the LegalizedInst with a temporary register, since
1094 // RepairInst already defines DstReg.
1095 Register TmpReg
= MRI
.createGenericVirtualRegister(MRI
.getType(DstReg
));
1096 LegalizedInst
->getOperand(0).setReg(TmpReg
);
1097 B
.setInsertPt(*RepairInst
->getParent(), RepairInst
);
1099 for (unsigned DefIdx
= 0, e
= DefRegs
.size(); DefIdx
!= e
; ++DefIdx
) {
1100 Register IdxReg
= MRI
.createGenericVirtualRegister(LLT::scalar(32));
1101 B
.buildConstant(IdxReg
, DefIdx
);
1102 MRI
.setRegBank(IdxReg
, getRegBank(AMDGPU::VGPRRegBankID
));
1103 B
.buildExtractVectorElement(DefRegs
[DefIdx
], TmpReg
, IdxReg
);
1106 MRI
.setRegBank(DstReg
, getRegBank(AMDGPU::VGPRRegBankID
));
1110 bool AMDGPURegisterBankInfo::applyMappingImage(
1111 MachineInstr
&MI
, const AMDGPURegisterBankInfo::OperandsMapper
&OpdMapper
,
1112 MachineRegisterInfo
&MRI
, int RsrcIdx
) const {
1113 const int NumDefs
= MI
.getNumExplicitDefs();
1115 // The reported argument index is relative to the IR intrinsic call arguments,
1116 // so we need to shift by the number of defs and the intrinsic ID.
1117 RsrcIdx
+= NumDefs
+ 1;
1119 // Insert copies to VGPR arguments.
1120 applyDefaultMapping(OpdMapper
);
1122 // Fixup any SGPR arguments.
1123 SmallVector
<unsigned, 4> SGPRIndexes
;
1124 for (int I
= NumDefs
, NumOps
= MI
.getNumOperands(); I
!= NumOps
; ++I
) {
1125 if (!MI
.getOperand(I
).isReg())
1128 // If this intrinsic has a sampler, it immediately follows rsrc.
1129 if (I
== RsrcIdx
|| I
== RsrcIdx
+ 1)
1130 SGPRIndexes
.push_back(I
);
1133 executeInWaterfallLoop(MI
, MRI
, SGPRIndexes
);
1137 // For cases where only a single copy is inserted for matching register banks.
1138 // Replace the register in the instruction operand
1139 static void substituteSimpleCopyRegs(
1140 const AMDGPURegisterBankInfo::OperandsMapper
&OpdMapper
, unsigned OpIdx
) {
1141 SmallVector
<unsigned, 1> SrcReg(OpdMapper
.getVRegs(OpIdx
));
1142 if (!SrcReg
.empty()) {
1143 assert(SrcReg
.size() == 1);
1144 OpdMapper
.getMI().getOperand(OpIdx
).setReg(SrcReg
[0]);
1148 /// Handle register layout difference for f16 images for some subtargets.
1149 Register
AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder
&B
,
1150 MachineRegisterInfo
&MRI
,
1151 Register Reg
) const {
1152 if (!Subtarget
.hasUnpackedD16VMem())
1155 const LLT S16
= LLT::scalar(16);
1156 LLT StoreVT
= MRI
.getType(Reg
);
1157 if (!StoreVT
.isVector() || StoreVT
.getElementType() != S16
)
1160 auto Unmerge
= B
.buildUnmerge(S16
, Reg
);
1163 SmallVector
<Register
, 4> WideRegs
;
1164 for (int I
= 0, E
= Unmerge
->getNumOperands() - 1; I
!= E
; ++I
)
1165 WideRegs
.push_back(Unmerge
.getReg(I
));
1167 const LLT S32
= LLT::scalar(32);
1168 int NumElts
= StoreVT
.getNumElements();
1170 return B
.buildMerge(LLT::vector(NumElts
, S32
), WideRegs
).getReg(0);
1173 static std::pair
<Register
, unsigned>
1174 getBaseWithConstantOffset(MachineRegisterInfo
&MRI
, Register Reg
) {
1176 if (mi_match(Reg
, MRI
, m_ICst(Const
)))
1177 return std::make_pair(Register(), Const
);
1180 if (mi_match(Reg
, MRI
, m_GAdd(m_Reg(Base
), m_ICst(Const
))))
1181 return std::make_pair(Base
, Const
);
1183 // TODO: Handle G_OR used for add case
1184 return std::make_pair(Reg
, 0);
1187 std::pair
<Register
, unsigned>
1188 AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder
&B
,
1189 Register OrigOffset
) const {
1190 const unsigned MaxImm
= 4095;
1193 const LLT S32
= LLT::scalar(32);
1195 std::tie(BaseReg
, ImmOffset
) = getBaseWithConstantOffset(*B
.getMRI(),
1199 if (ImmOffset
!= 0) {
1200 // If the immediate value is too big for the immoffset field, put the value
1201 // and -4096 into the immoffset field so that the value that is copied/added
1202 // for the voffset field is a multiple of 4096, and it stands more chance
1203 // of being CSEd with the copy/add for another similar load/store.
1204 // However, do not do that rounding down to a multiple of 4096 if that is a
1205 // negative number, as it appears to be illegal to have a negative offset
1206 // in the vgpr, even if adding the immediate offset makes it positive.
1207 unsigned Overflow
= ImmOffset
& ~MaxImm
;
1208 ImmOffset
-= Overflow
;
1209 if ((int32_t)Overflow
< 0) {
1210 Overflow
+= ImmOffset
;
1215 if (Overflow
!= 0) {
1217 BaseReg
= B
.buildConstant(S32
, Overflow
).getReg(0);
1219 auto OverflowVal
= B
.buildConstant(S32
, Overflow
);
1220 BaseReg
= B
.buildAdd(S32
, BaseReg
, OverflowVal
).getReg(0);
1226 BaseReg
= B
.buildConstant(S32
, 0).getReg(0);
1228 return {BaseReg
, C1
};
1231 static bool isZero(Register Reg
, MachineRegisterInfo
&MRI
) {
1233 return mi_match(Reg
, MRI
, m_ICst(C
)) && C
== 0;
1236 static unsigned extractGLC(unsigned CachePolicy
) {
1237 return CachePolicy
& 1;
1240 static unsigned extractSLC(unsigned CachePolicy
) {
1241 return (CachePolicy
>> 1) & 1;
1244 static unsigned extractDLC(unsigned CachePolicy
) {
1245 return (CachePolicy
>> 2) & 1;
1249 AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder
&B
,
1250 MachineInstr
&MI
) const {
1251 MachineRegisterInfo
&MRI
= *B
.getMRI();
1252 executeInWaterfallLoop(B
, MI
, MRI
, {2, 4});
1254 // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
1256 Register VData
= MI
.getOperand(1).getReg();
1257 LLT Ty
= MRI
.getType(VData
);
1259 int EltSize
= Ty
.getScalarSizeInBits();
1260 int Size
= Ty
.getSizeInBits();
1262 // FIXME: Broken integer truncstore.
1264 report_fatal_error("unhandled intrinsic store");
1266 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
1267 const int MemSize
= (*MI
.memoperands_begin())->getSize();
1270 Register RSrc
= MI
.getOperand(2).getReg();
1271 Register VOffset
= MI
.getOperand(3).getReg();
1272 Register SOffset
= MI
.getOperand(4).getReg();
1273 unsigned CachePolicy
= MI
.getOperand(5).getImm();
1276 std::tie(VOffset
, ImmOffset
) = splitBufferOffsets(B
, VOffset
);
1278 const bool Offen
= !isZero(VOffset
, MRI
);
1280 unsigned Opc
= AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact
;
1281 switch (8 * MemSize
) {
1283 Opc
= Offen
? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact
:
1284 AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact
;
1287 Opc
= Offen
? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact
:
1288 AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact
;
1291 Opc
= Offen
? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact
:
1292 AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact
;
1294 Opc
= AMDGPU::getMUBUFOpcode(Opc
, Size
/ 32);
1299 // Set the insertion point back to the instruction in case it was moved into a
1303 MachineInstrBuilder MIB
= B
.buildInstr(Opc
)
1307 MIB
.addUse(VOffset
);
1312 .addImm(extractGLC(CachePolicy
))
1313 .addImm(extractSLC(CachePolicy
))
1314 .addImm(0) // tfe: FIXME: Remove from inst
1315 .addImm(extractDLC(CachePolicy
))
1318 // FIXME: We need a way to report failure from applyMappingImpl.
1319 // Insert constrain copies before inserting the loop.
1320 if (!constrainSelectedInstRegOperands(*MIB
, *TII
, *TRI
, *this))
1321 report_fatal_error("failed to constrain selected store intrinsic");
1326 void AMDGPURegisterBankInfo::applyMappingImpl(
1327 const OperandsMapper
&OpdMapper
) const {
1328 MachineInstr
&MI
= OpdMapper
.getMI();
1329 unsigned Opc
= MI
.getOpcode();
1330 MachineRegisterInfo
&MRI
= OpdMapper
.getMRI();
1332 case AMDGPU::G_SELECT
: {
1333 Register DstReg
= MI
.getOperand(0).getReg();
1334 LLT DstTy
= MRI
.getType(DstReg
);
1335 if (DstTy
.getSizeInBits() != 64)
1338 LLT HalfTy
= getHalfSizedType(DstTy
);
1340 SmallVector
<Register
, 2> DefRegs(OpdMapper
.getVRegs(0));
1341 SmallVector
<Register
, 1> Src0Regs(OpdMapper
.getVRegs(1));
1342 SmallVector
<Register
, 2> Src1Regs(OpdMapper
.getVRegs(2));
1343 SmallVector
<Register
, 2> Src2Regs(OpdMapper
.getVRegs(3));
1345 // All inputs are SGPRs, nothing special to do.
1346 if (DefRegs
.empty()) {
1347 assert(Src1Regs
.empty() && Src2Regs
.empty());
1351 MachineIRBuilder
B(MI
);
1352 if (Src0Regs
.empty())
1353 Src0Regs
.push_back(MI
.getOperand(1).getReg());
1355 assert(Src0Regs
.size() == 1);
1358 if (Src1Regs
.empty())
1359 split64BitValueForMapping(B
, Src1Regs
, HalfTy
, MI
.getOperand(2).getReg());
1361 setRegsToType(MRI
, Src1Regs
, HalfTy
);
1364 if (Src2Regs
.empty())
1365 split64BitValueForMapping(B
, Src2Regs
, HalfTy
, MI
.getOperand(3).getReg());
1367 setRegsToType(MRI
, Src2Regs
, HalfTy
);
1369 setRegsToType(MRI
, DefRegs
, HalfTy
);
1371 B
.buildSelect(DefRegs
[0], Src0Regs
[0], Src1Regs
[0], Src2Regs
[0]);
1372 B
.buildSelect(DefRegs
[1], Src0Regs
[0], Src1Regs
[1], Src2Regs
[1]);
1374 MRI
.setRegBank(DstReg
, getRegBank(AMDGPU::VGPRRegBankID
));
1375 MI
.eraseFromParent();
1380 case AMDGPU::G_XOR
: {
1381 // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
1382 // there is a VGPR input.
1383 Register DstReg
= MI
.getOperand(0).getReg();
1384 LLT DstTy
= MRI
.getType(DstReg
);
1385 if (DstTy
.getSizeInBits() != 64)
1388 LLT HalfTy
= getHalfSizedType(DstTy
);
1389 SmallVector
<Register
, 2> DefRegs(OpdMapper
.getVRegs(0));
1390 SmallVector
<Register
, 2> Src0Regs(OpdMapper
.getVRegs(1));
1391 SmallVector
<Register
, 2> Src1Regs(OpdMapper
.getVRegs(2));
1393 // All inputs are SGPRs, nothing special to do.
1394 if (DefRegs
.empty()) {
1395 assert(Src0Regs
.empty() && Src1Regs
.empty());
1399 assert(DefRegs
.size() == 2);
1400 assert(Src0Regs
.size() == Src1Regs
.size() &&
1401 (Src0Regs
.empty() || Src0Regs
.size() == 2));
1403 // Depending on where the source registers came from, the generic code may
1404 // have decided to split the inputs already or not. If not, we still need to
1405 // extract the values.
1406 MachineIRBuilder
B(MI
);
1408 if (Src0Regs
.empty())
1409 split64BitValueForMapping(B
, Src0Regs
, HalfTy
, MI
.getOperand(1).getReg());
1411 setRegsToType(MRI
, Src0Regs
, HalfTy
);
1413 if (Src1Regs
.empty())
1414 split64BitValueForMapping(B
, Src1Regs
, HalfTy
, MI
.getOperand(2).getReg());
1416 setRegsToType(MRI
, Src1Regs
, HalfTy
);
1418 setRegsToType(MRI
, DefRegs
, HalfTy
);
1422 .addUse(Src0Regs
[0])
1423 .addUse(Src1Regs
[0]);
1427 .addUse(Src0Regs
[1])
1428 .addUse(Src1Regs
[1]);
1430 MRI
.setRegBank(DstReg
, getRegBank(AMDGPU::VGPRRegBankID
));
1431 MI
.eraseFromParent();
1436 case AMDGPU::G_MUL
: {
1437 Register DstReg
= MI
.getOperand(0).getReg();
1438 LLT DstTy
= MRI
.getType(DstReg
);
1439 if (DstTy
!= LLT::scalar(16))
1442 const RegisterBank
*DstBank
= getRegBank(DstReg
, MRI
, *TRI
);
1443 if (DstBank
== &AMDGPU::VGPRRegBank
)
1446 // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
1447 MachineFunction
*MF
= MI
.getParent()->getParent();
1448 MachineIRBuilder
B(MI
);
1449 ApplyRegBankMapping
ApplySALU(MRI
, &AMDGPU::SGPRRegBank
);
1450 GISelObserverWrapper
Observer(&ApplySALU
);
1451 LegalizerHelper
Helper(*MF
, Observer
, B
);
1453 if (Helper
.widenScalar(MI
, 0, LLT::scalar(32)) !=
1454 LegalizerHelper::Legalized
)
1455 llvm_unreachable("widen scalar should have succeeded");
1458 case AMDGPU::G_SMIN
:
1459 case AMDGPU::G_SMAX
:
1460 case AMDGPU::G_UMIN
:
1461 case AMDGPU::G_UMAX
: {
1462 Register DstReg
= MI
.getOperand(0).getReg();
1463 const RegisterBank
*DstBank
= getRegBank(DstReg
, MRI
, *TRI
);
1464 if (DstBank
== &AMDGPU::VGPRRegBank
)
1467 MachineFunction
*MF
= MI
.getParent()->getParent();
1468 MachineIRBuilder
B(MI
);
1469 ApplyRegBankMapping
ApplySALU(MRI
, &AMDGPU::SGPRRegBank
);
1470 GISelObserverWrapper
Observer(&ApplySALU
);
1471 LegalizerHelper
Helper(*MF
, Observer
, B
);
1473 // Turn scalar min/max into a compare and select.
1474 LLT Ty
= MRI
.getType(DstReg
);
1475 LLT S32
= LLT::scalar(32);
1476 LLT S16
= LLT::scalar(16);
1479 // Need to widen to s32, and expand as cmp + select.
1480 if (Helper
.widenScalar(MI
, 0, S32
) != LegalizerHelper::Legalized
)
1481 llvm_unreachable("widenScalar should have succeeded");
1483 // FIXME: This is relying on widenScalar leaving MI in place.
1484 if (Helper
.lower(MI
, 0, S32
) != LegalizerHelper::Legalized
)
1485 llvm_unreachable("lower should have succeeded");
1487 if (Helper
.lower(MI
, 0, Ty
) != LegalizerHelper::Legalized
)
1488 llvm_unreachable("lower should have succeeded");
1493 case AMDGPU::G_SEXT
:
1494 case AMDGPU::G_ZEXT
: {
1495 Register SrcReg
= MI
.getOperand(1).getReg();
1496 LLT SrcTy
= MRI
.getType(SrcReg
);
1497 bool Signed
= Opc
== AMDGPU::G_SEXT
;
1499 MachineIRBuilder
B(MI
);
1500 const RegisterBank
*SrcBank
= getRegBank(SrcReg
, MRI
, *TRI
);
1502 Register DstReg
= MI
.getOperand(0).getReg();
1503 LLT DstTy
= MRI
.getType(DstReg
);
1504 if (DstTy
.isScalar() &&
1505 SrcBank
!= &AMDGPU::SGPRRegBank
&&
1506 SrcBank
!= &AMDGPU::SCCRegBank
&&
1507 SrcBank
!= &AMDGPU::VCCRegBank
&&
1508 // FIXME: Should handle any type that round to s64 when irregular
1509 // breakdowns supported.
1510 DstTy
.getSizeInBits() == 64 &&
1511 SrcTy
.getSizeInBits() <= 32) {
1512 const LLT S32
= LLT::scalar(32);
1513 SmallVector
<Register
, 2> DefRegs(OpdMapper
.getVRegs(0));
1515 // Extend to 32-bit, and then extend the low half.
1517 // TODO: Should really be buildSExtOrCopy
1518 B
.buildSExtOrTrunc(DefRegs
[0], SrcReg
);
1520 // Replicate sign bit from 32-bit extended part.
1521 auto ShiftAmt
= B
.buildConstant(S32
, 31);
1522 MRI
.setRegBank(ShiftAmt
.getReg(0), *SrcBank
);
1523 B
.buildAShr(DefRegs
[1], DefRegs
[0], ShiftAmt
);
1525 B
.buildZExtOrTrunc(DefRegs
[0], SrcReg
);
1526 B
.buildConstant(DefRegs
[1], 0);
1529 MRI
.setRegBank(DstReg
, *SrcBank
);
1530 MI
.eraseFromParent();
1534 if (SrcTy
!= LLT::scalar(1))
1537 if (SrcBank
== &AMDGPU::SCCRegBank
|| SrcBank
== &AMDGPU::VCCRegBank
) {
1538 SmallVector
<Register
, 2> DefRegs(OpdMapper
.getVRegs(0));
1540 const RegisterBank
*DstBank
= SrcBank
== &AMDGPU::SCCRegBank
?
1541 &AMDGPU::SGPRRegBank
: &AMDGPU::VGPRRegBank
;
1543 unsigned DstSize
= DstTy
.getSizeInBits();
1544 // 64-bit select is SGPR only
1545 const bool UseSel64
= DstSize
> 32 &&
1546 SrcBank
->getID() == AMDGPU::SCCRegBankID
;
1548 // TODO: Should s16 select be legal?
1549 LLT SelType
= UseSel64
? LLT::scalar(64) : LLT::scalar(32);
1550 auto True
= B
.buildConstant(SelType
, Signed
? -1 : 1);
1551 auto False
= B
.buildConstant(SelType
, 0);
1553 MRI
.setRegBank(True
.getReg(0), *DstBank
);
1554 MRI
.setRegBank(False
.getReg(0), *DstBank
);
1555 MRI
.setRegBank(DstReg
, *DstBank
);
1557 if (DstSize
> 32 && SrcBank
->getID() != AMDGPU::SCCRegBankID
) {
1558 B
.buildSelect(DefRegs
[0], SrcReg
, True
, False
);
1559 B
.buildCopy(DefRegs
[1], DefRegs
[0]);
1560 } else if (DstSize
< 32) {
1561 auto Sel
= B
.buildSelect(SelType
, SrcReg
, True
, False
);
1562 MRI
.setRegBank(Sel
.getReg(0), *DstBank
);
1563 B
.buildTrunc(DstReg
, Sel
);
1565 B
.buildSelect(DstReg
, SrcReg
, True
, False
);
1568 MI
.eraseFromParent();
1572 // Fixup the case with an s1 src that isn't a condition register. Use shifts
1573 // instead of introducing a compare to avoid an unnecessary condition
1574 // register (and since there's no scalar 16-bit compares).
1575 auto Ext
= B
.buildAnyExt(DstTy
, SrcReg
);
1576 auto ShiftAmt
= B
.buildConstant(LLT::scalar(32), DstTy
.getSizeInBits() - 1);
1577 auto Shl
= B
.buildShl(DstTy
, Ext
, ShiftAmt
);
1579 if (MI
.getOpcode() == AMDGPU::G_SEXT
)
1580 B
.buildAShr(DstReg
, Shl
, ShiftAmt
);
1582 B
.buildLShr(DstReg
, Shl
, ShiftAmt
);
1584 MRI
.setRegBank(DstReg
, *SrcBank
);
1585 MRI
.setRegBank(Ext
.getReg(0), *SrcBank
);
1586 MRI
.setRegBank(ShiftAmt
.getReg(0), *SrcBank
);
1587 MRI
.setRegBank(Shl
.getReg(0), *SrcBank
);
1588 MI
.eraseFromParent();
1591 case AMDGPU::G_BUILD_VECTOR
:
1592 case AMDGPU::G_BUILD_VECTOR_TRUNC
: {
1593 Register DstReg
= MI
.getOperand(0).getReg();
1594 LLT DstTy
= MRI
.getType(DstReg
);
1595 if (DstTy
!= LLT::vector(2, 16))
1598 assert(MI
.getNumOperands() == 3 && OpdMapper
.getVRegs(0).empty());
1599 substituteSimpleCopyRegs(OpdMapper
, 1);
1600 substituteSimpleCopyRegs(OpdMapper
, 2);
1602 const RegisterBank
*DstBank
= getRegBank(DstReg
, MRI
, *TRI
);
1603 if (DstBank
== &AMDGPU::SGPRRegBank
)
1604 break; // Can use S_PACK_* instructions.
1606 MachineIRBuilder
B(MI
);
1608 Register Lo
= MI
.getOperand(1).getReg();
1609 Register Hi
= MI
.getOperand(2).getReg();
1610 const LLT S32
= LLT::scalar(32);
1612 const RegisterBank
*BankLo
= getRegBank(Lo
, MRI
, *TRI
);
1613 const RegisterBank
*BankHi
= getRegBank(Hi
, MRI
, *TRI
);
1618 if (Opc
== AMDGPU::G_BUILD_VECTOR
) {
1619 ZextLo
= B
.buildZExt(S32
, Lo
).getReg(0);
1620 MRI
.setRegBank(ZextLo
, *BankLo
);
1622 Register ZextHi
= B
.buildZExt(S32
, Hi
).getReg(0);
1623 MRI
.setRegBank(ZextHi
, *BankHi
);
1625 auto ShiftAmt
= B
.buildConstant(S32
, 16);
1626 MRI
.setRegBank(ShiftAmt
.getReg(0), *BankHi
);
1628 ShiftHi
= B
.buildShl(S32
, ZextHi
, ShiftAmt
).getReg(0);
1629 MRI
.setRegBank(ShiftHi
, *BankHi
);
1631 Register MaskLo
= B
.buildConstant(S32
, 0xffff).getReg(0);
1632 MRI
.setRegBank(MaskLo
, *BankLo
);
1634 auto ShiftAmt
= B
.buildConstant(S32
, 16);
1635 MRI
.setRegBank(ShiftAmt
.getReg(0), *BankHi
);
1637 ShiftHi
= B
.buildShl(S32
, Hi
, ShiftAmt
).getReg(0);
1638 MRI
.setRegBank(ShiftHi
, *BankHi
);
1640 ZextLo
= B
.buildAnd(S32
, Lo
, MaskLo
).getReg(0);
1641 MRI
.setRegBank(ZextLo
, *BankLo
);
1644 auto Or
= B
.buildOr(S32
, ZextLo
, ShiftHi
);
1645 MRI
.setRegBank(Or
.getReg(0), *DstBank
);
1647 B
.buildBitcast(DstReg
, Or
);
1648 MI
.eraseFromParent();
1651 case AMDGPU::G_EXTRACT_VECTOR_ELT
: {
1652 SmallVector
<Register
, 2> DstRegs(OpdMapper
.getVRegs(0));
1654 assert(OpdMapper
.getVRegs(1).empty() && OpdMapper
.getVRegs(2).empty());
1656 if (DstRegs
.empty()) {
1657 applyDefaultMapping(OpdMapper
);
1658 executeInWaterfallLoop(MI
, MRI
, { 2 });
1662 Register DstReg
= MI
.getOperand(0).getReg();
1663 Register SrcReg
= MI
.getOperand(1).getReg();
1664 Register IdxReg
= MI
.getOperand(2).getReg();
1665 LLT DstTy
= MRI
.getType(DstReg
);
1668 assert(DstTy
.getSizeInBits() == 64);
1670 LLT SrcTy
= MRI
.getType(SrcReg
);
1671 const LLT S32
= LLT::scalar(32);
1672 LLT Vec32
= LLT::vector(2 * SrcTy
.getNumElements(), 32);
1674 MachineIRBuilder
B(MI
);
1675 auto CastSrc
= B
.buildBitcast(Vec32
, SrcReg
);
1676 auto One
= B
.buildConstant(S32
, 1);
1678 // Split the vector index into 32-bit pieces. Prepare to move all of the
1679 // new instructions into a waterfall loop if necessary.
1681 // Don't put the bitcast or constant in the loop.
1682 MachineInstrSpan
Span(MachineBasicBlock::iterator(&MI
), &B
.getMBB());
1684 // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
1685 auto IdxLo
= B
.buildShl(S32
, IdxReg
, One
);
1686 auto IdxHi
= B
.buildAdd(S32
, IdxLo
, One
);
1687 B
.buildExtractVectorElement(DstRegs
[0], CastSrc
, IdxLo
);
1688 B
.buildExtractVectorElement(DstRegs
[1], CastSrc
, IdxHi
);
1690 const ValueMapping
&DstMapping
1691 = OpdMapper
.getInstrMapping().getOperandMapping(0);
1693 // FIXME: Should be getting from mapping or not?
1694 const RegisterBank
*SrcBank
= getRegBank(SrcReg
, MRI
, *TRI
);
1695 MRI
.setRegBank(DstReg
, *DstMapping
.BreakDown
[0].RegBank
);
1696 MRI
.setRegBank(CastSrc
.getReg(0), *SrcBank
);
1697 MRI
.setRegBank(One
.getReg(0), AMDGPU::SGPRRegBank
);
1698 MRI
.setRegBank(IdxLo
.getReg(0), AMDGPU::SGPRRegBank
);
1699 MRI
.setRegBank(IdxHi
.getReg(0), AMDGPU::SGPRRegBank
);
1701 SmallSet
<Register
, 4> OpsToWaterfall
;
1702 if (!collectWaterfallOperands(OpsToWaterfall
, MI
, MRI
, { 2 })) {
1703 MI
.eraseFromParent();
1707 // Remove the original instruction to avoid potentially confusing the
1708 // waterfall loop logic.
1709 B
.setInstr(*Span
.begin());
1710 MI
.eraseFromParent();
1711 executeInWaterfallLoop(B
, make_range(Span
.begin(), Span
.end()),
1712 OpsToWaterfall
, MRI
);
1715 case AMDGPU::G_INSERT_VECTOR_ELT
: {
1716 SmallVector
<Register
, 2> InsRegs(OpdMapper
.getVRegs(2));
1718 assert(OpdMapper
.getVRegs(0).empty());
1719 assert(OpdMapper
.getVRegs(1).empty());
1720 assert(OpdMapper
.getVRegs(3).empty());
1722 if (InsRegs
.empty()) {
1723 applyDefaultMapping(OpdMapper
);
1724 executeInWaterfallLoop(MI
, MRI
, { 3 });
1728 Register DstReg
= MI
.getOperand(0).getReg();
1729 Register SrcReg
= MI
.getOperand(1).getReg();
1730 Register InsReg
= MI
.getOperand(2).getReg();
1731 Register IdxReg
= MI
.getOperand(3).getReg();
1732 LLT SrcTy
= MRI
.getType(SrcReg
);
1733 LLT InsTy
= MRI
.getType(InsReg
);
1736 assert(InsTy
.getSizeInBits() == 64);
1738 const LLT S32
= LLT::scalar(32);
1739 LLT Vec32
= LLT::vector(2 * SrcTy
.getNumElements(), 32);
1741 MachineIRBuilder
B(MI
);
1742 auto CastSrc
= B
.buildBitcast(Vec32
, SrcReg
);
1743 auto One
= B
.buildConstant(S32
, 1);
1745 // Split the vector index into 32-bit pieces. Prepare to move all of the
1746 // new instructions into a waterfall loop if necessary.
1748 // Don't put the bitcast or constant in the loop.
1749 MachineInstrSpan
Span(MachineBasicBlock::iterator(&MI
), &B
.getMBB());
1751 // Compute 32-bit element indices, (2 * OrigIdx, 2 * OrigIdx + 1).
1752 auto IdxLo
= B
.buildShl(S32
, IdxReg
, One
);
1753 auto IdxHi
= B
.buildAdd(S32
, IdxLo
, One
);
1755 auto InsLo
= B
.buildInsertVectorElement(Vec32
, CastSrc
, InsRegs
[0], IdxLo
);
1756 auto InsHi
= B
.buildInsertVectorElement(Vec32
, InsLo
, InsRegs
[1], IdxHi
);
1757 B
.buildBitcast(DstReg
, InsHi
);
1759 const RegisterBank
*DstBank
= getRegBank(DstReg
, MRI
, *TRI
);
1760 const RegisterBank
*SrcBank
= getRegBank(SrcReg
, MRI
, *TRI
);
1761 const RegisterBank
*InsSrcBank
= getRegBank(InsReg
, MRI
, *TRI
);
1763 MRI
.setRegBank(InsReg
, *InsSrcBank
);
1764 MRI
.setRegBank(CastSrc
.getReg(0), *SrcBank
);
1765 MRI
.setRegBank(InsLo
.getReg(0), *DstBank
);
1766 MRI
.setRegBank(InsHi
.getReg(0), *DstBank
);
1767 MRI
.setRegBank(One
.getReg(0), AMDGPU::SGPRRegBank
);
1768 MRI
.setRegBank(IdxLo
.getReg(0), AMDGPU::SGPRRegBank
);
1769 MRI
.setRegBank(IdxHi
.getReg(0), AMDGPU::SGPRRegBank
);
1772 SmallSet
<Register
, 4> OpsToWaterfall
;
1773 if (!collectWaterfallOperands(OpsToWaterfall
, MI
, MRI
, { 3 })) {
1774 MI
.eraseFromParent();
1778 B
.setInstr(*Span
.begin());
1779 MI
.eraseFromParent();
1781 executeInWaterfallLoop(B
, make_range(Span
.begin(), Span
.end()),
1782 OpsToWaterfall
, MRI
);
1785 case AMDGPU::G_INTRINSIC
: {
1786 switch (MI
.getIntrinsicID()) {
1787 case Intrinsic::amdgcn_s_buffer_load
: {
1788 // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
1789 executeInWaterfallLoop(MI
, MRI
, { 2, 3 });
1792 case Intrinsic::amdgcn_readlane
: {
1793 substituteSimpleCopyRegs(OpdMapper
, 2);
1795 assert(OpdMapper
.getVRegs(0).empty());
1796 assert(OpdMapper
.getVRegs(3).empty());
1798 // Make sure the index is an SGPR. It doesn't make sense to run this in a
1799 // waterfall loop, so assume it's a uniform value.
1800 constrainOpWithReadfirstlane(MI
, MRI
, 3); // Index
1803 case Intrinsic::amdgcn_writelane
: {
1804 assert(OpdMapper
.getVRegs(0).empty());
1805 assert(OpdMapper
.getVRegs(2).empty());
1806 assert(OpdMapper
.getVRegs(3).empty());
1808 substituteSimpleCopyRegs(OpdMapper
, 4); // VGPR input val
1809 constrainOpWithReadfirstlane(MI
, MRI
, 2); // Source value
1810 constrainOpWithReadfirstlane(MI
, MRI
, 3); // Index
1818 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS
: {
1819 auto IntrID
= MI
.getIntrinsicID();
1821 case Intrinsic::amdgcn_buffer_load
: {
1822 executeInWaterfallLoop(MI
, MRI
, { 2 });
1825 case Intrinsic::amdgcn_ds_ordered_add
:
1826 case Intrinsic::amdgcn_ds_ordered_swap
: {
1827 // This is only allowed to execute with 1 lane, so readfirstlane is safe.
1828 assert(OpdMapper
.getVRegs(0).empty());
1829 substituteSimpleCopyRegs(OpdMapper
, 3);
1830 constrainOpWithReadfirstlane(MI
, MRI
, 2); // M0
1833 case Intrinsic::amdgcn_ds_gws_init
:
1834 case Intrinsic::amdgcn_ds_gws_barrier
:
1835 case Intrinsic::amdgcn_ds_gws_sema_br
: {
1836 // Only the first lane is executes, so readfirstlane is safe.
1837 substituteSimpleCopyRegs(OpdMapper
, 1);
1838 constrainOpWithReadfirstlane(MI
, MRI
, 2); // M0
1841 case Intrinsic::amdgcn_ds_gws_sema_v
:
1842 case Intrinsic::amdgcn_ds_gws_sema_p
:
1843 case Intrinsic::amdgcn_ds_gws_sema_release_all
: {
1844 // Only the first lane is executes, so readfirstlane is safe.
1845 constrainOpWithReadfirstlane(MI
, MRI
, 1); // M0
1848 case Intrinsic::amdgcn_s_sendmsg
:
1849 case Intrinsic::amdgcn_s_sendmsghalt
: {
1850 // FIXME: Should this use a waterfall loop?
1851 constrainOpWithReadfirstlane(MI
, MRI
, 2); // M0
1854 case Intrinsic::amdgcn_raw_buffer_load
:
1855 case Intrinsic::amdgcn_raw_buffer_load_format
:
1856 case Intrinsic::amdgcn_raw_tbuffer_load
:
1857 case Intrinsic::amdgcn_raw_buffer_store
:
1858 case Intrinsic::amdgcn_raw_buffer_store_format
:
1859 case Intrinsic::amdgcn_raw_tbuffer_store
: {
1860 applyDefaultMapping(OpdMapper
);
1861 executeInWaterfallLoop(MI
, MRI
, {2, 4});
1864 case Intrinsic::amdgcn_struct_buffer_load
:
1865 case Intrinsic::amdgcn_struct_buffer_store
:
1866 case Intrinsic::amdgcn_struct_tbuffer_load
:
1867 case Intrinsic::amdgcn_struct_tbuffer_store
: {
1868 applyDefaultMapping(OpdMapper
);
1869 executeInWaterfallLoop(MI
, MRI
, {2, 5});
1873 if (const AMDGPU::RsrcIntrinsic
*RSrcIntrin
=
1874 AMDGPU::lookupRsrcIntrinsic(IntrID
)) {
1875 // Non-images can have complications from operands that allow both SGPR
1876 // and VGPR. For now it's too complicated to figure out the final opcode
1877 // to derive the register bank from the MCInstrDesc.
1878 if (RSrcIntrin
->IsImage
) {
1879 applyMappingImage(MI
, OpdMapper
, MRI
, RSrcIntrin
->RsrcArg
);
1889 case AMDGPU::G_LOAD
:
1890 case AMDGPU::G_ZEXTLOAD
:
1891 case AMDGPU::G_SEXTLOAD
: {
1892 if (applyMappingWideLoad(MI
, OpdMapper
, MRI
))
1900 return applyDefaultMapping(OpdMapper
);
1903 bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr
&MI
) const {
1904 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1905 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1906 for (unsigned i
= 0, e
= MI
.getNumOperands();i
!= e
; ++i
) {
1907 if (!MI
.getOperand(i
).isReg())
1909 Register Reg
= MI
.getOperand(i
).getReg();
1910 if (const RegisterBank
*Bank
= getRegBank(Reg
, MRI
, *TRI
)) {
1911 if (Bank
->getID() == AMDGPU::VGPRRegBankID
)
1914 assert(Bank
->getID() == AMDGPU::SGPRRegBankID
||
1915 Bank
->getID() == AMDGPU::SCCRegBankID
);
1921 const RegisterBankInfo::InstructionMapping
&
1922 AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr
&MI
) const {
1923 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1924 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1925 SmallVector
<const ValueMapping
*, 8> OpdsMapping(MI
.getNumOperands());
1927 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1928 unsigned Size
= getSizeInBits(MI
.getOperand(i
).getReg(), MRI
, *TRI
);
1929 unsigned BankID
= Size
== 1 ? AMDGPU::SCCRegBankID
: AMDGPU::SGPRRegBankID
;
1930 OpdsMapping
[i
] = AMDGPU::getValueMapping(BankID
, Size
);
1932 return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping
),
1933 MI
.getNumOperands());
1936 const RegisterBankInfo::InstructionMapping
&
1937 AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr
&MI
) const {
1938 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1939 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1940 SmallVector
<const ValueMapping
*, 8> OpdsMapping(MI
.getNumOperands());
1941 unsigned OpdIdx
= 0;
1943 unsigned Size0
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
1944 OpdsMapping
[OpdIdx
++] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size0
);
1946 if (MI
.getOperand(OpdIdx
).isIntrinsicID())
1947 OpdsMapping
[OpdIdx
++] = nullptr;
1949 Register Reg1
= MI
.getOperand(OpdIdx
).getReg();
1950 unsigned Size1
= getSizeInBits(Reg1
, MRI
, *TRI
);
1952 unsigned DefaultBankID
= Size1
== 1 ?
1953 AMDGPU::VCCRegBankID
: AMDGPU::VGPRRegBankID
;
1954 unsigned Bank1
= getRegBankID(Reg1
, MRI
, *TRI
, DefaultBankID
);
1956 OpdsMapping
[OpdIdx
++] = AMDGPU::getValueMapping(Bank1
, Size1
);
1958 for (unsigned e
= MI
.getNumOperands(); OpdIdx
!= e
; ++OpdIdx
) {
1959 const MachineOperand
&MO
= MI
.getOperand(OpdIdx
);
1963 unsigned Size
= getSizeInBits(MO
.getReg(), MRI
, *TRI
);
1964 unsigned BankID
= Size
== 1 ? AMDGPU::VCCRegBankID
: AMDGPU::VGPRRegBankID
;
1965 OpdsMapping
[OpdIdx
] = AMDGPU::getValueMapping(BankID
, Size
);
1968 return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping
),
1969 MI
.getNumOperands());
1972 const RegisterBankInfo::InstructionMapping
&
1973 AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr
&MI
) const {
1974 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1975 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1976 SmallVector
<const ValueMapping
*, 8> OpdsMapping(MI
.getNumOperands());
1978 for (unsigned I
= 0, E
= MI
.getNumOperands(); I
!= E
; ++I
) {
1979 const MachineOperand
&Op
= MI
.getOperand(I
);
1983 unsigned Size
= getSizeInBits(Op
.getReg(), MRI
, *TRI
);
1984 OpdsMapping
[I
] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
1987 return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping
),
1988 MI
.getNumOperands());
1991 const RegisterBankInfo::InstructionMapping
&
1992 AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo
&MRI
,
1993 const MachineInstr
&MI
,
1994 int RsrcIdx
) const {
1995 // The reported argument index is relative to the IR intrinsic call arguments,
1996 // so we need to shift by the number of defs and the intrinsic ID.
1997 RsrcIdx
+= MI
.getNumExplicitDefs() + 1;
1999 const int NumOps
= MI
.getNumOperands();
2000 SmallVector
<const ValueMapping
*, 8> OpdsMapping(NumOps
);
2002 // TODO: Should packed/unpacked D16 difference be reported here as part of
2003 // the value mapping?
2004 for (int I
= 0; I
!= NumOps
; ++I
) {
2005 if (!MI
.getOperand(I
).isReg())
2008 Register OpReg
= MI
.getOperand(I
).getReg();
2009 unsigned Size
= getSizeInBits(OpReg
, MRI
, *TRI
);
2011 // FIXME: Probably need a new intrinsic register bank searchable table to
2012 // handle arbitrary intrinsics easily.
2014 // If this has a sampler, it immediately follows rsrc.
2015 const bool MustBeSGPR
= I
== RsrcIdx
|| I
== RsrcIdx
+ 1;
2018 // If this must be an SGPR, so we must report whatever it is as legal.
2019 unsigned NewBank
= getRegBankID(OpReg
, MRI
, *TRI
, AMDGPU::SGPRRegBankID
);
2020 OpdsMapping
[I
] = AMDGPU::getValueMapping(NewBank
, Size
);
2022 // Some operands must be VGPR, and these are easy to copy to.
2023 OpdsMapping
[I
] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
2027 return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping
), NumOps
);
2030 const RegisterBankInfo::InstructionMapping
&
2031 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr
&MI
) const {
2033 const MachineFunction
&MF
= *MI
.getParent()->getParent();
2034 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2035 SmallVector
<const ValueMapping
*, 2> OpdsMapping(2);
2036 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2037 LLT LoadTy
= MRI
.getType(MI
.getOperand(0).getReg());
2038 Register PtrReg
= MI
.getOperand(1).getReg();
2039 LLT PtrTy
= MRI
.getType(PtrReg
);
2040 unsigned AS
= PtrTy
.getAddressSpace();
2041 unsigned PtrSize
= PtrTy
.getSizeInBits();
2043 const ValueMapping
*ValMapping
;
2044 const ValueMapping
*PtrMapping
;
2046 const RegisterBank
*PtrBank
= getRegBank(PtrReg
, MRI
, *TRI
);
2048 if (PtrBank
== &AMDGPU::SGPRRegBank
&&
2049 (AS
!= AMDGPUAS::LOCAL_ADDRESS
&& AS
!= AMDGPUAS::REGION_ADDRESS
&&
2050 AS
!= AMDGPUAS::PRIVATE_ADDRESS
) &&
2051 isInstrUniformNonExtLoadAlign4(MI
)) {
2052 // We have a uniform instruction so we want to use an SMRD load
2053 ValMapping
= AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2054 PtrMapping
= AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, PtrSize
);
2056 ValMapping
= AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID
, LoadTy
);
2057 PtrMapping
= AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, PtrSize
);
2060 OpdsMapping
[0] = ValMapping
;
2061 OpdsMapping
[1] = PtrMapping
;
2062 const RegisterBankInfo::InstructionMapping
&Mapping
= getInstructionMapping(
2063 1, 1, getOperandsMapping(OpdsMapping
), MI
.getNumOperands());
2066 // FIXME: Do we want to add a mapping for FLAT load, or should we just
2067 // handle that during instruction selection?
2071 AMDGPURegisterBankInfo::getRegBankID(Register Reg
,
2072 const MachineRegisterInfo
&MRI
,
2073 const TargetRegisterInfo
&TRI
,
2074 unsigned Default
) const {
2076 const RegisterBank
*Bank
= getRegBank(Reg
, MRI
, TRI
);
2077 return Bank
? Bank
->getID() : Default
;
2081 static unsigned regBankUnion(unsigned RB0
, unsigned RB1
) {
2082 return (RB0
== AMDGPU::SGPRRegBankID
&& RB1
== AMDGPU::SGPRRegBankID
) ?
2083 AMDGPU::SGPRRegBankID
: AMDGPU::VGPRRegBankID
;
2086 const RegisterBankInfo::ValueMapping
*
2087 AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg
,
2088 const MachineRegisterInfo
&MRI
,
2089 const TargetRegisterInfo
&TRI
) const {
2090 // Lie and claim anything is legal, even though this needs to be an SGPR
2091 // applyMapping will have to deal with it as a waterfall loop.
2092 unsigned Bank
= getRegBankID(Reg
, MRI
, TRI
, AMDGPU::SGPRRegBankID
);
2093 unsigned Size
= getSizeInBits(Reg
, MRI
, TRI
);
2094 return AMDGPU::getValueMapping(Bank
, Size
);
2097 const RegisterBankInfo::ValueMapping
*
2098 AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg
,
2099 const MachineRegisterInfo
&MRI
,
2100 const TargetRegisterInfo
&TRI
) const {
2101 unsigned Size
= getSizeInBits(Reg
, MRI
, TRI
);
2102 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
2106 /// This function must return a legal mapping, because
2107 /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
2108 /// in RegBankSelect::Mode::Fast. Any mapping that would cause a
2109 /// VGPR to SGPR generated is illegal.
2111 const RegisterBankInfo::InstructionMapping
&
2112 AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr
&MI
) const {
2113 const MachineFunction
&MF
= *MI
.getParent()->getParent();
2114 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2116 if (MI
.isRegSequence()) {
2117 // If any input is a VGPR, the result must be a VGPR. The default handling
2118 // assumes any copy between banks is legal.
2119 unsigned BankID
= AMDGPU::SGPRRegBankID
;
2121 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2122 auto OpBank
= getRegBankID(MI
.getOperand(I
).getReg(), MRI
, *TRI
);
2123 // It doesn't make sense to use vcc or scc banks here, so just ignore
2125 if (OpBank
!= AMDGPU::SGPRRegBankID
) {
2126 BankID
= AMDGPU::VGPRRegBankID
;
2130 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2132 const ValueMapping
&ValMap
= getValueMapping(0, Size
, getRegBank(BankID
));
2133 return getInstructionMapping(
2135 /*OperandsMapping*/ getOperandsMapping({&ValMap
}), 1);
2138 // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
2141 // TODO: There are additional exec masking dependencies to analyze.
2142 if (MI
.getOpcode() == TargetOpcode::G_PHI
) {
2143 // TODO: Generate proper invalid bank enum.
2144 int ResultBank
= -1;
2146 for (unsigned I
= 1, E
= MI
.getNumOperands(); I
!= E
; I
+= 2) {
2147 Register Reg
= MI
.getOperand(I
).getReg();
2148 const RegisterBank
*Bank
= getRegBank(Reg
, MRI
, *TRI
);
2150 // FIXME: Assuming VGPR for any undetermined inputs.
2151 if (!Bank
|| Bank
->getID() == AMDGPU::VGPRRegBankID
) {
2152 ResultBank
= AMDGPU::VGPRRegBankID
;
2156 unsigned OpBank
= Bank
->getID();
2158 if (OpBank
== AMDGPU::SCCRegBankID
) {
2159 // There's only one SCC register, so a phi requires copying to SGPR.
2160 OpBank
= AMDGPU::SGPRRegBankID
;
2161 } else if (OpBank
== AMDGPU::VCCRegBankID
) {
2163 // vcc, sgpr -> vgpr
2164 if (ResultBank
!= -1 && ResultBank
!= AMDGPU::VCCRegBankID
) {
2165 ResultBank
= AMDGPU::VGPRRegBankID
;
2170 ResultBank
= OpBank
;
2173 assert(ResultBank
!= -1);
2175 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2177 const ValueMapping
&ValMap
=
2178 getValueMapping(0, Size
, getRegBank(ResultBank
));
2179 return getInstructionMapping(
2181 /*OperandsMapping*/ getOperandsMapping({&ValMap
}), 1);
2184 const RegisterBankInfo::InstructionMapping
&Mapping
= getInstrMappingImpl(MI
);
2185 if (Mapping
.isValid())
2188 SmallVector
<const ValueMapping
*, 8> OpdsMapping(MI
.getNumOperands());
2190 switch (MI
.getOpcode()) {
2192 return getInvalidInstructionMapping();
2196 case AMDGPU::G_XOR
: {
2197 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2199 const RegisterBank
*DstBank
2200 = getRegBank(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2202 unsigned TargetBankID
= -1;
2203 unsigned BankLHS
= -1;
2204 unsigned BankRHS
= -1;
2206 TargetBankID
= DstBank
->getID();
2207 if (DstBank
== &AMDGPU::VCCRegBank
) {
2208 TargetBankID
= AMDGPU::VCCRegBankID
;
2209 BankLHS
= AMDGPU::VCCRegBankID
;
2210 BankRHS
= AMDGPU::VCCRegBankID
;
2211 } else if (DstBank
== &AMDGPU::SCCRegBank
) {
2212 TargetBankID
= AMDGPU::SCCRegBankID
;
2213 BankLHS
= AMDGPU::SGPRRegBankID
;
2214 BankRHS
= AMDGPU::SGPRRegBankID
;
2216 BankLHS
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
,
2217 AMDGPU::SGPRRegBankID
);
2218 BankRHS
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2219 AMDGPU::SGPRRegBankID
);
2222 BankLHS
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
,
2223 AMDGPU::VCCRegBankID
);
2224 BankRHS
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2225 AMDGPU::VCCRegBankID
);
2227 // Both inputs should be true booleans to produce a boolean result.
2228 if (BankLHS
== AMDGPU::VGPRRegBankID
|| BankRHS
== AMDGPU::VGPRRegBankID
) {
2229 TargetBankID
= AMDGPU::VGPRRegBankID
;
2230 } else if (BankLHS
== AMDGPU::VCCRegBankID
|| BankRHS
== AMDGPU::VCCRegBankID
) {
2231 TargetBankID
= AMDGPU::VCCRegBankID
;
2232 BankLHS
= AMDGPU::VCCRegBankID
;
2233 BankRHS
= AMDGPU::VCCRegBankID
;
2234 } else if (BankLHS
== AMDGPU::SGPRRegBankID
&& BankRHS
== AMDGPU::SGPRRegBankID
) {
2235 TargetBankID
= AMDGPU::SGPRRegBankID
;
2236 } else if (BankLHS
== AMDGPU::SCCRegBankID
|| BankRHS
== AMDGPU::SCCRegBankID
) {
2237 // The operation must be done on a 32-bit register, but it will set
2238 // scc. The result type could interchangably be SCC or SGPR, since
2239 // both values will be produced.
2240 TargetBankID
= AMDGPU::SCCRegBankID
;
2241 BankLHS
= AMDGPU::SGPRRegBankID
;
2242 BankRHS
= AMDGPU::SGPRRegBankID
;
2246 OpdsMapping
[0] = AMDGPU::getValueMapping(TargetBankID
, Size
);
2247 OpdsMapping
[1] = AMDGPU::getValueMapping(BankLHS
, Size
);
2248 OpdsMapping
[2] = AMDGPU::getValueMapping(BankRHS
, Size
);
2254 if (isSALUMapping(MI
)) {
2255 OpdsMapping
[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID
, Size
);
2256 OpdsMapping
[1] = OpdsMapping
[2] = OpdsMapping
[0];
2258 OpdsMapping
[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID
, Size
);
2259 unsigned Bank1
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
/*, DefaultBankID*/);
2260 OpdsMapping
[1] = AMDGPU::getValueMapping(Bank1
, Size
);
2262 unsigned Bank2
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
/*, DefaultBankID*/);
2263 OpdsMapping
[2] = AMDGPU::getValueMapping(Bank2
, Size
);
2276 case AMDGPU::G_LSHR
:
2277 case AMDGPU::G_ASHR
:
2278 case AMDGPU::G_UADDO
:
2279 case AMDGPU::G_SADDO
:
2280 case AMDGPU::G_USUBO
:
2281 case AMDGPU::G_SSUBO
:
2282 case AMDGPU::G_UADDE
:
2283 case AMDGPU::G_SADDE
:
2284 case AMDGPU::G_USUBE
:
2285 case AMDGPU::G_SSUBE
:
2286 case AMDGPU::G_SMIN
:
2287 case AMDGPU::G_SMAX
:
2288 case AMDGPU::G_UMIN
:
2289 case AMDGPU::G_UMAX
:
2290 if (isSALUMapping(MI
))
2291 return getDefaultMappingSOP(MI
);
2294 case AMDGPU::G_FADD
:
2295 case AMDGPU::G_FSUB
:
2296 case AMDGPU::G_FPTOSI
:
2297 case AMDGPU::G_FPTOUI
:
2298 case AMDGPU::G_FMUL
:
2300 case AMDGPU::G_FMAD
:
2301 case AMDGPU::G_FSQRT
:
2302 case AMDGPU::G_FFLOOR
:
2303 case AMDGPU::G_FCEIL
:
2304 case AMDGPU::G_FRINT
:
2305 case AMDGPU::G_SITOFP
:
2306 case AMDGPU::G_UITOFP
:
2307 case AMDGPU::G_FPTRUNC
:
2308 case AMDGPU::G_FPEXT
:
2309 case AMDGPU::G_FEXP2
:
2310 case AMDGPU::G_FLOG2
:
2311 case AMDGPU::G_FMINNUM
:
2312 case AMDGPU::G_FMAXNUM
:
2313 case AMDGPU::G_FMINNUM_IEEE
:
2314 case AMDGPU::G_FMAXNUM_IEEE
:
2315 case AMDGPU::G_FCANONICALIZE
:
2316 case AMDGPU::G_INTRINSIC_TRUNC
:
2317 case AMDGPU::G_INTRINSIC_ROUND
:
2318 case AMDGPU::G_AMDGPU_FFBH_U32
:
2319 return getDefaultMappingVOP(MI
);
2320 case AMDGPU::G_UMULH
:
2321 case AMDGPU::G_SMULH
: {
2322 if (Subtarget
.hasScalarMulHiInsts() && isSALUMapping(MI
))
2323 return getDefaultMappingSOP(MI
);
2324 return getDefaultMappingVOP(MI
);
2326 case AMDGPU::G_IMPLICIT_DEF
: {
2327 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2328 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2331 case AMDGPU::G_FCONSTANT
:
2332 case AMDGPU::G_CONSTANT
:
2333 case AMDGPU::G_GLOBAL_VALUE
:
2334 case AMDGPU::G_BLOCK_ADDR
: {
2335 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2336 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2339 case AMDGPU::G_FRAME_INDEX
: {
2340 // TODO: This should be the same as other constants, but eliminateFrameIndex
2341 // currently assumes VALU uses.
2342 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2343 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
2346 case AMDGPU::G_INSERT
: {
2347 unsigned BankID
= isSALUMapping(MI
) ? AMDGPU::SGPRRegBankID
:
2348 AMDGPU::VGPRRegBankID
;
2349 unsigned DstSize
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2350 unsigned SrcSize
= getSizeInBits(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2351 unsigned EltSize
= getSizeInBits(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2352 OpdsMapping
[0] = AMDGPU::getValueMapping(BankID
, DstSize
);
2353 OpdsMapping
[1] = AMDGPU::getValueMapping(BankID
, SrcSize
);
2354 OpdsMapping
[2] = AMDGPU::getValueMapping(BankID
, EltSize
);
2355 OpdsMapping
[3] = nullptr;
2358 case AMDGPU::G_EXTRACT
: {
2359 unsigned BankID
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2360 unsigned DstSize
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2361 unsigned SrcSize
= getSizeInBits(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2362 OpdsMapping
[0] = AMDGPU::getValueMapping(BankID
, DstSize
);
2363 OpdsMapping
[1] = AMDGPU::getValueMapping(BankID
, SrcSize
);
2364 OpdsMapping
[2] = nullptr;
2367 case AMDGPU::G_BUILD_VECTOR
:
2368 case AMDGPU::G_BUILD_VECTOR_TRUNC
: {
2369 LLT DstTy
= MRI
.getType(MI
.getOperand(0).getReg());
2370 if (DstTy
== LLT::vector(2, 16)) {
2371 unsigned DstSize
= DstTy
.getSizeInBits();
2372 unsigned SrcSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
2373 unsigned Src0BankID
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2374 unsigned Src1BankID
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2375 unsigned DstBankID
= regBankUnion(Src0BankID
, Src1BankID
);
2377 OpdsMapping
[0] = AMDGPU::getValueMapping(DstBankID
, DstSize
);
2378 OpdsMapping
[1] = AMDGPU::getValueMapping(Src0BankID
, SrcSize
);
2379 OpdsMapping
[2] = AMDGPU::getValueMapping(Src1BankID
, SrcSize
);
2385 case AMDGPU::G_MERGE_VALUES
:
2386 case AMDGPU::G_CONCAT_VECTORS
: {
2387 unsigned Bank
= isSALUMapping(MI
) ?
2388 AMDGPU::SGPRRegBankID
: AMDGPU::VGPRRegBankID
;
2389 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2390 unsigned SrcSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
2392 OpdsMapping
[0] = AMDGPU::getValueMapping(Bank
, DstSize
);
2393 // Op1 and Dst should use the same register bank.
2394 for (unsigned i
= 1, e
= MI
.getNumOperands(); i
!= e
; ++i
)
2395 OpdsMapping
[i
] = AMDGPU::getValueMapping(Bank
, SrcSize
);
2398 case AMDGPU::G_BITCAST
:
2399 case AMDGPU::G_INTTOPTR
:
2400 case AMDGPU::G_PTRTOINT
:
2401 case AMDGPU::G_CTLZ
:
2402 case AMDGPU::G_CTLZ_ZERO_UNDEF
:
2403 case AMDGPU::G_CTTZ
:
2404 case AMDGPU::G_CTTZ_ZERO_UNDEF
:
2405 case AMDGPU::G_CTPOP
:
2406 case AMDGPU::G_BSWAP
:
2407 case AMDGPU::G_BITREVERSE
:
2408 case AMDGPU::G_FABS
:
2409 case AMDGPU::G_FNEG
: {
2410 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2411 unsigned BankID
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2412 OpdsMapping
[0] = OpdsMapping
[1] = AMDGPU::getValueMapping(BankID
, Size
);
2415 case AMDGPU::G_TRUNC
: {
2416 Register Dst
= MI
.getOperand(0).getReg();
2417 Register Src
= MI
.getOperand(1).getReg();
2418 unsigned Bank
= getRegBankID(Src
, MRI
, *TRI
);
2419 unsigned DstSize
= getSizeInBits(Dst
, MRI
, *TRI
);
2420 unsigned SrcSize
= getSizeInBits(Src
, MRI
, *TRI
);
2421 OpdsMapping
[0] = AMDGPU::getValueMapping(Bank
, DstSize
);
2422 OpdsMapping
[1] = AMDGPU::getValueMapping(Bank
, SrcSize
);
2425 case AMDGPU::G_ZEXT
:
2426 case AMDGPU::G_SEXT
:
2427 case AMDGPU::G_ANYEXT
: {
2428 Register Dst
= MI
.getOperand(0).getReg();
2429 Register Src
= MI
.getOperand(1).getReg();
2430 unsigned DstSize
= getSizeInBits(Dst
, MRI
, *TRI
);
2431 unsigned SrcSize
= getSizeInBits(Src
, MRI
, *TRI
);
2434 const RegisterBank
*SrcBank
= getRegBank(Src
, MRI
, *TRI
);
2436 switch (SrcBank
->getID()) {
2437 case AMDGPU::SCCRegBankID
:
2438 case AMDGPU::SGPRRegBankID
:
2439 DstBank
= AMDGPU::SGPRRegBankID
;
2442 DstBank
= AMDGPU::VGPRRegBankID
;
2446 // TODO: Should anyext be split into 32-bit part as well?
2447 if (MI
.getOpcode() == AMDGPU::G_ANYEXT
) {
2448 OpdsMapping
[0] = AMDGPU::getValueMapping(DstBank
, DstSize
);
2449 OpdsMapping
[1] = AMDGPU::getValueMapping(SrcBank
->getID(), SrcSize
);
2451 // Scalar extend can use 64-bit BFE, but VGPRs require extending to
2452 // 32-bits, and then to 64.
2453 OpdsMapping
[0] = AMDGPU::getValueMappingSGPR64Only(DstBank
, DstSize
);
2454 OpdsMapping
[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank
->getID(),
2459 case AMDGPU::G_FCMP
: {
2460 unsigned Size
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2461 unsigned Op2Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2462 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1);
2463 OpdsMapping
[1] = nullptr; // Predicate Operand.
2464 OpdsMapping
[2] = AMDGPU::getValueMapping(Op2Bank
, Size
);
2465 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
2468 case AMDGPU::G_STORE
: {
2469 assert(MI
.getOperand(0).isReg());
2470 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2471 // FIXME: We need to specify a different reg bank once scalar stores
2473 const ValueMapping
*ValMapping
=
2474 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size
);
2475 // FIXME: Depending on the type of store, the pointer could be in
2476 // the SGPR Reg bank.
2477 // FIXME: Pointer size should be based on the address space.
2478 const ValueMapping
*PtrMapping
=
2479 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 64);
2481 OpdsMapping
[0] = ValMapping
;
2482 OpdsMapping
[1] = PtrMapping
;
2486 case AMDGPU::G_ICMP
: {
2487 auto Pred
= static_cast<CmpInst::Predicate
>(MI
.getOperand(1).getPredicate());
2488 unsigned Size
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2489 unsigned Op2Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2490 unsigned Op3Bank
= getRegBankID(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2492 bool CanUseSCC
= Op2Bank
== AMDGPU::SGPRRegBankID
&&
2493 Op3Bank
== AMDGPU::SGPRRegBankID
&&
2494 (Size
== 32 || (Size
== 64 &&
2495 (Pred
== CmpInst::ICMP_EQ
|| Pred
== CmpInst::ICMP_NE
) &&
2496 Subtarget
.hasScalarCompareEq64()));
2498 unsigned Op0Bank
= CanUseSCC
? AMDGPU::SCCRegBankID
: AMDGPU::VCCRegBankID
;
2500 OpdsMapping
[0] = AMDGPU::getValueMapping(Op0Bank
, 1);
2501 OpdsMapping
[1] = nullptr; // Predicate Operand.
2502 OpdsMapping
[2] = AMDGPU::getValueMapping(Op2Bank
, Size
);
2503 OpdsMapping
[3] = AMDGPU::getValueMapping(Op3Bank
, Size
);
2506 case AMDGPU::G_EXTRACT_VECTOR_ELT
: {
2507 // VGPR index can be used for waterfall when indexing a SGPR vector.
2508 unsigned SrcBankID
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2509 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2510 unsigned SrcSize
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
2511 unsigned IdxSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2512 unsigned IdxBank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2513 unsigned OutputBankID
= regBankUnion(SrcBankID
, IdxBank
);
2515 OpdsMapping
[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID
, DstSize
);
2516 OpdsMapping
[1] = AMDGPU::getValueMapping(SrcBankID
, SrcSize
);
2518 // The index can be either if the source vector is VGPR.
2519 OpdsMapping
[2] = AMDGPU::getValueMapping(IdxBank
, IdxSize
);
2522 case AMDGPU::G_INSERT_VECTOR_ELT
: {
2523 unsigned OutputBankID
= isSALUMapping(MI
) ?
2524 AMDGPU::SGPRRegBankID
: AMDGPU::VGPRRegBankID
;
2526 unsigned VecSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2527 unsigned InsertSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2528 unsigned IdxSize
= MRI
.getType(MI
.getOperand(3).getReg()).getSizeInBits();
2529 unsigned SrcBankID
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2530 unsigned InsertEltBankID
= getRegBankID(MI
.getOperand(2).getReg(),
2532 unsigned IdxBankID
= getRegBankID(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2534 OpdsMapping
[0] = AMDGPU::getValueMapping(OutputBankID
, VecSize
);
2535 OpdsMapping
[1] = AMDGPU::getValueMapping(SrcBankID
, VecSize
);
2536 OpdsMapping
[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID
,
2539 // The index can be either if the source vector is VGPR.
2540 OpdsMapping
[3] = AMDGPU::getValueMapping(IdxBankID
, IdxSize
);
2543 case AMDGPU::G_UNMERGE_VALUES
: {
2544 unsigned Bank
= isSALUMapping(MI
) ? AMDGPU::SGPRRegBankID
:
2545 AMDGPU::VGPRRegBankID
;
2547 // Op1 and Dst should use the same register bank.
2548 // FIXME: Shouldn't this be the default? Why do we need to handle this?
2549 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
2550 unsigned Size
= getSizeInBits(MI
.getOperand(i
).getReg(), MRI
, *TRI
);
2551 OpdsMapping
[i
] = AMDGPU::getValueMapping(Bank
, Size
);
2555 case AMDGPU::G_INTRINSIC
: {
2556 switch (MI
.getIntrinsicID()) {
2558 return getInvalidInstructionMapping();
2559 case Intrinsic::amdgcn_div_fmas
:
2560 case Intrinsic::amdgcn_trig_preop
:
2561 case Intrinsic::amdgcn_sin
:
2562 case Intrinsic::amdgcn_cos
:
2563 case Intrinsic::amdgcn_log_clamp
:
2564 case Intrinsic::amdgcn_rcp
:
2565 case Intrinsic::amdgcn_rcp_legacy
:
2566 case Intrinsic::amdgcn_rsq
:
2567 case Intrinsic::amdgcn_rsq_legacy
:
2568 case Intrinsic::amdgcn_rsq_clamp
:
2569 case Intrinsic::amdgcn_ldexp
:
2570 case Intrinsic::amdgcn_frexp_mant
:
2571 case Intrinsic::amdgcn_frexp_exp
:
2572 case Intrinsic::amdgcn_fract
:
2573 case Intrinsic::amdgcn_cvt_pkrtz
:
2574 case Intrinsic::amdgcn_cvt_pknorm_i16
:
2575 case Intrinsic::amdgcn_cvt_pknorm_u16
:
2576 case Intrinsic::amdgcn_cvt_pk_i16
:
2577 case Intrinsic::amdgcn_cvt_pk_u16
:
2578 case Intrinsic::amdgcn_fmed3
:
2579 case Intrinsic::amdgcn_cubeid
:
2580 case Intrinsic::amdgcn_cubema
:
2581 case Intrinsic::amdgcn_cubesc
:
2582 case Intrinsic::amdgcn_cubetc
:
2583 case Intrinsic::amdgcn_sffbh
:
2584 case Intrinsic::amdgcn_fmad_ftz
:
2585 case Intrinsic::amdgcn_mbcnt_lo
:
2586 case Intrinsic::amdgcn_mbcnt_hi
:
2587 case Intrinsic::amdgcn_ubfe
:
2588 case Intrinsic::amdgcn_sbfe
:
2589 case Intrinsic::amdgcn_mul_u24
:
2590 case Intrinsic::amdgcn_mul_i24
:
2591 case Intrinsic::amdgcn_lerp
:
2592 case Intrinsic::amdgcn_sad_u8
:
2593 case Intrinsic::amdgcn_msad_u8
:
2594 case Intrinsic::amdgcn_sad_hi_u8
:
2595 case Intrinsic::amdgcn_sad_u16
:
2596 case Intrinsic::amdgcn_qsad_pk_u16_u8
:
2597 case Intrinsic::amdgcn_mqsad_pk_u16_u8
:
2598 case Intrinsic::amdgcn_mqsad_u32_u8
:
2599 case Intrinsic::amdgcn_cvt_pk_u8_f32
:
2600 case Intrinsic::amdgcn_alignbit
:
2601 case Intrinsic::amdgcn_alignbyte
:
2602 case Intrinsic::amdgcn_fdot2
:
2603 case Intrinsic::amdgcn_sdot2
:
2604 case Intrinsic::amdgcn_udot2
:
2605 case Intrinsic::amdgcn_sdot4
:
2606 case Intrinsic::amdgcn_udot4
:
2607 case Intrinsic::amdgcn_sdot8
:
2608 case Intrinsic::amdgcn_udot8
:
2609 case Intrinsic::amdgcn_wwm
:
2610 case Intrinsic::amdgcn_wqm
:
2611 return getDefaultMappingVOP(MI
);
2612 case Intrinsic::amdgcn_ds_swizzle
:
2613 case Intrinsic::amdgcn_ds_permute
:
2614 case Intrinsic::amdgcn_ds_bpermute
:
2615 case Intrinsic::amdgcn_update_dpp
:
2616 return getDefaultMappingAllVGPR(MI
);
2617 case Intrinsic::amdgcn_kernarg_segment_ptr
:
2618 case Intrinsic::amdgcn_s_getpc
:
2619 case Intrinsic::amdgcn_groupstaticsize
: {
2620 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2621 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2624 case Intrinsic::amdgcn_wqm_vote
: {
2625 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2626 OpdsMapping
[0] = OpdsMapping
[2]
2627 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, Size
);
2630 case Intrinsic::amdgcn_s_buffer_load
: {
2631 // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
2632 Register RSrc
= MI
.getOperand(2).getReg(); // SGPR
2633 Register Offset
= MI
.getOperand(3).getReg(); // SGPR/imm
2635 unsigned Size0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2636 unsigned Size2
= MRI
.getType(RSrc
).getSizeInBits();
2637 unsigned Size3
= MRI
.getType(Offset
).getSizeInBits();
2639 unsigned RSrcBank
= getRegBankID(RSrc
, MRI
, *TRI
);
2640 unsigned OffsetBank
= getRegBankID(Offset
, MRI
, *TRI
);
2642 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size0
);
2643 OpdsMapping
[1] = nullptr; // intrinsic id
2645 // Lie and claim everything is legal, even though some need to be
2646 // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2647 OpdsMapping
[2] = AMDGPU::getValueMapping(RSrcBank
, Size2
); // rsrc
2648 OpdsMapping
[3] = AMDGPU::getValueMapping(OffsetBank
, Size3
);
2649 OpdsMapping
[4] = nullptr;
2652 case Intrinsic::amdgcn_div_scale
: {
2653 unsigned Dst0Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2654 unsigned Dst1Size
= MRI
.getType(MI
.getOperand(1).getReg()).getSizeInBits();
2655 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Dst0Size
);
2656 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, Dst1Size
);
2658 unsigned SrcSize
= MRI
.getType(MI
.getOperand(3).getReg()).getSizeInBits();
2659 OpdsMapping
[3] = AMDGPU::getValueMapping(
2660 getRegBankID(MI
.getOperand(3).getReg(), MRI
, *TRI
), SrcSize
);
2661 OpdsMapping
[4] = AMDGPU::getValueMapping(
2662 getRegBankID(MI
.getOperand(4).getReg(), MRI
, *TRI
), SrcSize
);
2666 case Intrinsic::amdgcn_class
: {
2667 Register Src0Reg
= MI
.getOperand(2).getReg();
2668 Register Src1Reg
= MI
.getOperand(3).getReg();
2669 unsigned Src0Size
= MRI
.getType(Src0Reg
).getSizeInBits();
2670 unsigned Src1Size
= MRI
.getType(Src1Reg
).getSizeInBits();
2671 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2672 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, DstSize
);
2673 OpdsMapping
[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg
, MRI
, *TRI
),
2675 OpdsMapping
[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg
, MRI
, *TRI
),
2679 case Intrinsic::amdgcn_icmp
:
2680 case Intrinsic::amdgcn_fcmp
: {
2681 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2682 // This is not VCCRegBank because this is not used in boolean contexts.
2683 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, DstSize
);
2684 unsigned OpSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2685 unsigned Op1Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2686 unsigned Op2Bank
= getRegBankID(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2687 OpdsMapping
[2] = AMDGPU::getValueMapping(Op1Bank
, OpSize
);
2688 OpdsMapping
[3] = AMDGPU::getValueMapping(Op2Bank
, OpSize
);
2691 case Intrinsic::amdgcn_readlane
: {
2692 // This must be an SGPR, but accept a VGPR.
2693 Register IdxReg
= MI
.getOperand(3).getReg();
2694 unsigned IdxSize
= MRI
.getType(IdxReg
).getSizeInBits();
2695 unsigned IdxBank
= getRegBankID(IdxReg
, MRI
, *TRI
, AMDGPU::SGPRRegBankID
);
2696 OpdsMapping
[3] = AMDGPU::getValueMapping(IdxBank
, IdxSize
);
2699 case Intrinsic::amdgcn_readfirstlane
: {
2700 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2701 unsigned SrcSize
= MRI
.getType(MI
.getOperand(2).getReg()).getSizeInBits();
2702 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, DstSize
);
2703 OpdsMapping
[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, SrcSize
);
2706 case Intrinsic::amdgcn_writelane
: {
2707 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2708 Register SrcReg
= MI
.getOperand(2).getReg();
2709 unsigned SrcSize
= MRI
.getType(SrcReg
).getSizeInBits();
2710 unsigned SrcBank
= getRegBankID(SrcReg
, MRI
, *TRI
, AMDGPU::SGPRRegBankID
);
2711 Register IdxReg
= MI
.getOperand(3).getReg();
2712 unsigned IdxSize
= MRI
.getType(IdxReg
).getSizeInBits();
2713 unsigned IdxBank
= getRegBankID(IdxReg
, MRI
, *TRI
, AMDGPU::SGPRRegBankID
);
2714 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, DstSize
);
2716 // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
2718 OpdsMapping
[2] = AMDGPU::getValueMapping(SrcBank
, SrcSize
);
2719 OpdsMapping
[3] = AMDGPU::getValueMapping(IdxBank
, IdxSize
);
2720 OpdsMapping
[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, SrcSize
);
2723 case Intrinsic::amdgcn_if_break
: {
2724 unsigned Size
= getSizeInBits(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2725 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2726 OpdsMapping
[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1);
2727 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2733 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS
: {
2734 auto IntrID
= MI
.getIntrinsicID();
2736 case Intrinsic::amdgcn_s_getreg
:
2737 case Intrinsic::amdgcn_s_memtime
:
2738 case Intrinsic::amdgcn_s_memrealtime
:
2739 case Intrinsic::amdgcn_s_get_waveid_in_workgroup
: {
2740 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2741 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2744 case Intrinsic::amdgcn_ds_append
:
2745 case Intrinsic::amdgcn_ds_consume
:
2746 case Intrinsic::amdgcn_ds_fadd
:
2747 case Intrinsic::amdgcn_ds_fmin
:
2748 case Intrinsic::amdgcn_ds_fmax
:
2749 case Intrinsic::amdgcn_atomic_inc
:
2750 case Intrinsic::amdgcn_atomic_dec
:
2751 return getDefaultMappingAllVGPR(MI
);
2752 case Intrinsic::amdgcn_ds_ordered_add
:
2753 case Intrinsic::amdgcn_ds_ordered_swap
: {
2754 unsigned DstSize
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2755 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, DstSize
);
2756 unsigned M0Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2757 AMDGPU::SGPRRegBankID
);
2758 OpdsMapping
[2] = AMDGPU::getValueMapping(M0Bank
, 32);
2759 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2762 case Intrinsic::amdgcn_exp_compr
:
2763 OpdsMapping
[0] = nullptr; // IntrinsicID
2764 // FIXME: These are immediate values which can't be read from registers.
2765 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, 32);
2766 OpdsMapping
[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, 32);
2767 // FIXME: Could we support packed types here?
2768 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2769 OpdsMapping
[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2770 // FIXME: These are immediate values which can't be read from registers.
2771 OpdsMapping
[5] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, 32);
2772 OpdsMapping
[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, 32);
2774 case Intrinsic::amdgcn_exp
:
2775 // FIXME: Could we support packed types here?
2776 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2777 OpdsMapping
[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2778 OpdsMapping
[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2779 OpdsMapping
[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2781 case Intrinsic::amdgcn_buffer_load
: {
2782 Register RSrc
= MI
.getOperand(2).getReg(); // SGPR
2783 Register VIndex
= MI
.getOperand(3).getReg(); // VGPR
2784 Register Offset
= MI
.getOperand(4).getReg(); // SGPR/VGPR/imm
2786 unsigned Size0
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2787 unsigned Size2
= MRI
.getType(RSrc
).getSizeInBits();
2788 unsigned Size3
= MRI
.getType(VIndex
).getSizeInBits();
2789 unsigned Size4
= MRI
.getType(Offset
).getSizeInBits();
2791 unsigned RSrcBank
= getRegBankID(RSrc
, MRI
, *TRI
);
2792 unsigned OffsetBank
= getRegBankID(Offset
, MRI
, *TRI
);
2794 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size0
);
2795 OpdsMapping
[1] = nullptr; // intrinsic id
2797 // Lie and claim everything is legal, even though some need to be
2798 // SGPRs. applyMapping will have to deal with it as a waterfall loop.
2799 OpdsMapping
[2] = AMDGPU::getValueMapping(RSrcBank
, Size2
); // rsrc
2800 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, Size3
);
2801 OpdsMapping
[4] = AMDGPU::getValueMapping(OffsetBank
, Size4
);
2802 OpdsMapping
[5] = nullptr;
2803 OpdsMapping
[6] = nullptr;
2806 case Intrinsic::amdgcn_s_sendmsg
:
2807 case Intrinsic::amdgcn_s_sendmsghalt
: {
2808 // This must be an SGPR, but accept a VGPR.
2809 unsigned Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2810 AMDGPU::SGPRRegBankID
);
2811 OpdsMapping
[2] = AMDGPU::getValueMapping(Bank
, 32);
2814 case Intrinsic::amdgcn_end_cf
:
2815 case Intrinsic::amdgcn_init_exec
: {
2816 unsigned Size
= getSizeInBits(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2817 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2820 case Intrinsic::amdgcn_else
: {
2821 unsigned WaveSize
= getSizeInBits(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2822 OpdsMapping
[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1);
2823 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, WaveSize
);
2824 OpdsMapping
[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, WaveSize
);
2827 case Intrinsic::amdgcn_kill
: {
2828 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID
, 1);
2831 case Intrinsic::amdgcn_raw_buffer_load
:
2832 case Intrinsic::amdgcn_raw_tbuffer_load
: {
2833 // FIXME: Should make intrinsic ID the last operand of the instruction,
2834 // then this would be the same as store
2835 OpdsMapping
[0] = getVGPROpMapping(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2836 OpdsMapping
[2] = getSGPROpMapping(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2837 OpdsMapping
[3] = getVGPROpMapping(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2838 OpdsMapping
[4] = getSGPROpMapping(MI
.getOperand(4).getReg(), MRI
, *TRI
);
2841 case Intrinsic::amdgcn_raw_buffer_store
:
2842 case Intrinsic::amdgcn_raw_buffer_store_format
:
2843 case Intrinsic::amdgcn_raw_tbuffer_store
: {
2844 OpdsMapping
[1] = getVGPROpMapping(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2845 OpdsMapping
[2] = getSGPROpMapping(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2846 OpdsMapping
[3] = getVGPROpMapping(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2847 OpdsMapping
[4] = getSGPROpMapping(MI
.getOperand(4).getReg(), MRI
, *TRI
);
2850 case Intrinsic::amdgcn_struct_buffer_load
:
2851 case Intrinsic::amdgcn_struct_tbuffer_load
: {
2852 OpdsMapping
[0] = getVGPROpMapping(MI
.getOperand(0).getReg(), MRI
, *TRI
);
2853 OpdsMapping
[2] = getSGPROpMapping(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2854 OpdsMapping
[3] = getVGPROpMapping(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2855 OpdsMapping
[4] = getVGPROpMapping(MI
.getOperand(4).getReg(), MRI
, *TRI
);
2856 OpdsMapping
[5] = getSGPROpMapping(MI
.getOperand(5).getReg(), MRI
, *TRI
);
2859 case Intrinsic::amdgcn_struct_buffer_store
:
2860 case Intrinsic::amdgcn_struct_tbuffer_store
: {
2861 OpdsMapping
[1] = getVGPROpMapping(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2862 OpdsMapping
[2] = getSGPROpMapping(MI
.getOperand(2).getReg(), MRI
, *TRI
);
2863 OpdsMapping
[3] = getVGPROpMapping(MI
.getOperand(3).getReg(), MRI
, *TRI
);
2864 OpdsMapping
[4] = getVGPROpMapping(MI
.getOperand(4).getReg(), MRI
, *TRI
);
2865 OpdsMapping
[5] = getSGPROpMapping(MI
.getOperand(5).getReg(), MRI
, *TRI
);
2868 case Intrinsic::amdgcn_init_exec_from_input
: {
2869 unsigned Size
= getSizeInBits(MI
.getOperand(1).getReg(), MRI
, *TRI
);
2870 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2871 OpdsMapping
[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID
, Size
);
2874 case Intrinsic::amdgcn_ds_gws_init
:
2875 case Intrinsic::amdgcn_ds_gws_barrier
:
2876 case Intrinsic::amdgcn_ds_gws_sema_br
: {
2877 OpdsMapping
[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID
, 32);
2879 // This must be an SGPR, but accept a VGPR.
2880 unsigned Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2881 AMDGPU::SGPRRegBankID
);
2882 OpdsMapping
[2] = AMDGPU::getValueMapping(Bank
, 32);
2885 case Intrinsic::amdgcn_ds_gws_sema_v
:
2886 case Intrinsic::amdgcn_ds_gws_sema_p
:
2887 case Intrinsic::amdgcn_ds_gws_sema_release_all
: {
2888 // This must be an SGPR, but accept a VGPR.
2889 unsigned Bank
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
,
2890 AMDGPU::SGPRRegBankID
);
2891 OpdsMapping
[1] = AMDGPU::getValueMapping(Bank
, 32);
2895 if (const AMDGPU::RsrcIntrinsic
*RSrcIntrin
=
2896 AMDGPU::lookupRsrcIntrinsic(IntrID
)) {
2897 // Non-images can have complications from operands that allow both SGPR
2898 // and VGPR. For now it's too complicated to figure out the final opcode
2899 // to derive the register bank from the MCInstrDesc.
2900 if (RSrcIntrin
->IsImage
)
2901 return getImageMapping(MRI
, MI
, RSrcIntrin
->RsrcArg
);
2904 return getInvalidInstructionMapping();
2908 case AMDGPU::G_SELECT
: {
2909 unsigned Size
= MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits();
2910 unsigned Op2Bank
= getRegBankID(MI
.getOperand(2).getReg(), MRI
, *TRI
,
2911 AMDGPU::SGPRRegBankID
);
2912 unsigned Op3Bank
= getRegBankID(MI
.getOperand(3).getReg(), MRI
, *TRI
,
2913 AMDGPU::SGPRRegBankID
);
2914 bool SGPRSrcs
= Op2Bank
== AMDGPU::SGPRRegBankID
&&
2915 Op3Bank
== AMDGPU::SGPRRegBankID
;
2917 unsigned CondBankDefault
= SGPRSrcs
?
2918 AMDGPU::SCCRegBankID
: AMDGPU::VCCRegBankID
;
2919 unsigned CondBank
= getRegBankID(MI
.getOperand(1).getReg(), MRI
, *TRI
,
2921 if (CondBank
== AMDGPU::SGPRRegBankID
)
2922 CondBank
= SGPRSrcs
? AMDGPU::SCCRegBankID
: AMDGPU::VCCRegBankID
;
2923 else if (CondBank
== AMDGPU::VGPRRegBankID
)
2924 CondBank
= AMDGPU::VCCRegBankID
;
2926 unsigned Bank
= SGPRSrcs
&& CondBank
== AMDGPU::SCCRegBankID
?
2927 AMDGPU::SGPRRegBankID
: AMDGPU::VGPRRegBankID
;
2929 assert(CondBank
== AMDGPU::VCCRegBankID
|| CondBank
== AMDGPU::SCCRegBankID
);
2932 OpdsMapping
[0] = AMDGPU::getValueMappingSGPR64Only(Bank
, Size
);
2933 OpdsMapping
[1] = AMDGPU::getValueMapping(CondBank
, 1);
2934 OpdsMapping
[2] = AMDGPU::getValueMappingSGPR64Only(Bank
, Size
);
2935 OpdsMapping
[3] = AMDGPU::getValueMappingSGPR64Only(Bank
, Size
);
2937 OpdsMapping
[0] = AMDGPU::getValueMapping(Bank
, Size
);
2938 OpdsMapping
[1] = AMDGPU::getValueMapping(CondBank
, 1);
2939 OpdsMapping
[2] = AMDGPU::getValueMapping(Bank
, Size
);
2940 OpdsMapping
[3] = AMDGPU::getValueMapping(Bank
, Size
);
2946 case AMDGPU::G_LOAD
:
2947 case AMDGPU::G_ZEXTLOAD
:
2948 case AMDGPU::G_SEXTLOAD
:
2949 return getInstrMappingForLoad(MI
);
2951 case AMDGPU::G_ATOMICRMW_XCHG
:
2952 case AMDGPU::G_ATOMICRMW_ADD
:
2953 case AMDGPU::G_ATOMICRMW_SUB
:
2954 case AMDGPU::G_ATOMICRMW_AND
:
2955 case AMDGPU::G_ATOMICRMW_OR
:
2956 case AMDGPU::G_ATOMICRMW_XOR
:
2957 case AMDGPU::G_ATOMICRMW_MAX
:
2958 case AMDGPU::G_ATOMICRMW_MIN
:
2959 case AMDGPU::G_ATOMICRMW_UMAX
:
2960 case AMDGPU::G_ATOMICRMW_UMIN
:
2961 case AMDGPU::G_ATOMICRMW_FADD
:
2962 case AMDGPU::G_ATOMIC_CMPXCHG
: {
2963 return getDefaultMappingAllVGPR(MI
);
2965 case AMDGPU::G_BRCOND
: {
2966 unsigned Bank
= getRegBankID(MI
.getOperand(0).getReg(), MRI
, *TRI
,
2967 AMDGPU::SGPRRegBankID
);
2968 assert(MRI
.getType(MI
.getOperand(0).getReg()).getSizeInBits() == 1);
2969 if (Bank
!= AMDGPU::SCCRegBankID
)
2970 Bank
= AMDGPU::VCCRegBankID
;
2972 OpdsMapping
[0] = AMDGPU::getValueMapping(Bank
, 1);
2977 return getInstructionMapping(/*ID*/1, /*Cost*/1,
2978 getOperandsMapping(OpdsMapping
),
2979 MI
.getNumOperands());