1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// SI implementation of the TargetRegisterInfo class.
13 //===----------------------------------------------------------------------===//
15 #include "SIRegisterInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "SIInstrInfo.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/LLVMContext.h"
29 static bool hasPressureSet(const int *PSets
, unsigned PSetID
) {
30 for (unsigned i
= 0; PSets
[i
] != -1; ++i
) {
31 if (PSets
[i
] == (int)PSetID
)
37 void SIRegisterInfo::classifyPressureSet(unsigned PSetID
, unsigned Reg
,
38 BitVector
&PressureSets
) const {
39 for (MCRegUnitIterator
U(Reg
, this); U
.isValid(); ++U
) {
40 const int *PSets
= getRegUnitPressureSets(*U
);
41 if (hasPressureSet(PSets
, PSetID
)) {
42 PressureSets
.set(PSetID
);
48 static cl::opt
<bool> EnableSpillSGPRToSMEM(
49 "amdgpu-spill-sgpr-to-smem",
50 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
53 static cl::opt
<bool> EnableSpillSGPRToVGPR(
54 "amdgpu-spill-sgpr-to-vgpr",
55 cl::desc("Enable spilling VGPRs to SGPRs"),
59 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget
&ST
) :
61 SGPRPressureSets(getNumRegPressureSets()),
62 VGPRPressureSets(getNumRegPressureSets()),
63 SpillSGPRToVGPR(false),
64 SpillSGPRToSMEM(false) {
65 if (EnableSpillSGPRToSMEM
&& ST
.hasScalarStores())
66 SpillSGPRToSMEM
= true;
67 else if (EnableSpillSGPRToVGPR
)
68 SpillSGPRToVGPR
= true;
70 unsigned NumRegPressureSets
= getNumRegPressureSets();
72 SGPRSetID
= NumRegPressureSets
;
73 VGPRSetID
= NumRegPressureSets
;
75 for (unsigned i
= 0; i
< NumRegPressureSets
; ++i
) {
76 classifyPressureSet(i
, AMDGPU::SGPR0
, SGPRPressureSets
);
77 classifyPressureSet(i
, AMDGPU::VGPR0
, VGPRPressureSets
);
80 // Determine the number of reg units for each pressure set.
81 std::vector
<unsigned> PressureSetRegUnits(NumRegPressureSets
, 0);
82 for (unsigned i
= 0, e
= getNumRegUnits(); i
!= e
; ++i
) {
83 const int *PSets
= getRegUnitPressureSets(i
);
84 for (unsigned j
= 0; PSets
[j
] != -1; ++j
) {
85 ++PressureSetRegUnits
[PSets
[j
]];
89 unsigned VGPRMax
= 0, SGPRMax
= 0;
90 for (unsigned i
= 0; i
< NumRegPressureSets
; ++i
) {
91 if (isVGPRPressureSet(i
) && PressureSetRegUnits
[i
] > VGPRMax
) {
93 VGPRMax
= PressureSetRegUnits
[i
];
96 if (isSGPRPressureSet(i
) && PressureSetRegUnits
[i
] > SGPRMax
) {
98 SGPRMax
= PressureSetRegUnits
[i
];
102 assert(SGPRSetID
< NumRegPressureSets
&&
103 VGPRSetID
< NumRegPressureSets
);
106 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
107 const MachineFunction
&MF
) const {
109 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
110 unsigned BaseIdx
= alignDown(ST
.getMaxNumSGPRs(MF
), 4) - 4;
111 unsigned BaseReg(AMDGPU::SGPR_32RegClass
.getRegister(BaseIdx
));
112 return getMatchingSuperReg(BaseReg
, AMDGPU::sub0
, &AMDGPU::SReg_128RegClass
);
115 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount
) {
118 // Try to place it in a hole after PrivateSegmentBufferReg.
120 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
121 // alignment constraints, so we have a hole where can put the wave offset.
124 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
125 // wave offset before it.
132 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
133 const MachineFunction
&MF
) const {
134 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
135 unsigned Reg
= findPrivateSegmentWaveByteOffsetRegIndex(ST
.getMaxNumSGPRs(MF
));
136 return AMDGPU::SGPR_32RegClass
.getRegister(Reg
);
139 unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
140 const MachineFunction
&MF
) const {
141 return AMDGPU::SGPR32
;
144 BitVector
SIRegisterInfo::getReservedRegs(const MachineFunction
&MF
) const {
145 BitVector
Reserved(getNumRegs());
147 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
148 // this seems likely to result in bugs, so I'm marking them as reserved.
149 reserveRegisterTuples(Reserved
, AMDGPU::EXEC
);
150 reserveRegisterTuples(Reserved
, AMDGPU::FLAT_SCR
);
152 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
153 reserveRegisterTuples(Reserved
, AMDGPU::M0
);
155 // Reserve the memory aperture registers.
156 reserveRegisterTuples(Reserved
, AMDGPU::SRC_SHARED_BASE
);
157 reserveRegisterTuples(Reserved
, AMDGPU::SRC_SHARED_LIMIT
);
158 reserveRegisterTuples(Reserved
, AMDGPU::SRC_PRIVATE_BASE
);
159 reserveRegisterTuples(Reserved
, AMDGPU::SRC_PRIVATE_LIMIT
);
161 // Reserve xnack_mask registers - support is not implemented in Codegen.
162 reserveRegisterTuples(Reserved
, AMDGPU::XNACK_MASK
);
164 // Reserve Trap Handler registers - support is not implemented in Codegen.
165 reserveRegisterTuples(Reserved
, AMDGPU::TBA
);
166 reserveRegisterTuples(Reserved
, AMDGPU::TMA
);
167 reserveRegisterTuples(Reserved
, AMDGPU::TTMP0_TTMP1
);
168 reserveRegisterTuples(Reserved
, AMDGPU::TTMP2_TTMP3
);
169 reserveRegisterTuples(Reserved
, AMDGPU::TTMP4_TTMP5
);
170 reserveRegisterTuples(Reserved
, AMDGPU::TTMP6_TTMP7
);
171 reserveRegisterTuples(Reserved
, AMDGPU::TTMP8_TTMP9
);
172 reserveRegisterTuples(Reserved
, AMDGPU::TTMP10_TTMP11
);
173 reserveRegisterTuples(Reserved
, AMDGPU::TTMP12_TTMP13
);
174 reserveRegisterTuples(Reserved
, AMDGPU::TTMP14_TTMP15
);
176 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
178 unsigned MaxNumSGPRs
= ST
.getMaxNumSGPRs(MF
);
179 unsigned TotalNumSGPRs
= AMDGPU::SGPR_32RegClass
.getNumRegs();
180 for (unsigned i
= MaxNumSGPRs
; i
< TotalNumSGPRs
; ++i
) {
181 unsigned Reg
= AMDGPU::SGPR_32RegClass
.getRegister(i
);
182 reserveRegisterTuples(Reserved
, Reg
);
185 unsigned MaxNumVGPRs
= ST
.getMaxNumVGPRs(MF
);
186 unsigned TotalNumVGPRs
= AMDGPU::VGPR_32RegClass
.getNumRegs();
187 for (unsigned i
= MaxNumVGPRs
; i
< TotalNumVGPRs
; ++i
) {
188 unsigned Reg
= AMDGPU::VGPR_32RegClass
.getRegister(i
);
189 reserveRegisterTuples(Reserved
, Reg
);
192 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
194 unsigned ScratchWaveOffsetReg
= MFI
->getScratchWaveOffsetReg();
195 if (ScratchWaveOffsetReg
!= AMDGPU::NoRegister
) {
196 // Reserve 1 SGPR for scratch wave offset in case we need to spill.
197 reserveRegisterTuples(Reserved
, ScratchWaveOffsetReg
);
200 unsigned ScratchRSrcReg
= MFI
->getScratchRSrcReg();
201 if (ScratchRSrcReg
!= AMDGPU::NoRegister
) {
202 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
204 // TODO: May need to reserve a VGPR if doing LDS spilling.
205 reserveRegisterTuples(Reserved
, ScratchRSrcReg
);
206 assert(!isSubRegister(ScratchRSrcReg
, ScratchWaveOffsetReg
));
209 // We have to assume the SP is needed in case there are calls in the function,
210 // which is detected after the function is lowered. If we aren't really going
211 // to need SP, don't bother reserving it.
212 unsigned StackPtrReg
= MFI
->getStackPtrOffsetReg();
214 if (StackPtrReg
!= AMDGPU::NoRegister
) {
215 reserveRegisterTuples(Reserved
, StackPtrReg
);
216 assert(!isSubRegister(ScratchRSrcReg
, StackPtrReg
));
219 unsigned FrameReg
= MFI
->getFrameOffsetReg();
220 if (FrameReg
!= AMDGPU::NoRegister
) {
221 reserveRegisterTuples(Reserved
, FrameReg
);
222 assert(!isSubRegister(ScratchRSrcReg
, FrameReg
));
228 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction
&Fn
) const {
229 const SIMachineFunctionInfo
*Info
= Fn
.getInfo
<SIMachineFunctionInfo
>();
230 if (Info
->isEntryFunction()) {
231 const MachineFrameInfo
&MFI
= Fn
.getFrameInfo();
232 return MFI
.hasStackObjects() || MFI
.hasCalls();
235 // May need scavenger for dealing with callee saved registers.
239 bool SIRegisterInfo::requiresFrameIndexScavenging(
240 const MachineFunction
&MF
) const {
241 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
242 if (MFI
.hasStackObjects())
245 // May need to deal with callee saved registers.
246 const SIMachineFunctionInfo
*Info
= MF
.getInfo
<SIMachineFunctionInfo
>();
247 return !Info
->isEntryFunction();
250 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
251 const MachineFunction
&MF
) const {
252 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
253 // create a virtual register for it during frame index elimination, so the
254 // scavenger is directly needed.
255 return MF
.getFrameInfo().hasStackObjects() &&
256 MF
.getSubtarget
<GCNSubtarget
>().hasScalarStores() &&
257 MF
.getInfo
<SIMachineFunctionInfo
>()->hasSpilledSGPRs();
260 bool SIRegisterInfo::requiresVirtualBaseRegisters(
261 const MachineFunction
&) const {
262 // There are no special dedicated stack or frame pointers.
266 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction
&MF
) const {
267 // This helps catch bugs as verifier errors.
271 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr
*MI
) const {
272 assert(SIInstrInfo::isMUBUF(*MI
));
274 int OffIdx
= AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
275 AMDGPU::OpName::offset
);
276 return MI
->getOperand(OffIdx
).getImm();
279 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr
*MI
,
281 if (!SIInstrInfo::isMUBUF(*MI
))
284 assert(Idx
== AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
285 AMDGPU::OpName::vaddr
) &&
286 "Should never see frame index on non-address operand");
288 return getMUBUFInstrOffset(MI
);
291 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr
*MI
, int64_t Offset
) const {
292 if (!MI
->mayLoadOrStore())
295 int64_t FullOffset
= Offset
+ getMUBUFInstrOffset(MI
);
297 return !isUInt
<12>(FullOffset
);
300 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock
*MBB
,
303 int64_t Offset
) const {
304 MachineBasicBlock::iterator Ins
= MBB
->begin();
305 DebugLoc DL
; // Defaults to "unknown"
307 if (Ins
!= MBB
->end())
308 DL
= Ins
->getDebugLoc();
310 MachineFunction
*MF
= MBB
->getParent();
311 const GCNSubtarget
&Subtarget
= MF
->getSubtarget
<GCNSubtarget
>();
312 const SIInstrInfo
*TII
= Subtarget
.getInstrInfo();
315 BuildMI(*MBB
, Ins
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), BaseReg
)
316 .addFrameIndex(FrameIdx
);
320 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
321 unsigned OffsetReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
323 unsigned FIReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
325 BuildMI(*MBB
, Ins
, DL
, TII
->get(AMDGPU::S_MOV_B32
), OffsetReg
)
327 BuildMI(*MBB
, Ins
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), FIReg
)
328 .addFrameIndex(FrameIdx
);
330 TII
->getAddNoCarry(*MBB
, Ins
, DL
, BaseReg
)
331 .addReg(OffsetReg
, RegState::Kill
)
335 void SIRegisterInfo::resolveFrameIndex(MachineInstr
&MI
, unsigned BaseReg
,
336 int64_t Offset
) const {
338 MachineBasicBlock
*MBB
= MI
.getParent();
339 MachineFunction
*MF
= MBB
->getParent();
340 const GCNSubtarget
&Subtarget
= MF
->getSubtarget
<GCNSubtarget
>();
341 const SIInstrInfo
*TII
= Subtarget
.getInstrInfo();
344 // FIXME: Is it possible to be storing a frame index to itself?
346 for (const MachineOperand
&MO
: MI
.operands()) {
349 llvm_unreachable("should not see multiple frame indices");
356 MachineOperand
*FIOp
= TII
->getNamedOperand(MI
, AMDGPU::OpName::vaddr
);
357 assert(FIOp
&& FIOp
->isFI() && "frame index must be address operand");
358 assert(TII
->isMUBUF(MI
));
359 assert(TII
->getNamedOperand(MI
, AMDGPU::OpName::soffset
)->getReg() ==
360 MF
->getInfo
<SIMachineFunctionInfo
>()->getFrameOffsetReg() &&
361 "should only be seeing frame offset relative FrameIndex");
364 MachineOperand
*OffsetOp
= TII
->getNamedOperand(MI
, AMDGPU::OpName::offset
);
365 int64_t NewOffset
= OffsetOp
->getImm() + Offset
;
366 assert(isUInt
<12>(NewOffset
) && "offset should be legal");
368 FIOp
->ChangeToRegister(BaseReg
, false);
369 OffsetOp
->setImm(NewOffset
);
372 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr
*MI
,
374 int64_t Offset
) const {
375 if (!SIInstrInfo::isMUBUF(*MI
))
378 int64_t NewOffset
= Offset
+ getMUBUFInstrOffset(MI
);
380 return isUInt
<12>(NewOffset
);
383 const TargetRegisterClass
*SIRegisterInfo::getPointerRegClass(
384 const MachineFunction
&MF
, unsigned Kind
) const {
385 // This is inaccurate. It depends on the instruction and address space. The
386 // only place where we should hit this is for dealing with frame indexes /
387 // private accesses, so this is correct in that case.
388 return &AMDGPU::VGPR_32RegClass
;
391 static unsigned getNumSubRegsForSpillOp(unsigned Op
) {
394 case AMDGPU::SI_SPILL_S512_SAVE
:
395 case AMDGPU::SI_SPILL_S512_RESTORE
:
396 case AMDGPU::SI_SPILL_V512_SAVE
:
397 case AMDGPU::SI_SPILL_V512_RESTORE
:
399 case AMDGPU::SI_SPILL_S256_SAVE
:
400 case AMDGPU::SI_SPILL_S256_RESTORE
:
401 case AMDGPU::SI_SPILL_V256_SAVE
:
402 case AMDGPU::SI_SPILL_V256_RESTORE
:
404 case AMDGPU::SI_SPILL_S128_SAVE
:
405 case AMDGPU::SI_SPILL_S128_RESTORE
:
406 case AMDGPU::SI_SPILL_V128_SAVE
:
407 case AMDGPU::SI_SPILL_V128_RESTORE
:
409 case AMDGPU::SI_SPILL_V96_SAVE
:
410 case AMDGPU::SI_SPILL_V96_RESTORE
:
412 case AMDGPU::SI_SPILL_S64_SAVE
:
413 case AMDGPU::SI_SPILL_S64_RESTORE
:
414 case AMDGPU::SI_SPILL_V64_SAVE
:
415 case AMDGPU::SI_SPILL_V64_RESTORE
:
417 case AMDGPU::SI_SPILL_S32_SAVE
:
418 case AMDGPU::SI_SPILL_S32_RESTORE
:
419 case AMDGPU::SI_SPILL_V32_SAVE
:
420 case AMDGPU::SI_SPILL_V32_RESTORE
:
422 default: llvm_unreachable("Invalid spill opcode");
426 static int getOffsetMUBUFStore(unsigned Opc
) {
428 case AMDGPU::BUFFER_STORE_DWORD_OFFEN
:
429 return AMDGPU::BUFFER_STORE_DWORD_OFFSET
;
430 case AMDGPU::BUFFER_STORE_BYTE_OFFEN
:
431 return AMDGPU::BUFFER_STORE_BYTE_OFFSET
;
432 case AMDGPU::BUFFER_STORE_SHORT_OFFEN
:
433 return AMDGPU::BUFFER_STORE_SHORT_OFFSET
;
434 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN
:
435 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET
;
436 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN
:
437 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET
;
438 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN
:
439 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET
;
440 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN
:
441 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET
;
447 static int getOffsetMUBUFLoad(unsigned Opc
) {
449 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN
:
450 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET
;
451 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN
:
452 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET
;
453 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN
:
454 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET
;
455 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN
:
456 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET
;
457 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN
:
458 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET
;
459 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN
:
460 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET
;
461 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN
:
462 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET
;
463 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN
:
464 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET
;
465 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN
:
466 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET
;
467 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN
:
468 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET
;
469 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN
:
470 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET
;
471 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN
:
472 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET
;
473 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN
:
474 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET
;
480 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
481 // need to handle the case where an SGPR may need to be spilled while spilling.
482 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo
*TII
,
483 MachineFrameInfo
&MFI
,
484 MachineBasicBlock::iterator MI
,
487 MachineBasicBlock
*MBB
= MI
->getParent();
488 const DebugLoc
&DL
= MI
->getDebugLoc();
489 bool IsStore
= MI
->mayStore();
491 unsigned Opc
= MI
->getOpcode();
492 int LoadStoreOp
= IsStore
?
493 getOffsetMUBUFStore(Opc
) : getOffsetMUBUFLoad(Opc
);
494 if (LoadStoreOp
== -1)
497 const MachineOperand
*Reg
= TII
->getNamedOperand(*MI
, AMDGPU::OpName::vdata
);
498 MachineInstrBuilder NewMI
=
499 BuildMI(*MBB
, MI
, DL
, TII
->get(LoadStoreOp
))
501 .add(*TII
->getNamedOperand(*MI
, AMDGPU::OpName::srsrc
))
502 .add(*TII
->getNamedOperand(*MI
, AMDGPU::OpName::soffset
))
509 const MachineOperand
*VDataIn
= TII
->getNamedOperand(*MI
,
510 AMDGPU::OpName::vdata_in
);
516 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI
,
517 unsigned LoadStoreOp
,
521 unsigned ScratchRsrcReg
,
522 unsigned ScratchOffsetReg
,
524 MachineMemOperand
*MMO
,
525 RegScavenger
*RS
) const {
526 MachineBasicBlock
*MBB
= MI
->getParent();
527 MachineFunction
*MF
= MI
->getParent()->getParent();
528 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
529 const SIInstrInfo
*TII
= ST
.getInstrInfo();
530 const MachineFrameInfo
&MFI
= MF
->getFrameInfo();
532 const MCInstrDesc
&Desc
= TII
->get(LoadStoreOp
);
533 const DebugLoc
&DL
= MI
->getDebugLoc();
534 bool IsStore
= Desc
.mayStore();
536 bool Scavenged
= false;
537 unsigned SOffset
= ScratchOffsetReg
;
539 const unsigned EltSize
= 4;
540 const TargetRegisterClass
*RC
= getRegClassForReg(MF
->getRegInfo(), ValueReg
);
541 unsigned NumSubRegs
= AMDGPU::getRegBitWidth(RC
->getID()) / (EltSize
* CHAR_BIT
);
542 unsigned Size
= NumSubRegs
* EltSize
;
543 int64_t Offset
= InstOffset
+ MFI
.getObjectOffset(Index
);
544 int64_t ScratchOffsetRegDelta
= 0;
546 unsigned Align
= MFI
.getObjectAlignment(Index
);
547 const MachinePointerInfo
&BasePtrInfo
= MMO
->getPointerInfo();
549 assert((Offset
% EltSize
) == 0 && "unexpected VGPR spill offset");
551 if (!isUInt
<12>(Offset
+ Size
- EltSize
)) {
552 SOffset
= AMDGPU::NoRegister
;
554 // We currently only support spilling VGPRs to EltSize boundaries, meaning
555 // we can simplify the adjustment of Offset here to just scale with
557 Offset
*= ST
.getWavefrontSize();
559 // We don't have access to the register scavenger if this function is called
560 // during PEI::scavengeFrameVirtualRegs().
562 SOffset
= RS
->FindUnusedReg(&AMDGPU::SGPR_32RegClass
);
564 if (SOffset
== AMDGPU::NoRegister
) {
565 // There are no free SGPRs, and since we are in the process of spilling
566 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
567 // on SI/CI and on VI it is true until we implement spilling using scalar
568 // stores), we have no way to free up an SGPR. Our solution here is to
569 // add the offset directly to the ScratchOffset register, and then
570 // subtract the offset after the spill to return ScratchOffset to it's
572 SOffset
= ScratchOffsetReg
;
573 ScratchOffsetRegDelta
= Offset
;
578 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_ADD_U32
), SOffset
)
579 .addReg(ScratchOffsetReg
)
585 for (unsigned i
= 0, e
= NumSubRegs
; i
!= e
; ++i
, Offset
+= EltSize
) {
586 unsigned SubReg
= NumSubRegs
== 1 ?
587 ValueReg
: getSubReg(ValueReg
, getSubRegFromChannel(i
));
589 unsigned SOffsetRegState
= 0;
590 unsigned SrcDstRegState
= getDefRegState(!IsStore
);
592 SOffsetRegState
|= getKillRegState(Scavenged
);
593 // The last implicit use carries the "Kill" flag.
594 SrcDstRegState
|= getKillRegState(IsKill
);
597 MachinePointerInfo PInfo
= BasePtrInfo
.getWithOffset(EltSize
* i
);
598 MachineMemOperand
*NewMMO
599 = MF
->getMachineMemOperand(PInfo
, MMO
->getFlags(),
600 EltSize
, MinAlign(Align
, EltSize
* i
));
602 auto MIB
= BuildMI(*MBB
, MI
, DL
, Desc
)
603 .addReg(SubReg
, getDefRegState(!IsStore
) | getKillRegState(IsKill
))
604 .addReg(ScratchRsrcReg
)
605 .addReg(SOffset
, SOffsetRegState
)
610 .addMemOperand(NewMMO
);
613 MIB
.addReg(ValueReg
, RegState::Implicit
| SrcDstRegState
);
616 if (ScratchOffsetRegDelta
!= 0) {
617 // Subtract the offset we added to the ScratchOffset register.
618 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_SUB_U32
), ScratchOffsetReg
)
619 .addReg(ScratchOffsetReg
)
620 .addImm(ScratchOffsetRegDelta
);
624 static std::pair
<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize
,
626 if (SuperRegSize
% 16 == 0) {
627 return { 16, Store
? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR
:
628 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR
};
631 if (SuperRegSize
% 8 == 0) {
632 return { 8, Store
? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR
:
633 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR
};
636 return { 4, Store
? AMDGPU::S_BUFFER_STORE_DWORD_SGPR
:
637 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR
};
640 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI
,
643 bool OnlyToVGPR
) const {
644 MachineBasicBlock
*MBB
= MI
->getParent();
645 MachineFunction
*MF
= MBB
->getParent();
646 SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
647 DenseSet
<unsigned> SGPRSpillVGPRDefinedSet
;
649 ArrayRef
<SIMachineFunctionInfo::SpilledReg
> VGPRSpills
650 = MFI
->getSGPRToVGPRSpills(Index
);
651 bool SpillToVGPR
= !VGPRSpills
.empty();
652 if (OnlyToVGPR
&& !SpillToVGPR
)
655 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
656 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
657 const SIInstrInfo
*TII
= ST
.getInstrInfo();
659 unsigned SuperReg
= MI
->getOperand(0).getReg();
660 bool IsKill
= MI
->getOperand(0).isKill();
661 const DebugLoc
&DL
= MI
->getDebugLoc();
663 MachineFrameInfo
&FrameInfo
= MF
->getFrameInfo();
665 bool SpillToSMEM
= spillSGPRToSMEM();
666 if (SpillToSMEM
&& OnlyToVGPR
)
669 assert(SpillToVGPR
|| (SuperReg
!= MFI
->getStackPtrOffsetReg() &&
670 SuperReg
!= MFI
->getFrameOffsetReg() &&
671 SuperReg
!= MFI
->getScratchWaveOffsetReg()));
673 assert(SuperReg
!= AMDGPU::M0
&& "m0 should never spill");
675 unsigned OffsetReg
= AMDGPU::M0
;
676 unsigned M0CopyReg
= AMDGPU::NoRegister
;
679 if (RS
->isRegUsed(AMDGPU::M0
)) {
680 M0CopyReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
681 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::COPY
), M0CopyReg
)
686 unsigned ScalarStoreOp
;
687 unsigned EltSize
= 4;
688 const TargetRegisterClass
*RC
= getPhysRegClass(SuperReg
);
689 if (SpillToSMEM
&& isSGPRClass(RC
)) {
690 // XXX - if private_element_size is larger than 4 it might be useful to be
691 // able to spill wider vmem spills.
692 std::tie(EltSize
, ScalarStoreOp
) =
693 getSpillEltSize(getRegSizeInBits(*RC
) / 8, true);
696 ArrayRef
<int16_t> SplitParts
= getRegSplitParts(RC
, EltSize
);
697 unsigned NumSubRegs
= SplitParts
.empty() ? 1 : SplitParts
.size();
699 // SubReg carries the "Kill" flag when SubReg == SuperReg.
700 unsigned SubKillState
= getKillRegState((NumSubRegs
== 1) && IsKill
);
701 for (unsigned i
= 0, e
= NumSubRegs
; i
< e
; ++i
) {
702 unsigned SubReg
= NumSubRegs
== 1 ?
703 SuperReg
: getSubReg(SuperReg
, SplitParts
[i
]);
706 int64_t FrOffset
= FrameInfo
.getObjectOffset(Index
);
708 // The allocated memory size is really the wavefront size * the frame
709 // index size. The widest register class is 64 bytes, so a 4-byte scratch
710 // allocation is enough to spill this in a single stack object.
712 // FIXME: Frame size/offsets are computed earlier than this, so the extra
713 // space is still unnecessarily allocated.
715 unsigned Align
= FrameInfo
.getObjectAlignment(Index
);
716 MachinePointerInfo PtrInfo
717 = MachinePointerInfo::getFixedStack(*MF
, Index
, EltSize
* i
);
718 MachineMemOperand
*MMO
719 = MF
->getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
720 EltSize
, MinAlign(Align
, EltSize
* i
));
722 // SMEM instructions only support a single offset, so increment the wave
725 int64_t Offset
= (ST
.getWavefrontSize() * FrOffset
) + (EltSize
* i
);
727 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_ADD_U32
), OffsetReg
)
728 .addReg(MFI
->getFrameOffsetReg())
731 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_MOV_B32
), OffsetReg
)
732 .addReg(MFI
->getFrameOffsetReg());
735 BuildMI(*MBB
, MI
, DL
, TII
->get(ScalarStoreOp
))
736 .addReg(SubReg
, getKillRegState(IsKill
)) // sdata
737 .addReg(MFI
->getScratchRSrcReg()) // sbase
738 .addReg(OffsetReg
, RegState::Kill
) // soff
746 SIMachineFunctionInfo::SpilledReg Spill
= VGPRSpills
[i
];
748 // During SGPR spilling to VGPR, determine if the VGPR is defined. The
749 // only circumstance in which we say it is undefined is when it is the
750 // first spill to this VGPR in the first basic block.
751 bool VGPRDefined
= true;
752 if (MBB
== &MF
->front())
753 VGPRDefined
= !SGPRSpillVGPRDefinedSet
.insert(Spill
.VGPR
).second
;
755 // Mark the "old value of vgpr" input undef only if this is the first sgpr
756 // spill to this specific vgpr in the first basic block.
757 BuildMI(*MBB
, MI
, DL
,
758 TII
->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32
),
760 .addReg(SubReg
, getKillRegState(IsKill
))
762 .addReg(Spill
.VGPR
, VGPRDefined
? 0 : RegState::Undef
);
764 // FIXME: Since this spills to another register instead of an actual
765 // frame index, we should delete the frame index when all references to
768 // XXX - Can to VGPR spill fail for some subregisters but not others?
772 // Spill SGPR to a frame index.
773 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
774 unsigned TmpReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
775 // TODO: Should VI try to spill to VGPR and then spill to SMEM?
777 MachineInstrBuilder Mov
778 = BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), TmpReg
)
779 .addReg(SubReg
, SubKillState
);
782 // There could be undef components of a spilled super register.
783 // TODO: Can we detect this and skip the spill?
784 if (NumSubRegs
> 1) {
785 // The last implicit use of the SuperReg carries the "Kill" flag.
786 unsigned SuperKillState
= 0;
788 SuperKillState
|= getKillRegState(IsKill
);
789 Mov
.addReg(SuperReg
, RegState::Implicit
| SuperKillState
);
792 unsigned Align
= FrameInfo
.getObjectAlignment(Index
);
793 MachinePointerInfo PtrInfo
794 = MachinePointerInfo::getFixedStack(*MF
, Index
, EltSize
* i
);
795 MachineMemOperand
*MMO
796 = MF
->getMachineMemOperand(PtrInfo
, MachineMemOperand::MOStore
,
797 EltSize
, MinAlign(Align
, EltSize
* i
));
798 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::SI_SPILL_V32_SAVE
))
799 .addReg(TmpReg
, RegState::Kill
) // src
800 .addFrameIndex(Index
) // vaddr
801 .addReg(MFI
->getScratchRSrcReg()) // srrsrc
802 .addReg(MFI
->getFrameOffsetReg()) // soffset
803 .addImm(i
* 4) // offset
808 if (M0CopyReg
!= AMDGPU::NoRegister
) {
809 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::COPY
), AMDGPU::M0
)
810 .addReg(M0CopyReg
, RegState::Kill
);
813 MI
->eraseFromParent();
814 MFI
->addToSpilledSGPRs(NumSubRegs
);
818 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI
,
821 bool OnlyToVGPR
) const {
822 MachineFunction
*MF
= MI
->getParent()->getParent();
823 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
824 MachineBasicBlock
*MBB
= MI
->getParent();
825 SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
827 ArrayRef
<SIMachineFunctionInfo::SpilledReg
> VGPRSpills
828 = MFI
->getSGPRToVGPRSpills(Index
);
829 bool SpillToVGPR
= !VGPRSpills
.empty();
830 if (OnlyToVGPR
&& !SpillToVGPR
)
833 MachineFrameInfo
&FrameInfo
= MF
->getFrameInfo();
834 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
835 const SIInstrInfo
*TII
= ST
.getInstrInfo();
836 const DebugLoc
&DL
= MI
->getDebugLoc();
838 unsigned SuperReg
= MI
->getOperand(0).getReg();
839 bool SpillToSMEM
= spillSGPRToSMEM();
840 if (SpillToSMEM
&& OnlyToVGPR
)
843 assert(SuperReg
!= AMDGPU::M0
&& "m0 should never spill");
845 unsigned OffsetReg
= AMDGPU::M0
;
846 unsigned M0CopyReg
= AMDGPU::NoRegister
;
849 if (RS
->isRegUsed(AMDGPU::M0
)) {
850 M0CopyReg
= MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
851 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::COPY
), M0CopyReg
)
856 unsigned EltSize
= 4;
857 unsigned ScalarLoadOp
;
859 const TargetRegisterClass
*RC
= getPhysRegClass(SuperReg
);
860 if (SpillToSMEM
&& isSGPRClass(RC
)) {
861 // XXX - if private_element_size is larger than 4 it might be useful to be
862 // able to spill wider vmem spills.
863 std::tie(EltSize
, ScalarLoadOp
) =
864 getSpillEltSize(getRegSizeInBits(*RC
) / 8, false);
867 ArrayRef
<int16_t> SplitParts
= getRegSplitParts(RC
, EltSize
);
868 unsigned NumSubRegs
= SplitParts
.empty() ? 1 : SplitParts
.size();
870 // SubReg carries the "Kill" flag when SubReg == SuperReg.
871 int64_t FrOffset
= FrameInfo
.getObjectOffset(Index
);
873 for (unsigned i
= 0, e
= NumSubRegs
; i
< e
; ++i
) {
874 unsigned SubReg
= NumSubRegs
== 1 ?
875 SuperReg
: getSubReg(SuperReg
, SplitParts
[i
]);
878 // FIXME: Size may be > 4 but extra bytes wasted.
879 unsigned Align
= FrameInfo
.getObjectAlignment(Index
);
880 MachinePointerInfo PtrInfo
881 = MachinePointerInfo::getFixedStack(*MF
, Index
, EltSize
* i
);
882 MachineMemOperand
*MMO
883 = MF
->getMachineMemOperand(PtrInfo
, MachineMemOperand::MOLoad
,
884 EltSize
, MinAlign(Align
, EltSize
* i
));
887 int64_t Offset
= (ST
.getWavefrontSize() * FrOffset
) + (EltSize
* i
);
889 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_ADD_U32
), OffsetReg
)
890 .addReg(MFI
->getFrameOffsetReg())
893 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_MOV_B32
), OffsetReg
)
894 .addReg(MFI
->getFrameOffsetReg());
898 BuildMI(*MBB
, MI
, DL
, TII
->get(ScalarLoadOp
), SubReg
)
899 .addReg(MFI
->getScratchRSrcReg()) // sbase
900 .addReg(OffsetReg
, RegState::Kill
) // soff
905 MIB
.addReg(SuperReg
, RegState::ImplicitDefine
);
911 SIMachineFunctionInfo::SpilledReg Spill
= VGPRSpills
[i
];
913 BuildMI(*MBB
, MI
, DL
, TII
->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32
),
919 MIB
.addReg(SuperReg
, RegState::ImplicitDefine
);
924 // Restore SGPR from a stack slot.
925 // FIXME: We should use S_LOAD_DWORD here for VI.
926 unsigned TmpReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
927 unsigned Align
= FrameInfo
.getObjectAlignment(Index
);
929 MachinePointerInfo PtrInfo
930 = MachinePointerInfo::getFixedStack(*MF
, Index
, EltSize
* i
);
932 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(PtrInfo
,
933 MachineMemOperand::MOLoad
, EltSize
,
934 MinAlign(Align
, EltSize
* i
));
936 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::SI_SPILL_V32_RESTORE
), TmpReg
)
937 .addFrameIndex(Index
) // vaddr
938 .addReg(MFI
->getScratchRSrcReg()) // srsrc
939 .addReg(MFI
->getFrameOffsetReg()) // soffset
940 .addImm(i
* 4) // offset
944 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::V_READFIRSTLANE_B32
), SubReg
)
945 .addReg(TmpReg
, RegState::Kill
);
948 MIB
.addReg(MI
->getOperand(0).getReg(), RegState::ImplicitDefine
);
952 if (M0CopyReg
!= AMDGPU::NoRegister
) {
953 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::COPY
), AMDGPU::M0
)
954 .addReg(M0CopyReg
, RegState::Kill
);
957 MI
->eraseFromParent();
961 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
962 /// a VGPR and the stack slot can be safely eliminated when all other users are
964 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
965 MachineBasicBlock::iterator MI
,
967 RegScavenger
*RS
) const {
968 switch (MI
->getOpcode()) {
969 case AMDGPU::SI_SPILL_S512_SAVE
:
970 case AMDGPU::SI_SPILL_S256_SAVE
:
971 case AMDGPU::SI_SPILL_S128_SAVE
:
972 case AMDGPU::SI_SPILL_S64_SAVE
:
973 case AMDGPU::SI_SPILL_S32_SAVE
:
974 return spillSGPR(MI
, FI
, RS
, true);
975 case AMDGPU::SI_SPILL_S512_RESTORE
:
976 case AMDGPU::SI_SPILL_S256_RESTORE
:
977 case AMDGPU::SI_SPILL_S128_RESTORE
:
978 case AMDGPU::SI_SPILL_S64_RESTORE
:
979 case AMDGPU::SI_SPILL_S32_RESTORE
:
980 return restoreSGPR(MI
, FI
, RS
, true);
982 llvm_unreachable("not an SGPR spill instruction");
986 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI
,
987 int SPAdj
, unsigned FIOperandNum
,
988 RegScavenger
*RS
) const {
989 MachineFunction
*MF
= MI
->getParent()->getParent();
990 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
991 MachineBasicBlock
*MBB
= MI
->getParent();
992 SIMachineFunctionInfo
*MFI
= MF
->getInfo
<SIMachineFunctionInfo
>();
993 MachineFrameInfo
&FrameInfo
= MF
->getFrameInfo();
994 const GCNSubtarget
&ST
= MF
->getSubtarget
<GCNSubtarget
>();
995 const SIInstrInfo
*TII
= ST
.getInstrInfo();
996 DebugLoc DL
= MI
->getDebugLoc();
998 MachineOperand
&FIOp
= MI
->getOperand(FIOperandNum
);
999 int Index
= MI
->getOperand(FIOperandNum
).getIndex();
1001 switch (MI
->getOpcode()) {
1002 // SGPR register spill
1003 case AMDGPU::SI_SPILL_S512_SAVE
:
1004 case AMDGPU::SI_SPILL_S256_SAVE
:
1005 case AMDGPU::SI_SPILL_S128_SAVE
:
1006 case AMDGPU::SI_SPILL_S64_SAVE
:
1007 case AMDGPU::SI_SPILL_S32_SAVE
: {
1008 spillSGPR(MI
, Index
, RS
);
1012 // SGPR register restore
1013 case AMDGPU::SI_SPILL_S512_RESTORE
:
1014 case AMDGPU::SI_SPILL_S256_RESTORE
:
1015 case AMDGPU::SI_SPILL_S128_RESTORE
:
1016 case AMDGPU::SI_SPILL_S64_RESTORE
:
1017 case AMDGPU::SI_SPILL_S32_RESTORE
: {
1018 restoreSGPR(MI
, Index
, RS
);
1022 // VGPR register spill
1023 case AMDGPU::SI_SPILL_V512_SAVE
:
1024 case AMDGPU::SI_SPILL_V256_SAVE
:
1025 case AMDGPU::SI_SPILL_V128_SAVE
:
1026 case AMDGPU::SI_SPILL_V96_SAVE
:
1027 case AMDGPU::SI_SPILL_V64_SAVE
:
1028 case AMDGPU::SI_SPILL_V32_SAVE
: {
1029 const MachineOperand
*VData
= TII
->getNamedOperand(*MI
,
1030 AMDGPU::OpName::vdata
);
1031 buildSpillLoadStore(MI
, AMDGPU::BUFFER_STORE_DWORD_OFFSET
,
1033 VData
->getReg(), VData
->isKill(),
1034 TII
->getNamedOperand(*MI
, AMDGPU::OpName::srsrc
)->getReg(),
1035 TII
->getNamedOperand(*MI
, AMDGPU::OpName::soffset
)->getReg(),
1036 TII
->getNamedOperand(*MI
, AMDGPU::OpName::offset
)->getImm(),
1037 *MI
->memoperands_begin(),
1039 MFI
->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI
->getOpcode()));
1040 MI
->eraseFromParent();
1043 case AMDGPU::SI_SPILL_V32_RESTORE
:
1044 case AMDGPU::SI_SPILL_V64_RESTORE
:
1045 case AMDGPU::SI_SPILL_V96_RESTORE
:
1046 case AMDGPU::SI_SPILL_V128_RESTORE
:
1047 case AMDGPU::SI_SPILL_V256_RESTORE
:
1048 case AMDGPU::SI_SPILL_V512_RESTORE
: {
1049 const MachineOperand
*VData
= TII
->getNamedOperand(*MI
,
1050 AMDGPU::OpName::vdata
);
1052 buildSpillLoadStore(MI
, AMDGPU::BUFFER_LOAD_DWORD_OFFSET
,
1054 VData
->getReg(), VData
->isKill(),
1055 TII
->getNamedOperand(*MI
, AMDGPU::OpName::srsrc
)->getReg(),
1056 TII
->getNamedOperand(*MI
, AMDGPU::OpName::soffset
)->getReg(),
1057 TII
->getNamedOperand(*MI
, AMDGPU::OpName::offset
)->getImm(),
1058 *MI
->memoperands_begin(),
1060 MI
->eraseFromParent();
1065 const DebugLoc
&DL
= MI
->getDebugLoc();
1066 bool IsMUBUF
= TII
->isMUBUF(*MI
);
1069 MFI
->getFrameOffsetReg() != MFI
->getScratchWaveOffsetReg()) {
1070 // Convert to an absolute stack address by finding the offset from the
1071 // scratch wave base and scaling by the wave size.
1073 // In an entry function/kernel the stack address is already the
1074 // absolute address relative to the scratch wave offset.
1077 = MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1079 bool IsCopy
= MI
->getOpcode() == AMDGPU::V_MOV_B32_e32
;
1080 unsigned ResultReg
= IsCopy
?
1081 MI
->getOperand(0).getReg() :
1082 MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1084 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_SUB_U32
), DiffReg
)
1085 .addReg(MFI
->getFrameOffsetReg())
1086 .addReg(MFI
->getScratchWaveOffsetReg());
1088 int64_t Offset
= FrameInfo
.getObjectOffset(Index
);
1090 // XXX - This never happens because of emergency scavenging slot at 0?
1091 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::V_LSHRREV_B32_e64
), ResultReg
)
1092 .addImm(Log2_32(ST
.getWavefrontSize()))
1096 = MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1098 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::V_LSHRREV_B32_e64
), ScaledReg
)
1099 .addImm(Log2_32(ST
.getWavefrontSize()))
1100 .addReg(DiffReg
, RegState::Kill
);
1102 // TODO: Fold if use instruction is another add of a constant.
1103 if (AMDGPU::isInlinableLiteral32(Offset
, ST
.hasInv2PiInlineImm())) {
1104 TII
->getAddNoCarry(*MBB
, MI
, DL
, ResultReg
)
1106 .addReg(ScaledReg
, RegState::Kill
);
1108 unsigned ConstOffsetReg
1109 = MRI
.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass
);
1111 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::S_MOV_B32
), ConstOffsetReg
)
1113 TII
->getAddNoCarry(*MBB
, MI
, DL
, ResultReg
)
1114 .addReg(ConstOffsetReg
, RegState::Kill
)
1115 .addReg(ScaledReg
, RegState::Kill
);
1119 // Don't introduce an extra copy if we're just materializing in a mov.
1121 MI
->eraseFromParent();
1123 FIOp
.ChangeToRegister(ResultReg
, false, false, true);
1128 // Disable offen so we don't need a 0 vgpr base.
1129 assert(static_cast<int>(FIOperandNum
) ==
1130 AMDGPU::getNamedOperandIdx(MI
->getOpcode(),
1131 AMDGPU::OpName::vaddr
));
1133 assert(TII
->getNamedOperand(*MI
, AMDGPU::OpName::soffset
)->getReg()
1134 == MFI
->getFrameOffsetReg());
1136 int64_t Offset
= FrameInfo
.getObjectOffset(Index
);
1138 = TII
->getNamedOperand(*MI
, AMDGPU::OpName::offset
)->getImm();
1139 int64_t NewOffset
= OldImm
+ Offset
;
1141 if (isUInt
<12>(NewOffset
) &&
1142 buildMUBUFOffsetLoadStore(TII
, FrameInfo
, MI
, Index
, NewOffset
)) {
1143 MI
->eraseFromParent();
1148 // If the offset is simply too big, don't convert to a scratch wave offset
1151 int64_t Offset
= FrameInfo
.getObjectOffset(Index
);
1152 FIOp
.ChangeToImmediate(Offset
);
1153 if (!TII
->isImmOperandLegal(*MI
, FIOperandNum
, FIOp
)) {
1154 unsigned TmpReg
= MRI
.createVirtualRegister(&AMDGPU::VGPR_32RegClass
);
1155 BuildMI(*MBB
, MI
, DL
, TII
->get(AMDGPU::V_MOV_B32_e32
), TmpReg
)
1157 FIOp
.ChangeToRegister(TmpReg
, false, false, true);
1163 StringRef
SIRegisterInfo::getRegAsmName(unsigned Reg
) const {
1164 #define AMDGPU_REG_ASM_NAMES
1165 #include "AMDGPURegAsmNames.inc.cpp"
1167 #define REG_RANGE(BeginReg, EndReg, RegTable) \
1168 if (Reg >= BeginReg && Reg <= EndReg) { \
1169 unsigned Index = Reg - BeginReg; \
1170 assert(Index < array_lengthof(RegTable)); \
1171 return RegTable[Index]; \
1174 REG_RANGE(AMDGPU::VGPR0
, AMDGPU::VGPR255
, VGPR32RegNames
);
1175 REG_RANGE(AMDGPU::SGPR0
, AMDGPU::SGPR103
, SGPR32RegNames
);
1176 REG_RANGE(AMDGPU::VGPR0_VGPR1
, AMDGPU::VGPR254_VGPR255
, VGPR64RegNames
);
1177 REG_RANGE(AMDGPU::SGPR0_SGPR1
, AMDGPU::SGPR102_SGPR103
, SGPR64RegNames
);
1178 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2
, AMDGPU::VGPR253_VGPR254_VGPR255
,
1181 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3
,
1182 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255
,
1184 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3
,
1185 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103
,
1188 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7
,
1189 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255
,
1193 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15
,
1194 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255
,
1197 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7
,
1198 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103
,
1202 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15
,
1203 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103
,
1209 // FIXME: Rename flat_scr so we don't need to special case this.
1211 case AMDGPU::FLAT_SCR
:
1212 return "flat_scratch";
1213 case AMDGPU::FLAT_SCR_LO
:
1214 return "flat_scratch_lo";
1215 case AMDGPU::FLAT_SCR_HI
:
1216 return "flat_scratch_hi";
1218 // For the special named registers the default is fine.
1219 return TargetRegisterInfo::getRegAsmName(Reg
);
1223 // FIXME: This is very slow. It might be worth creating a map from physreg to
1225 const TargetRegisterClass
*SIRegisterInfo::getPhysRegClass(unsigned Reg
) const {
1226 assert(!TargetRegisterInfo::isVirtualRegister(Reg
));
1228 static const TargetRegisterClass
*const BaseClasses
[] = {
1229 &AMDGPU::VGPR_32RegClass
,
1230 &AMDGPU::SReg_32RegClass
,
1231 &AMDGPU::VReg_64RegClass
,
1232 &AMDGPU::SReg_64RegClass
,
1233 &AMDGPU::VReg_96RegClass
,
1234 &AMDGPU::VReg_128RegClass
,
1235 &AMDGPU::SReg_128RegClass
,
1236 &AMDGPU::VReg_256RegClass
,
1237 &AMDGPU::SReg_256RegClass
,
1238 &AMDGPU::VReg_512RegClass
,
1239 &AMDGPU::SReg_512RegClass
,
1240 &AMDGPU::SCC_CLASSRegClass
,
1241 &AMDGPU::Pseudo_SReg_32RegClass
,
1242 &AMDGPU::Pseudo_SReg_128RegClass
,
1245 for (const TargetRegisterClass
*BaseClass
: BaseClasses
) {
1246 if (BaseClass
->contains(Reg
)) {
1253 // TODO: It might be helpful to have some target specific flags in
1254 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1255 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass
*RC
) const {
1256 unsigned Size
= getRegSizeInBits(*RC
);
1261 return getCommonSubClass(&AMDGPU::VGPR_32RegClass
, RC
) != nullptr;
1263 return getCommonSubClass(&AMDGPU::VReg_64RegClass
, RC
) != nullptr;
1265 return getCommonSubClass(&AMDGPU::VReg_96RegClass
, RC
) != nullptr;
1267 return getCommonSubClass(&AMDGPU::VReg_128RegClass
, RC
) != nullptr;
1269 return getCommonSubClass(&AMDGPU::VReg_256RegClass
, RC
) != nullptr;
1271 return getCommonSubClass(&AMDGPU::VReg_512RegClass
, RC
) != nullptr;
1273 llvm_unreachable("Invalid register class size");
1277 const TargetRegisterClass
*SIRegisterInfo::getEquivalentVGPRClass(
1278 const TargetRegisterClass
*SRC
) const {
1279 switch (getRegSizeInBits(*SRC
)) {
1281 return &AMDGPU::VGPR_32RegClass
;
1283 return &AMDGPU::VReg_64RegClass
;
1285 return &AMDGPU::VReg_96RegClass
;
1287 return &AMDGPU::VReg_128RegClass
;
1289 return &AMDGPU::VReg_256RegClass
;
1291 return &AMDGPU::VReg_512RegClass
;
1293 llvm_unreachable("Invalid register class size");
1297 const TargetRegisterClass
*SIRegisterInfo::getEquivalentSGPRClass(
1298 const TargetRegisterClass
*VRC
) const {
1299 switch (getRegSizeInBits(*VRC
)) {
1301 return &AMDGPU::SGPR_32RegClass
;
1303 return &AMDGPU::SReg_64RegClass
;
1305 return &AMDGPU::SReg_128RegClass
;
1307 return &AMDGPU::SReg_256RegClass
;
1309 return &AMDGPU::SReg_512RegClass
;
1311 llvm_unreachable("Invalid register class size");
1315 const TargetRegisterClass
*SIRegisterInfo::getSubRegClass(
1316 const TargetRegisterClass
*RC
, unsigned SubIdx
) const {
1317 if (SubIdx
== AMDGPU::NoSubRegister
)
1320 // We can assume that each lane corresponds to one 32-bit register.
1321 unsigned Count
= getSubRegIndexLaneMask(SubIdx
).getNumLanes();
1322 if (isSGPRClass(RC
)) {
1325 return &AMDGPU::SGPR_32RegClass
;
1327 return &AMDGPU::SReg_64RegClass
;
1329 return &AMDGPU::SReg_128RegClass
;
1331 return &AMDGPU::SReg_256RegClass
;
1332 case 16: /* fall-through */
1334 llvm_unreachable("Invalid sub-register class size");
1339 return &AMDGPU::VGPR_32RegClass
;
1341 return &AMDGPU::VReg_64RegClass
;
1343 return &AMDGPU::VReg_96RegClass
;
1345 return &AMDGPU::VReg_128RegClass
;
1347 return &AMDGPU::VReg_256RegClass
;
1348 case 16: /* fall-through */
1350 llvm_unreachable("Invalid sub-register class size");
1355 bool SIRegisterInfo::shouldRewriteCopySrc(
1356 const TargetRegisterClass
*DefRC
,
1358 const TargetRegisterClass
*SrcRC
,
1359 unsigned SrcSubReg
) const {
1360 // We want to prefer the smallest register class possible, so we don't want to
1361 // stop and rewrite on anything that looks like a subregister
1362 // extract. Operations mostly don't care about the super register class, so we
1363 // only want to stop on the most basic of copies between the same register
1366 // e.g. if we have something like
1369 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1370 // %3 = COPY %2, sub0
1372 // We want to look through the COPY to find:
1376 return getCommonSubClass(DefRC
, SrcRC
) != nullptr;
1379 /// Returns a register that is not used at any point in the function.
1380 /// If all registers are used, then this function will return
1381 // AMDGPU::NoRegister.
1383 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo
&MRI
,
1384 const TargetRegisterClass
*RC
,
1385 const MachineFunction
&MF
) const {
1387 for (unsigned Reg
: *RC
)
1388 if (MRI
.isAllocatable(Reg
) && !MRI
.isPhysRegUsed(Reg
))
1390 return AMDGPU::NoRegister
;
1393 ArrayRef
<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass
*RC
,
1394 unsigned EltSize
) const {
1396 static const int16_t Sub0_15
[] = {
1397 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
,
1398 AMDGPU::sub4
, AMDGPU::sub5
, AMDGPU::sub6
, AMDGPU::sub7
,
1399 AMDGPU::sub8
, AMDGPU::sub9
, AMDGPU::sub10
, AMDGPU::sub11
,
1400 AMDGPU::sub12
, AMDGPU::sub13
, AMDGPU::sub14
, AMDGPU::sub15
,
1403 static const int16_t Sub0_7
[] = {
1404 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
,
1405 AMDGPU::sub4
, AMDGPU::sub5
, AMDGPU::sub6
, AMDGPU::sub7
,
1408 static const int16_t Sub0_3
[] = {
1409 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
, AMDGPU::sub3
,
1412 static const int16_t Sub0_2
[] = {
1413 AMDGPU::sub0
, AMDGPU::sub1
, AMDGPU::sub2
,
1416 static const int16_t Sub0_1
[] = {
1417 AMDGPU::sub0
, AMDGPU::sub1
,
1420 switch (AMDGPU::getRegBitWidth(*RC
->MC
)) {
1424 return makeArrayRef(Sub0_1
);
1426 return makeArrayRef(Sub0_2
);
1428 return makeArrayRef(Sub0_3
);
1430 return makeArrayRef(Sub0_7
);
1432 return makeArrayRef(Sub0_15
);
1434 llvm_unreachable("unhandled register size");
1439 static const int16_t Sub0_15_64
[] = {
1440 AMDGPU::sub0_sub1
, AMDGPU::sub2_sub3
,
1441 AMDGPU::sub4_sub5
, AMDGPU::sub6_sub7
,
1442 AMDGPU::sub8_sub9
, AMDGPU::sub10_sub11
,
1443 AMDGPU::sub12_sub13
, AMDGPU::sub14_sub15
1446 static const int16_t Sub0_7_64
[] = {
1447 AMDGPU::sub0_sub1
, AMDGPU::sub2_sub3
,
1448 AMDGPU::sub4_sub5
, AMDGPU::sub6_sub7
1452 static const int16_t Sub0_3_64
[] = {
1453 AMDGPU::sub0_sub1
, AMDGPU::sub2_sub3
1456 switch (AMDGPU::getRegBitWidth(*RC
->MC
)) {
1460 return makeArrayRef(Sub0_3_64
);
1462 return makeArrayRef(Sub0_7_64
);
1464 return makeArrayRef(Sub0_15_64
);
1466 llvm_unreachable("unhandled register size");
1470 assert(EltSize
== 16 && "unhandled register spill split size");
1472 static const int16_t Sub0_15_128
[] = {
1473 AMDGPU::sub0_sub1_sub2_sub3
,
1474 AMDGPU::sub4_sub5_sub6_sub7
,
1475 AMDGPU::sub8_sub9_sub10_sub11
,
1476 AMDGPU::sub12_sub13_sub14_sub15
1479 static const int16_t Sub0_7_128
[] = {
1480 AMDGPU::sub0_sub1_sub2_sub3
,
1481 AMDGPU::sub4_sub5_sub6_sub7
1484 switch (AMDGPU::getRegBitWidth(*RC
->MC
)) {
1488 return makeArrayRef(Sub0_7_128
);
1490 return makeArrayRef(Sub0_15_128
);
1492 llvm_unreachable("unhandled register size");
1496 const TargetRegisterClass
*
1497 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo
&MRI
,
1498 unsigned Reg
) const {
1499 if (TargetRegisterInfo::isVirtualRegister(Reg
))
1500 return MRI
.getRegClass(Reg
);
1502 return getPhysRegClass(Reg
);
1505 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo
&MRI
,
1506 unsigned Reg
) const {
1507 const TargetRegisterClass
* RC
= getRegClassForReg(MRI
, Reg
);
1508 assert(RC
&& "Register class for the reg not found");
1509 return hasVGPRs(RC
);
1512 bool SIRegisterInfo::shouldCoalesce(MachineInstr
*MI
,
1513 const TargetRegisterClass
*SrcRC
,
1515 const TargetRegisterClass
*DstRC
,
1517 const TargetRegisterClass
*NewRC
,
1518 LiveIntervals
&LIS
) const {
1519 unsigned SrcSize
= getRegSizeInBits(*SrcRC
);
1520 unsigned DstSize
= getRegSizeInBits(*DstRC
);
1521 unsigned NewSize
= getRegSizeInBits(*NewRC
);
1523 // Do not increase size of registers beyond dword, we would need to allocate
1524 // adjacent registers and constraint regalloc more than needed.
1526 // Always allow dword coalescing.
1527 if (SrcSize
<= 32 || DstSize
<= 32)
1530 return NewSize
<= DstSize
|| NewSize
<= SrcSize
;
1533 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass
*RC
,
1534 MachineFunction
&MF
) const {
1536 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
1537 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
1539 unsigned Occupancy
= ST
.getOccupancyWithLocalMemSize(MFI
->getLDSSize(),
1541 switch (RC
->getID()) {
1543 return AMDGPURegisterInfo::getRegPressureLimit(RC
, MF
);
1544 case AMDGPU::VGPR_32RegClassID
:
1545 return std::min(ST
.getMaxNumVGPRs(Occupancy
), ST
.getMaxNumVGPRs(MF
));
1546 case AMDGPU::SGPR_32RegClassID
:
1547 return std::min(ST
.getMaxNumSGPRs(Occupancy
, true), ST
.getMaxNumSGPRs(MF
));
1551 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction
&MF
,
1552 unsigned Idx
) const {
1553 if (Idx
== getVGPRPressureSet())
1554 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass
,
1555 const_cast<MachineFunction
&>(MF
));
1557 if (Idx
== getSGPRPressureSet())
1558 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass
,
1559 const_cast<MachineFunction
&>(MF
));
1561 return AMDGPURegisterInfo::getRegPressureSetLimit(MF
, Idx
);
1564 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit
) const {
1565 static const int Empty
[] = { -1 };
1567 if (hasRegUnit(AMDGPU::M0
, RegUnit
))
1569 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit
);
1572 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction
&MF
) const {
1573 // Not a callee saved register.
1574 return AMDGPU::SGPR30_SGPR31
;
1577 const TargetRegisterClass
*
1578 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand
&MO
,
1579 const MachineRegisterInfo
&MRI
) const {
1580 unsigned Size
= getRegSizeInBits(MO
.getReg(), MRI
);
1581 const RegisterBank
*RB
= MRI
.getRegBankOrNull(MO
.getReg());
1587 return RB
->getID() == AMDGPU::VGPRRegBankID
? &AMDGPU::VGPR_32RegClass
:
1588 &AMDGPU::SReg_32_XM0RegClass
;
1590 return RB
->getID() == AMDGPU::VGPRRegBankID
? &AMDGPU::VReg_64RegClass
:
1591 &AMDGPU::SReg_64_XEXECRegClass
;
1593 return RB
->getID() == AMDGPU::VGPRRegBankID
? &AMDGPU::VReg_96RegClass
:
1596 return RB
->getID() == AMDGPU::VGPRRegBankID
? &AMDGPU::VReg_128RegClass
:
1597 &AMDGPU::SReg_128RegClass
;
1599 llvm_unreachable("not implemented");