1 //===----------------------- SIFrameLowering.cpp --------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
9 #include "SIFrameLowering.h"
10 #include "AMDGPUSubtarget.h"
11 #include "SIInstrInfo.h"
12 #include "SIMachineFunctionInfo.h"
13 #include "SIRegisterInfo.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16 #include "llvm/CodeGen/LivePhysRegs.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/RegisterScavenging.h"
24 #define DEBUG_TYPE "frame-info"
27 static ArrayRef
<MCPhysReg
> getAllSGPR128(const GCNSubtarget
&ST
,
28 const MachineFunction
&MF
) {
29 return makeArrayRef(AMDGPU::SGPR_128RegClass
.begin(),
30 ST
.getMaxNumSGPRs(MF
) / 4);
33 static ArrayRef
<MCPhysReg
> getAllSGPRs(const GCNSubtarget
&ST
,
34 const MachineFunction
&MF
) {
35 return makeArrayRef(AMDGPU::SGPR_32RegClass
.begin(),
36 ST
.getMaxNumSGPRs(MF
));
39 // Find a scratch register that we can use at the start of the prologue to
40 // re-align the stack pointer. We avoid using callee-save registers since they
41 // may appear to be free when this is called from canUseAsPrologue (during
42 // shrink wrapping), but then no longer be free when this is called from
45 // FIXME: This is a bit conservative, since in the above case we could use one
46 // of the callee-save registers as a scratch temp to re-align the stack pointer,
47 // but we would then have to make sure that we were in fact saving at least one
48 // callee-save register in the prologue, which is additional complexity that
49 // doesn't seem worth the benefit.
50 static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo
&MRI
,
51 LivePhysRegs
&LiveRegs
,
52 const TargetRegisterClass
&RC
,
53 bool Unused
= false) {
54 // Mark callee saved registers as used so we will not choose them.
55 const MCPhysReg
*CSRegs
= MRI
.getCalleeSavedRegs();
56 for (unsigned i
= 0; CSRegs
[i
]; ++i
)
57 LiveRegs
.addReg(CSRegs
[i
]);
60 // We are looking for a register that can be used throughout the entire
61 // function, so any use is unacceptable.
62 for (unsigned Reg
: RC
) {
63 if (!MRI
.isPhysRegUsed(Reg
) && LiveRegs
.available(MRI
, Reg
))
67 for (unsigned Reg
: RC
) {
68 if (LiveRegs
.available(MRI
, Reg
))
73 // If we require an unused register, this is used in contexts where failure is
74 // an option and has an alternative plan. In other contexts, this must
77 report_fatal_error("failed to find free scratch register");
79 return AMDGPU::NoRegister
;
82 static MCPhysReg
findUnusedSGPRNonCalleeSaved(MachineRegisterInfo
&MRI
) {
83 LivePhysRegs LiveRegs
;
84 LiveRegs
.init(*MRI
.getTargetRegisterInfo());
85 return findScratchNonCalleeSaveRegister(
86 MRI
, LiveRegs
, AMDGPU::SReg_32_XM0_XEXECRegClass
, true);
89 // We need to specially emit stack operations here because a different frame
90 // register is used than in the rest of the function, as getFrameRegister would
92 static void buildPrologSpill(LivePhysRegs
&LiveRegs
, MachineBasicBlock
&MBB
,
93 MachineBasicBlock::iterator I
,
94 const SIInstrInfo
*TII
, unsigned SpillReg
,
95 unsigned ScratchRsrcReg
, unsigned SPReg
, int FI
) {
96 MachineFunction
*MF
= MBB
.getParent();
97 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
99 int64_t Offset
= MFI
.getObjectOffset(FI
);
101 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
102 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOStore
, 4,
103 MFI
.getObjectAlignment(FI
));
105 if (isUInt
<12>(Offset
)) {
106 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET
))
107 .addReg(SpillReg
, RegState::Kill
)
108 .addReg(ScratchRsrcReg
)
120 MCPhysReg OffsetReg
= findScratchNonCalleeSaveRegister(
121 MF
->getRegInfo(), LiveRegs
, AMDGPU::VGPR_32RegClass
);
123 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::V_MOV_B32_e32
), OffsetReg
)
126 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN
))
127 .addReg(SpillReg
, RegState::Kill
)
128 .addReg(OffsetReg
, RegState::Kill
)
129 .addReg(ScratchRsrcReg
)
140 static void buildEpilogReload(LivePhysRegs
&LiveRegs
, MachineBasicBlock
&MBB
,
141 MachineBasicBlock::iterator I
,
142 const SIInstrInfo
*TII
, unsigned SpillReg
,
143 unsigned ScratchRsrcReg
, unsigned SPReg
, int FI
) {
144 MachineFunction
*MF
= MBB
.getParent();
145 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
146 int64_t Offset
= MFI
.getObjectOffset(FI
);
148 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
149 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOLoad
, 4,
150 MFI
.getObjectAlignment(FI
));
152 if (isUInt
<12>(Offset
)) {
153 BuildMI(MBB
, I
, DebugLoc(),
154 TII
->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET
), SpillReg
)
155 .addReg(ScratchRsrcReg
)
167 MCPhysReg OffsetReg
= findScratchNonCalleeSaveRegister(
168 MF
->getRegInfo(), LiveRegs
, AMDGPU::VGPR_32RegClass
);
170 BuildMI(MBB
, I
, DebugLoc(), TII
->get(AMDGPU::V_MOV_B32_e32
), OffsetReg
)
173 BuildMI(MBB
, I
, DebugLoc(),
174 TII
->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN
), SpillReg
)
175 .addReg(OffsetReg
, RegState::Kill
)
176 .addReg(ScratchRsrcReg
)
187 void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget
&ST
,
189 MachineBasicBlock
&MBB
) const {
190 const SIInstrInfo
*TII
= ST
.getInstrInfo();
191 const SIRegisterInfo
* TRI
= &TII
->getRegisterInfo();
192 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
194 // We don't need this if we only have spills since there is no user facing
197 // TODO: If we know we don't have flat instructions earlier, we can omit
198 // this from the input registers.
200 // TODO: We only need to know if we access scratch space through a flat
201 // pointer. Because we only detect if flat instructions are used at all,
202 // this will be used more often than necessary on VI.
204 // Debug location must be unknown since the first debug location is used to
205 // determine the end of the prologue.
207 MachineBasicBlock::iterator I
= MBB
.begin();
209 Register FlatScratchInitReg
=
210 MFI
->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
);
212 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
213 MRI
.addLiveIn(FlatScratchInitReg
);
214 MBB
.addLiveIn(FlatScratchInitReg
);
216 Register FlatScrInitLo
= TRI
->getSubReg(FlatScratchInitReg
, AMDGPU::sub0
);
217 Register FlatScrInitHi
= TRI
->getSubReg(FlatScratchInitReg
, AMDGPU::sub1
);
219 unsigned ScratchWaveOffsetReg
= MFI
->getScratchWaveOffsetReg();
221 // Do a 64-bit pointer add.
222 if (ST
.flatScratchIsPointer()) {
223 if (ST
.getGeneration() >= AMDGPUSubtarget::GFX10
) {
224 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADD_U32
), FlatScrInitLo
)
225 .addReg(FlatScrInitLo
)
226 .addReg(ScratchWaveOffsetReg
);
227 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADDC_U32
), FlatScrInitHi
)
228 .addReg(FlatScrInitHi
)
230 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_SETREG_B32
)).
231 addReg(FlatScrInitLo
).
232 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO
|
233 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_
)));
234 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_SETREG_B32
)).
235 addReg(FlatScrInitHi
).
236 addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI
|
237 (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_
)));
241 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADD_U32
), AMDGPU::FLAT_SCR_LO
)
242 .addReg(FlatScrInitLo
)
243 .addReg(ScratchWaveOffsetReg
);
244 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADDC_U32
), AMDGPU::FLAT_SCR_HI
)
245 .addReg(FlatScrInitHi
)
251 assert(ST
.getGeneration() < AMDGPUSubtarget::GFX10
);
253 // Copy the size in bytes.
254 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), AMDGPU::FLAT_SCR_LO
)
255 .addReg(FlatScrInitHi
, RegState::Kill
);
257 // Add wave offset in bytes to private base offset.
258 // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
259 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADD_U32
), FlatScrInitLo
)
260 .addReg(FlatScrInitLo
)
261 .addReg(ScratchWaveOffsetReg
);
263 // Convert offset to 256-byte units.
264 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_LSHR_B32
), AMDGPU::FLAT_SCR_HI
)
265 .addReg(FlatScrInitLo
, RegState::Kill
)
269 unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
270 const GCNSubtarget
&ST
,
271 const SIInstrInfo
*TII
,
272 const SIRegisterInfo
*TRI
,
273 SIMachineFunctionInfo
*MFI
,
274 MachineFunction
&MF
) const {
275 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
277 // We need to insert initialization of the scratch resource descriptor.
278 unsigned ScratchRsrcReg
= MFI
->getScratchRSrcReg();
279 if (ScratchRsrcReg
== AMDGPU::NoRegister
||
280 !MRI
.isPhysRegUsed(ScratchRsrcReg
))
281 return AMDGPU::NoRegister
;
283 if (ST
.hasSGPRInitBug() ||
284 ScratchRsrcReg
!= TRI
->reservedPrivateSegmentBufferReg(MF
))
285 return ScratchRsrcReg
;
287 // We reserved the last registers for this. Shift it down to the end of those
288 // which were actually used.
290 // FIXME: It might be safer to use a pseudoregister before replacement.
292 // FIXME: We should be able to eliminate unused input registers. We only
293 // cannot do this for the resources required for scratch access. For now we
294 // skip over user SGPRs and may leave unused holes.
296 // We find the resource first because it has an alignment requirement.
298 unsigned NumPreloaded
= (MFI
->getNumPreloadedSGPRs() + 3) / 4;
299 ArrayRef
<MCPhysReg
> AllSGPR128s
= getAllSGPR128(ST
, MF
);
300 AllSGPR128s
= AllSGPR128s
.slice(std::min(static_cast<unsigned>(AllSGPR128s
.size()), NumPreloaded
));
302 // Skip the last N reserved elements because they should have already been
303 // reserved for VCC etc.
304 for (MCPhysReg Reg
: AllSGPR128s
) {
305 // Pick the first unallocated one. Make sure we don't clobber the other
306 // reserved input we needed.
307 if (!MRI
.isPhysRegUsed(Reg
) && MRI
.isAllocatable(Reg
)) {
308 MRI
.replaceRegWith(ScratchRsrcReg
, Reg
);
309 MFI
->setScratchRSrcReg(Reg
);
314 return ScratchRsrcReg
;
317 // Shift down registers reserved for the scratch wave offset.
318 std::pair
<unsigned, bool>
319 SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
320 const GCNSubtarget
&ST
, const SIInstrInfo
*TII
, const SIRegisterInfo
*TRI
,
321 SIMachineFunctionInfo
*MFI
, MachineFunction
&MF
) const {
322 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
323 unsigned ScratchWaveOffsetReg
= MFI
->getScratchWaveOffsetReg();
325 assert(MFI
->isEntryFunction());
327 // No replacement necessary.
328 if (ScratchWaveOffsetReg
== AMDGPU::NoRegister
||
329 (!hasFP(MF
) && !MRI
.isPhysRegUsed(ScratchWaveOffsetReg
))) {
330 return std::make_pair(AMDGPU::NoRegister
, false);
333 if (ST
.hasSGPRInitBug())
334 return std::make_pair(ScratchWaveOffsetReg
, false);
336 unsigned NumPreloaded
= MFI
->getNumPreloadedSGPRs();
338 ArrayRef
<MCPhysReg
> AllSGPRs
= getAllSGPRs(ST
, MF
);
339 if (NumPreloaded
> AllSGPRs
.size())
340 return std::make_pair(ScratchWaveOffsetReg
, false);
342 AllSGPRs
= AllSGPRs
.slice(NumPreloaded
);
344 // We need to drop register from the end of the list that we cannot use
345 // for the scratch wave offset.
346 // + 2 s102 and s103 do not exist on VI.
348 // + 2 for xnack_mask
349 // + 2 for flat_scratch
350 // + 4 for registers reserved for scratch resource register
351 // + 1 for register reserved for scratch wave offset. (By exluding this
352 // register from the list to consider, it means that when this
353 // register is being used for the scratch wave offset and there
354 // are no other free SGPRs, then the value will stay in this register.
355 // + 1 if stack pointer is used.
358 unsigned ReservedRegCount
= 13;
360 if (AllSGPRs
.size() < ReservedRegCount
)
361 return std::make_pair(ScratchWaveOffsetReg
, false);
363 bool HandledScratchWaveOffsetReg
=
364 ScratchWaveOffsetReg
!= TRI
->reservedPrivateSegmentWaveByteOffsetReg(MF
);
365 bool FPAdjusted
= false;
367 for (MCPhysReg Reg
: AllSGPRs
.drop_back(ReservedRegCount
)) {
368 // Pick the first unallocated SGPR. Be careful not to pick an alias of the
369 // scratch descriptor, since we haven’t added its uses yet.
370 if (!MRI
.isPhysRegUsed(Reg
) && MRI
.isAllocatable(Reg
)) {
371 if (!HandledScratchWaveOffsetReg
) {
372 HandledScratchWaveOffsetReg
= true;
374 MRI
.replaceRegWith(ScratchWaveOffsetReg
, Reg
);
375 if (MFI
->getScratchWaveOffsetReg() == MFI
->getStackPtrOffsetReg()) {
377 MFI
->setStackPtrOffsetReg(Reg
);
380 MFI
->setScratchWaveOffsetReg(Reg
);
381 MFI
->setFrameOffsetReg(Reg
);
382 ScratchWaveOffsetReg
= Reg
;
389 return std::make_pair(ScratchWaveOffsetReg
, FPAdjusted
);
392 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction
&MF
,
393 MachineBasicBlock
&MBB
) const {
394 assert(&MF
.front() == &MBB
&& "Shrink-wrapping not yet supported");
396 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
398 // If we only have SGPR spills, we won't actually be using scratch memory
399 // since these spill to VGPRs.
401 // FIXME: We should be cleaning up these unused SGPR spill frame indices
404 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
405 const SIInstrInfo
*TII
= ST
.getInstrInfo();
406 const SIRegisterInfo
*TRI
= &TII
->getRegisterInfo();
407 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
408 const Function
&F
= MF
.getFunction();
410 // We need to do the replacement of the private segment buffer and wave offset
411 // register even if there are no stack objects. There could be stores to undef
412 // or a constant without an associated object.
414 // FIXME: We still have implicit uses on SGPR spill instructions in case they
415 // need to spill to vector memory. It's likely that will not happen, but at
416 // this point it appears we need the setup. This part of the prolog should be
417 // emitted after frame indices are eliminated.
419 if (MFI
->hasFlatScratchInit())
420 emitFlatScratchInit(ST
, MF
, MBB
);
422 unsigned ScratchRsrcReg
423 = getReservedPrivateSegmentBufferReg(ST
, TII
, TRI
, MFI
, MF
);
425 unsigned ScratchWaveOffsetReg
;
427 std::tie(ScratchWaveOffsetReg
, FPAdjusted
) =
428 getReservedPrivateSegmentWaveByteOffsetReg(ST
, TII
, TRI
, MFI
, MF
);
430 // We need to insert initialization of the scratch resource descriptor.
431 Register PreloadedScratchWaveOffsetReg
= MFI
->getPreloadedReg(
432 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
);
434 unsigned PreloadedPrivateBufferReg
= AMDGPU::NoRegister
;
435 if (ST
.isAmdHsaOrMesa(F
)) {
436 PreloadedPrivateBufferReg
= MFI
->getPreloadedReg(
437 AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
);
440 bool OffsetRegUsed
= ScratchWaveOffsetReg
!= AMDGPU::NoRegister
&&
441 MRI
.isPhysRegUsed(ScratchWaveOffsetReg
);
442 bool ResourceRegUsed
= ScratchRsrcReg
!= AMDGPU::NoRegister
&&
443 MRI
.isPhysRegUsed(ScratchRsrcReg
);
445 // FIXME: Hack to not crash in situations which emitted an error.
446 if (PreloadedScratchWaveOffsetReg
== AMDGPU::NoRegister
)
449 // We added live-ins during argument lowering, but since they were not used
450 // they were deleted. We're adding the uses now, so add them back.
451 MRI
.addLiveIn(PreloadedScratchWaveOffsetReg
);
452 MBB
.addLiveIn(PreloadedScratchWaveOffsetReg
);
454 if (ResourceRegUsed
&& PreloadedPrivateBufferReg
!= AMDGPU::NoRegister
) {
455 assert(ST
.isAmdHsaOrMesa(F
) || ST
.isMesaGfxShader(F
));
456 MRI
.addLiveIn(PreloadedPrivateBufferReg
);
457 MBB
.addLiveIn(PreloadedPrivateBufferReg
);
460 // Make the register selected live throughout the function.
461 for (MachineBasicBlock
&OtherBB
: MF
) {
462 if (&OtherBB
== &MBB
)
465 if (OffsetRegUsed
|| FPAdjusted
)
466 OtherBB
.addLiveIn(ScratchWaveOffsetReg
);
469 OtherBB
.addLiveIn(ScratchRsrcReg
);
473 MachineBasicBlock::iterator I
= MBB
.begin();
475 // If we reserved the original input registers, we don't need to copy to the
476 // reserved registers.
478 bool CopyBuffer
= ResourceRegUsed
&&
479 PreloadedPrivateBufferReg
!= AMDGPU::NoRegister
&&
480 ST
.isAmdHsaOrMesa(F
) &&
481 ScratchRsrcReg
!= PreloadedPrivateBufferReg
;
483 // This needs to be careful of the copying order to avoid overwriting one of
484 // the input registers before it's been copied to it's final
485 // destination. Usually the offset should be copied first.
486 bool CopyBufferFirst
= TRI
->isSubRegisterEq(PreloadedPrivateBufferReg
,
487 ScratchWaveOffsetReg
);
488 if (CopyBuffer
&& CopyBufferFirst
) {
489 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), ScratchRsrcReg
)
490 .addReg(PreloadedPrivateBufferReg
, RegState::Kill
);
493 unsigned SPReg
= MFI
->getStackPtrOffsetReg();
494 assert(SPReg
!= AMDGPU::SP_REG
);
496 // FIXME: Remove the isPhysRegUsed checks
497 const bool HasFP
= hasFP(MF
);
499 if (HasFP
|| OffsetRegUsed
) {
500 assert(ScratchWaveOffsetReg
);
501 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), ScratchWaveOffsetReg
)
502 .addReg(PreloadedScratchWaveOffsetReg
, HasFP
? RegState::Kill
: 0);
505 if (CopyBuffer
&& !CopyBufferFirst
) {
506 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), ScratchRsrcReg
)
507 .addReg(PreloadedPrivateBufferReg
, RegState::Kill
);
510 if (ResourceRegUsed
) {
511 emitEntryFunctionScratchSetup(ST
, MF
, MBB
, MFI
, I
,
512 PreloadedPrivateBufferReg
, ScratchRsrcReg
);
517 const MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
518 int64_t StackSize
= FrameInfo
.getStackSize();
520 // On kernel entry, the private scratch wave offset is the SP value.
521 if (StackSize
== 0) {
522 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::COPY
), SPReg
)
523 .addReg(MFI
->getScratchWaveOffsetReg());
525 BuildMI(MBB
, I
, DL
, TII
->get(AMDGPU::S_ADD_U32
), SPReg
)
526 .addReg(MFI
->getScratchWaveOffsetReg())
527 .addImm(StackSize
* ST
.getWavefrontSize());
532 // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
533 void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget
&ST
,
534 MachineFunction
&MF
, MachineBasicBlock
&MBB
, SIMachineFunctionInfo
*MFI
,
535 MachineBasicBlock::iterator I
, unsigned PreloadedPrivateBufferReg
,
536 unsigned ScratchRsrcReg
) const {
538 const SIInstrInfo
*TII
= ST
.getInstrInfo();
539 const SIRegisterInfo
*TRI
= &TII
->getRegisterInfo();
540 const Function
&Fn
= MF
.getFunction();
543 if (ST
.isAmdPalOS()) {
544 // The pointer to the GIT is formed from the offset passed in and either
545 // the amdgpu-git-ptr-high function attribute or the top part of the PC
546 Register RsrcLo
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub0
);
547 Register RsrcHi
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub1
);
548 Register Rsrc01
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub0_sub1
);
550 const MCInstrDesc
&SMovB32
= TII
->get(AMDGPU::S_MOV_B32
);
552 if (MFI
->getGITPtrHigh() != 0xffffffff) {
553 BuildMI(MBB
, I
, DL
, SMovB32
, RsrcHi
)
554 .addImm(MFI
->getGITPtrHigh())
555 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
557 const MCInstrDesc
&GetPC64
= TII
->get(AMDGPU::S_GETPC_B64
);
558 BuildMI(MBB
, I
, DL
, GetPC64
, Rsrc01
);
560 auto GitPtrLo
= AMDGPU::SGPR0
; // Low GIT address passed in
561 if (ST
.hasMergedShaders()) {
562 switch (MF
.getFunction().getCallingConv()) {
563 case CallingConv::AMDGPU_HS
:
564 case CallingConv::AMDGPU_GS
:
565 // Low GIT address is passed in s8 rather than s0 for an LS+HS or
566 // ES+GS merged shader on gfx9+.
567 GitPtrLo
= AMDGPU::SGPR8
;
573 MF
.getRegInfo().addLiveIn(GitPtrLo
);
574 MBB
.addLiveIn(GitPtrLo
);
575 BuildMI(MBB
, I
, DL
, SMovB32
, RsrcLo
)
577 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
579 // We now have the GIT ptr - now get the scratch descriptor from the entry
580 // at offset 0 (or offset 16 for a compute shader).
582 PointerType::get(Type::getInt64Ty(MF
.getFunction().getContext()),
583 AMDGPUAS::CONSTANT_ADDRESS
);
584 MachinePointerInfo
PtrInfo(UndefValue::get(PtrTy
));
585 const MCInstrDesc
&LoadDwordX4
= TII
->get(AMDGPU::S_LOAD_DWORDX4_IMM
);
586 auto MMO
= MF
.getMachineMemOperand(PtrInfo
,
587 MachineMemOperand::MOLoad
|
588 MachineMemOperand::MOInvariant
|
589 MachineMemOperand::MODereferenceable
,
591 unsigned Offset
= Fn
.getCallingConv() == CallingConv::AMDGPU_CS
? 16 : 0;
592 const GCNSubtarget
&Subtarget
= MF
.getSubtarget
<GCNSubtarget
>();
593 unsigned EncodedOffset
= AMDGPU::getSMRDEncodedOffset(Subtarget
, Offset
);
594 BuildMI(MBB
, I
, DL
, LoadDwordX4
, ScratchRsrcReg
)
596 .addImm(EncodedOffset
) // offset
599 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
)
603 if (ST
.isMesaGfxShader(Fn
)
604 || (PreloadedPrivateBufferReg
== AMDGPU::NoRegister
)) {
605 assert(!ST
.isAmdHsaOrMesa(Fn
));
606 const MCInstrDesc
&SMovB32
= TII
->get(AMDGPU::S_MOV_B32
);
608 Register Rsrc2
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub2
);
609 Register Rsrc3
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub3
);
611 // Use relocations to get the pointer, and setup the other bits manually.
612 uint64_t Rsrc23
= TII
->getScratchRsrcWords23();
614 if (MFI
->hasImplicitBufferPtr()) {
615 Register Rsrc01
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub0_sub1
);
617 if (AMDGPU::isCompute(MF
.getFunction().getCallingConv())) {
618 const MCInstrDesc
&Mov64
= TII
->get(AMDGPU::S_MOV_B64
);
620 BuildMI(MBB
, I
, DL
, Mov64
, Rsrc01
)
621 .addReg(MFI
->getImplicitBufferPtrUserSGPR())
622 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
624 const MCInstrDesc
&LoadDwordX2
= TII
->get(AMDGPU::S_LOAD_DWORDX2_IMM
);
627 PointerType::get(Type::getInt64Ty(MF
.getFunction().getContext()),
628 AMDGPUAS::CONSTANT_ADDRESS
);
629 MachinePointerInfo
PtrInfo(UndefValue::get(PtrTy
));
630 auto MMO
= MF
.getMachineMemOperand(PtrInfo
,
631 MachineMemOperand::MOLoad
|
632 MachineMemOperand::MOInvariant
|
633 MachineMemOperand::MODereferenceable
,
635 BuildMI(MBB
, I
, DL
, LoadDwordX2
, Rsrc01
)
636 .addReg(MFI
->getImplicitBufferPtrUserSGPR())
641 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
643 MF
.getRegInfo().addLiveIn(MFI
->getImplicitBufferPtrUserSGPR());
644 MBB
.addLiveIn(MFI
->getImplicitBufferPtrUserSGPR());
647 Register Rsrc0
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub0
);
648 Register Rsrc1
= TRI
->getSubReg(ScratchRsrcReg
, AMDGPU::sub1
);
650 BuildMI(MBB
, I
, DL
, SMovB32
, Rsrc0
)
651 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
652 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
654 BuildMI(MBB
, I
, DL
, SMovB32
, Rsrc1
)
655 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
656 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
660 BuildMI(MBB
, I
, DL
, SMovB32
, Rsrc2
)
661 .addImm(Rsrc23
& 0xffffffff)
662 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
664 BuildMI(MBB
, I
, DL
, SMovB32
, Rsrc3
)
665 .addImm(Rsrc23
>> 32)
666 .addReg(ScratchRsrcReg
, RegState::ImplicitDefine
);
670 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID
) const {
672 case TargetStackID::Default
:
673 case TargetStackID::NoAlloc
:
674 case TargetStackID::SGPRSpill
:
676 case TargetStackID::SVEVector
:
679 llvm_unreachable("Invalid TargetStackID::Value");
682 void SIFrameLowering::emitPrologue(MachineFunction
&MF
,
683 MachineBasicBlock
&MBB
) const {
684 SIMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<SIMachineFunctionInfo
>();
685 if (FuncInfo
->isEntryFunction()) {
686 emitEntryFunctionPrologue(MF
, MBB
);
690 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
691 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
692 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
693 const SIInstrInfo
*TII
= ST
.getInstrInfo();
694 const SIRegisterInfo
&TRI
= TII
->getRegisterInfo();
696 unsigned StackPtrReg
= FuncInfo
->getStackPtrOffsetReg();
697 unsigned FramePtrReg
= FuncInfo
->getFrameOffsetReg();
698 LivePhysRegs LiveRegs
;
700 MachineBasicBlock::iterator MBBI
= MBB
.begin();
704 uint32_t NumBytes
= MFI
.getStackSize();
705 uint32_t RoundedSize
= NumBytes
;
706 // To avoid clobbering VGPRs in lanes that weren't active on function entry,
707 // turn on all lanes before doing the spill to memory.
708 unsigned ScratchExecCopy
= AMDGPU::NoRegister
;
710 // Emit the copy if we need an FP, and are using a free SGPR to save it.
711 if (FuncInfo
->SGPRForFPSaveRestoreCopy
!= AMDGPU::NoRegister
) {
712 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::COPY
), FuncInfo
->SGPRForFPSaveRestoreCopy
)
714 .setMIFlag(MachineInstr::FrameSetup
);
717 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR
&Reg
718 : FuncInfo
->getSGPRSpillVGPRs()) {
719 if (!Reg
.FI
.hasValue())
722 if (ScratchExecCopy
== AMDGPU::NoRegister
) {
723 if (LiveRegs
.empty()) {
725 LiveRegs
.addLiveIns(MBB
);
726 if (FuncInfo
->SGPRForFPSaveRestoreCopy
)
727 LiveRegs
.removeReg(FuncInfo
->SGPRForFPSaveRestoreCopy
);
731 = findScratchNonCalleeSaveRegister(MRI
, LiveRegs
,
732 *TRI
.getWaveMaskRegClass());
733 assert(FuncInfo
->SGPRForFPSaveRestoreCopy
!= ScratchExecCopy
);
735 const unsigned OrSaveExec
= ST
.isWave32() ?
736 AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64
;
737 BuildMI(MBB
, MBBI
, DL
, TII
->get(OrSaveExec
),
742 buildPrologSpill(LiveRegs
, MBB
, MBBI
, TII
, Reg
.VGPR
,
743 FuncInfo
->getScratchRSrcReg(),
748 if (ScratchExecCopy
!= AMDGPU::NoRegister
) {
749 // FIXME: Split block and make terminator.
750 unsigned ExecMov
= ST
.isWave32() ? AMDGPU::S_MOV_B32
: AMDGPU::S_MOV_B64
;
751 unsigned Exec
= ST
.isWave32() ? AMDGPU::EXEC_LO
: AMDGPU::EXEC
;
752 BuildMI(MBB
, MBBI
, DL
, TII
->get(ExecMov
), Exec
)
753 .addReg(ScratchExecCopy
, RegState::Kill
);
754 LiveRegs
.addReg(ScratchExecCopy
);
758 if (FuncInfo
->FramePointerSaveIndex
) {
759 const int FI
= FuncInfo
->FramePointerSaveIndex
.getValue();
760 assert(!MFI
.isDeadObjectIndex(FI
) &&
761 MFI
.getStackID(FI
) == TargetStackID::SGPRSpill
);
762 ArrayRef
<SIMachineFunctionInfo::SpilledReg
> Spill
763 = FuncInfo
->getSGPRToVGPRSpills(FI
);
764 assert(Spill
.size() == 1);
766 // Save FP before setting it up.
767 // FIXME: This should respect spillSGPRToVGPR;
768 BuildMI(MBB
, MBBI
, DL
, TII
->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32
),
771 .addImm(Spill
[0].Lane
)
772 .addReg(Spill
[0].VGPR
, RegState::Undef
);
775 if (TRI
.needsStackRealignment(MF
)) {
777 const unsigned Alignment
= MFI
.getMaxAlignment();
779 RoundedSize
+= Alignment
;
780 if (LiveRegs
.empty()) {
782 LiveRegs
.addLiveIns(MBB
);
783 LiveRegs
.addReg(FuncInfo
->SGPRForFPSaveRestoreCopy
);
786 unsigned ScratchSPReg
= findScratchNonCalleeSaveRegister(
787 MRI
, LiveRegs
, AMDGPU::SReg_32_XM0RegClass
);
788 assert(ScratchSPReg
!= AMDGPU::NoRegister
&&
789 ScratchSPReg
!= FuncInfo
->SGPRForFPSaveRestoreCopy
);
791 // s_add_u32 tmp_reg, s32, NumBytes
792 // s_and_b32 s32, tmp_reg, 0b111...0000
793 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::S_ADD_U32
), ScratchSPReg
)
795 .addImm((Alignment
- 1) * ST
.getWavefrontSize())
796 .setMIFlag(MachineInstr::FrameSetup
);
797 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::S_AND_B32
), FramePtrReg
)
798 .addReg(ScratchSPReg
, RegState::Kill
)
799 .addImm(-Alignment
* ST
.getWavefrontSize())
800 .setMIFlag(MachineInstr::FrameSetup
);
801 FuncInfo
->setIsStackRealigned(true);
802 } else if ((HasFP
= hasFP(MF
))) {
803 // If we need a base pointer, set it up here. It's whatever the value of
804 // the stack pointer is at this point. Any variable size objects will be
805 // allocated after this, so we can still use the base pointer to reference
807 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::COPY
), FramePtrReg
)
809 .setMIFlag(MachineInstr::FrameSetup
);
812 if (HasFP
&& RoundedSize
!= 0) {
813 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::S_ADD_U32
), StackPtrReg
)
815 .addImm(RoundedSize
* ST
.getWavefrontSize())
816 .setMIFlag(MachineInstr::FrameSetup
);
819 assert((!HasFP
|| (FuncInfo
->SGPRForFPSaveRestoreCopy
!= AMDGPU::NoRegister
||
820 FuncInfo
->FramePointerSaveIndex
)) &&
821 "Needed to save FP but didn't save it anywhere");
823 assert((HasFP
|| (FuncInfo
->SGPRForFPSaveRestoreCopy
== AMDGPU::NoRegister
&&
824 !FuncInfo
->FramePointerSaveIndex
)) &&
825 "Saved FP but didn't need it");
828 void SIFrameLowering::emitEpilogue(MachineFunction
&MF
,
829 MachineBasicBlock
&MBB
) const {
830 const SIMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<SIMachineFunctionInfo
>();
831 if (FuncInfo
->isEntryFunction())
834 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
835 const SIInstrInfo
*TII
= ST
.getInstrInfo();
836 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
837 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
838 LivePhysRegs LiveRegs
;
841 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
842 uint32_t NumBytes
= MFI
.getStackSize();
843 uint32_t RoundedSize
= FuncInfo
->isStackRealigned() ?
844 NumBytes
+ MFI
.getMaxAlignment() : NumBytes
;
846 if (RoundedSize
!= 0 && hasFP(MF
)) {
847 const unsigned StackPtrReg
= FuncInfo
->getStackPtrOffsetReg();
848 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::S_SUB_U32
), StackPtrReg
)
850 .addImm(RoundedSize
* ST
.getWavefrontSize())
851 .setMIFlag(MachineInstr::FrameDestroy
);
854 if (FuncInfo
->SGPRForFPSaveRestoreCopy
!= AMDGPU::NoRegister
) {
855 BuildMI(MBB
, MBBI
, DL
, TII
->get(AMDGPU::COPY
), FuncInfo
->getFrameOffsetReg())
856 .addReg(FuncInfo
->SGPRForFPSaveRestoreCopy
)
857 .setMIFlag(MachineInstr::FrameSetup
);
860 if (FuncInfo
->FramePointerSaveIndex
) {
861 const int FI
= FuncInfo
->FramePointerSaveIndex
.getValue();
863 assert(!MF
.getFrameInfo().isDeadObjectIndex(FI
) &&
864 MF
.getFrameInfo().getStackID(FI
) == TargetStackID::SGPRSpill
);
866 ArrayRef
<SIMachineFunctionInfo::SpilledReg
> Spill
867 = FuncInfo
->getSGPRToVGPRSpills(FI
);
868 assert(Spill
.size() == 1);
869 BuildMI(MBB
, MBBI
, DL
, TII
->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32
),
870 FuncInfo
->getFrameOffsetReg())
871 .addReg(Spill
[0].VGPR
)
872 .addImm(Spill
[0].Lane
);
875 unsigned ScratchExecCopy
= AMDGPU::NoRegister
;
876 for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR
&Reg
877 : FuncInfo
->getSGPRSpillVGPRs()) {
878 if (!Reg
.FI
.hasValue())
881 const SIRegisterInfo
&TRI
= TII
->getRegisterInfo();
882 if (ScratchExecCopy
== AMDGPU::NoRegister
) {
884 if (LiveRegs
.empty()) {
885 LiveRegs
.init(*ST
.getRegisterInfo());
886 LiveRegs
.addLiveOuts(MBB
);
887 LiveRegs
.stepBackward(*MBBI
);
890 ScratchExecCopy
= findScratchNonCalleeSaveRegister(
891 MRI
, LiveRegs
, *TRI
.getWaveMaskRegClass());
892 LiveRegs
.removeReg(ScratchExecCopy
);
894 const unsigned OrSaveExec
=
895 ST
.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64
;
897 BuildMI(MBB
, MBBI
, DL
, TII
->get(OrSaveExec
), ScratchExecCopy
)
901 buildEpilogReload(LiveRegs
, MBB
, MBBI
, TII
, Reg
.VGPR
,
902 FuncInfo
->getScratchRSrcReg(),
903 FuncInfo
->getStackPtrOffsetReg(), Reg
.FI
.getValue());
906 if (ScratchExecCopy
!= AMDGPU::NoRegister
) {
907 // FIXME: Split block and make terminator.
908 unsigned ExecMov
= ST
.isWave32() ? AMDGPU::S_MOV_B32
: AMDGPU::S_MOV_B64
;
909 unsigned Exec
= ST
.isWave32() ? AMDGPU::EXEC_LO
: AMDGPU::EXEC
;
910 BuildMI(MBB
, MBBI
, DL
, TII
->get(ExecMov
), Exec
)
911 .addReg(ScratchExecCopy
, RegState::Kill
);
915 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
916 // memory. They should have been removed by now.
917 static bool allStackObjectsAreDead(const MachineFrameInfo
&MFI
) {
918 for (int I
= MFI
.getObjectIndexBegin(), E
= MFI
.getObjectIndexEnd();
920 if (!MFI
.isDeadObjectIndex(I
))
928 static bool allSGPRSpillsAreDead(const MachineFrameInfo
&MFI
,
929 Optional
<int> FramePointerSaveIndex
) {
930 for (int I
= MFI
.getObjectIndexBegin(), E
= MFI
.getObjectIndexEnd();
932 if (!MFI
.isDeadObjectIndex(I
) &&
933 MFI
.getStackID(I
) == TargetStackID::SGPRSpill
&&
934 FramePointerSaveIndex
&& I
!= FramePointerSaveIndex
) {
943 int SIFrameLowering::getFrameIndexReference(const MachineFunction
&MF
, int FI
,
944 unsigned &FrameReg
) const {
945 const SIRegisterInfo
*RI
= MF
.getSubtarget
<GCNSubtarget
>().getRegisterInfo();
947 FrameReg
= RI
->getFrameRegister(MF
);
948 return MF
.getFrameInfo().getObjectOffset(FI
);
951 void SIFrameLowering::processFunctionBeforeFrameFinalized(
953 RegScavenger
*RS
) const {
954 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
956 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
957 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
958 SIMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<SIMachineFunctionInfo
>();
960 FuncInfo
->removeDeadFrameIndices(MFI
);
961 assert(allSGPRSpillsAreDead(MFI
, None
) &&
962 "SGPR spill should have been removed in SILowerSGPRSpills");
964 // FIXME: The other checks should be redundant with allStackObjectsAreDead,
965 // but currently hasNonSpillStackObjects is set only from source
966 // allocas. Stack temps produced from legalization are not counted currently.
967 if (!allStackObjectsAreDead(MFI
)) {
968 assert(RS
&& "RegScavenger required if spilling");
970 if (FuncInfo
->isEntryFunction()) {
971 int ScavengeFI
= MFI
.CreateFixedObject(
972 TRI
->getSpillSize(AMDGPU::SGPR_32RegClass
), 0, false);
973 RS
->addScavengingFrameIndex(ScavengeFI
);
975 int ScavengeFI
= MFI
.CreateStackObject(
976 TRI
->getSpillSize(AMDGPU::SGPR_32RegClass
),
977 TRI
->getSpillAlignment(AMDGPU::SGPR_32RegClass
),
979 RS
->addScavengingFrameIndex(ScavengeFI
);
984 // Only report VGPRs to generic code.
985 void SIFrameLowering::determineCalleeSaves(MachineFunction
&MF
,
986 BitVector
&SavedVGPRs
,
987 RegScavenger
*RS
) const {
988 TargetFrameLowering::determineCalleeSaves(MF
, SavedVGPRs
, RS
);
989 SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
990 if (MFI
->isEntryFunction())
993 const MachineFrameInfo
&FrameInfo
= MF
.getFrameInfo();
994 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
995 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
997 // Ignore the SGPRs the default implementation found.
998 SavedVGPRs
.clearBitsNotInMask(TRI
->getAllVGPRRegMask());
1000 // hasFP only knows about stack objects that already exist. We're now
1001 // determining the stack slots that will be created, so we have to predict
1002 // them. Stack objects force FP usage with calls.
1004 // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1005 // don't want to report it here.
1007 // FIXME: Is this really hasReservedCallFrame?
1008 const bool WillHaveFP
=
1009 FrameInfo
.hasCalls() &&
1010 (SavedVGPRs
.any() || !allStackObjectsAreDead(FrameInfo
));
1012 // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1013 // so don't allow the default insertion to handle them.
1014 for (auto SSpill
: MFI
->getSGPRSpillVGPRs())
1015 SavedVGPRs
.reset(SSpill
.VGPR
);
1017 const bool HasFP
= WillHaveFP
|| hasFP(MF
);
1021 if (MFI
->haveFreeLanesForSGPRSpill(MF
, 1)) {
1022 int NewFI
= MF
.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1023 TargetStackID::SGPRSpill
);
1025 // If there is already a VGPR with free lanes, use it. We may already have
1026 // to pay the penalty for spilling a CSR VGPR.
1027 if (!MFI
->allocateSGPRSpillToVGPR(MF
, NewFI
))
1028 llvm_unreachable("allocate SGPR spill should have worked");
1030 MFI
->FramePointerSaveIndex
= NewFI
;
1033 auto Spill
= MFI
->getSGPRToVGPRSpills(NewFI
).front();
1034 dbgs() << "Spilling FP to " << printReg(Spill
.VGPR
, TRI
)
1035 << ':' << Spill
.Lane
<< '\n');
1039 MFI
->SGPRForFPSaveRestoreCopy
= findUnusedSGPRNonCalleeSaved(MF
.getRegInfo());
1041 if (!MFI
->SGPRForFPSaveRestoreCopy
) {
1042 // There's no free lane to spill, and no free register to save FP, so we're
1043 // forced to spill another VGPR to use for the spill.
1044 int NewFI
= MF
.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
1045 TargetStackID::SGPRSpill
);
1046 if (!MFI
->allocateSGPRSpillToVGPR(MF
, NewFI
))
1047 llvm_unreachable("allocate SGPR spill should have worked");
1048 MFI
->FramePointerSaveIndex
= NewFI
;
1051 auto Spill
= MFI
->getSGPRToVGPRSpills(NewFI
).front();
1052 dbgs() << "FP requires fallback spill to " << printReg(Spill
.VGPR
, TRI
)
1053 << ':' << Spill
.Lane
<< '\n';);
1055 LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
1056 printReg(MFI
->SGPRForFPSaveRestoreCopy
, TRI
) << '\n');
1060 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction
&MF
,
1061 BitVector
&SavedRegs
,
1062 RegScavenger
*RS
) const {
1063 TargetFrameLowering::determineCalleeSaves(MF
, SavedRegs
, RS
);
1064 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
1065 if (MFI
->isEntryFunction())
1068 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
1069 const SIRegisterInfo
*TRI
= ST
.getRegisterInfo();
1071 // The SP is specifically managed and we don't want extra spills of it.
1072 SavedRegs
.reset(MFI
->getStackPtrOffsetReg());
1073 SavedRegs
.clearBitsInMask(TRI
->getAllVGPRRegMask());
1076 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1077 MachineFunction
&MF
, const TargetRegisterInfo
*TRI
,
1078 std::vector
<CalleeSavedInfo
> &CSI
) const {
1080 return true; // Early exit if no callee saved registers are modified!
1082 const SIMachineFunctionInfo
*FuncInfo
= MF
.getInfo
<SIMachineFunctionInfo
>();
1083 if (!FuncInfo
->SGPRForFPSaveRestoreCopy
)
1086 for (auto &CS
: CSI
) {
1087 if (CS
.getReg() == FuncInfo
->getFrameOffsetReg()) {
1088 if (FuncInfo
->SGPRForFPSaveRestoreCopy
!= AMDGPU::NoRegister
)
1089 CS
.setDstReg(FuncInfo
->SGPRForFPSaveRestoreCopy
);
1097 MachineBasicBlock::iterator
SIFrameLowering::eliminateCallFramePseudoInstr(
1098 MachineFunction
&MF
,
1099 MachineBasicBlock
&MBB
,
1100 MachineBasicBlock::iterator I
) const {
1101 int64_t Amount
= I
->getOperand(0).getImm();
1103 return MBB
.erase(I
);
1105 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
1106 const SIInstrInfo
*TII
= ST
.getInstrInfo();
1107 const DebugLoc
&DL
= I
->getDebugLoc();
1108 unsigned Opc
= I
->getOpcode();
1109 bool IsDestroy
= Opc
== TII
->getCallFrameDestroyOpcode();
1110 uint64_t CalleePopAmount
= IsDestroy
? I
->getOperand(1).getImm() : 0;
1112 if (!hasReservedCallFrame(MF
)) {
1113 unsigned Align
= getStackAlignment();
1115 Amount
= alignTo(Amount
, Align
);
1116 assert(isUInt
<32>(Amount
) && "exceeded stack address space size");
1117 const SIMachineFunctionInfo
*MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
1118 unsigned SPReg
= MFI
->getStackPtrOffsetReg();
1120 unsigned Op
= IsDestroy
? AMDGPU::S_SUB_U32
: AMDGPU::S_ADD_U32
;
1121 BuildMI(MBB
, I
, DL
, TII
->get(Op
), SPReg
)
1123 .addImm(Amount
* ST
.getWavefrontSize());
1124 } else if (CalleePopAmount
!= 0) {
1125 llvm_unreachable("is this used?");
1128 return MBB
.erase(I
);
1131 bool SIFrameLowering::hasFP(const MachineFunction
&MF
) const {
1132 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1133 if (MFI
.hasCalls()) {
1134 // All offsets are unsigned, so need to be addressed in the same direction
1137 // FIXME: This function is pretty broken, since it can be called before the
1138 // frame layout is determined or CSR spills are inserted.
1139 if (MFI
.getStackSize() != 0)
1142 // For the entry point, the input wave scratch offset must be copied to the
1143 // API SP if there are calls.
1144 if (MF
.getInfo
<SIMachineFunctionInfo
>()->isEntryFunction())
1148 return MFI
.hasVarSizedObjects() || MFI
.isFrameAddressTaken() ||
1149 MFI
.hasStackMap() || MFI
.hasPatchPoint() ||
1150 MF
.getSubtarget
<GCNSubtarget
>().getRegisterInfo()->needsStackRealignment(MF
) ||
1151 MF
.getTarget().Options
.DisableFramePointerElim(MF
);