1 //===-- SIFormMemoryClauses.cpp -------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This pass extends the live ranges of registers used as pointers in
10 /// sequences of adjacent SMEM and VMEM instructions if XNACK is enabled. A
11 /// load that would overwrite a pointer would require breaking the soft clause.
12 /// Artificially extend the live ranges of the pointer operands by adding
13 /// implicit-def early-clobber operands throughout the soft clause.
15 //===----------------------------------------------------------------------===//
18 #include "GCNRegPressure.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "llvm/InitializePasses.h"
24 #define DEBUG_TYPE "si-form-memory-clauses"
26 // Clauses longer then 15 instructions would overflow one of the counters
27 // and stall. They can stall even earlier if there are outstanding counters.
28 static cl::opt
<unsigned>
29 MaxClause("amdgpu-max-memory-clause", cl::Hidden
, cl::init(15),
30 cl::desc("Maximum length of a memory clause, instructions"));
34 class SIFormMemoryClauses
: public MachineFunctionPass
{
35 typedef DenseMap
<unsigned, std::pair
<unsigned, LaneBitmask
>> RegUse
;
41 SIFormMemoryClauses() : MachineFunctionPass(ID
) {
42 initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
45 bool runOnMachineFunction(MachineFunction
&MF
) override
;
47 StringRef
getPassName() const override
{
48 return "SI Form memory clauses";
51 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
52 AU
.addRequired
<LiveIntervals
>();
54 MachineFunctionPass::getAnalysisUsage(AU
);
57 MachineFunctionProperties
getClearedProperties() const override
{
58 return MachineFunctionProperties().set(
59 MachineFunctionProperties::Property::IsSSA
);
63 bool canBundle(const MachineInstr
&MI
, const RegUse
&Defs
,
64 const RegUse
&Uses
) const;
65 bool checkPressure(const MachineInstr
&MI
, GCNDownwardRPTracker
&RPT
);
66 void collectRegUses(const MachineInstr
&MI
, RegUse
&Defs
, RegUse
&Uses
) const;
67 bool processRegUses(const MachineInstr
&MI
, RegUse
&Defs
, RegUse
&Uses
,
68 GCNDownwardRPTracker
&RPT
);
70 const GCNSubtarget
*ST
;
71 const SIRegisterInfo
*TRI
;
72 const MachineRegisterInfo
*MRI
;
73 SIMachineFunctionInfo
*MFI
;
75 unsigned LastRecordedOccupancy
;
80 } // End anonymous namespace.
82 INITIALIZE_PASS_BEGIN(SIFormMemoryClauses
, DEBUG_TYPE
,
83 "SI Form memory clauses", false, false)
84 INITIALIZE_PASS_DEPENDENCY(LiveIntervals
)
85 INITIALIZE_PASS_END(SIFormMemoryClauses
, DEBUG_TYPE
,
86 "SI Form memory clauses", false, false)
89 char SIFormMemoryClauses::ID
= 0;
91 char &llvm::SIFormMemoryClausesID
= SIFormMemoryClauses::ID
;
93 FunctionPass
*llvm::createSIFormMemoryClausesPass() {
94 return new SIFormMemoryClauses();
97 static bool isVMEMClauseInst(const MachineInstr
&MI
) {
98 return SIInstrInfo::isFLAT(MI
) || SIInstrInfo::isVMEM(MI
);
101 static bool isSMEMClauseInst(const MachineInstr
&MI
) {
102 return SIInstrInfo::isSMRD(MI
);
105 // There no sense to create store clauses, they do not define anything,
106 // thus there is nothing to set early-clobber.
107 static bool isValidClauseInst(const MachineInstr
&MI
, bool IsVMEMClause
) {
108 assert(!MI
.isDebugInstr() && "debug instructions should not reach here");
111 if (!MI
.mayLoad() || MI
.mayStore())
113 if (SIInstrInfo::isAtomic(MI
))
115 if (IsVMEMClause
&& !isVMEMClauseInst(MI
))
117 if (!IsVMEMClause
&& !isSMEMClauseInst(MI
))
119 // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
120 for (const MachineOperand
&ResMO
: MI
.defs()) {
121 Register ResReg
= ResMO
.getReg();
122 for (const MachineOperand
&MO
: MI
.uses()) {
123 if (!MO
.isReg() || MO
.isDef())
125 if (MO
.getReg() == ResReg
)
128 break; // Only check the first def.
133 static unsigned getMopState(const MachineOperand
&MO
) {
136 S
|= RegState::Implicit
;
140 S
|= RegState::Undef
;
143 if (MO
.isEarlyClobber())
144 S
|= RegState::EarlyClobber
;
145 if (MO
.getReg().isPhysical() && MO
.isRenamable())
146 S
|= RegState::Renamable
;
150 // Returns false if there is a use of a def already in the map.
151 // In this case we must break the clause.
152 bool SIFormMemoryClauses::canBundle(const MachineInstr
&MI
, const RegUse
&Defs
,
153 const RegUse
&Uses
) const {
154 // Check interference with defs.
155 for (const MachineOperand
&MO
: MI
.operands()) {
156 // TODO: Prologue/Epilogue Insertion pass does not process bundled
164 Register Reg
= MO
.getReg();
166 // If it is tied we will need to write same register as we read.
170 const RegUse
&Map
= MO
.isDef() ? Uses
: Defs
;
171 auto Conflict
= Map
.find(Reg
);
172 if (Conflict
== Map
.end())
175 if (Reg
.isPhysical())
178 LaneBitmask Mask
= TRI
->getSubRegIndexLaneMask(MO
.getSubReg());
179 if ((Conflict
->second
.second
& Mask
).any())
186 // Since all defs in the clause are early clobber we can run out of registers.
187 // Function returns false if pressure would hit the limit if instruction is
188 // bundled into a memory clause.
189 bool SIFormMemoryClauses::checkPressure(const MachineInstr
&MI
,
190 GCNDownwardRPTracker
&RPT
) {
191 // NB: skip advanceBeforeNext() call. Since all defs will be marked
192 // early-clobber they will all stay alive at least to the end of the
193 // clause. Therefor we should not decrease pressure even if load
194 // pointer becomes dead and could otherwise be reused for destination.
196 GCNRegPressure MaxPressure
= RPT
.moveMaxPressure();
197 unsigned Occupancy
= MaxPressure
.getOccupancy(*ST
);
199 // Don't push over half the register budget. We don't want to introduce
200 // spilling just to form a soft clause.
202 // FIXME: This pressure check is fundamentally broken. First, this is checking
203 // the global pressure, not the pressure at this specific point in the
204 // program. Second, it's not accounting for the increased liveness of the use
205 // operands due to the early clobber we will introduce. Third, the pressure
206 // tracking does not account for the alignment requirements for SGPRs, or the
207 // fragmentation of registers the allocator will need to satisfy.
208 if (Occupancy
>= MFI
->getMinAllowedOccupancy() &&
209 MaxPressure
.getVGPRNum(ST
->hasGFX90AInsts()) <= MaxVGPRs
/ 2 &&
210 MaxPressure
.getSGPRNum() <= MaxSGPRs
/ 2) {
211 LastRecordedOccupancy
= Occupancy
;
217 // Collect register defs and uses along with their lane masks and states.
218 void SIFormMemoryClauses::collectRegUses(const MachineInstr
&MI
,
219 RegUse
&Defs
, RegUse
&Uses
) const {
220 for (const MachineOperand
&MO
: MI
.operands()) {
223 Register Reg
= MO
.getReg();
227 LaneBitmask Mask
= Reg
.isVirtual()
228 ? TRI
->getSubRegIndexLaneMask(MO
.getSubReg())
229 : LaneBitmask::getAll();
230 RegUse
&Map
= MO
.isDef() ? Defs
: Uses
;
232 auto Loc
= Map
.find(Reg
);
233 unsigned State
= getMopState(MO
);
234 if (Loc
== Map
.end()) {
235 Map
[Reg
] = std::make_pair(State
, Mask
);
237 Loc
->second
.first
|= State
;
238 Loc
->second
.second
|= Mask
;
243 // Check register def/use conflicts, occupancy limits and collect def/use maps.
244 // Return true if instruction can be bundled with previous. It it cannot
245 // def/use maps are not updated.
246 bool SIFormMemoryClauses::processRegUses(const MachineInstr
&MI
,
247 RegUse
&Defs
, RegUse
&Uses
,
248 GCNDownwardRPTracker
&RPT
) {
249 if (!canBundle(MI
, Defs
, Uses
))
252 if (!checkPressure(MI
, RPT
))
255 collectRegUses(MI
, Defs
, Uses
);
259 bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction
&MF
) {
260 if (skipFunction(MF
.getFunction()))
263 ST
= &MF
.getSubtarget
<GCNSubtarget
>();
264 if (!ST
->isXNACKEnabled())
267 const SIInstrInfo
*TII
= ST
->getInstrInfo();
268 TRI
= ST
->getRegisterInfo();
269 MRI
= &MF
.getRegInfo();
270 MFI
= MF
.getInfo
<SIMachineFunctionInfo
>();
271 LiveIntervals
*LIS
= &getAnalysis
<LiveIntervals
>();
272 SlotIndexes
*Ind
= LIS
->getSlotIndexes();
273 bool Changed
= false;
275 MaxVGPRs
= TRI
->getAllocatableSet(MF
, &AMDGPU::VGPR_32RegClass
).count();
276 MaxSGPRs
= TRI
->getAllocatableSet(MF
, &AMDGPU::SGPR_32RegClass
).count();
277 unsigned FuncMaxClause
= AMDGPU::getIntegerAttribute(
278 MF
.getFunction(), "amdgpu-max-memory-clause", MaxClause
);
280 for (MachineBasicBlock
&MBB
: MF
) {
281 GCNDownwardRPTracker
RPT(*LIS
);
282 MachineBasicBlock::instr_iterator Next
;
283 for (auto I
= MBB
.instr_begin(), E
= MBB
.instr_end(); I
!= E
; I
= Next
) {
284 MachineInstr
&MI
= *I
;
287 if (MI
.isMetaInstruction())
290 bool IsVMEM
= isVMEMClauseInst(MI
);
292 if (!isValidClauseInst(MI
, IsVMEM
))
295 if (!RPT
.getNext().isValid())
297 else { // Advance the state to the current MI.
298 RPT
.advance(MachineBasicBlock::const_iterator(MI
));
299 RPT
.advanceBeforeNext();
302 const GCNRPTracker::LiveRegSet
LiveRegsCopy(RPT
.getLiveRegs());
304 if (!processRegUses(MI
, Defs
, Uses
, RPT
)) {
305 RPT
.reset(MI
, &LiveRegsCopy
);
309 MachineBasicBlock::iterator LastClauseInst
= Next
;
311 for ( ; Next
!= E
&& Length
< FuncMaxClause
; ++Next
) {
312 // Debug instructions should not change the kill insertion.
313 if (Next
->isMetaInstruction())
316 if (!isValidClauseInst(*Next
, IsVMEM
))
319 // A load from pointer which was loaded inside the same bundle is an
320 // impossible clause because we will need to write and read the same
321 // register inside. In this case processRegUses will return false.
322 if (!processRegUses(*Next
, Defs
, Uses
, RPT
))
325 LastClauseInst
= Next
;
329 RPT
.reset(MI
, &LiveRegsCopy
);
334 MFI
->limitOccupancy(LastRecordedOccupancy
);
336 assert(!LastClauseInst
->isMetaInstruction());
338 SlotIndex ClauseLiveInIdx
= LIS
->getInstructionIndex(MI
);
339 SlotIndex ClauseLiveOutIdx
=
340 LIS
->getInstructionIndex(*LastClauseInst
).getNextIndex();
342 // Track the last inserted kill.
343 MachineInstrBuilder Kill
;
345 // Insert one kill per register, with operands covering all necessary
347 for (auto &&R
: Uses
) {
348 Register Reg
= R
.first
;
349 if (Reg
.isPhysical())
352 // Collect the register operands we should extend the live ranges of.
353 SmallVector
<std::tuple
<unsigned, unsigned>> KillOps
;
354 const LiveInterval
&LI
= LIS
->getInterval(R
.first
);
356 if (!LI
.hasSubRanges()) {
357 if (!LI
.liveAt(ClauseLiveOutIdx
)) {
358 KillOps
.emplace_back(R
.second
.first
| RegState::Kill
,
359 AMDGPU::NoSubRegister
);
362 LaneBitmask KilledMask
;
363 for (const LiveInterval::SubRange
&SR
: LI
.subranges()) {
364 if (SR
.liveAt(ClauseLiveInIdx
) && !SR
.liveAt(ClauseLiveOutIdx
))
365 KilledMask
|= SR
.LaneMask
;
368 if (KilledMask
.none())
371 SmallVector
<unsigned> KilledIndexes
;
372 bool Success
= TRI
->getCoveringSubRegIndexes(
373 *MRI
, MRI
->getRegClass(Reg
), KilledMask
, KilledIndexes
);
375 assert(Success
&& "Failed to find subregister mask to cover lanes");
376 for (unsigned SubReg
: KilledIndexes
) {
377 KillOps
.emplace_back(R
.second
.first
| RegState::Kill
, SubReg
);
384 // We only want to extend the live ranges of used registers. If they
385 // already have existing uses beyond the bundle, we don't need the kill.
387 // It's possible all of the use registers were already live past the
389 Kill
= BuildMI(*MI
.getParent(), std::next(LastClauseInst
),
390 DebugLoc(), TII
->get(AMDGPU::KILL
));
391 for (auto &Op
: KillOps
)
392 Kill
.addUse(Reg
, std::get
<0>(Op
), std::get
<1>(Op
));
393 Ind
->insertMachineInstrInMaps(*Kill
);
397 RPT
.reset(MI
, &LiveRegsCopy
);
401 // Restore the state after processing the end of the bundle.
402 RPT
.reset(*Kill
, &LiveRegsCopy
);
404 for (auto &&R
: Defs
) {
405 Register Reg
= R
.first
;
407 if (Reg
.isPhysical())
409 LIS
->removeInterval(Reg
);
410 LIS
->createAndComputeVirtRegInterval(Reg
);
413 for (auto &&R
: Uses
) {
414 Register Reg
= R
.first
;
415 if (Reg
.isPhysical())
417 LIS
->removeInterval(Reg
);
418 LIS
->createAndComputeVirtRegInterval(Reg
);