1 //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements methods from the AMDGPUCustomBehaviour class.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPUCustomBehaviour.h"
15 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16 #include "TargetInfo/AMDGPUTargetInfo.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/MC/TargetRegistry.h"
19 #include "llvm/Support/WithColor.h"
23 void AMDGPUInstrPostProcess::postProcessInstruction(
24 std::unique_ptr
<Instruction
> &Inst
, const MCInst
&MCI
) {
25 switch (MCI
.getOpcode()) {
26 case AMDGPU::S_WAITCNT
:
27 case AMDGPU::S_WAITCNT_soft
:
28 case AMDGPU::S_WAITCNT_EXPCNT
:
29 case AMDGPU::S_WAITCNT_LGKMCNT
:
30 case AMDGPU::S_WAITCNT_VMCNT
:
31 case AMDGPU::S_WAITCNT_VSCNT
:
32 case AMDGPU::S_WAITCNT_VSCNT_soft
:
33 case AMDGPU::S_WAITCNT_EXPCNT_gfx10
:
34 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10
:
35 case AMDGPU::S_WAITCNT_VMCNT_gfx10
:
36 case AMDGPU::S_WAITCNT_VSCNT_gfx10
:
37 case AMDGPU::S_WAITCNT_gfx10
:
38 case AMDGPU::S_WAITCNT_gfx6_gfx7
:
39 case AMDGPU::S_WAITCNT_vi
:
40 return processWaitCnt(Inst
, MCI
);
44 // s_waitcnt instructions encode important information as immediate operands
45 // which are lost during the MCInst -> mca::Instruction lowering.
46 void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr
<Instruction
> &Inst
,
48 for (int Idx
= 0, N
= MCI
.size(); Idx
< N
; Idx
++) {
50 const MCOperand
&MCOp
= MCI
.getOperand(Idx
);
52 Op
= MCAOperand::createReg(MCOp
.getReg());
53 } else if (MCOp
.isImm()) {
54 Op
= MCAOperand::createImm(MCOp
.getImm());
61 AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo
&STI
,
62 const mca::SourceMgr
&SrcMgr
,
63 const MCInstrInfo
&MCII
)
64 : CustomBehaviour(STI
, SrcMgr
, MCII
) {
65 generateWaitCntInfo();
68 unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef
<InstRef
> IssuedInst
,
70 const Instruction
&Inst
= *IR
.getInstruction();
71 unsigned Opcode
= Inst
.getOpcode();
73 // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
74 // pseudo instructions here. However, there are plans for the future to make
75 // it possible to use mca within backend passes. As such, I have left the
76 // pseudo version of s_waitcnt within this switch statement.
80 case AMDGPU::S_WAITCNT
: // This instruction
81 case AMDGPU::S_WAITCNT_soft
:
82 case AMDGPU::S_WAITCNT_EXPCNT
:
83 case AMDGPU::S_WAITCNT_LGKMCNT
:
84 case AMDGPU::S_WAITCNT_VMCNT
:
85 case AMDGPU::S_WAITCNT_VSCNT
:
86 case AMDGPU::S_WAITCNT_VSCNT_soft
: // to this instruction are all pseudo.
87 case AMDGPU::S_WAITCNT_EXPCNT_gfx10
:
88 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10
:
89 case AMDGPU::S_WAITCNT_VMCNT_gfx10
:
90 case AMDGPU::S_WAITCNT_VSCNT_gfx10
:
91 case AMDGPU::S_WAITCNT_gfx10
:
92 case AMDGPU::S_WAITCNT_gfx6_gfx7
:
93 case AMDGPU::S_WAITCNT_vi
:
94 // s_endpgm also behaves as if there is an implicit
95 // s_waitcnt 0, but I'm not sure if it would be appropriate
96 // to model this in llvm-mca based on how the iterations work
97 // while simulating the pipeline over and over.
98 return handleWaitCnt(IssuedInst
, IR
);
104 unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef
<InstRef
> IssuedInst
,
106 // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
107 // I do not know how that instruction works so I did not attempt to model it.
108 // set the max values to begin
111 unsigned Lgkmcnt
= 31;
113 unsigned CurrVmcnt
= 0;
114 unsigned CurrExpcnt
= 0;
115 unsigned CurrLgkmcnt
= 0;
116 unsigned CurrVscnt
= 0;
117 unsigned CyclesToWaitVm
= ~0U;
118 unsigned CyclesToWaitExp
= ~0U;
119 unsigned CyclesToWaitLgkm
= ~0U;
120 unsigned CyclesToWaitVs
= ~0U;
122 computeWaitCnt(IR
, Vmcnt
, Expcnt
, Lgkmcnt
, Vscnt
);
124 // We will now look at each of the currently executing instructions
125 // to find out if this wait instruction still needs to wait.
126 for (const InstRef
&PrevIR
: IssuedInst
) {
127 const Instruction
&PrevInst
= *PrevIR
.getInstruction();
128 const unsigned PrevInstIndex
= PrevIR
.getSourceIndex() % SrcMgr
.size();
129 const WaitCntInfo
&PrevInstWaitInfo
= InstrWaitCntInfo
[PrevInstIndex
];
130 const int CyclesLeft
= PrevInst
.getCyclesLeft();
131 assert(CyclesLeft
!= UNKNOWN_CYCLES
&&
132 "We should know how many cycles are left for this instruction");
133 if (PrevInstWaitInfo
.VmCnt
) {
135 if ((unsigned)CyclesLeft
< CyclesToWaitVm
)
136 CyclesToWaitVm
= CyclesLeft
;
138 if (PrevInstWaitInfo
.ExpCnt
) {
140 if ((unsigned)CyclesLeft
< CyclesToWaitExp
)
141 CyclesToWaitExp
= CyclesLeft
;
143 if (PrevInstWaitInfo
.LgkmCnt
) {
145 if ((unsigned)CyclesLeft
< CyclesToWaitLgkm
)
146 CyclesToWaitLgkm
= CyclesLeft
;
148 if (PrevInstWaitInfo
.VsCnt
) {
150 if ((unsigned)CyclesLeft
< CyclesToWaitVs
)
151 CyclesToWaitVs
= CyclesLeft
;
155 unsigned CyclesToWait
= ~0U;
156 if (CurrVmcnt
> Vmcnt
&& CyclesToWaitVm
< CyclesToWait
)
157 CyclesToWait
= CyclesToWaitVm
;
158 if (CurrExpcnt
> Expcnt
&& CyclesToWaitExp
< CyclesToWait
)
159 CyclesToWait
= CyclesToWaitExp
;
160 if (CurrLgkmcnt
> Lgkmcnt
&& CyclesToWaitLgkm
< CyclesToWait
)
161 CyclesToWait
= CyclesToWaitLgkm
;
162 if (CurrVscnt
> Vscnt
&& CyclesToWaitVs
< CyclesToWait
)
163 CyclesToWait
= CyclesToWaitVs
;
165 // We may underestimate how many cycles we need to wait, but this
166 // isn't a big deal. Our return value is just how many cycles until
167 // this function gets run again. So as long as we don't overestimate
168 // the wait time, we'll still end up stalling at this instruction
169 // for the correct number of cycles.
171 if (CyclesToWait
== ~0U)
176 void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef
&IR
, unsigned &Vmcnt
,
177 unsigned &Expcnt
, unsigned &Lgkmcnt
,
179 AMDGPU::IsaVersion IV
= AMDGPU::getIsaVersion(STI
.getCPU());
180 const Instruction
&Inst
= *IR
.getInstruction();
181 unsigned Opcode
= Inst
.getOpcode();
184 case AMDGPU::S_WAITCNT_EXPCNT_gfx10
:
185 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10
:
186 case AMDGPU::S_WAITCNT_VMCNT_gfx10
:
187 case AMDGPU::S_WAITCNT_VSCNT_gfx10
: {
188 // Should probably be checking for nullptr
189 // here, but I'm not sure how I should handle the case
190 // where we see a nullptr.
191 const MCAOperand
*OpReg
= Inst
.getOperand(0);
192 const MCAOperand
*OpImm
= Inst
.getOperand(1);
193 assert(OpReg
&& OpReg
->isReg() && "First operand should be a register.");
194 assert(OpImm
&& OpImm
->isImm() && "Second operand should be an immediate.");
195 if (OpReg
->getReg() != AMDGPU::SGPR_NULL
) {
196 // Instruction is using a real register.
197 // Since we can't know what value this register will have,
198 // we can't compute what the value of this wait should be.
199 WithColor::warning() << "The register component of "
200 << MCII
.getName(Opcode
) << " will be completely "
201 << "ignored. So the wait may not be accurate.\n";
204 // Redundant switch so I don't have to repeat the code above
205 // for each case. There are more clever ways to avoid this
206 // extra switch and anyone can feel free to implement one of them.
207 case AMDGPU::S_WAITCNT_EXPCNT_gfx10
:
208 Expcnt
= OpImm
->getImm();
210 case AMDGPU::S_WAITCNT_LGKMCNT_gfx10
:
211 Lgkmcnt
= OpImm
->getImm();
213 case AMDGPU::S_WAITCNT_VMCNT_gfx10
:
214 Vmcnt
= OpImm
->getImm();
216 case AMDGPU::S_WAITCNT_VSCNT_gfx10
:
217 Vscnt
= OpImm
->getImm();
222 case AMDGPU::S_WAITCNT_gfx10
:
223 case AMDGPU::S_WAITCNT_gfx6_gfx7
:
224 case AMDGPU::S_WAITCNT_vi
:
225 unsigned WaitCnt
= Inst
.getOperand(0)->getImm();
226 AMDGPU::decodeWaitcnt(IV
, WaitCnt
, Vmcnt
, Expcnt
, Lgkmcnt
);
231 void AMDGPUCustomBehaviour::generateWaitCntInfo() {
232 // The core logic from this function is taken from
233 // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
234 // that are being looked at are in the MachineInstr format, whereas we have
235 // access to the MCInst format. The side effects of this are that we can't use
236 // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
237 // functions. Therefore, we conservatively assume that these functions will
238 // return true. This may cause a few instructions to be incorrectly tagged
239 // with an extra CNT. However, these are instructions that do interact with at
240 // least one CNT so giving them an extra CNT shouldn't cause issues in most
242 AMDGPU::IsaVersion IV
= AMDGPU::getIsaVersion(STI
.getCPU());
243 InstrWaitCntInfo
.resize(SrcMgr
.size());
245 for (const auto &EN
: llvm::enumerate(SrcMgr
.getInstructions())) {
246 const std::unique_ptr
<Instruction
> &Inst
= EN
.value();
247 unsigned Index
= EN
.index();
248 unsigned Opcode
= Inst
->getOpcode();
249 const MCInstrDesc
&MCID
= MCII
.get(Opcode
);
250 if ((MCID
.TSFlags
& SIInstrFlags::DS
) &&
251 (MCID
.TSFlags
& SIInstrFlags::LGKM_CNT
)) {
252 InstrWaitCntInfo
[Index
].LgkmCnt
= true;
253 if (isAlwaysGDS(Opcode
) || hasModifiersSet(Inst
, AMDGPU::OpName::gds
))
254 InstrWaitCntInfo
[Index
].ExpCnt
= true;
255 } else if (MCID
.TSFlags
& SIInstrFlags::FLAT
) {
256 // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
257 // and mayAccessLDSThroughFlat(Inst) would both return true for this
258 // instruction. We have to do this because those functions use
259 // information about the memory operands that we don't have access to.
260 InstrWaitCntInfo
[Index
].LgkmCnt
= true;
261 if (!STI
.hasFeature(AMDGPU::FeatureVscnt
))
262 InstrWaitCntInfo
[Index
].VmCnt
= true;
263 else if (MCID
.mayLoad() && !(MCID
.TSFlags
& SIInstrFlags::IsAtomicNoRet
))
264 InstrWaitCntInfo
[Index
].VmCnt
= true;
266 InstrWaitCntInfo
[Index
].VsCnt
= true;
267 } else if (isVMEM(MCID
) && !AMDGPU::getMUBUFIsBufferInv(Opcode
)) {
268 if (!STI
.hasFeature(AMDGPU::FeatureVscnt
))
269 InstrWaitCntInfo
[Index
].VmCnt
= true;
270 else if ((MCID
.mayLoad() &&
271 !(MCID
.TSFlags
& SIInstrFlags::IsAtomicNoRet
)) ||
272 ((MCID
.TSFlags
& SIInstrFlags::MIMG
) && !MCID
.mayLoad() &&
274 InstrWaitCntInfo
[Index
].VmCnt
= true;
275 else if (MCID
.mayStore())
276 InstrWaitCntInfo
[Index
].VsCnt
= true;
278 // (IV.Major < 7) is meant to represent
279 // GCNTarget.vmemWriteNeedsExpWaitcnt()
280 // which is defined as
281 // { return getGeneration() < SEA_ISLANDS; }
283 (MCID
.mayStore() || (MCID
.TSFlags
& SIInstrFlags::IsAtomicRet
)))
284 InstrWaitCntInfo
[Index
].ExpCnt
= true;
285 } else if (MCID
.TSFlags
& SIInstrFlags::SMRD
) {
286 InstrWaitCntInfo
[Index
].LgkmCnt
= true;
287 } else if (MCID
.TSFlags
& SIInstrFlags::EXP
) {
288 InstrWaitCntInfo
[Index
].ExpCnt
= true;
291 case AMDGPU::S_SENDMSG
:
292 case AMDGPU::S_SENDMSGHALT
:
293 case AMDGPU::S_MEMTIME
:
294 case AMDGPU::S_MEMREALTIME
:
295 InstrWaitCntInfo
[Index
].LgkmCnt
= true;
302 // taken from SIInstrInfo::isVMEM()
303 bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc
&MCID
) {
304 return MCID
.TSFlags
& SIInstrFlags::MUBUF
||
305 MCID
.TSFlags
& SIInstrFlags::MTBUF
||
306 MCID
.TSFlags
& SIInstrFlags::MIMG
;
309 // taken from SIInstrInfo::hasModifiersSet()
310 bool AMDGPUCustomBehaviour::hasModifiersSet(
311 const std::unique_ptr
<Instruction
> &Inst
, unsigned OpName
) const {
312 int Idx
= AMDGPU::getNamedOperandIdx(Inst
->getOpcode(), OpName
);
316 const MCAOperand
*Op
= Inst
->getOperand(Idx
);
317 if (Op
== nullptr || !Op
->isImm() || !Op
->getImm())
323 // taken from SIInstrInfo::isGWS()
324 bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode
) const {
325 const MCInstrDesc
&MCID
= MCII
.get(Opcode
);
326 return MCID
.TSFlags
& SIInstrFlags::GWS
;
329 // taken from SIInstrInfo::isAlwaysGDS()
330 bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode
) const {
331 return Opcode
== AMDGPU::DS_ORDERED_COUNT
|| isGWS(Opcode
);
334 } // namespace llvm::mca
336 using namespace llvm
;
339 static CustomBehaviour
*
340 createAMDGPUCustomBehaviour(const MCSubtargetInfo
&STI
,
341 const mca::SourceMgr
&SrcMgr
,
342 const MCInstrInfo
&MCII
) {
343 return new AMDGPUCustomBehaviour(STI
, SrcMgr
, MCII
);
346 static InstrPostProcess
*
347 createAMDGPUInstrPostProcess(const MCSubtargetInfo
&STI
,
348 const MCInstrInfo
&MCII
) {
349 return new AMDGPUInstrPostProcess(STI
, MCII
);
352 /// Extern function to initialize the targets for the AMDGPU backend
354 extern "C" LLVM_EXTERNAL_VISIBILITY
void LLVMInitializeAMDGPUTargetMCA() {
355 TargetRegistry::RegisterCustomBehaviour(getTheR600Target(),
356 createAMDGPUCustomBehaviour
);
357 TargetRegistry::RegisterInstrPostProcess(getTheR600Target(),
358 createAMDGPUInstrPostProcess
);
360 TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
361 createAMDGPUCustomBehaviour
);
362 TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
363 createAMDGPUInstrPostProcess
);