1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Insert s_clause instructions to form hard clauses.
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
32 //===----------------------------------------------------------------------===//
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
41 #define DEBUG_TYPE "si-insert-hard-clauses"
46 // Texture, buffer, global or scratch memory instructions.
48 // Flat (not global or scratch) memory instructions.
50 // Instructions that access LDS.
52 // Scalar memory instructions.
56 LAST_REAL_HARDCLAUSE_TYPE
= HARDCLAUSE_VALU
,
58 // Internal instructions, which are allowed in the middle of a hard clause,
59 // except for s_waitcnt.
61 // Instructions that are not allowed in a hard clause: SALU, export, branch,
62 // message, GDS, s_waitcnt and anything else not mentioned above.
66 class SIInsertHardClauses
: public MachineFunctionPass
{
69 const GCNSubtarget
*ST
= nullptr;
71 SIInsertHardClauses() : MachineFunctionPass(ID
) {}
73 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
75 MachineFunctionPass::getAnalysisUsage(AU
);
78 HardClauseType
getHardClauseType(const MachineInstr
&MI
) {
80 // On current architectures we only get a benefit from clausing loads.
82 if (SIInstrInfo::isVMEM(MI
) || SIInstrInfo::isSegmentSpecificFLAT(MI
)) {
83 if (ST
->hasNSAClauseBug()) {
84 const AMDGPU::MIMGInfo
*Info
= AMDGPU::getMIMGInfo(MI
.getOpcode());
85 if (Info
&& Info
->MIMGEncoding
== AMDGPU::MIMGEncGfx10NSA
)
86 return HARDCLAUSE_ILLEGAL
;
88 return HARDCLAUSE_VMEM
;
90 if (SIInstrInfo::isFLAT(MI
))
91 return HARDCLAUSE_FLAT
;
93 if (SIInstrInfo::isSMRD(MI
))
94 return HARDCLAUSE_SMEM
;
97 // Don't form VALU clauses. It's not clear what benefit they give, if any.
99 // In practice s_nop is the only internal instruction we're likely to see.
100 // It's safe to treat the rest as illegal.
101 if (MI
.getOpcode() == AMDGPU::S_NOP
)
102 return HARDCLAUSE_INTERNAL
;
103 return HARDCLAUSE_ILLEGAL
;
106 // Track information about a clause as we discover it.
108 // The type of all (non-internal) instructions in the clause.
109 HardClauseType Type
= HARDCLAUSE_ILLEGAL
;
110 // The first (necessarily non-internal) instruction in the clause.
111 MachineInstr
*First
= nullptr;
112 // The last non-internal instruction in the clause.
113 MachineInstr
*Last
= nullptr;
114 // The length of the clause including any internal instructions in the
115 // middle or after the end of the clause.
117 // The base operands of *Last.
118 SmallVector
<const MachineOperand
*, 4> BaseOps
;
121 bool emitClause(const ClauseInfo
&CI
, const SIInstrInfo
*SII
) {
122 // Get the size of the clause excluding any internal instructions at the
125 std::distance(CI
.First
->getIterator(), CI
.Last
->getIterator()) + 1;
128 assert(Size
<= 64 && "Hard clause is too long!");
130 auto &MBB
= *CI
.First
->getParent();
132 BuildMI(MBB
, *CI
.First
, DebugLoc(), SII
->get(AMDGPU::S_CLAUSE
))
134 finalizeBundle(MBB
, ClauseMI
->getIterator(),
135 std::next(CI
.Last
->getIterator()));
139 bool runOnMachineFunction(MachineFunction
&MF
) override
{
140 if (skipFunction(MF
.getFunction()))
143 ST
= &MF
.getSubtarget
<GCNSubtarget
>();
144 if (!ST
->hasHardClauses())
147 const SIInstrInfo
*SII
= ST
->getInstrInfo();
148 const TargetRegisterInfo
*TRI
= ST
->getRegisterInfo();
150 bool Changed
= false;
151 for (auto &MBB
: MF
) {
153 for (auto &MI
: MBB
) {
154 HardClauseType Type
= getHardClauseType(MI
);
159 SmallVector
<const MachineOperand
*, 4> BaseOps
;
160 if (Type
<= LAST_REAL_HARDCLAUSE_TYPE
) {
161 if (!SII
->getMemOperandsWithOffsetWidth(MI
, BaseOps
, Dummy1
, Dummy2
,
163 // We failed to get the base operands, so we'll never clause this
164 // instruction with any other, so pretend it's illegal.
165 Type
= HARDCLAUSE_ILLEGAL
;
169 if (CI
.Length
== 64 ||
170 (CI
.Length
&& Type
!= HARDCLAUSE_INTERNAL
&&
172 // Note that we lie to shouldClusterMemOps about the size of the
173 // cluster. When shouldClusterMemOps is called from the machine
174 // scheduler it limits the size of the cluster to avoid increasing
175 // register pressure too much, but this pass runs after register
176 // allocation so there is no need for that kind of limit.
177 !SII
->shouldClusterMemOps(CI
.BaseOps
, BaseOps
, 2, 2)))) {
178 // Finish the current clause.
179 Changed
|= emitClause(CI
, SII
);
184 // Extend the current clause.
186 if (Type
!= HARDCLAUSE_INTERNAL
) {
188 CI
.BaseOps
= std::move(BaseOps
);
190 } else if (Type
<= LAST_REAL_HARDCLAUSE_TYPE
) {
191 // Start a new clause.
192 CI
= ClauseInfo
{Type
, &MI
, &MI
, 1, std::move(BaseOps
)};
196 // Finish the last clause in the basic block if any.
198 Changed
|= emitClause(CI
, SII
);
207 char SIInsertHardClauses::ID
= 0;
209 char &llvm::SIInsertHardClausesID
= SIInsertHardClauses::ID
;
211 INITIALIZE_PASS(SIInsertHardClauses
, DEBUG_TYPE
, "SI Insert Hard Clauses",