[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / R600EmitClauseMarkers.cpp
blobb97e3c8b8dd7bfbe3174d410a0b346a06a70cd7d
1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
11 /// 128 Alu instructions ; these instructions can access up to 4 prefetched
12 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
13 /// initiated by CF_ALU instructions.
14 //===----------------------------------------------------------------------===//
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600RegisterInfo.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include <cassert>
33 #include <cstdint>
34 #include <utility>
35 #include <vector>
37 using namespace llvm;
39 namespace llvm {
41 void initializeR600EmitClauseMarkersPass(PassRegistry&);
43 } // end namespace llvm
45 namespace {
47 class R600EmitClauseMarkers : public MachineFunctionPass {
48 private:
49 const R600InstrInfo *TII = nullptr;
50 int Address = 0;
52 unsigned OccupiedDwords(MachineInstr &MI) const {
53 switch (MI.getOpcode()) {
54 case R600::INTERP_PAIR_XY:
55 case R600::INTERP_PAIR_ZW:
56 case R600::INTERP_VEC_LOAD:
57 case R600::DOT_4:
58 return 4;
59 case R600::KILL:
60 return 0;
61 default:
62 break;
65 // These will be expanded to two ALU instructions in the
66 // ExpandSpecialInstructions pass.
67 if (TII->isLDSRetInstr(MI.getOpcode()))
68 return 2;
70 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
71 TII->isReductionOp(MI.getOpcode()))
72 return 4;
74 unsigned NumLiteral = 0;
75 for (MachineInstr::mop_iterator It = MI.operands_begin(),
76 E = MI.operands_end();
77 It != E; ++It) {
78 MachineOperand &MO = *It;
79 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
80 ++NumLiteral;
82 return 1 + NumLiteral;
85 bool isALU(const MachineInstr &MI) const {
86 if (TII->isALUInstr(MI.getOpcode()))
87 return true;
88 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
89 return true;
90 switch (MI.getOpcode()) {
91 case R600::PRED_X:
92 case R600::INTERP_PAIR_XY:
93 case R600::INTERP_PAIR_ZW:
94 case R600::INTERP_VEC_LOAD:
95 case R600::COPY:
96 case R600::DOT_4:
97 return true;
98 default:
99 return false;
103 bool IsTrivialInst(MachineInstr &MI) const {
104 switch (MI.getOpcode()) {
105 case R600::KILL:
106 case R600::RETURN:
107 case R600::IMPLICIT_DEF:
108 return true;
109 default:
110 return false;
114 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
115 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
116 // (See also R600ISelLowering.cpp)
117 // ConstIndex value is in [0, 4095];
118 return std::pair<unsigned, unsigned>(
119 ((Sel >> 2) - 512) >> 12, // KC_BANK
120 // Line Number of ConstIndex
121 // A line contains 16 constant registers however KCX bank can lock
122 // two line at the same time ; thus we want to get an even line number.
123 // Line number can be retrieved with (>>4), using (>>5) <<1 generates
124 // an even number.
125 ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
128 bool
129 SubstituteKCacheBank(MachineInstr &MI,
130 std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
131 bool UpdateInstr = true) const {
132 std::vector<std::pair<unsigned, unsigned>> UsedKCache;
134 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
135 return true;
137 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
138 TII->getSrcs(MI);
139 assert(
140 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
141 "Can't assign Const");
142 for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
143 if (Consts[i].first->getReg() != R600::ALU_CONST)
144 continue;
145 unsigned Sel = Consts[i].second;
146 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
147 unsigned KCacheIndex = Index * 4 + Chan;
148 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
149 if (CachedConsts.empty()) {
150 CachedConsts.push_back(BankLine);
151 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
152 continue;
154 if (CachedConsts[0] == BankLine) {
155 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
156 continue;
158 if (CachedConsts.size() == 1) {
159 CachedConsts.push_back(BankLine);
160 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
161 continue;
163 if (CachedConsts[1] == BankLine) {
164 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
165 continue;
167 return false;
170 if (!UpdateInstr)
171 return true;
173 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
174 if (Consts[i].first->getReg() != R600::ALU_CONST)
175 continue;
176 switch(UsedKCache[j].first) {
177 case 0:
178 Consts[i].first->setReg(
179 R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
180 break;
181 case 1:
182 Consts[i].first->setReg(
183 R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
184 break;
185 default:
186 llvm_unreachable("Wrong Cache Line");
188 j++;
190 return true;
193 bool canClauseLocalKillFitInClause(
194 unsigned AluInstCount,
195 std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
196 MachineBasicBlock::iterator Def,
197 MachineBasicBlock::iterator BBEnd) {
198 const R600RegisterInfo &TRI = TII->getRegisterInfo();
199 //TODO: change this to defs?
200 for (MachineInstr::const_mop_iterator
201 MOI = Def->operands_begin(),
202 MOE = Def->operands_end(); MOI != MOE; ++MOI) {
203 if (!MOI->isReg() || !MOI->isDef() ||
204 TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
205 continue;
207 // Def defines a clause local register, so check that its use will fit
208 // in the clause.
209 unsigned LastUseCount = 0;
210 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
211 AluInstCount += OccupiedDwords(*UseI);
212 // Make sure we won't need to end the clause due to KCache limitations.
213 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
214 return false;
216 // We have reached the maximum instruction limit before finding the
217 // use that kills this register, so we cannot use this def in the
218 // current clause.
219 if (AluInstCount >= TII->getMaxAlusPerClause())
220 return false;
222 // TODO: Is this true? kill flag appears to work OK below
223 // Register kill flags have been cleared by the time we get to this
224 // pass, but it is safe to assume that all uses of this register
225 // occur in the same basic block as its definition, because
226 // it is illegal for the scheduler to schedule them in
227 // different blocks.
228 if (UseI->readsRegister(MOI->getReg(), &TRI))
229 LastUseCount = AluInstCount;
231 // Exit early if the current use kills the register
232 if (UseI != Def && UseI->killsRegister(MOI->getReg(), &TRI))
233 break;
235 if (LastUseCount)
236 return LastUseCount <= TII->getMaxAlusPerClause();
237 llvm_unreachable("Clause local register live at end of clause.");
239 return true;
242 MachineBasicBlock::iterator
243 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
244 MachineBasicBlock::iterator ClauseHead = I;
245 std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
246 bool PushBeforeModifier = false;
247 unsigned AluInstCount = 0;
248 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
249 if (IsTrivialInst(*I))
250 continue;
251 if (!isALU(*I))
252 break;
253 if (AluInstCount > TII->getMaxAlusPerClause())
254 break;
255 if (I->getOpcode() == R600::PRED_X) {
256 // We put PRED_X in its own clause to ensure that ifcvt won't create
257 // clauses with more than 128 insts.
258 // IfCvt is indeed checking that "then" and "else" branches of an if
259 // statement have less than ~60 insts thus converted clauses can't be
260 // bigger than ~121 insts (predicate setter needs to be in the same
261 // clause as predicated alus).
262 if (AluInstCount > 0)
263 break;
264 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
265 PushBeforeModifier = true;
266 AluInstCount ++;
267 continue;
269 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
271 // * KILL or INTERP instructions
272 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
273 // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
275 // XXX: These checks have not been implemented yet.
276 if (TII->mustBeLastInClause(I->getOpcode())) {
277 I++;
278 break;
281 // If this instruction defines a clause local register, make sure
282 // its use can fit in this clause.
283 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
284 break;
286 if (!SubstituteKCacheBank(*I, KCacheBanks))
287 break;
288 AluInstCount += OccupiedDwords(*I);
290 unsigned Opcode = PushBeforeModifier ?
291 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
292 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
293 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
294 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
295 // pass may assume that identical ALU clause starter at the beginning of a
296 // true and false branch can be factorized which is not the case.
297 .addImm(Address++) // ADDR
298 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
299 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
300 .addImm(KCacheBanks.empty()?0:2) // KM0
301 .addImm((KCacheBanks.size() < 2)?0:2) // KM1
302 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
303 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
304 .addImm(AluInstCount) // COUNT
305 .addImm(1); // Enabled
306 return I;
309 public:
310 static char ID;
312 R600EmitClauseMarkers() : MachineFunctionPass(ID) {
313 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
316 bool runOnMachineFunction(MachineFunction &MF) override {
317 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
318 TII = ST.getInstrInfo();
320 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
321 BB != BB_E; ++BB) {
322 MachineBasicBlock &MBB = *BB;
323 MachineBasicBlock::iterator I = MBB.begin();
324 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
325 continue; // BB was already parsed
326 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
327 if (isALU(*I)) {
328 auto next = MakeALUClause(MBB, I);
329 assert(next != I);
330 I = next;
331 } else
332 ++I;
335 return false;
338 StringRef getPassName() const override {
339 return "R600 Emit Clause Markers Pass";
343 char R600EmitClauseMarkers::ID = 0;
345 } // end anonymous namespace
347 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
348 "R600 Emit Clause Markters", false, false)
349 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
350 "R600 Emit Clause Markters", false, false)
352 FunctionPass *llvm::createR600EmitClauseMarkers() {
353 return new R600EmitClauseMarkers();