[PowerPC] Materialize more constants with CR-field set in late peephole
[llvm-core.git] / lib / Target / AMDGPU / R600EmitClauseMarkers.cpp
blob1683fe6c9a571a5dcd286eeaa8edeb6b00f5ad0e
1 //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
12 /// 128 Alu instructions ; these instructions can access up to 4 prefetched
13 /// 4 lines of 16 registers from constant buffers. Such ALU clauses are
14 /// initiated by CF_ALU instructions.
15 //===----------------------------------------------------------------------===//
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600InstrInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include <cassert>
34 #include <cstdint>
35 #include <utility>
36 #include <vector>
38 using namespace llvm;
40 namespace llvm {
42 void initializeR600EmitClauseMarkersPass(PassRegistry&);
44 } // end namespace llvm
46 namespace {
48 class R600EmitClauseMarkers : public MachineFunctionPass {
49 private:
50 const R600InstrInfo *TII = nullptr;
51 int Address = 0;
53 unsigned OccupiedDwords(MachineInstr &MI) const {
54 switch (MI.getOpcode()) {
55 case R600::INTERP_PAIR_XY:
56 case R600::INTERP_PAIR_ZW:
57 case R600::INTERP_VEC_LOAD:
58 case R600::DOT_4:
59 return 4;
60 case R600::KILL:
61 return 0;
62 default:
63 break;
66 // These will be expanded to two ALU instructions in the
67 // ExpandSpecialInstructions pass.
68 if (TII->isLDSRetInstr(MI.getOpcode()))
69 return 2;
71 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
72 TII->isReductionOp(MI.getOpcode()))
73 return 4;
75 unsigned NumLiteral = 0;
76 for (MachineInstr::mop_iterator It = MI.operands_begin(),
77 E = MI.operands_end();
78 It != E; ++It) {
79 MachineOperand &MO = *It;
80 if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
81 ++NumLiteral;
83 return 1 + NumLiteral;
86 bool isALU(const MachineInstr &MI) const {
87 if (TII->isALUInstr(MI.getOpcode()))
88 return true;
89 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
90 return true;
91 switch (MI.getOpcode()) {
92 case R600::PRED_X:
93 case R600::INTERP_PAIR_XY:
94 case R600::INTERP_PAIR_ZW:
95 case R600::INTERP_VEC_LOAD:
96 case R600::COPY:
97 case R600::DOT_4:
98 return true;
99 default:
100 return false;
104 bool IsTrivialInst(MachineInstr &MI) const {
105 switch (MI.getOpcode()) {
106 case R600::KILL:
107 case R600::RETURN:
108 case R600::IMPLICIT_DEF:
109 return true;
110 default:
111 return false;
115 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
116 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
117 // (See also R600ISelLowering.cpp)
118 // ConstIndex value is in [0, 4095];
119 return std::pair<unsigned, unsigned>(
120 ((Sel >> 2) - 512) >> 12, // KC_BANK
121 // Line Number of ConstIndex
122 // A line contains 16 constant registers however KCX bank can lock
123 // two line at the same time ; thus we want to get an even line number.
124 // Line number can be retrieved with (>>4), using (>>5) <<1 generates
125 // an even number.
126 ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
129 bool
130 SubstituteKCacheBank(MachineInstr &MI,
131 std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
132 bool UpdateInstr = true) const {
133 std::vector<std::pair<unsigned, unsigned>> UsedKCache;
135 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
136 return true;
138 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
139 TII->getSrcs(MI);
140 assert(
141 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
142 "Can't assign Const");
143 for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
144 if (Consts[i].first->getReg() != R600::ALU_CONST)
145 continue;
146 unsigned Sel = Consts[i].second;
147 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
148 unsigned KCacheIndex = Index * 4 + Chan;
149 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
150 if (CachedConsts.empty()) {
151 CachedConsts.push_back(BankLine);
152 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
153 continue;
155 if (CachedConsts[0] == BankLine) {
156 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
157 continue;
159 if (CachedConsts.size() == 1) {
160 CachedConsts.push_back(BankLine);
161 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
162 continue;
164 if (CachedConsts[1] == BankLine) {
165 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
166 continue;
168 return false;
171 if (!UpdateInstr)
172 return true;
174 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
175 if (Consts[i].first->getReg() != R600::ALU_CONST)
176 continue;
177 switch(UsedKCache[j].first) {
178 case 0:
179 Consts[i].first->setReg(
180 R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
181 break;
182 case 1:
183 Consts[i].first->setReg(
184 R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
185 break;
186 default:
187 llvm_unreachable("Wrong Cache Line");
189 j++;
191 return true;
194 bool canClauseLocalKillFitInClause(
195 unsigned AluInstCount,
196 std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
197 MachineBasicBlock::iterator Def,
198 MachineBasicBlock::iterator BBEnd) {
199 const R600RegisterInfo &TRI = TII->getRegisterInfo();
200 //TODO: change this to defs?
201 for (MachineInstr::const_mop_iterator
202 MOI = Def->operands_begin(),
203 MOE = Def->operands_end(); MOI != MOE; ++MOI) {
204 if (!MOI->isReg() || !MOI->isDef() ||
205 TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
206 continue;
208 // Def defines a clause local register, so check that its use will fit
209 // in the clause.
210 unsigned LastUseCount = 0;
211 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
212 AluInstCount += OccupiedDwords(*UseI);
213 // Make sure we won't need to end the clause due to KCache limitations.
214 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
215 return false;
217 // We have reached the maximum instruction limit before finding the
218 // use that kills this register, so we cannot use this def in the
219 // current clause.
220 if (AluInstCount >= TII->getMaxAlusPerClause())
221 return false;
223 // TODO: Is this true? kill flag appears to work OK below
224 // Register kill flags have been cleared by the time we get to this
225 // pass, but it is safe to assume that all uses of this register
226 // occur in the same basic block as its definition, because
227 // it is illegal for the scheduler to schedule them in
228 // different blocks.
229 if (UseI->readsRegister(MOI->getReg()))
230 LastUseCount = AluInstCount;
232 // Exit early if the current use kills the register
233 if (UseI != Def && UseI->killsRegister(MOI->getReg()))
234 break;
236 if (LastUseCount)
237 return LastUseCount <= TII->getMaxAlusPerClause();
238 llvm_unreachable("Clause local register live at end of clause.");
240 return true;
243 MachineBasicBlock::iterator
244 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
245 MachineBasicBlock::iterator ClauseHead = I;
246 std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
247 bool PushBeforeModifier = false;
248 unsigned AluInstCount = 0;
249 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
250 if (IsTrivialInst(*I))
251 continue;
252 if (!isALU(*I))
253 break;
254 if (AluInstCount > TII->getMaxAlusPerClause())
255 break;
256 if (I->getOpcode() == R600::PRED_X) {
257 // We put PRED_X in its own clause to ensure that ifcvt won't create
258 // clauses with more than 128 insts.
259 // IfCvt is indeed checking that "then" and "else" branches of an if
260 // statement have less than ~60 insts thus converted clauses can't be
261 // bigger than ~121 insts (predicate setter needs to be in the same
262 // clause as predicated alus).
263 if (AluInstCount > 0)
264 break;
265 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
266 PushBeforeModifier = true;
267 AluInstCount ++;
268 continue;
270 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
272 // * KILL or INTERP instructions
273 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
274 // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
276 // XXX: These checks have not been implemented yet.
277 if (TII->mustBeLastInClause(I->getOpcode())) {
278 I++;
279 break;
282 // If this instruction defines a clause local register, make sure
283 // its use can fit in this clause.
284 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
285 break;
287 if (!SubstituteKCacheBank(*I, KCacheBanks))
288 break;
289 AluInstCount += OccupiedDwords(*I);
291 unsigned Opcode = PushBeforeModifier ?
292 R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
293 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
294 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
295 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
296 // pass may assume that identical ALU clause starter at the beginning of a
297 // true and false branch can be factorized which is not the case.
298 .addImm(Address++) // ADDR
299 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
300 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
301 .addImm(KCacheBanks.empty()?0:2) // KM0
302 .addImm((KCacheBanks.size() < 2)?0:2) // KM1
303 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
304 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
305 .addImm(AluInstCount) // COUNT
306 .addImm(1); // Enabled
307 return I;
310 public:
311 static char ID;
313 R600EmitClauseMarkers() : MachineFunctionPass(ID) {
314 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
317 bool runOnMachineFunction(MachineFunction &MF) override {
318 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
319 TII = ST.getInstrInfo();
321 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
322 BB != BB_E; ++BB) {
323 MachineBasicBlock &MBB = *BB;
324 MachineBasicBlock::iterator I = MBB.begin();
325 if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
326 continue; // BB was already parsed
327 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
328 if (isALU(*I)) {
329 auto next = MakeALUClause(MBB, I);
330 assert(next != I);
331 I = next;
332 } else
333 ++I;
336 return false;
339 StringRef getPassName() const override {
340 return "R600 Emit Clause Markers Pass";
344 char R600EmitClauseMarkers::ID = 0;
346 } // end anonymous namespace
348 INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
349 "R600 Emit Clause Markters", false, false)
350 INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
351 "R600 Emit Clause Markters", false, false)
353 FunctionPass *llvm::createR600EmitClauseMarkers() {
354 return new R600EmitClauseMarkers();