[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / R600ClauseMergePass.cpp
blob290a960ae9017ea04a061da2a86ca5efa5037369
1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
11 /// This pass is merging consecutive CFAlus where applicable.
12 /// It needs to be called after IfCvt for best results.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
19 #include "R600MachineFunctionInfo.h"
20 #include "R600RegisterInfo.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
28 using namespace llvm;
30 #define DEBUG_TYPE "r600mergeclause"
32 namespace {
34 static bool isCFAlu(const MachineInstr &MI) {
35 switch (MI.getOpcode()) {
36 case R600::CF_ALU:
37 case R600::CF_ALU_PUSH_BEFORE:
38 return true;
39 default:
40 return false;
44 class R600ClauseMergePass : public MachineFunctionPass {
46 private:
47 const R600InstrInfo *TII;
49 unsigned getCFAluSize(const MachineInstr &MI) const;
50 bool isCFAluEnabled(const MachineInstr &MI) const;
52 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
53 /// removed and their content affected to the previous alu clause.
54 /// This function parse instructions after CFAlu until it find a disabled
55 /// CFAlu and merge the content, or an enabled CFAlu.
56 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
59 /// it is the case.
60 bool mergeIfPossible(MachineInstr &RootCFAlu,
61 const MachineInstr &LatrCFAlu) const;
63 public:
64 static char ID;
66 R600ClauseMergePass() : MachineFunctionPass(ID) { }
68 bool runOnMachineFunction(MachineFunction &MF) override;
70 StringRef getPassName() const override;
73 } // end anonymous namespace
75 INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
76 "R600 Clause Merge", false, false)
77 INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
78 "R600 Clause Merge", false, false)
80 char R600ClauseMergePass::ID = 0;
82 char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
84 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
85 assert(isCFAlu(MI));
86 return MI
87 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
88 .getImm();
91 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
92 assert(isCFAlu(MI));
93 return MI
94 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
95 .getImm();
98 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
99 MachineInstr &CFAlu) const {
100 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
101 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
102 I++;
103 do {
104 while (I != E && !isCFAlu(*I))
105 I++;
106 if (I == E)
107 return;
108 MachineInstr &MI = *I++;
109 if (isCFAluEnabled(MI))
110 break;
111 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
112 MI.eraseFromParent();
113 } while (I != E);
116 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
117 const MachineInstr &LatrCFAlu) const {
118 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
119 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
120 unsigned RootInstCount = getCFAluSize(RootCFAlu),
121 LaterInstCount = getCFAluSize(LatrCFAlu);
122 unsigned CumuledInsts = RootInstCount + LaterInstCount;
123 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
124 LLVM_DEBUG(dbgs() << "Excess inst counts\n");
125 return false;
127 if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
128 return false;
129 // Is KCache Bank 0 compatible ?
130 int Mode0Idx =
131 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
132 int KBank0Idx =
133 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
134 int KBank0LineIdx =
135 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
136 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
137 RootCFAlu.getOperand(Mode0Idx).getImm() &&
138 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
139 RootCFAlu.getOperand(KBank0Idx).getImm() ||
140 LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
141 RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
142 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
143 return false;
145 // Is KCache Bank 1 compatible ?
146 int Mode1Idx =
147 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
148 int KBank1Idx =
149 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
150 int KBank1LineIdx =
151 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
152 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
153 RootCFAlu.getOperand(Mode1Idx).getImm() &&
154 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
155 RootCFAlu.getOperand(KBank1Idx).getImm() ||
156 LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
157 RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
158 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
159 return false;
161 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
162 RootCFAlu.getOperand(Mode0Idx).setImm(
163 LatrCFAlu.getOperand(Mode0Idx).getImm());
164 RootCFAlu.getOperand(KBank0Idx).setImm(
165 LatrCFAlu.getOperand(KBank0Idx).getImm());
166 RootCFAlu.getOperand(KBank0LineIdx)
167 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
169 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
170 RootCFAlu.getOperand(Mode1Idx).setImm(
171 LatrCFAlu.getOperand(Mode1Idx).getImm());
172 RootCFAlu.getOperand(KBank1Idx).setImm(
173 LatrCFAlu.getOperand(KBank1Idx).getImm());
174 RootCFAlu.getOperand(KBank1LineIdx)
175 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
177 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
178 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
179 return true;
182 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
183 if (skipFunction(MF.getFunction()))
184 return false;
186 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
187 TII = ST.getInstrInfo();
189 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
190 BB != BB_E; ++BB) {
191 MachineBasicBlock &MBB = *BB;
192 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
193 MachineBasicBlock::iterator LatestCFAlu = E;
194 while (I != E) {
195 MachineInstr &MI = *I++;
196 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
197 TII->mustBeLastInClause(MI.getOpcode()))
198 LatestCFAlu = E;
199 if (!isCFAlu(MI))
200 continue;
201 cleanPotentialDisabledCFAlu(MI);
203 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
204 MI.eraseFromParent();
205 } else {
206 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
207 LatestCFAlu = MI;
211 return false;
214 StringRef R600ClauseMergePass::getPassName() const {
215 return "R600 Merge Clause Markers Pass";
218 llvm::FunctionPass *llvm::createR600ClauseMergePass() {
219 return new R600ClauseMergePass();