[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPURegBankCombiner.cpp
blob4e12e5cd8f65673f8f0a85f96b08fd33b9f75bfe
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
12 //===----------------------------------------------------------------------===//
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/TargetPassConfig.h"
26 #include "llvm/Target/TargetMachine.h"
27 #define DEBUG_TYPE "amdgpu-regbank-combiner"
29 using namespace llvm;
30 using namespace MIPatternMatch;
32 class AMDGPURegBankCombinerHelper {
33 protected:
34 MachineIRBuilder &B;
35 MachineFunction &MF;
36 MachineRegisterInfo &MRI;
37 const RegisterBankInfo &RBI;
38 const TargetRegisterInfo &TRI;
39 CombinerHelper &Helper;
41 public:
42 AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
43 : B(B), MF(B.getMF()), MRI(*B.getMRI()),
44 RBI(*MF.getSubtarget().getRegBankInfo()),
45 TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
47 bool isVgprRegBank(Register Reg);
49 struct MinMaxMedOpc {
50 unsigned Min, Max, Med;
53 struct Med3MatchInfo {
54 unsigned Opc;
55 Register Val0, Val1, Val2;
58 MinMaxMedOpc getMinMaxPair(unsigned Opc);
60 template <class m_Cst>
61 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
62 Register &Val, Register &K0, Register &K1);
64 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
65 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
68 bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
69 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
72 AMDGPURegBankCombinerHelper::MinMaxMedOpc
73 AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
74 switch (Opc) {
75 default:
76 llvm_unreachable("Unsupported opcode");
77 case AMDGPU::G_SMAX:
78 case AMDGPU::G_SMIN:
79 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
80 case AMDGPU::G_UMAX:
81 case AMDGPU::G_UMIN:
82 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
86 template <class m_Cst>
87 bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
88 MachineRegisterInfo &MRI,
89 MinMaxMedOpc MMMOpc, Register &Val,
90 Register &K0, Register &K1) {
91 // 4 operand commutes of: min(max(Val, K0), K1).
92 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
93 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
94 // 4 operand commutes of: max(min(Val, K1), K0).
95 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
96 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
97 return mi_match(
98 MI, MRI,
99 m_any_of(
100 m_CommutativeBinOp(
101 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
102 m_Cst(K1)),
103 m_CommutativeBinOp(
104 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
105 m_Cst(K0))));
108 bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
109 MachineInstr &MI, Med3MatchInfo &MatchInfo) {
110 Register Dst = MI.getOperand(0).getReg();
111 if (!isVgprRegBank(Dst))
112 return false;
114 if (MRI.getType(Dst).isVector())
115 return false;
117 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
118 Register Val, K0, K1;
119 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
120 if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
121 return false;
123 const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
124 const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
125 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
126 return false;
127 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
128 return false;
130 MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
131 return true;
134 void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
135 Med3MatchInfo &MatchInfo) {
136 B.setInstrAndDebugLoc(MI);
137 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
138 {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
139 MI.eraseFromParent();
142 class AMDGPURegBankCombinerHelperState {
143 protected:
144 CombinerHelper &Helper;
145 AMDGPURegBankCombinerHelper &RegBankHelper;
147 public:
148 AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
149 AMDGPURegBankCombinerHelper &RegBankHelper)
150 : Helper(Helper), RegBankHelper(RegBankHelper) {}
153 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
154 #include "AMDGPUGenRegBankGICombiner.inc"
155 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
157 namespace {
158 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
159 #include "AMDGPUGenRegBankGICombiner.inc"
160 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
162 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
163 GISelKnownBits *KB;
164 MachineDominatorTree *MDT;
166 public:
167 AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
169 AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
170 const AMDGPULegalizerInfo *LI,
171 GISelKnownBits *KB, MachineDominatorTree *MDT)
172 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
173 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
174 KB(KB), MDT(MDT) {
175 if (!GeneratedRuleCfg.parseCommandLineOption())
176 report_fatal_error("Invalid rule identifier");
179 bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
180 MachineIRBuilder &B) const override;
183 bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
184 MachineInstr &MI,
185 MachineIRBuilder &B) const {
186 CombinerHelper Helper(Observer, B, KB, MDT);
187 AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
188 AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
189 RegBankHelper);
191 if (Generated.tryCombineAll(Observer, MI, B))
192 return true;
194 return false;
197 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
198 #include "AMDGPUGenRegBankGICombiner.inc"
199 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
201 // Pass boilerplate
202 // ================
204 class AMDGPURegBankCombiner : public MachineFunctionPass {
205 public:
206 static char ID;
208 AMDGPURegBankCombiner(bool IsOptNone = false);
210 StringRef getPassName() const override {
211 return "AMDGPURegBankCombiner";
214 bool runOnMachineFunction(MachineFunction &MF) override;
216 void getAnalysisUsage(AnalysisUsage &AU) const override;
217 private:
218 bool IsOptNone;
220 } // end anonymous namespace
222 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
223 AU.addRequired<TargetPassConfig>();
224 AU.setPreservesCFG();
225 getSelectionDAGFallbackAnalysisUsage(AU);
226 AU.addRequired<GISelKnownBitsAnalysis>();
227 AU.addPreserved<GISelKnownBitsAnalysis>();
228 if (!IsOptNone) {
229 AU.addRequired<MachineDominatorTree>();
230 AU.addPreserved<MachineDominatorTree>();
232 MachineFunctionPass::getAnalysisUsage(AU);
235 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
236 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
237 initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
240 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
241 if (MF.getProperties().hasProperty(
242 MachineFunctionProperties::Property::FailedISel))
243 return false;
244 auto *TPC = &getAnalysis<TargetPassConfig>();
245 const Function &F = MF.getFunction();
246 bool EnableOpt =
247 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
249 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
250 const AMDGPULegalizerInfo *LI
251 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
253 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
254 MachineDominatorTree *MDT =
255 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
256 AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
257 F.hasMinSize(), LI, KB, MDT);
258 Combiner C(PCInfo, TPC);
259 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
262 char AMDGPURegBankCombiner::ID = 0;
263 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
264 "Combine AMDGPU machine instrs after regbankselect",
265 false, false)
266 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
267 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
268 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
269 "Combine AMDGPU machine instrs after regbankselect", false,
270 false)
272 namespace llvm {
273 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
274 return new AMDGPURegBankCombiner(IsOptNone);
276 } // end namespace llvm