1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Combiner.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/Target/TargetMachine.h"
30 #define GET_GICOMBINER_DEPS
31 #include "AMDGPUGenPreLegalizeGICombiner.inc"
32 #undef GET_GICOMBINER_DEPS
34 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
37 using namespace MIPatternMatch
;
40 #define GET_GICOMBINER_TYPES
41 #include "AMDGPUGenPreLegalizeGICombiner.inc"
42 #undef GET_GICOMBINER_TYPES
44 class AMDGPUPreLegalizerCombinerImpl
: public Combiner
{
46 const AMDGPUPreLegalizerCombinerImplRuleConfig
&RuleConfig
;
47 const GCNSubtarget
&STI
;
48 // TODO: Make CombinerHelper methods const.
49 mutable AMDGPUCombinerHelper Helper
;
52 AMDGPUPreLegalizerCombinerImpl(
53 MachineFunction
&MF
, CombinerInfo
&CInfo
, const TargetPassConfig
*TPC
,
54 GISelKnownBits
&KB
, GISelCSEInfo
*CSEInfo
,
55 const AMDGPUPreLegalizerCombinerImplRuleConfig
&RuleConfig
,
56 const GCNSubtarget
&STI
, MachineDominatorTree
*MDT
,
57 const LegalizerInfo
*LI
);
59 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
61 bool tryCombineAllImpl(MachineInstr
&MI
) const;
62 bool tryCombineAll(MachineInstr
&I
) const override
;
64 struct ClampI64ToI16MatchInfo
{
70 bool matchClampI64ToI16(MachineInstr
&MI
, const MachineRegisterInfo
&MRI
,
71 const MachineFunction
&MF
,
72 ClampI64ToI16MatchInfo
&MatchInfo
) const;
74 void applyClampI64ToI16(MachineInstr
&MI
,
75 const ClampI64ToI16MatchInfo
&MatchInfo
) const;
78 #define GET_GICOMBINER_CLASS_MEMBERS
79 #define AMDGPUSubtarget GCNSubtarget
80 #include "AMDGPUGenPreLegalizeGICombiner.inc"
81 #undef GET_GICOMBINER_CLASS_MEMBERS
82 #undef AMDGPUSubtarget
85 #define GET_GICOMBINER_IMPL
86 #define AMDGPUSubtarget GCNSubtarget
87 #include "AMDGPUGenPreLegalizeGICombiner.inc"
88 #undef AMDGPUSubtarget
89 #undef GET_GICOMBINER_IMPL
91 AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92 MachineFunction
&MF
, CombinerInfo
&CInfo
, const TargetPassConfig
*TPC
,
93 GISelKnownBits
&KB
, GISelCSEInfo
*CSEInfo
,
94 const AMDGPUPreLegalizerCombinerImplRuleConfig
&RuleConfig
,
95 const GCNSubtarget
&STI
, MachineDominatorTree
*MDT
, const LegalizerInfo
*LI
)
96 : Combiner(MF
, CInfo
, TPC
, &KB
, CSEInfo
), RuleConfig(RuleConfig
), STI(STI
),
97 Helper(Observer
, B
, /*IsPreLegalize*/ true, &KB
, MDT
, LI
),
98 #define GET_GICOMBINER_CONSTRUCTOR_INITS
99 #include "AMDGPUGenPreLegalizeGICombiner.inc"
100 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
104 bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr
&MI
) const {
105 if (tryCombineAllImpl(MI
))
108 switch (MI
.getOpcode()) {
109 case TargetOpcode::G_SHUFFLE_VECTOR
:
110 return Helper
.tryCombineShuffleVector(MI
);
116 bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
117 MachineInstr
&MI
, const MachineRegisterInfo
&MRI
, const MachineFunction
&MF
,
118 ClampI64ToI16MatchInfo
&MatchInfo
) const {
119 assert(MI
.getOpcode() == TargetOpcode::G_TRUNC
&& "Invalid instruction!");
121 // Try to find a pattern where an i64 value should get clamped to short.
122 const LLT SrcType
= MRI
.getType(MI
.getOperand(1).getReg());
123 if (SrcType
!= LLT::scalar(64))
126 const LLT DstType
= MRI
.getType(MI
.getOperand(0).getReg());
127 if (DstType
!= LLT::scalar(16))
132 auto IsApplicableForCombine
= [&MatchInfo
]() -> bool {
133 const auto Cmp1
= MatchInfo
.Cmp1
;
134 const auto Cmp2
= MatchInfo
.Cmp2
;
135 const auto Diff
= std::abs(Cmp2
- Cmp1
);
137 // If the difference between both comparison values is 0 or 1, there is no
139 if (Diff
== 0 || Diff
== 1)
142 const int64_t Min
= std::numeric_limits
<int16_t>::min();
143 const int64_t Max
= std::numeric_limits
<int16_t>::max();
145 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
146 return ((Cmp2
>= Cmp1
&& Cmp1
>= Min
&& Cmp2
<= Max
) ||
147 (Cmp1
>= Cmp2
&& Cmp1
<= Max
&& Cmp2
>= Min
));
150 // Try to match a combination of min / max MIR opcodes.
151 if (mi_match(MI
.getOperand(1).getReg(), MRI
,
152 m_GSMin(m_Reg(Base
), m_ICst(MatchInfo
.Cmp1
)))) {
153 if (mi_match(Base
, MRI
,
154 m_GSMax(m_Reg(MatchInfo
.Origin
), m_ICst(MatchInfo
.Cmp2
)))) {
155 return IsApplicableForCombine();
159 if (mi_match(MI
.getOperand(1).getReg(), MRI
,
160 m_GSMax(m_Reg(Base
), m_ICst(MatchInfo
.Cmp1
)))) {
161 if (mi_match(Base
, MRI
,
162 m_GSMin(m_Reg(MatchInfo
.Origin
), m_ICst(MatchInfo
.Cmp2
)))) {
163 return IsApplicableForCombine();
170 // We want to find a combination of instructions that
171 // gets generated when an i64 gets clamped to i16.
172 // The corresponding pattern is:
173 // G_MAX / G_MAX for i16 <= G_TRUNC i64.
174 // This can be efficiently written as following:
175 // v_cvt_pk_i16_i32 v0, v0, v1
176 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
177 void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
178 MachineInstr
&MI
, const ClampI64ToI16MatchInfo
&MatchInfo
) const {
180 Register Src
= MatchInfo
.Origin
;
181 assert(MI
.getParent()->getParent()->getRegInfo().getType(Src
) ==
183 const LLT S32
= LLT::scalar(32);
185 auto Unmerge
= B
.buildUnmerge(S32
, Src
);
187 assert(MI
.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32
);
189 const LLT V2S16
= LLT::fixed_vector(2, 16);
191 B
.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32
, {V2S16
},
192 {Unmerge
.getReg(0), Unmerge
.getReg(1)}, MI
.getFlags());
194 auto MinBoundary
= std::min(MatchInfo
.Cmp1
, MatchInfo
.Cmp2
);
195 auto MaxBoundary
= std::max(MatchInfo
.Cmp1
, MatchInfo
.Cmp2
);
196 auto MinBoundaryDst
= B
.buildConstant(S32
, MinBoundary
);
197 auto MaxBoundaryDst
= B
.buildConstant(S32
, MaxBoundary
);
199 auto Bitcast
= B
.buildBitcast({S32
}, CvtPk
);
201 auto Med3
= B
.buildInstr(
202 AMDGPU::G_AMDGPU_SMED3
, {S32
},
203 {MinBoundaryDst
.getReg(0), Bitcast
.getReg(0), MaxBoundaryDst
.getReg(0)},
206 B
.buildTrunc(MI
.getOperand(0).getReg(), Med3
);
208 MI
.eraseFromParent();
214 class AMDGPUPreLegalizerCombiner
: public MachineFunctionPass
{
218 AMDGPUPreLegalizerCombiner(bool IsOptNone
= false);
220 StringRef
getPassName() const override
{
221 return "AMDGPUPreLegalizerCombiner";
224 bool runOnMachineFunction(MachineFunction
&MF
) override
;
226 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
230 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig
;
232 } // end anonymous namespace
234 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage
&AU
) const {
235 AU
.addRequired
<TargetPassConfig
>();
236 AU
.setPreservesCFG();
237 getSelectionDAGFallbackAnalysisUsage(AU
);
238 AU
.addRequired
<GISelKnownBitsAnalysis
>();
239 AU
.addPreserved
<GISelKnownBitsAnalysis
>();
241 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
242 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
245 AU
.addRequired
<GISelCSEAnalysisWrapperPass
>();
246 AU
.addPreserved
<GISelCSEAnalysisWrapperPass
>();
247 MachineFunctionPass::getAnalysisUsage(AU
);
250 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone
)
251 : MachineFunctionPass(ID
), IsOptNone(IsOptNone
) {
252 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
254 if (!RuleConfig
.parseCommandLineOption())
255 report_fatal_error("Invalid rule identifier");
258 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction
&MF
) {
259 if (MF
.getProperties().hasProperty(
260 MachineFunctionProperties::Property::FailedISel
))
262 auto *TPC
= &getAnalysis
<TargetPassConfig
>();
263 const Function
&F
= MF
.getFunction();
265 MF
.getTarget().getOptLevel() != CodeGenOptLevel::None
&& !skipFunction(F
);
266 GISelKnownBits
*KB
= &getAnalysis
<GISelKnownBitsAnalysis
>().get(MF
);
269 GISelCSEAnalysisWrapper
&Wrapper
=
270 getAnalysis
<GISelCSEAnalysisWrapperPass
>().getCSEWrapper();
271 auto *CSEInfo
= &Wrapper
.get(TPC
->getCSEConfig());
273 const GCNSubtarget
&STI
= MF
.getSubtarget
<GCNSubtarget
>();
274 MachineDominatorTree
*MDT
=
276 : &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
277 CombinerInfo
CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
278 nullptr, EnableOpt
, F
.hasOptSize(), F
.hasMinSize());
279 // Disable fixed-point iteration to reduce compile-time
280 CInfo
.MaxIterations
= 1;
281 CInfo
.ObserverLvl
= CombinerInfo::ObserverLevel::SinglePass
;
282 // This is the first Combiner, so the input IR might contain dead
284 CInfo
.EnableFullDCE
= true;
285 AMDGPUPreLegalizerCombinerImpl
Impl(MF
, CInfo
, TPC
, *KB
, CSEInfo
, RuleConfig
,
286 STI
, MDT
, STI
.getLegalizerInfo());
287 return Impl
.combineMachineInstrs();
290 char AMDGPUPreLegalizerCombiner::ID
= 0;
291 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner
, DEBUG_TYPE
,
292 "Combine AMDGPU machine instrs before legalization",
294 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
295 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis
)
296 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner
, DEBUG_TYPE
,
297 "Combine AMDGPU machine instrs before legalization", false,
301 FunctionPass
*createAMDGPUPreLegalizeCombiner(bool IsOptNone
) {
302 return new AMDGPUPreLegalizerCombiner(IsOptNone
);
304 } // end namespace llvm