1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
12 //===----------------------------------------------------------------------===//
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
38 using namespace MIPatternMatch
;
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenPostLegalizeGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
45 class AMDGPUPostLegalizerCombinerImpl
: public Combiner
{
47 const AMDGPUPostLegalizerCombinerImplRuleConfig
&RuleConfig
;
48 const GCNSubtarget
&STI
;
49 const SIInstrInfo
&TII
;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper
;
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction
&MF
, CombinerInfo
&CInfo
, const TargetPassConfig
*TPC
,
56 GISelKnownBits
&KB
, GISelCSEInfo
*CSEInfo
,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig
&RuleConfig
,
58 const GCNSubtarget
&STI
, MachineDominatorTree
*MDT
,
59 const LegalizerInfo
*LI
);
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
63 bool tryCombineAllImpl(MachineInstr
&I
) const;
64 bool tryCombineAll(MachineInstr
&I
) const override
;
66 struct FMinFMaxLegacyInfo
{
69 CmpInst::Predicate Pred
;
72 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
73 bool matchFMinFMaxLegacy(MachineInstr
&MI
, MachineInstr
&FCmp
,
74 FMinFMaxLegacyInfo
&Info
) const;
75 void applySelectFCmpToFMinFMaxLegacy(MachineInstr
&MI
,
76 const FMinFMaxLegacyInfo
&Info
) const;
78 bool matchUCharToFloat(MachineInstr
&MI
) const;
79 void applyUCharToFloat(MachineInstr
&MI
) const;
82 matchRcpSqrtToRsq(MachineInstr
&MI
,
83 std::function
<void(MachineIRBuilder
&)> &MatchInfo
) const;
85 bool matchFDivSqrtToRsqF16(MachineInstr
&MI
) const;
86 void applyFDivSqrtToRsqF16(MachineInstr
&MI
, const Register
&X
) const;
88 // FIXME: Should be able to have 2 separate matchdatas rather than custom
89 // struct boilerplate.
90 struct CvtF32UByteMatchInfo
{
95 bool matchCvtF32UByteN(MachineInstr
&MI
,
96 CvtF32UByteMatchInfo
&MatchInfo
) const;
97 void applyCvtF32UByteN(MachineInstr
&MI
,
98 const CvtF32UByteMatchInfo
&MatchInfo
) const;
100 bool matchRemoveFcanonicalize(MachineInstr
&MI
, Register
&Reg
) const;
102 // Combine unsigned buffer load and signed extension instructions to generate
103 // signed buffer load instructions.
104 bool matchCombineSignExtendInReg(
105 MachineInstr
&MI
, std::pair
<MachineInstr
*, unsigned> &MatchInfo
) const;
106 void applyCombineSignExtendInReg(
107 MachineInstr
&MI
, std::pair
<MachineInstr
*, unsigned> &MatchInfo
) const;
109 // Find the s_mul_u64 instructions where the higher bits are either
110 // zero-extended or sign-extended.
111 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
112 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
113 // bits are zero extended.
114 bool matchCombine_s_mul_u64(MachineInstr
&MI
, unsigned &NewOpcode
) const;
117 #define GET_GICOMBINER_CLASS_MEMBERS
118 #define AMDGPUSubtarget GCNSubtarget
119 #include "AMDGPUGenPostLegalizeGICombiner.inc"
120 #undef GET_GICOMBINER_CLASS_MEMBERS
121 #undef AMDGPUSubtarget
124 #define GET_GICOMBINER_IMPL
125 #define AMDGPUSubtarget GCNSubtarget
126 #include "AMDGPUGenPostLegalizeGICombiner.inc"
127 #undef AMDGPUSubtarget
128 #undef GET_GICOMBINER_IMPL
130 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
131 MachineFunction
&MF
, CombinerInfo
&CInfo
, const TargetPassConfig
*TPC
,
132 GISelKnownBits
&KB
, GISelCSEInfo
*CSEInfo
,
133 const AMDGPUPostLegalizerCombinerImplRuleConfig
&RuleConfig
,
134 const GCNSubtarget
&STI
, MachineDominatorTree
*MDT
, const LegalizerInfo
*LI
)
135 : Combiner(MF
, CInfo
, TPC
, &KB
, CSEInfo
), RuleConfig(RuleConfig
), STI(STI
),
136 TII(*STI
.getInstrInfo()),
137 Helper(Observer
, B
, /*IsPreLegalize*/ false, &KB
, MDT
, LI
),
138 #define GET_GICOMBINER_CONSTRUCTOR_INITS
139 #include "AMDGPUGenPostLegalizeGICombiner.inc"
140 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
144 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr
&MI
) const {
145 if (tryCombineAllImpl(MI
))
148 switch (MI
.getOpcode()) {
149 case TargetOpcode::G_SHL
:
150 case TargetOpcode::G_LSHR
:
151 case TargetOpcode::G_ASHR
:
152 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
153 // common case, splitting this into a move and a 32-bit shift is faster and
154 // the same code size.
155 return Helper
.tryCombineShiftToUnmerge(MI
, 32);
161 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
162 MachineInstr
&MI
, MachineInstr
&FCmp
, FMinFMaxLegacyInfo
&Info
) const {
163 if (!MRI
.hasOneNonDBGUse(FCmp
.getOperand(0).getReg()))
167 static_cast<CmpInst::Predicate
>(FCmp
.getOperand(1).getPredicate());
168 Info
.LHS
= FCmp
.getOperand(2).getReg();
169 Info
.RHS
= FCmp
.getOperand(3).getReg();
170 Register True
= MI
.getOperand(2).getReg();
171 Register False
= MI
.getOperand(3).getReg();
173 // TODO: Handle case where the the selected value is an fneg and the compared
174 // constant is the negation of the selected value.
175 if ((Info
.LHS
!= True
|| Info
.RHS
!= False
) &&
176 (Info
.LHS
!= False
|| Info
.RHS
!= True
))
179 // Invert the predicate if necessary so that the apply function can assume
180 // that the select operands are the same as the fcmp operands.
181 // (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)
182 if (Info
.LHS
!= True
)
183 Info
.Pred
= CmpInst::getInversePredicate(Info
.Pred
);
185 // Only match </<=/>=/> not ==/!= etc.
186 return Info
.Pred
!= CmpInst::getSwappedPredicate(Info
.Pred
);
189 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinFMaxLegacy(
190 MachineInstr
&MI
, const FMinFMaxLegacyInfo
&Info
) const {
191 unsigned Opc
= (Info
.Pred
& CmpInst::FCMP_OGT
) ? AMDGPU::G_AMDGPU_FMAX_LEGACY
192 : AMDGPU::G_AMDGPU_FMIN_LEGACY
;
193 Register X
= Info
.LHS
;
194 Register Y
= Info
.RHS
;
195 if (Info
.Pred
== CmpInst::getUnorderedPredicate(Info
.Pred
)) {
196 // We need to permute the operands to get the correct NaN behavior. The
197 // selected operand is the second one based on the failing compare with NaN,
198 // so permute it based on the compare type the hardware uses.
202 B
.buildInstr(Opc
, {MI
.getOperand(0)}, {X
, Y
}, MI
.getFlags());
204 MI
.eraseFromParent();
207 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
208 MachineInstr
&MI
) const {
209 Register DstReg
= MI
.getOperand(0).getReg();
211 // TODO: We could try to match extracting the higher bytes, which would be
212 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
213 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
214 // about in practice.
215 LLT Ty
= MRI
.getType(DstReg
);
216 if (Ty
== LLT::scalar(32) || Ty
== LLT::scalar(16)) {
217 Register SrcReg
= MI
.getOperand(1).getReg();
218 unsigned SrcSize
= MRI
.getType(SrcReg
).getSizeInBits();
219 assert(SrcSize
== 16 || SrcSize
== 32 || SrcSize
== 64);
220 const APInt Mask
= APInt::getHighBitsSet(SrcSize
, SrcSize
- 8);
221 return Helper
.getKnownBits()->maskedValueIsZero(SrcReg
, Mask
);
227 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
228 MachineInstr
&MI
) const {
229 const LLT S32
= LLT::scalar(32);
231 Register DstReg
= MI
.getOperand(0).getReg();
232 Register SrcReg
= MI
.getOperand(1).getReg();
233 LLT Ty
= MRI
.getType(DstReg
);
234 LLT SrcTy
= MRI
.getType(SrcReg
);
236 SrcReg
= B
.buildAnyExtOrTrunc(S32
, SrcReg
).getReg(0);
239 B
.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0
, {DstReg
}, {SrcReg
},
242 auto Cvt0
= B
.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0
, {S32
}, {SrcReg
},
244 B
.buildFPTrunc(DstReg
, Cvt0
, MI
.getFlags());
247 MI
.eraseFromParent();
250 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
252 std::function
<void(MachineIRBuilder
&)> &MatchInfo
) const {
253 auto getRcpSrc
= [=](const MachineInstr
&MI
) -> MachineInstr
* {
254 if (!MI
.getFlag(MachineInstr::FmContract
))
257 if (auto *GI
= dyn_cast
<GIntrinsic
>(&MI
)) {
258 if (GI
->is(Intrinsic::amdgcn_rcp
))
259 return MRI
.getVRegDef(MI
.getOperand(2).getReg());
264 auto getSqrtSrc
= [=](const MachineInstr
&MI
) -> MachineInstr
* {
265 if (!MI
.getFlag(MachineInstr::FmContract
))
267 MachineInstr
*SqrtSrcMI
= nullptr;
269 mi_match(MI
.getOperand(0).getReg(), MRI
, m_GFSqrt(m_MInstr(SqrtSrcMI
)));
274 MachineInstr
*RcpSrcMI
= nullptr, *SqrtSrcMI
= nullptr;
276 if ((RcpSrcMI
= getRcpSrc(MI
)) && (SqrtSrcMI
= getSqrtSrc(*RcpSrcMI
))) {
277 MatchInfo
= [SqrtSrcMI
, &MI
](MachineIRBuilder
&B
) {
278 B
.buildIntrinsic(Intrinsic::amdgcn_rsq
, {MI
.getOperand(0)})
279 .addUse(SqrtSrcMI
->getOperand(0).getReg())
280 .setMIFlags(MI
.getFlags());
286 if ((SqrtSrcMI
= getSqrtSrc(MI
)) && (RcpSrcMI
= getRcpSrc(*SqrtSrcMI
))) {
287 MatchInfo
= [RcpSrcMI
, &MI
](MachineIRBuilder
&B
) {
288 B
.buildIntrinsic(Intrinsic::amdgcn_rsq
, {MI
.getOperand(0)})
289 .addUse(RcpSrcMI
->getOperand(0).getReg())
290 .setMIFlags(MI
.getFlags());
297 bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
298 MachineInstr
&MI
) const {
299 Register Sqrt
= MI
.getOperand(2).getReg();
300 return MRI
.hasOneNonDBGUse(Sqrt
);
303 void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
304 MachineInstr
&MI
, const Register
&X
) const {
305 Register Dst
= MI
.getOperand(0).getReg();
306 Register Y
= MI
.getOperand(1).getReg();
307 LLT DstTy
= MRI
.getType(Dst
);
308 uint32_t Flags
= MI
.getFlags();
309 Register RSQ
= B
.buildIntrinsic(Intrinsic::amdgcn_rsq
, {DstTy
})
313 B
.buildFMul(Dst
, RSQ
, Y
, Flags
);
314 MI
.eraseFromParent();
317 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
318 MachineInstr
&MI
, CvtF32UByteMatchInfo
&MatchInfo
) const {
319 Register SrcReg
= MI
.getOperand(1).getReg();
321 // Look through G_ZEXT.
322 bool IsShr
= mi_match(SrcReg
, MRI
, m_GZExt(m_Reg(SrcReg
)));
326 IsShr
= mi_match(SrcReg
, MRI
, m_GLShr(m_Reg(Src0
), m_ICst(ShiftAmt
)));
327 if (IsShr
|| mi_match(SrcReg
, MRI
, m_GShl(m_Reg(Src0
), m_ICst(ShiftAmt
)))) {
328 const unsigned Offset
= MI
.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0
;
330 unsigned ShiftOffset
= 8 * Offset
;
332 ShiftOffset
+= ShiftAmt
;
334 ShiftOffset
-= ShiftAmt
;
336 MatchInfo
.CvtVal
= Src0
;
337 MatchInfo
.ShiftOffset
= ShiftOffset
;
338 return ShiftOffset
< 32 && ShiftOffset
>= 8 && (ShiftOffset
% 8) == 0;
341 // TODO: Simplify demanded bits.
345 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
346 MachineInstr
&MI
, const CvtF32UByteMatchInfo
&MatchInfo
) const {
347 unsigned NewOpc
= AMDGPU::G_AMDGPU_CVT_F32_UBYTE0
+ MatchInfo
.ShiftOffset
/ 8;
349 const LLT S32
= LLT::scalar(32);
350 Register CvtSrc
= MatchInfo
.CvtVal
;
351 LLT SrcTy
= MRI
.getType(MatchInfo
.CvtVal
);
353 assert(SrcTy
.isScalar() && SrcTy
.getSizeInBits() >= 8);
354 CvtSrc
= B
.buildAnyExt(S32
, CvtSrc
).getReg(0);
357 assert(MI
.getOpcode() != NewOpc
);
358 B
.buildInstr(NewOpc
, {MI
.getOperand(0)}, {CvtSrc
}, MI
.getFlags());
359 MI
.eraseFromParent();
362 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
363 MachineInstr
&MI
, Register
&Reg
) const {
364 const SITargetLowering
*TLI
= static_cast<const SITargetLowering
*>(
365 MF
.getSubtarget().getTargetLowering());
366 Reg
= MI
.getOperand(1).getReg();
367 return TLI
->isCanonicalized(Reg
, MF
);
370 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
371 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
372 // with sign extension instrucions in order to generate buffer_load_{i8, i16}
375 // Identify buffer_load_{u8, u16}.
376 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
377 MachineInstr
&MI
, std::pair
<MachineInstr
*, unsigned> &MatchData
) const {
378 Register LoadReg
= MI
.getOperand(1).getReg();
379 if (!MRI
.hasOneNonDBGUse(LoadReg
))
382 // Check if the first operand of the sign extension is a subword buffer load
384 MachineInstr
*LoadMI
= MRI
.getVRegDef(LoadReg
);
385 int64_t Width
= MI
.getOperand(2).getImm();
386 switch (LoadMI
->getOpcode()) {
387 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
:
388 MatchData
= {LoadMI
, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
};
390 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT
:
391 MatchData
= {LoadMI
, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT
};
393 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE
:
394 MatchData
= {LoadMI
, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE
};
396 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT
:
397 MatchData
= {LoadMI
, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT
};
403 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate
404 // buffer_load_{i8, i16}.
405 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
406 MachineInstr
&MI
, std::pair
<MachineInstr
*, unsigned> &MatchData
) const {
407 auto [LoadMI
, NewOpcode
] = MatchData
;
408 LoadMI
->setDesc(TII
.get(NewOpcode
));
409 // Update the destination register of the load with the destination register
410 // of the sign extension.
411 Register SignExtendInsnDst
= MI
.getOperand(0).getReg();
412 LoadMI
->getOperand(0).setReg(SignExtendInsnDst
);
413 // Remove the sign extension.
414 MI
.eraseFromParent();
417 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
418 MachineInstr
&MI
, unsigned &NewOpcode
) const {
419 Register Src0
= MI
.getOperand(1).getReg();
420 Register Src1
= MI
.getOperand(2).getReg();
421 if (MRI
.getType(Src0
) != LLT::scalar(64))
424 if (KB
->getKnownBits(Src1
).countMinLeadingZeros() >= 32 &&
425 KB
->getKnownBits(Src0
).countMinLeadingZeros() >= 32) {
426 NewOpcode
= AMDGPU::G_AMDGPU_S_MUL_U64_U32
;
430 if (KB
->computeNumSignBits(Src1
) >= 33 &&
431 KB
->computeNumSignBits(Src0
) >= 33) {
432 NewOpcode
= AMDGPU::G_AMDGPU_S_MUL_I64_I32
;
441 class AMDGPUPostLegalizerCombiner
: public MachineFunctionPass
{
445 AMDGPUPostLegalizerCombiner(bool IsOptNone
= false);
447 StringRef
getPassName() const override
{
448 return "AMDGPUPostLegalizerCombiner";
451 bool runOnMachineFunction(MachineFunction
&MF
) override
;
453 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
457 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig
;
459 } // end anonymous namespace
461 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage
&AU
) const {
462 AU
.addRequired
<TargetPassConfig
>();
463 AU
.setPreservesCFG();
464 getSelectionDAGFallbackAnalysisUsage(AU
);
465 AU
.addRequired
<GISelKnownBitsAnalysis
>();
466 AU
.addPreserved
<GISelKnownBitsAnalysis
>();
468 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
469 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
471 MachineFunctionPass::getAnalysisUsage(AU
);
474 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone
)
475 : MachineFunctionPass(ID
), IsOptNone(IsOptNone
) {
476 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
478 if (!RuleConfig
.parseCommandLineOption())
479 report_fatal_error("Invalid rule identifier");
482 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction
&MF
) {
483 if (MF
.getProperties().hasProperty(
484 MachineFunctionProperties::Property::FailedISel
))
486 auto *TPC
= &getAnalysis
<TargetPassConfig
>();
487 const Function
&F
= MF
.getFunction();
489 MF
.getTarget().getOptLevel() != CodeGenOptLevel::None
&& !skipFunction(F
);
491 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
492 const AMDGPULegalizerInfo
*LI
=
493 static_cast<const AMDGPULegalizerInfo
*>(ST
.getLegalizerInfo());
495 GISelKnownBits
*KB
= &getAnalysis
<GISelKnownBitsAnalysis
>().get(MF
);
496 MachineDominatorTree
*MDT
=
498 : &getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
500 CombinerInfo
CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
501 LI
, EnableOpt
, F
.hasOptSize(), F
.hasMinSize());
502 // Disable fixed-point iteration to reduce compile-time
503 CInfo
.MaxIterations
= 1;
504 CInfo
.ObserverLvl
= CombinerInfo::ObserverLevel::SinglePass
;
505 // Legalizer performs DCE, so a full DCE pass is unnecessary.
506 CInfo
.EnableFullDCE
= false;
507 AMDGPUPostLegalizerCombinerImpl
Impl(MF
, CInfo
, TPC
, *KB
, /*CSEInfo*/ nullptr,
508 RuleConfig
, ST
, MDT
, LI
);
509 return Impl
.combineMachineInstrs();
512 char AMDGPUPostLegalizerCombiner::ID
= 0;
513 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner
, DEBUG_TYPE
,
514 "Combine AMDGPU machine instrs after legalization", false,
516 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
517 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis
)
518 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner
, DEBUG_TYPE
,
519 "Combine AMDGPU machine instrs after legalization", false,
523 FunctionPass
*createAMDGPUPostLegalizeCombiner(bool IsOptNone
) {
524 return new AMDGPUPostLegalizerCombiner(IsOptNone
);
526 } // end namespace llvm