1 //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64TargetMachine.h"
16 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
17 #include "llvm/CodeGen/GlobalISel/Combiner.h"
18 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
19 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
20 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
21 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
22 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/Support/Debug.h"
31 #define DEBUG_TYPE "aarch64-prelegalizer-combiner"
34 using namespace MIPatternMatch
;
36 /// Return true if a G_FCONSTANT instruction is known to be better-represented
38 static bool matchFConstantToConstant(MachineInstr
&MI
,
39 MachineRegisterInfo
&MRI
) {
40 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
);
41 Register DstReg
= MI
.getOperand(0).getReg();
42 const unsigned DstSize
= MRI
.getType(DstReg
).getSizeInBits();
43 if (DstSize
!= 32 && DstSize
!= 64)
46 // When we're storing a value, it doesn't matter what register bank it's on.
47 // Since not all floating point constants can be materialized using a fmov,
48 // it makes more sense to just use a GPR.
49 return all_of(MRI
.use_nodbg_instructions(DstReg
),
50 [](const MachineInstr
&Use
) { return Use
.mayStore(); });
53 /// Change a G_FCONSTANT into a G_CONSTANT.
54 static void applyFConstantToConstant(MachineInstr
&MI
) {
55 assert(MI
.getOpcode() == TargetOpcode::G_FCONSTANT
);
56 MachineIRBuilder
MIB(MI
);
57 const APFloat
&ImmValAPF
= MI
.getOperand(1).getFPImm()->getValueAPF();
58 MIB
.buildConstant(MI
.getOperand(0).getReg(), ImmValAPF
.bitcastToAPInt());
62 /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
63 /// are sign bits. In this case, we can transform the G_ICMP to directly compare
64 /// the wide value with a zero.
65 static bool matchICmpRedundantTrunc(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
66 GISelKnownBits
*KB
, Register
&MatchInfo
) {
67 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
&& KB
);
69 auto Pred
= (CmpInst::Predicate
)MI
.getOperand(1).getPredicate();
70 if (!ICmpInst::isEquality(Pred
))
73 Register LHS
= MI
.getOperand(2).getReg();
74 LLT LHSTy
= MRI
.getType(LHS
);
75 if (!LHSTy
.isScalar())
78 Register RHS
= MI
.getOperand(3).getReg();
81 if (!mi_match(LHS
, MRI
, m_GTrunc(m_Reg(WideReg
))) ||
82 !mi_match(RHS
, MRI
, m_SpecificICst(0)))
85 LLT WideTy
= MRI
.getType(WideReg
);
86 if (KB
->computeNumSignBits(WideReg
) <=
87 WideTy
.getSizeInBits() - LHSTy
.getSizeInBits())
94 static bool applyICmpRedundantTrunc(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
95 MachineIRBuilder
&Builder
,
96 GISelChangeObserver
&Observer
,
98 assert(MI
.getOpcode() == TargetOpcode::G_ICMP
);
100 LLT WideTy
= MRI
.getType(WideReg
);
101 // We're going to directly use the wide register as the LHS, and then use an
102 // equivalent size zero for RHS.
103 Builder
.setInstrAndDebugLoc(MI
);
104 auto WideZero
= Builder
.buildConstant(WideTy
, 0);
105 Observer
.changingInstr(MI
);
106 MI
.getOperand(2).setReg(WideReg
);
107 MI
.getOperand(3).setReg(WideZero
.getReg(0));
108 Observer
.changedInstr(MI
);
112 /// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
116 /// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
117 static bool matchFoldGlobalOffset(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
118 std::pair
<uint64_t, uint64_t> &MatchInfo
) {
119 assert(MI
.getOpcode() == TargetOpcode::G_GLOBAL_VALUE
);
120 MachineFunction
&MF
= *MI
.getMF();
121 auto &GlobalOp
= MI
.getOperand(1);
122 auto *GV
= GlobalOp
.getGlobal();
123 if (GV
->isThreadLocal())
126 // Don't allow anything that could represent offsets etc.
127 if (MF
.getSubtarget
<AArch64Subtarget
>().ClassifyGlobalReference(
128 GV
, MF
.getTarget()) != AArch64II::MO_NO_FLAG
)
131 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
133 // %g = G_GLOBAL_VALUE @x
134 // %ptr1 = G_PTR_ADD %g, cst1
135 // %ptr2 = G_PTR_ADD %g, cst2
137 // %ptrN = G_PTR_ADD %g, cstN
139 // Identify the *smallest* constant. We want to be able to form this:
141 // %offset_g = G_GLOBAL_VALUE @x + min_cst
142 // %g = G_PTR_ADD %offset_g, -min_cst
143 // %ptr1 = G_PTR_ADD %g, cst1
145 Register Dst
= MI
.getOperand(0).getReg();
146 uint64_t MinOffset
= -1ull;
147 for (auto &UseInstr
: MRI
.use_nodbg_instructions(Dst
)) {
148 if (UseInstr
.getOpcode() != TargetOpcode::G_PTR_ADD
)
150 auto Cst
= getIConstantVRegValWithLookThrough(
151 UseInstr
.getOperand(2).getReg(), MRI
);
154 MinOffset
= std::min(MinOffset
, Cst
->Value
.getZExtValue());
157 // Require that the new offset is larger than the existing one to avoid
159 uint64_t CurrOffset
= GlobalOp
.getOffset();
160 uint64_t NewOffset
= MinOffset
+ CurrOffset
;
161 if (NewOffset
<= CurrOffset
)
164 // Check whether folding this offset is legal. It must not go out of bounds of
165 // the referenced object to avoid violating the code model, and must be
166 // smaller than 2^21 because this is the largest offset expressible in all
169 // This check also prevents us from folding negative offsets, which will end
170 // up being treated in the same way as large positive ones. They could also
171 // cause code model violations, and aren't really common enough to matter.
172 if (NewOffset
>= (1 << 21))
175 Type
*T
= GV
->getValueType();
177 NewOffset
> GV
->getParent()->getDataLayout().getTypeAllocSize(T
))
179 MatchInfo
= std::make_pair(NewOffset
, MinOffset
);
183 static bool applyFoldGlobalOffset(MachineInstr
&MI
, MachineRegisterInfo
&MRI
,
185 GISelChangeObserver
&Observer
,
186 std::pair
<uint64_t, uint64_t> &MatchInfo
) {
189 // %g = G_GLOBAL_VALUE @x
190 // %ptr1 = G_PTR_ADD %g, cst1
191 // %ptr2 = G_PTR_ADD %g, cst2
193 // %ptrN = G_PTR_ADD %g, cstN
197 // %offset_g = G_GLOBAL_VALUE @x + min_cst
198 // %g = G_PTR_ADD %offset_g, -min_cst
199 // %ptr1 = G_PTR_ADD %g, cst1
201 // %ptrN = G_PTR_ADD %g, cstN
203 // Then, the original G_PTR_ADDs should be folded later on so that they look
206 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
207 uint64_t Offset
, MinOffset
;
208 std::tie(Offset
, MinOffset
) = MatchInfo
;
209 B
.setInstrAndDebugLoc(MI
);
210 Observer
.changingInstr(MI
);
211 auto &GlobalOp
= MI
.getOperand(1);
212 auto *GV
= GlobalOp
.getGlobal();
213 GlobalOp
.ChangeToGA(GV
, Offset
, GlobalOp
.getTargetFlags());
214 Register Dst
= MI
.getOperand(0).getReg();
215 Register NewGVDst
= MRI
.cloneVirtualRegister(Dst
);
216 MI
.getOperand(0).setReg(NewGVDst
);
217 Observer
.changedInstr(MI
);
220 B
.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset
)));
224 static bool tryToSimplifyUADDO(MachineInstr
&MI
, MachineIRBuilder
&B
,
225 CombinerHelper
&Helper
,
226 GISelChangeObserver
&Observer
) {
227 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
228 // result is only used in the no-overflow case. It is restricted to cases
229 // where we know that the high-bits of the operands are 0. If there's an
230 // overflow, then the the 9th or 17th bit must be set, which can be checked
233 // Change (for UADDOs on 8 and 16 bits):
235 // %z0 = G_ASSERT_ZEXT _
236 // %op0 = G_TRUNC %z0
237 // %z1 = G_ASSERT_ZEXT _
238 // %op1 = G_TRUNC %z1
239 // %val, %cond = G_UADDO %op0, %op1
240 // G_BRCOND %cond, %error.bb
243 // (no successors and no uses of %val)
247 // %z0 = G_ASSERT_ZEXT _
248 // %z1 = G_ASSERT_ZEXT _
249 // %add = G_ADD %z0, %z1
250 // %val = G_TRUNC %add
251 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
252 // %cond = G_ICMP NE, %bit, 0
253 // G_BRCOND %cond, %error.bb
255 auto &MRI
= *B
.getMRI();
257 MachineOperand
*DefOp0
= MRI
.getOneDef(MI
.getOperand(2).getReg());
258 MachineOperand
*DefOp1
= MRI
.getOneDef(MI
.getOperand(3).getReg());
261 if (!mi_match(DefOp0
->getParent(), MRI
, m_GTrunc(m_Reg(Op0Wide
))) ||
262 !mi_match(DefOp1
->getParent(), MRI
, m_GTrunc(m_Reg(Op1Wide
))))
264 LLT WideTy0
= MRI
.getType(Op0Wide
);
265 LLT WideTy1
= MRI
.getType(Op1Wide
);
266 Register ResVal
= MI
.getOperand(0).getReg();
267 LLT OpTy
= MRI
.getType(ResVal
);
268 MachineInstr
*Op0WideDef
= MRI
.getVRegDef(Op0Wide
);
269 MachineInstr
*Op1WideDef
= MRI
.getVRegDef(Op1Wide
);
271 unsigned OpTySize
= OpTy
.getScalarSizeInBits();
272 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
273 // inputs have been zero-extended.
274 if (Op0WideDef
->getOpcode() != TargetOpcode::G_ASSERT_ZEXT
||
275 Op1WideDef
->getOpcode() != TargetOpcode::G_ASSERT_ZEXT
||
276 OpTySize
!= Op0WideDef
->getOperand(2).getImm() ||
277 OpTySize
!= Op1WideDef
->getOperand(2).getImm())
280 // Only scalar UADDO with either 8 or 16 bit operands are handled.
281 if (!WideTy0
.isScalar() || !WideTy1
.isScalar() || WideTy0
!= WideTy1
||
282 OpTySize
>= WideTy0
.getScalarSizeInBits() ||
283 (OpTySize
!= 8 && OpTySize
!= 16))
286 // The overflow-status result must be used by a branch only.
287 Register ResStatus
= MI
.getOperand(1).getReg();
288 if (!MRI
.hasOneNonDBGUse(ResStatus
))
290 MachineInstr
*CondUser
= &*MRI
.use_instr_nodbg_begin(ResStatus
);
291 if (CondUser
->getOpcode() != TargetOpcode::G_BRCOND
)
294 // Make sure the computed result is only used in the no-overflow blocks.
295 MachineBasicBlock
*CurrentMBB
= MI
.getParent();
296 MachineBasicBlock
*FailMBB
= CondUser
->getOperand(1).getMBB();
297 if (!FailMBB
->succ_empty() || CondUser
->getParent() != CurrentMBB
)
299 if (any_of(MRI
.use_nodbg_instructions(ResVal
),
300 [&MI
, FailMBB
, CurrentMBB
](MachineInstr
&I
) {
302 (I
.getParent() == FailMBB
|| I
.getParent() == CurrentMBB
);
307 B
.setInstrAndDebugLoc(*MI
.getNextNode());
308 MI
.eraseFromParent();
311 Register AddDst
= MRI
.cloneVirtualRegister(Op0Wide
);
312 B
.buildInstr(TargetOpcode::G_ADD
, {AddDst
}, {Op0Wide
, Op1Wide
});
314 // Emit check of the 9th or 17th bit and update users (the branch). This will
315 // later be folded to TBNZ.
316 Register CondBit
= MRI
.cloneVirtualRegister(Op0Wide
);
319 B
.buildConstant(LLT::scalar(32), OpTySize
== 8 ? 1 << 8 : 1 << 16));
320 B
.buildICmp(CmpInst::ICMP_NE
, ResStatus
, CondBit
,
321 B
.buildConstant(LLT::scalar(32), 0));
323 // Update ZEXts users of the result value. Because all uses are in the
324 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
325 B
.buildZExtOrTrunc(ResVal
, AddDst
);
326 for (MachineOperand
&U
: make_early_inc_range(MRI
.use_operands(ResVal
))) {
328 if (mi_match(U
.getParent(), MRI
, m_GZExt(m_Reg(WideReg
)))) {
329 auto OldR
= U
.getParent()->getOperand(0).getReg();
330 Observer
.erasingInstr(*U
.getParent());
331 U
.getParent()->eraseFromParent();
332 Helper
.replaceRegWith(MRI
, OldR
, AddDst
);
339 class AArch64PreLegalizerCombinerHelperState
{
341 CombinerHelper
&Helper
;
344 AArch64PreLegalizerCombinerHelperState(CombinerHelper
&Helper
)
348 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
349 #include "AArch64GenPreLegalizeGICombiner.inc"
350 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
353 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
354 #include "AArch64GenPreLegalizeGICombiner.inc"
355 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
357 class AArch64PreLegalizerCombinerInfo
: public CombinerInfo
{
359 MachineDominatorTree
*MDT
;
360 AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg
;
363 AArch64PreLegalizerCombinerInfo(bool EnableOpt
, bool OptSize
, bool MinSize
,
364 GISelKnownBits
*KB
, MachineDominatorTree
*MDT
)
365 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
366 /*LegalizerInfo*/ nullptr, EnableOpt
, OptSize
, MinSize
),
368 if (!GeneratedRuleCfg
.parseCommandLineOption())
369 report_fatal_error("Invalid rule identifier");
372 virtual bool combine(GISelChangeObserver
&Observer
, MachineInstr
&MI
,
373 MachineIRBuilder
&B
) const override
;
376 bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver
&Observer
,
378 MachineIRBuilder
&B
) const {
379 CombinerHelper
Helper(Observer
, B
, KB
, MDT
);
380 AArch64GenPreLegalizerCombinerHelper
Generated(GeneratedRuleCfg
, Helper
);
382 if (Generated
.tryCombineAll(Observer
, MI
, B
))
385 unsigned Opc
= MI
.getOpcode();
387 case TargetOpcode::G_CONCAT_VECTORS
:
388 return Helper
.tryCombineConcatVectors(MI
);
389 case TargetOpcode::G_SHUFFLE_VECTOR
:
390 return Helper
.tryCombineShuffleVector(MI
);
391 case TargetOpcode::G_UADDO
:
392 return tryToSimplifyUADDO(MI
, B
, Helper
, Observer
);
393 case TargetOpcode::G_MEMCPY_INLINE
:
394 return Helper
.tryEmitMemcpyInline(MI
);
395 case TargetOpcode::G_MEMCPY
:
396 case TargetOpcode::G_MEMMOVE
:
397 case TargetOpcode::G_MEMSET
: {
398 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
399 // heuristics decide.
400 unsigned MaxLen
= EnableOpt
? 0 : 32;
401 // Try to inline memcpy type calls if optimizations are enabled.
402 if (Helper
.tryCombineMemCpyFamily(MI
, MaxLen
))
404 if (Opc
== TargetOpcode::G_MEMSET
)
405 return llvm::AArch64GISelUtils::tryEmitBZero(MI
, B
, EnableMinSize
);
413 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
414 #include "AArch64GenPreLegalizeGICombiner.inc"
415 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
420 class AArch64PreLegalizerCombiner
: public MachineFunctionPass
{
424 AArch64PreLegalizerCombiner();
426 StringRef
getPassName() const override
{ return "AArch64PreLegalizerCombiner"; }
428 bool runOnMachineFunction(MachineFunction
&MF
) override
;
430 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
432 } // end anonymous namespace
434 void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage
&AU
) const {
435 AU
.addRequired
<TargetPassConfig
>();
436 AU
.setPreservesCFG();
437 getSelectionDAGFallbackAnalysisUsage(AU
);
438 AU
.addRequired
<GISelKnownBitsAnalysis
>();
439 AU
.addPreserved
<GISelKnownBitsAnalysis
>();
440 AU
.addRequired
<MachineDominatorTree
>();
441 AU
.addPreserved
<MachineDominatorTree
>();
442 AU
.addRequired
<GISelCSEAnalysisWrapperPass
>();
443 AU
.addPreserved
<GISelCSEAnalysisWrapperPass
>();
444 MachineFunctionPass::getAnalysisUsage(AU
);
447 AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
448 : MachineFunctionPass(ID
) {
449 initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
452 bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction
&MF
) {
453 if (MF
.getProperties().hasProperty(
454 MachineFunctionProperties::Property::FailedISel
))
456 auto &TPC
= getAnalysis
<TargetPassConfig
>();
459 GISelCSEAnalysisWrapper
&Wrapper
=
460 getAnalysis
<GISelCSEAnalysisWrapperPass
>().getCSEWrapper();
461 auto *CSEInfo
= &Wrapper
.get(TPC
.getCSEConfig());
463 const Function
&F
= MF
.getFunction();
465 MF
.getTarget().getOptLevel() != CodeGenOpt::None
&& !skipFunction(F
);
466 GISelKnownBits
*KB
= &getAnalysis
<GISelKnownBitsAnalysis
>().get(MF
);
467 MachineDominatorTree
*MDT
= &getAnalysis
<MachineDominatorTree
>();
468 AArch64PreLegalizerCombinerInfo
PCInfo(EnableOpt
, F
.hasOptSize(),
469 F
.hasMinSize(), KB
, MDT
);
470 Combiner
C(PCInfo
, &TPC
);
471 return C
.combineMachineInstrs(MF
, CSEInfo
);
474 char AArch64PreLegalizerCombiner::ID
= 0;
475 INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner
, DEBUG_TYPE
,
476 "Combine AArch64 machine instrs before legalization",
478 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig
)
479 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis
)
480 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass
)
481 INITIALIZE_PASS_END(AArch64PreLegalizerCombiner
, DEBUG_TYPE
,
482 "Combine AArch64 machine instrs before legalization", false,
487 FunctionPass
*createAArch64PreLegalizerCombiner() {
488 return new AArch64PreLegalizerCombiner();
490 } // end namespace llvm